From: Don Brace <don.brace@microchip.com>
To: <Kevin.Barnett@microchip.com>, <scott.teel@microchip.com>,
<Justin.Lindley@microchip.com>, <scott.benesh@microchip.com>,
<gerry.morong@microchip.com>, <mahesh.rajashekhara@microchip.com>,
<hch@infradead.org>, <jejb@linux.vnet.ibm.com>,
<joseph.szczypek@hpe.com>, <POSWALD@suse.com>
Cc: <linux-scsi@vger.kernel.org>
Subject: [PATCH V2 21/25] smartpqi: add additional logging for LUN resets
Date: Tue, 8 Dec 2020 13:38:37 -0600 [thread overview]
Message-ID: <160745631733.13450.16893289886113637507.stgit@brunhilda> (raw)
In-Reply-To: <160745609917.13450.11882890596851148601.stgit@brunhilda>
From: Kevin Barnett <kevin.barnett@microchip.com>
* Add additional logging to help in debugging issues
with LUN resets.
Reviewed-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
---
drivers/scsi/smartpqi/smartpqi_init.c | 125 +++++++++++++++++++++++----------
1 file changed, 89 insertions(+), 36 deletions(-)
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 43e238b0c27d..f59f5a1827d3 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -84,7 +84,7 @@ static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info);
static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info);
static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device, unsigned long timeout_secs);
+ struct pqi_scsi_dev *device, unsigned long timeout_msecs);
/* for flags argument to pqi_submit_raid_request_synchronous() */
#define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1
@@ -335,11 +335,34 @@ static void pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info)
atomic_dec(&ctrl_info->num_blocked_threads);
}
+#define PQI_QUIESE_WARNING_TIMEOUT_SECS 10
+
static inline void pqi_ctrl_wait_until_quiesced(struct pqi_ctrl_info *ctrl_info)
{
+ unsigned long start_jiffies;
+ unsigned long warning_timeout;
+ bool displayed_warning;
+
+ displayed_warning = false;
+ start_jiffies = jiffies;
+ warning_timeout = (PQI_QUIESE_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies;
+
while (atomic_read(&ctrl_info->num_busy_threads) >
- atomic_read(&ctrl_info->num_blocked_threads))
+ atomic_read(&ctrl_info->num_blocked_threads)) {
+ if (time_after(jiffies, warning_timeout)) {
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "waiting %u seconds for driver activity to quiesce\n",
+ jiffies_to_msecs(jiffies - start_jiffies) / 1000);
+ displayed_warning = true;
+ warning_timeout = (PQI_QUIESE_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies;
+ }
usleep_range(1000, 2000);
+ }
+
+ if (displayed_warning)
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "driver activity quiesced after waiting for %u seconds\n",
+ jiffies_to_msecs(jiffies - start_jiffies) / 1000);
}
static inline bool pqi_device_offline(struct pqi_scsi_dev *device)
@@ -1670,7 +1693,7 @@ static int pqi_add_device(struct pqi_ctrl_info *ctrl_info,
return rc;
}
-#define PQI_PENDING_IO_TIMEOUT_SECS 20
+#define PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS (20 * 1000)
static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
{
@@ -1678,7 +1701,8 @@ static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi
pqi_device_remove_start(device);
- rc = pqi_device_wait_for_pending_io(ctrl_info, device, PQI_PENDING_IO_TIMEOUT_SECS);
+ rc = pqi_device_wait_for_pending_io(ctrl_info, device,
+ PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS);
if (rc)
dev_err(&ctrl_info->pci_dev->dev,
"scsi %d:%d:%d:%d removing device with %d outstanding command(s)\n",
@@ -3064,7 +3088,7 @@ static void pqi_process_io_error(unsigned int iu_type,
}
}
-static int pqi_interpret_task_management_response(
+static int pqi_interpret_task_management_response(struct pqi_ctrl_info *ctrl_info,
struct pqi_task_management_response *response)
{
int rc;
@@ -3082,6 +3106,10 @@ static int pqi_interpret_task_management_response(
break;
}
+ if (rc)
+ dev_err(&ctrl_info->pci_dev->dev,
+ "Task Management Function error: %d (response code: %u)\n", rc, response->response_code);
+
return rc;
}
@@ -3150,9 +3178,8 @@ static int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, struct pqi_queue
&((struct pqi_vendor_general_response *)response)->status);
break;
case PQI_RESPONSE_IU_TASK_MANAGEMENT:
- io_request->status =
- pqi_interpret_task_management_response(
- (void *)response);
+ io_request->status = pqi_interpret_task_management_response(ctrl_info,
+ (void *)response);
break;
case PQI_RESPONSE_IU_AIO_PATH_DISABLED:
pqi_aio_path_disabled(io_request);
@@ -5856,24 +5883,37 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
}
}
+#define PQI_PENDING_IO_WARNING_TIMEOUT_SECS 10
+
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device, unsigned long timeout_secs)
+ struct pqi_scsi_dev *device, unsigned long timeout_msecs)
{
- unsigned long timeout;
+ int cmds_outstanding;
+ unsigned long start_jiffies;
+ unsigned long warning_timeout;
+ unsigned long msecs_waiting;
+ start_jiffies = jiffies;
+ warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies;
- timeout = (timeout_secs * PQI_HZ) + jiffies;
-
- while (atomic_read(&device->scsi_cmds_outstanding)) {
+ while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding)) > 0) {
pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info))
return -ENXIO;
- if (timeout_secs != NO_TIMEOUT) {
- if (time_after(jiffies, timeout)) {
- dev_err(&ctrl_info->pci_dev->dev,
- "timed out waiting for pending I/O\n");
- return -ETIMEDOUT;
- }
+ msecs_waiting = jiffies_to_msecs(jiffies - start_jiffies);
+ if (msecs_waiting > timeout_msecs) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "scsi %d:%d:%d:%d: timed out after %lu seconds waiting for %d outstanding command(s)\n",
+ ctrl_info->scsi_host->host_no, device->bus, device->target,
+ device->lun, msecs_waiting / 1000, cmds_outstanding);
+ return -ETIMEDOUT;
+ }
+ if (time_after(jiffies, warning_timeout)) {
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "scsi %d:%d:%d:%d: waiting %lu seconds for %d outstanding command(s)\n",
+ ctrl_info->scsi_host->host_no, device->bus, device->target,
+ device->lun, msecs_waiting / 1000, cmds_outstanding);
+ warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies;
}
usleep_range(1000, 2000);
}
@@ -5889,13 +5929,15 @@ static void pqi_lun_reset_complete(struct pqi_io_request *io_request,
complete(waiting);
}
-#define PQI_LUN_RESET_TIMEOUT_SECS 30
#define PQI_LUN_RESET_POLL_COMPLETION_SECS 10
static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device, struct completion *wait)
{
int rc;
+ unsigned int wait_secs;
+
+ wait_secs = 0;
while (1) {
if (wait_for_completion_io_timeout(wait,
@@ -5909,13 +5951,21 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
rc = -ENXIO;
break;
}
+
+ wait_secs += PQI_LUN_RESET_POLL_COMPLETION_SECS;
+
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "scsi %d:%d:%d:%d: waiting %u seconds for LUN reset to complete\n",
+ ctrl_info->scsi_host->host_no, device->bus, device->target, device->lun,
+ wait_secs);
}
return rc;
}
-static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device)
+#define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30
+
+static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
{
int rc;
struct pqi_io_request *io_request;
@@ -5937,8 +5987,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
sizeof(request->lun_number));
request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
if (ctrl_info->tmf_iu_timeout_supported)
- put_unaligned_le16(PQI_LUN_RESET_TIMEOUT_SECS,
- &request->timeout);
+ put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
io_request);
@@ -5952,29 +6001,33 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
return rc;
}
-#define PQI_LUN_RESET_RETRIES 3
-#define PQI_LUN_RESET_RETRY_INTERVAL_MSECS 10000
-#define PQI_LUN_RESET_PENDING_IO_TIMEOUT_SECS 120
+#define PQI_LUN_RESET_RETRIES 3
+#define PQI_LUN_RESET_RETRY_INTERVAL_MSECS (10 * 1000)
+#define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000)
+#define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000)
-static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device)
+static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device)
{
- int rc;
+ int reset_rc;
+ int wait_rc;
unsigned int retries;
- unsigned long timeout_secs;
+ unsigned long timeout_msecs;
for (retries = 0;;) {
- rc = pqi_lun_reset(ctrl_info, device);
- if (rc == 0 || ++retries > PQI_LUN_RESET_RETRIES)
+ reset_rc = pqi_lun_reset(ctrl_info, device);
+ if (reset_rc == 0 || ++retries > PQI_LUN_RESET_RETRIES)
break;
msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
}
- timeout_secs = rc ? PQI_LUN_RESET_PENDING_IO_TIMEOUT_SECS : NO_TIMEOUT;
+ timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
+ PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
- rc |= pqi_device_wait_for_pending_io(ctrl_info, device, timeout_secs);
+ wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, timeout_msecs);
+ if (wait_rc && reset_rc == 0)
+ reset_rc = wait_rc;
- return rc == 0 ? SUCCESS : FAILED;
+ return reset_rc == 0 ? SUCCESS : FAILED;
}
static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info,
next prev parent reply other threads:[~2020-12-08 21:07 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-08 19:36 [PATCH V2 00/25] smartpqi updates Don Brace
2020-12-08 19:36 ` [PATCH V2 01/25] smartpqi: add support for product id Don Brace
2020-12-08 19:36 ` [PATCH V2 02/25] smartpqi: refactor aio submission code Don Brace
2020-12-08 19:36 ` [PATCH V2 03/25] smartpqi: refactor build sg list code Don Brace
2020-12-08 19:36 ` [PATCH V2 04/25] smartpqi: add support for raid5 and raid6 writes Don Brace
2020-12-08 19:37 ` [PATCH V2 05/25] smartpqi: add support for raid1 writes Don Brace
2020-12-08 19:37 ` [PATCH V2 06/25] smartpqi: add support for BMIC sense feature cmd and feature bits Don Brace
2020-12-08 19:37 ` [PATCH V2 07/25] smartpqi: update AIO Sub Page 0x02 support Don Brace
2020-12-08 19:37 ` [PATCH V2 08/25] smartpqi: add support for long firmware version Don Brace
2020-12-08 19:37 ` [PATCH V2 09/25] smartpqi: align code with oob driver Don Brace
2020-12-08 19:37 ` [PATCH V2 10/25] smartpqi: add stream detection Don Brace
2020-12-08 19:37 ` [PATCH V2 11/25] smartpqi: add host level stream detection enable Don Brace
2020-12-08 19:37 ` [PATCH V2 12/25] smartpqi: enable support for NVMe encryption Don Brace
2020-12-08 19:37 ` [PATCH V2 13/25] smartpqi: disable write_same for nvme hba disks Don Brace
2020-12-08 19:37 ` [PATCH V2 14/25] smartpqi: fix driver synchronization issues Don Brace
2020-12-08 19:38 ` [PATCH V2 15/25] smartpqi: fix host qdepth limit Don Brace
2020-12-08 19:38 ` [PATCH V2 16/25] smartpqi: convert snprintf to scnprintf Don Brace
2020-12-08 19:38 ` [PATCH V2 17/25] smartpqi: change timing of release of QRM memory during OFA Don Brace
2020-12-08 19:38 ` [PATCH V2 18/25] smartpqi: return busy indication for IOCTLs when ofa is active Don Brace
2020-12-08 19:38 ` [PATCH V2 19/25] smartpqi: add phy id support for the physical drives Don Brace
2020-12-08 19:38 ` [PATCH V2 20/25] smartpqi: update sas initiator_port_protocols and target_port_protocols Don Brace
2020-12-08 19:38 ` Don Brace [this message]
2020-12-08 19:38 ` [PATCH V2 22/25] smartpqi: update enclosure identifier in sysf Don Brace
2020-12-08 19:38 ` [PATCH V2 23/25] smartpqi: correct system hangs when resuming from hibernation Don Brace
2020-12-08 19:38 ` [PATCH V2 24/25] smartpqi: add new pci ids Don Brace
2020-12-08 19:39 ` [PATCH V2 25/25] smartpqi: update version to 2.1.6-005 Don Brace
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=160745631733.13450.16893289886113637507.stgit@brunhilda \
--to=don.brace@microchip.com \
--cc=Justin.Lindley@microchip.com \
--cc=Kevin.Barnett@microchip.com \
--cc=POSWALD@suse.com \
--cc=gerry.morong@microchip.com \
--cc=hch@infradead.org \
--cc=jejb@linux.vnet.ibm.com \
--cc=joseph.szczypek@hpe.com \
--cc=linux-scsi@vger.kernel.org \
--cc=mahesh.rajashekhara@microchip.com \
--cc=scott.benesh@microchip.com \
--cc=scott.teel@microchip.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).