All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] scsi/sd: make disk cmd retries configurable V2
@ 2020-09-29 17:01 Mike Christie
  2020-09-29 17:01 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
  2020-09-29 17:01 ` [PATCH 2/2] scsi sd: Allow user to config cmd retries Mike Christie
  0 siblings, 2 replies; 12+ messages in thread
From: Mike Christie @ 2020-09-29 17:01 UTC (permalink / raw)
  To: martin.petersen, bvanassche, linux-scsi, james.bottomley

The following patches made over Martin's staging branch allow
the user to config sd cmd retries. I didn't allow the user to
set any old value and kept the max at 5. However, you can now
turn it off and rely on the transport error handler to decide
when to stop retrying. You can also decrease retries for cases
for disks where there is no use in retrying 5 times.

V2:
- Drop excess ()
- Add comment about forcing completion in the request sense
case
- Use kstrtoint


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-09-29 17:01 [PATCH 0/2] scsi/sd: make disk cmd retries configurable V2 Mike Christie
@ 2020-09-29 17:01 ` Mike Christie
  2020-10-01  2:52   ` Bart Van Assche
  2020-09-29 17:01 ` [PATCH 2/2] scsi sd: Allow user to config cmd retries Mike Christie
  1 sibling, 1 reply; 12+ messages in thread
From: Mike Christie @ 2020-09-29 17:01 UTC (permalink / raw)
  To: martin.petersen, bvanassche, linux-scsi, james.bottomley

The next patch allows users to configure disk scsi cmd retries from
-1 up to a ULD specific value where -1 means infinite retries.

This patch adds infinite retry support to scsi-ml by just combining
common checks for retries into some helper functions, and then
checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/scsi_error.c | 33 +++++++++++++++++++++++----------
 drivers/scsi/scsi_lib.c   | 29 +++++++++++++++++++----------
 drivers/scsi/scsi_priv.h  |  1 +
 3 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7d3571a2bd89..8b1311b08bfd 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -116,6 +116,14 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 	return 1;
 }
 
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return true;
+
+	return (++cmd->retries <= cmd->allowed);
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -151,7 +159,7 @@ scmd_eh_abort_handler(struct work_struct *work)
 						    "eh timeout, not retrying "
 						    "aborted command\n"));
 			} else if (!scsi_noretry_cmd(scmd) &&
-			    (++scmd->retries <= scmd->allowed)) {
+				   scsi_cmd_retry_allowed(scmd)) {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
@@ -1264,11 +1272,18 @@ int scsi_eh_get_sense(struct list_head *work_q,
 		 * upper level.
 		 */
 		if (rtn == SUCCESS)
-			/* we don't want this command reissued, just
-			 * finished with the sense data, so set
-			 * retries to the max allowed to ensure it
-			 * won't get reissued */
-			scmd->retries = scmd->allowed;
+			/*
+			 * We don't want this command reissued, just finished
+			 * with the sense data, so set retries to the max
+			 * allowed to ensure it won't get reissued. If the user
+			 * has requested infinite retries, we also want to
+			 * finish this command, so force completion by setting
+			 * retries and allowed to the same value.
+			 */
+			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+				scmd->retries = scmd->allowed = 1;
+			else
+				scmd->retries = scmd->allowed;
 		else if (rtn != NEEDS_RETRY)
 			continue;
 
@@ -1944,8 +1959,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
-	if ((++scmd->retries) <= scmd->allowed
-	    && !scsi_noretry_cmd(scmd)) {
+	if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
 		return NEEDS_RETRY;
 	} else {
 		/*
@@ -2091,8 +2105,7 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
 		list_del_init(&scmd->eh_entry);
 		if (scsi_device_online(scmd->device) &&
-		    !scsi_noretry_cmd(scmd) &&
-		    (++scmd->retries <= scmd->allowed)) {
+		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				scmd_printk(KERN_INFO, scmd,
 					     "%s: flush retry cmd\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7affaaf8b98e..cfe0e17422b9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -652,6 +652,23 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
 	scsi_mq_requeue_cmd(cmd);
 }
 
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+	struct request *req = cmd->request;
+	unsigned long wait_for;
+
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return false;
+
+	wait_for = (cmd->allowed + 1) * req->timeout;
+	if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+		scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+			    wait_for/HZ);
+		return true;
+	}
+	return false;
+}
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
@@ -660,7 +677,6 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	int level = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
-	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid;
 	bool sense_current = true;      /* false implies "deferred sense" */
@@ -765,8 +781,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	} else
 		action = ACTION_FAIL;
 
-	if (action != ACTION_FAIL &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+	if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
 		action = ACTION_FAIL;
 
 	switch (action) {
@@ -1439,7 +1454,6 @@ static bool scsi_mq_lld_busy(struct request_queue *q)
 static void scsi_softirq_done(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1449,13 +1463,8 @@ static void scsi_softirq_done(struct request *rq)
 		atomic_inc(&cmd->device->ioerr_cnt);
 
 	disposition = scsi_decide_disposition(cmd);
-	if (disposition != SUCCESS &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-		scmd_printk(KERN_ERR, cmd,
-			    "timing out command, waited %lus\n",
-			    wait_for/HZ);
+	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
 		disposition = SUCCESS;
-	}
 
 	scsi_log_completion(cmd, disposition);
 
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index d12ada035961..180636d54982 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -15,6 +15,7 @@ struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
 
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
 
 /*
  * Scsi Error Handler Flags
-- 
2.18.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] scsi sd: Allow user to config cmd retries
  2020-09-29 17:01 [PATCH 0/2] scsi/sd: make disk cmd retries configurable V2 Mike Christie
  2020-09-29 17:01 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
@ 2020-09-29 17:01 ` Mike Christie
  2020-10-01  2:54   ` Bart Van Assche
  1 sibling, 1 reply; 12+ messages in thread
From: Mike Christie @ 2020-09-29 17:01 UTC (permalink / raw)
  To: martin.petersen, bvanassche, linux-scsi, james.bottomley

Some iSCSI targets went the traditional export N ports and then allowed
the initiator to multipath over them. Other targets went the opposite
direction and export 1 port, and then software on the target side
performs load balancing and failover to other targets via a iscsi
specific feature or IP takover.

The problem for the 2nd type of config is we quickly run out of our
five retries and get IO errors. In these setups we want to reduce
resource use on the initaitor side so we only wanted the one session
and no dm-multipath. To handle traditional multipath operations like
failover we do IP takover on the target side. So we would have a
iscsi target running on node1. Some monitoring software decides it's
dead or the node is overloaded, so it starts the iscsi target on
node2. The problem is for the failover case where we might have the
equivalent of a dm-multipath temp all paths down, or we just have to
try more than 5 nodes before finding a good one.

To handle this type of issue, this patch allows the user to config
the disk cmd retries from -1 to the current max of 5. -1 means
infinite retries and should be used for setups where some other
setting is going to control when to fail. For example iscsi has the
replacement/recovery timeout and fc (some users have used FC with
NPIV and done something similar as IP takover) has
dev_loss_tmo/fast_io_fail which will eventually expire and fail IO.

Note:
One alternative to this patch would be to make it transport class
setting, then have it hook in to where we can config the
scsi_cmnd->allowed value. I put the setting in sd_mod because I
thought users might want to lower retries for the general case.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/sd.c | 101 ++++++++++++++++++++++++++++++++--------------
 drivers/scsi/sd.h |   1 +
 2 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 16503e22691e..162b996334e5 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -194,7 +194,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
 	}
 
 	if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT,
-			    SD_MAX_RETRIES, &data, NULL))
+			    sdkp->max_retries, &data, NULL))
 		return -EINVAL;
 	len = min_t(size_t, sizeof(buffer), data.length - data.header_length -
 		  data.block_descriptor_length);
@@ -212,7 +212,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
 	data.device_specific = 0;
 
 	if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
-			     SD_MAX_RETRIES, &data, &sshdr)) {
+			     sdkp->max_retries, &data, &sshdr)) {
 		if (scsi_sense_valid(&sshdr))
 			sd_print_sense_hdr(sdkp, &sshdr);
 		return -EINVAL;
@@ -543,6 +543,39 @@ zoned_cap_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(zoned_cap);
 
+static ssize_t
+max_retries_store(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+	struct scsi_device *sdev = sdkp->device;
+	int retries, err;
+
+	err = kstrtoint(buf, 10, &retries);
+	if (err)
+		return err;
+
+	if (retries == SCSI_CMD_RETRIES_NO_LIMIT || retries <= SD_MAX_RETRIES) {
+		sdkp->max_retries = retries;
+		return count;
+	}
+
+	sdev_printk(KERN_ERR, sdev, "max_retries must be between -1 and %d\n",
+		    SD_MAX_RETRIES);
+	return -EINVAL;
+}
+
+static ssize_t
+max_retries_show(struct device *dev, struct device_attribute *attr,
+		 char *buf)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+	return sprintf(buf, "%d\n", sdkp->max_retries);
+}
+
+static DEVICE_ATTR_RW(max_retries);
+
 static struct attribute *sd_disk_attrs[] = {
 	&dev_attr_cache_type.attr,
 	&dev_attr_FUA.attr,
@@ -557,6 +590,7 @@ static struct attribute *sd_disk_attrs[] = {
 	&dev_attr_max_write_same_blocks.attr,
 	&dev_attr_max_medium_access_timeouts.attr,
 	&dev_attr_zoned_cap.attr,
+	&dev_attr_max_retries.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(sd_disk);
@@ -665,7 +699,8 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
 		size_t len, bool send)
 {
-	struct scsi_device *sdev = data;
+	struct scsi_disk *sdkp = data;
+	struct scsi_device *sdev = sdkp->device;
 	u8 cdb[12] = { 0, };
 	int ret;
 
@@ -676,7 +711,7 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
 
 	ret = scsi_execute_req(sdev, cdb,
 			send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-			buffer, len, NULL, SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+			buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL);
 	return ret <= 0 ? ret : -EIO;
 }
 #endif /* CONFIG_BLK_SED_OPAL */
@@ -839,6 +874,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdp = cmd->device;
 	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
 	unsigned int data_len = 24;
@@ -862,7 +898,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
 	put_unaligned_be64(lba, &buf[8]);
 	put_unaligned_be32(nr_blocks, &buf[16]);
 
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 	cmd->transfersize = data_len;
 	rq->timeout = SD_TIMEOUT;
 
@@ -874,6 +910,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
 {
 	struct scsi_device *sdp = cmd->device;
 	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
 	u32 data_len = sdp->sector_size;
@@ -893,7 +930,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
 	put_unaligned_be64(lba, &cmd->cmnd[2]);
 	put_unaligned_be32(nr_blocks, &cmd->cmnd[10]);
 
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 	cmd->transfersize = data_len;
 	rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
 
@@ -905,6 +942,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
 {
 	struct scsi_device *sdp = cmd->device;
 	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
 	u32 data_len = sdp->sector_size;
@@ -924,7 +962,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
 	put_unaligned_be32(lba, &cmd->cmnd[2]);
 	put_unaligned_be16(nr_blocks, &cmd->cmnd[7]);
 
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 	cmd->transfersize = data_len;
 	rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
 
@@ -1056,7 +1094,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	}
 
 	cmd->transfersize = sdp->sector_size;
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 
 	/*
 	 * For WRITE SAME the data transferred via the DATA OUT buffer is
@@ -1078,6 +1116,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 {
 	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 
 	/* flush requests don't perform I/O, zero the S/G table */
 	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
@@ -1085,7 +1124,7 @@ static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 	cmd->cmnd[0] = SYNCHRONIZE_CACHE;
 	cmd->cmd_len = 10;
 	cmd->transfersize = 0;
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 
 	rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER;
 	return BLK_STS_OK;
@@ -1262,7 +1301,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	 */
 	cmd->transfersize = sdp->sector_size;
 	cmd->underflow = nr_blocks << 9;
-	cmd->allowed = SD_MAX_RETRIES;
+	cmd->allowed = sdkp->max_retries;
 	cmd->sdb.length = nr_blocks * sdp->sector_size;
 
 	SCSI_LOG_HLQUEUE(1,
@@ -1609,7 +1648,7 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	if (scsi_block_when_processing_errors(sdp)) {
 		struct scsi_sense_hdr sshdr = { 0, };
 
-		retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES,
+		retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, sdkp->max_retries,
 					      &sshdr);
 
 		/* failed to execute TUR, assume media not present */
@@ -1666,7 +1705,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 		 * flush everything.
 		 */
 		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr,
-				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
+				timeout, sdkp->max_retries, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
 	}
@@ -1761,7 +1800,8 @@ static char sd_pr_type(enum pr_type type)
 static int sd_pr_command(struct block_device *bdev, u8 sa,
 		u64 key, u64 sa_key, u8 type, u8 flags)
 {
-	struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device;
+	struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
+	struct scsi_device *sdev = sdkp->device;
 	struct scsi_sense_hdr sshdr;
 	int result;
 	u8 cmd[16] = { 0, };
@@ -1777,7 +1817,7 @@ static int sd_pr_command(struct block_device *bdev, u8 sa,
 	data[20] = flags;
 
 	result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, &data, sizeof(data),
-			&sshdr, SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+			&sshdr, SD_TIMEOUT, sdkp->max_retries, NULL);
 
 	if (driver_byte(result) == DRIVER_SENSE &&
 	    scsi_sense_valid(&sshdr)) {
@@ -2114,7 +2154,7 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 			the_result = scsi_execute_req(sdkp->device, cmd,
 						      DMA_NONE, NULL, 0,
 						      &sshdr, SD_TIMEOUT,
-						      SD_MAX_RETRIES, NULL);
+						      sdkp->max_retries, NULL);
 
 			/*
 			 * If the drive has indicated to us that it
@@ -2170,7 +2210,7 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 					cmd[4] |= 1 << 4;
 				scsi_execute_req(sdkp->device, cmd, DMA_NONE,
 						 NULL, 0, &sshdr,
-						 SD_TIMEOUT, SD_MAX_RETRIES,
+						 SD_TIMEOUT, sdkp->max_retries,
 						 NULL);
 				spintime_expire = jiffies + 100 * HZ;
 				spintime = 1;
@@ -2312,7 +2352,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 
 		the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
 					buffer, RC16_LEN, &sshdr,
-					SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+					SD_TIMEOUT, sdkp->max_retries, NULL);
 
 		if (media_not_present(sdkp, &sshdr))
 			return -ENODEV;
@@ -2397,7 +2437,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
 
 		the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
 					buffer, 8, &sshdr,
-					SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+					SD_TIMEOUT, sdkp->max_retries, NULL);
 
 		if (media_not_present(sdkp, &sshdr))
 			return -ENODEV;
@@ -2582,12 +2622,12 @@ sd_print_capacity(struct scsi_disk *sdkp,
 
 /* called with buffer of length 512 */
 static inline int
-sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage,
+sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage,
 		 unsigned char *buffer, int len, struct scsi_mode_data *data,
 		 struct scsi_sense_hdr *sshdr)
 {
-	return scsi_mode_sense(sdp, dbd, modepage, buffer, len,
-			       SD_TIMEOUT, SD_MAX_RETRIES, data,
+	return scsi_mode_sense(sdkp->device, dbd, modepage, buffer, len,
+			       SD_TIMEOUT, sdkp->max_retries, data,
 			       sshdr);
 }
 
@@ -2610,14 +2650,14 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 	}
 
 	if (sdp->use_192_bytes_for_3f) {
-		res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL);
+		res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 192, &data, NULL);
 	} else {
 		/*
 		 * First attempt: ask for all pages (0x3F), but only 4 bytes.
 		 * We have to start carefully: some devices hang if we ask
 		 * for more than is available.
 		 */
-		res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL);
+		res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 4, &data, NULL);
 
 		/*
 		 * Second attempt: ask for page 0 When only page 0 is
@@ -2626,13 +2666,13 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
 		 * CDB.
 		 */
 		if (!scsi_status_is_good(res))
-			res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL);
+			res = sd_do_mode_sense(sdkp, 0, 0, buffer, 4, &data, NULL);
 
 		/*
 		 * Third attempt: ask 255 bytes, as we did earlier.
 		 */
 		if (!scsi_status_is_good(res))
-			res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255,
+			res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 255,
 					       &data, NULL);
 	}
 
@@ -2694,7 +2734,7 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 	}
 
 	/* cautiously ask */
-	res = sd_do_mode_sense(sdp, dbd, modepage, buffer, first_len,
+	res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, first_len,
 			&data, &sshdr);
 
 	if (!scsi_status_is_good(res))
@@ -2726,7 +2766,7 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 
 	/* Get the data */
 	if (len > first_len)
-		res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len,
+		res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, len,
 				&data, &sshdr);
 
 	if (scsi_status_is_good(res)) {
@@ -2845,7 +2885,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
 		return;
 
 	res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT,
-			      SD_MAX_RETRIES, &data, &sshdr);
+			      sdkp->max_retries, &data, &sshdr);
 
 	if (!scsi_status_is_good(res) || !data.header_length ||
 	    data.length < 6) {
@@ -3368,6 +3408,7 @@ static int sd_probe(struct device *dev)
 	sdkp->driver = &sd_template;
 	sdkp->disk = gd;
 	sdkp->index = index;
+	sdkp->max_retries = SD_MAX_RETRIES;
 	atomic_set(&sdkp->openers, 0);
 	atomic_set(&sdkp->device->ioerr_cnt, 0);
 
@@ -3431,7 +3472,7 @@ static int sd_probe(struct device *dev)
 	sd_revalidate_disk(gd);
 
 	if (sdkp->security) {
-		sdkp->opal_dev = init_opal_dev(sdp, &sd_sec_submit);
+		sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit);
 		if (sdkp->opal_dev)
 			sd_printk(KERN_NOTICE, sdkp, "supports TCG Opal\n");
 	}
@@ -3546,7 +3587,7 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 		return -ENODEV;
 
 	res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
-			SD_TIMEOUT, SD_MAX_RETRIES, 0, RQF_PM, NULL);
+			SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL);
 	if (res) {
 		sd_print_result(sdkp, "Start/Stop Unit failed", res);
 		if (driver_byte(res) == DRIVER_SENSE)
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index a3aad608bc38..b59136c4125b 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -90,6 +90,7 @@ struct scsi_disk {
 #endif
 	atomic_t	openers;
 	sector_t	capacity;	/* size in logical blocks */
+	int		max_retries;
 	u32		max_xfer_blocks;
 	u32		opt_xfer_blocks;
 	u32		max_ws_blocks;
-- 
2.18.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-09-29 17:01 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
@ 2020-10-01  2:52   ` Bart Van Assche
  2020-10-01 15:22     ` Mike Christie
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2020-10-01  2:52 UTC (permalink / raw)
  To: Mike Christie, martin.petersen, linux-scsi, james.bottomley

On 2020-09-29 10:01, Mike Christie wrote:
> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
> +{
> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
> +		return true;
> +
> +	return (++cmd->retries <= cmd->allowed);
> +}

Did checkpatch complain about the parentheses in the above return statement?

Anyway:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] scsi sd: Allow user to config cmd retries
  2020-09-29 17:01 ` [PATCH 2/2] scsi sd: Allow user to config cmd retries Mike Christie
@ 2020-10-01  2:54   ` Bart Van Assche
  0 siblings, 0 replies; 12+ messages in thread
From: Bart Van Assche @ 2020-10-01  2:54 UTC (permalink / raw)
  To: Mike Christie, martin.petersen, linux-scsi, james.bottomley

On 2020-09-29 10:01, Mike Christie wrote:
> Some iSCSI targets went the traditional export N ports and then allowed
> the initiator to multipath over them. Other targets went the opposite
> direction and export 1 port, and then software on the target side
> performs load balancing and failover to other targets via a iscsi
> specific feature or IP takover.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-10-01  2:52   ` Bart Van Assche
@ 2020-10-01 15:22     ` Mike Christie
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Christie @ 2020-10-01 15:22 UTC (permalink / raw)
  To: Bart Van Assche, martin.petersen, linux-scsi, james.bottomley

On 9/30/20 9:52 PM, Bart Van Assche wrote:
> On 2020-09-29 10:01, Mike Christie wrote:
>> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
>> +{
>> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
>> +		return true;
>> +
>> +	return (++cmd->retries <= cmd->allowed);
>> +}
> 
> Did checkpatch complain about the parentheses in the above return statement?
> 

Ah sorry. It does not.

I dropped it like I mentioned in the 0/2 patch. It looks like when I re-edited my comments I accidentally used the first version of the patch and it snuck back in.

I'll resend. It's better to get it right now instead of wasting time having someone send a cleanup patch later.

> Anyway:
> 
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-10-01 15:35 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
  2020-10-02  2:38   ` Bart Van Assche
@ 2020-10-07  3:47   ` Martin K. Petersen
  1 sibling, 0 replies; 12+ messages in thread
From: Martin K. Petersen @ 2020-10-07  3:47 UTC (permalink / raw)
  To: bvanassche, Mike Christie, linux-scsi, james.bottomley
  Cc: Martin K . Petersen

On Thu, 1 Oct 2020 10:35:53 -0500, Mike Christie wrote:

> The next patch allows users to configure disk scsi cmd retries from
> -1 up to a ULD specific value where -1 means infinite retries.
> 
> This patch adds infinite retry support to scsi-ml by just combining
> common checks for retries into some helper functions, and then
> checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Applied to 5.10/scsi-queue, thanks!

[1/2] scsi: core: Add limitless cmd retry support
      https://git.kernel.org/mkp/scsi/c/2a242d59d6b9
[2/2] scsi: sd: Allow user to configure command retries
      https://git.kernel.org/mkp/scsi/c/0610959fbbca

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-10-01 15:35 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
@ 2020-10-02  2:38   ` Bart Van Assche
  2020-10-07  3:47   ` Martin K. Petersen
  1 sibling, 0 replies; 12+ messages in thread
From: Bart Van Assche @ 2020-10-02  2:38 UTC (permalink / raw)
  To: Mike Christie, martin.petersen, linux-scsi, james.bottomley

On 2020-10-01 08:35, Mike Christie wrote:
> The next patch allows users to configure disk scsi cmd retries from
> -1 up to a ULD specific value where -1 means infinite retries.
> 
> This patch adds infinite retry support to scsi-ml by just combining
> common checks for retries into some helper functions, and then
> checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-10-01 15:35 [PATCH V3 0/2] scsi/sd: make disk cmd retries configurable Mike Christie
@ 2020-10-01 15:35 ` Mike Christie
  2020-10-02  2:38   ` Bart Van Assche
  2020-10-07  3:47   ` Martin K. Petersen
  0 siblings, 2 replies; 12+ messages in thread
From: Mike Christie @ 2020-10-01 15:35 UTC (permalink / raw)
  To: martin.petersen, bvanassche, linux-scsi, james.bottomley

The next patch allows users to configure disk scsi cmd retries from
-1 up to a ULD specific value where -1 means infinite retries.

This patch adds infinite retry support to scsi-ml by just combining
common checks for retries into some helper functions, and then
checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/scsi_error.c | 33 +++++++++++++++++++++++----------
 drivers/scsi/scsi_lib.c   | 29 +++++++++++++++++++----------
 drivers/scsi/scsi_priv.h  |  1 +
 3 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7d3571a2bd89..8b1311b08bfd 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -116,6 +116,14 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 	return 1;
 }
 
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return true;
+
+	return ++cmd->retries <= cmd->allowed;
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -151,7 +159,7 @@ scmd_eh_abort_handler(struct work_struct *work)
 						    "eh timeout, not retrying "
 						    "aborted command\n"));
 			} else if (!scsi_noretry_cmd(scmd) &&
-			    (++scmd->retries <= scmd->allowed)) {
+				   scsi_cmd_retry_allowed(scmd)) {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
@@ -1264,11 +1272,18 @@ int scsi_eh_get_sense(struct list_head *work_q,
 		 * upper level.
 		 */
 		if (rtn == SUCCESS)
-			/* we don't want this command reissued, just
-			 * finished with the sense data, so set
-			 * retries to the max allowed to ensure it
-			 * won't get reissued */
-			scmd->retries = scmd->allowed;
+			/*
+			 * We don't want this command reissued, just finished
+			 * with the sense data, so set retries to the max
+			 * allowed to ensure it won't get reissued. If the user
+			 * has requested infinite retries, we also want to
+			 * finish this command, so force completion by setting
+			 * retries and allowed to the same value.
+			 */
+			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+				scmd->retries = scmd->allowed = 1;
+			else
+				scmd->retries = scmd->allowed;
 		else if (rtn != NEEDS_RETRY)
 			continue;
 
@@ -1944,8 +1959,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
-	if ((++scmd->retries) <= scmd->allowed
-	    && !scsi_noretry_cmd(scmd)) {
+	if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
 		return NEEDS_RETRY;
 	} else {
 		/*
@@ -2091,8 +2105,7 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
 		list_del_init(&scmd->eh_entry);
 		if (scsi_device_online(scmd->device) &&
-		    !scsi_noretry_cmd(scmd) &&
-		    (++scmd->retries <= scmd->allowed)) {
+		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				scmd_printk(KERN_INFO, scmd,
 					     "%s: flush retry cmd\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7affaaf8b98e..cfe0e17422b9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -652,6 +652,23 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
 	scsi_mq_requeue_cmd(cmd);
 }
 
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+	struct request *req = cmd->request;
+	unsigned long wait_for;
+
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return false;
+
+	wait_for = (cmd->allowed + 1) * req->timeout;
+	if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+		scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+			    wait_for/HZ);
+		return true;
+	}
+	return false;
+}
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
@@ -660,7 +677,6 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	int level = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
-	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid;
 	bool sense_current = true;      /* false implies "deferred sense" */
@@ -765,8 +781,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	} else
 		action = ACTION_FAIL;
 
-	if (action != ACTION_FAIL &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+	if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
 		action = ACTION_FAIL;
 
 	switch (action) {
@@ -1439,7 +1454,6 @@ static bool scsi_mq_lld_busy(struct request_queue *q)
 static void scsi_softirq_done(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1449,13 +1463,8 @@ static void scsi_softirq_done(struct request *rq)
 		atomic_inc(&cmd->device->ioerr_cnt);
 
 	disposition = scsi_decide_disposition(cmd);
-	if (disposition != SUCCESS &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-		scmd_printk(KERN_ERR, cmd,
-			    "timing out command, waited %lus\n",
-			    wait_for/HZ);
+	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
 		disposition = SUCCESS;
-	}
 
 	scsi_log_completion(cmd, disposition);
 
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index d12ada035961..180636d54982 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -15,6 +15,7 @@ struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
 
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
 
 /*
  * Scsi Error Handler Flags
-- 
2.18.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-09-22  3:10   ` Bart Van Assche
@ 2020-09-22 16:01     ` Mike Christie
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Christie @ 2020-09-22 16:01 UTC (permalink / raw)
  To: Bart Van Assche, martin.petersen, linux-scsi, james.bottomley

On 9/21/20 10:10 PM, Bart Van Assche wrote:
> On 2020-09-21 11:48, Mike Christie wrote:
>> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
>> +{
>> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
>> +		return true;
>> +
>> +	return (++cmd->retries <= cmd->allowed);
>> +}
> 
> Something minor: how about leaving out the parentheses from the above return
> statement?

Will do.

> 
>> @@ -1268,7 +1276,10 @@ int scsi_eh_get_sense(struct list_head *work_q,
>>  			 * finished with the sense data, so set
>>  			 * retries to the max allowed to ensure it
>>  			 * won't get reissued */
>> -			scmd->retries = scmd->allowed;
>> +			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
>> +				scmd->retries = scmd->allowed = 1;
>> +			else
>> +				scmd->retries = scmd->allowed;
> 
> Please make sure that the comment above the if-statement remains an accurate
> description of the code. The comment only explains why scmd->retries is
> modified but not why scmd->allowed is modified if scmd->allowed ==
> SCSI_CMD_RETRIES_NO_LIMIT.

Ok.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-09-21 18:48 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
@ 2020-09-22  3:10   ` Bart Van Assche
  2020-09-22 16:01     ` Mike Christie
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2020-09-22  3:10 UTC (permalink / raw)
  To: Mike Christie, martin.petersen, linux-scsi, james.bottomley

On 2020-09-21 11:48, Mike Christie wrote:
> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
> +{
> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
> +		return true;
> +
> +	return (++cmd->retries <= cmd->allowed);
> +}

Something minor: how about leaving out the parentheses from the above return
statement?

> @@ -1268,7 +1276,10 @@ int scsi_eh_get_sense(struct list_head *work_q,
>  			 * finished with the sense data, so set
>  			 * retries to the max allowed to ensure it
>  			 * won't get reissued */
> -			scmd->retries = scmd->allowed;
> +			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
> +				scmd->retries = scmd->allowed = 1;
> +			else
> +				scmd->retries = scmd->allowed;

Please make sure that the comment above the if-statement remains an accurate
description of the code. The comment only explains why scmd->retries is
modified but not why scmd->allowed is modified if scmd->allowed ==
SCSI_CMD_RETRIES_NO_LIMIT.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] scsi: Add limitless cmd retry support
  2020-09-21 18:48 [PATCH 0/2] scsi/sd: make cmd retries configurable Mike Christie
@ 2020-09-21 18:48 ` Mike Christie
  2020-09-22  3:10   ` Bart Van Assche
  0 siblings, 1 reply; 12+ messages in thread
From: Mike Christie @ 2020-09-21 18:48 UTC (permalink / raw)
  To: martin.petersen, linux-scsi, james.bottomley

The next patch allows users to configure disk scsi cmd retries from
-1 up to a ULD specific value where -1 means infinite retries.

This patch adds infinite retry support to scsi-ml by just combining
common checks for retries into some helper functions, and then
checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/scsi_error.c | 21 +++++++++++++++------
 drivers/scsi/scsi_lib.c   | 29 +++++++++++++++++++----------
 drivers/scsi/scsi_priv.h  |  1 +
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 927b1e641842..8d237152581f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -116,6 +116,14 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 	return 1;
 }
 
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return true;
+
+	return (++cmd->retries <= cmd->allowed);
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -151,7 +159,7 @@ scmd_eh_abort_handler(struct work_struct *work)
 						    "eh timeout, not retrying "
 						    "aborted command\n"));
 			} else if (!scsi_noretry_cmd(scmd) &&
-			    (++scmd->retries <= scmd->allowed)) {
+				   scsi_cmd_retry_allowed(scmd)) {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
@@ -1268,7 +1276,10 @@ int scsi_eh_get_sense(struct list_head *work_q,
 			 * finished with the sense data, so set
 			 * retries to the max allowed to ensure it
 			 * won't get reissued */
-			scmd->retries = scmd->allowed;
+			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+				scmd->retries = scmd->allowed = 1;
+			else
+				scmd->retries = scmd->allowed;
 		else if (rtn != NEEDS_RETRY)
 			continue;
 
@@ -1944,8 +1955,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
-	if ((++scmd->retries) <= scmd->allowed
-	    && !scsi_noretry_cmd(scmd)) {
+	if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
 		return NEEDS_RETRY;
 	} else {
 		/*
@@ -2091,8 +2101,7 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
 		list_del_init(&scmd->eh_entry);
 		if (scsi_device_online(scmd->device) &&
-		    !scsi_noretry_cmd(scmd) &&
-		    (++scmd->retries <= scmd->allowed)) {
+		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				scmd_printk(KERN_INFO, scmd,
 					     "%s: flush retry cmd\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 06056e9ec333..37b953553362 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -653,6 +653,23 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
 	scsi_mq_requeue_cmd(cmd);
 }
 
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+	struct request *req = cmd->request;
+	unsigned long wait_for;
+
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return false;
+
+	wait_for = (cmd->allowed + 1) * req->timeout;
+	if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+		scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+			    wait_for/HZ);
+		return true;
+	}
+	return false;
+}
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
@@ -661,7 +678,6 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	int level = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
-	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid;
 	bool sense_current = true;      /* false implies "deferred sense" */
@@ -766,8 +782,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	} else
 		action = ACTION_FAIL;
 
-	if (action != ACTION_FAIL &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+	if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
 		action = ACTION_FAIL;
 
 	switch (action) {
@@ -1440,7 +1455,6 @@ static bool scsi_mq_lld_busy(struct request_queue *q)
 static void scsi_softirq_done(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1450,13 +1464,8 @@ static void scsi_softirq_done(struct request *rq)
 		atomic_inc(&cmd->device->ioerr_cnt);
 
 	disposition = scsi_decide_disposition(cmd);
-	if (disposition != SUCCESS &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-		scmd_printk(KERN_ERR, cmd,
-			    "timing out command, waited %lus\n",
-			    wait_for/HZ);
+	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
 		disposition = SUCCESS;
-	}
 
 	scsi_log_completion(cmd, disposition);
 
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 22b6585e28b4..f8c46cc82e9f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -15,6 +15,7 @@ struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
 
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
 
 /*
  * Scsi Error Handler Flags
-- 
2.18.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-10-07  3:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-29 17:01 [PATCH 0/2] scsi/sd: make disk cmd retries configurable V2 Mike Christie
2020-09-29 17:01 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
2020-10-01  2:52   ` Bart Van Assche
2020-10-01 15:22     ` Mike Christie
2020-09-29 17:01 ` [PATCH 2/2] scsi sd: Allow user to config cmd retries Mike Christie
2020-10-01  2:54   ` Bart Van Assche
  -- strict thread matches above, loose matches on Subject: below --
2020-10-01 15:35 [PATCH V3 0/2] scsi/sd: make disk cmd retries configurable Mike Christie
2020-10-01 15:35 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
2020-10-02  2:38   ` Bart Van Assche
2020-10-07  3:47   ` Martin K. Petersen
2020-09-21 18:48 [PATCH 0/2] scsi/sd: make cmd retries configurable Mike Christie
2020-09-21 18:48 ` [PATCH 1/2] scsi: Add limitless cmd retry support Mike Christie
2020-09-22  3:10   ` Bart Van Assche
2020-09-22 16:01     ` Mike Christie

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.