All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ewan Milne <emilne@redhat.com>
To: Hannes Reinecke <hare@suse.de>
Cc: James Bottomley <jbottomley@parallels.com>,
	linux-scsi@vger.kernel.org, Joern Engel <joern@logfs.org>,
	James Smart <james.smart@emulex.com>,
	Ren Mingxin <renmx@cn.fujitsu.com>,
	Roland Dreier <roland@purestorage.com>,
	Bryn Reeves <bmr@redhat.com>,
	Christoph Hellwig <hch@infradead.org>
Subject: Re: [PATCH 7/7] scsi: Add 'eh_deadline' to limit SCSI EH runtime
Date: Thu, 27 Jun 2013 10:33:09 -0400	[thread overview]
Message-ID: <1372343589.3871.406.camel@localhost.localdomain> (raw)
In-Reply-To: <1370862713-41323-8-git-send-email-hare@suse.de>

The eh_deadline changes allow for a significant improvement
in multipath failover time.  It works very well in our testing.
I do have a few corrections, see below:

On Mon, 2013-06-10 at 13:11 +0200, Hannes Reinecke wrote:
> This patchs adds an 'eh_deadline' attribute to the scsi
> host which limits the overall runtime of the SCSI EH.
> When a command is failed the start time of the EH is stored
> in 'last_reset'. If the overall runtime of the SCSI EH is longer
> than last_reset + eh_deadline, the EH is short-circuited and
> falls through to issue a host reset only.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/hosts.c      |   7 +++
>  drivers/scsi/scsi_error.c | 142 +++++++++++++++++++++++++++++++++++++++++++---
>  drivers/scsi/scsi_sysfs.c |  37 ++++++++++++
>  include/scsi/scsi_host.h  |   2 +-
>  4 files changed, 180 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
> index df0c3c7..c8d828f 100644
> --- a/drivers/scsi/hosts.c
> +++ b/drivers/scsi/hosts.c
> @@ -316,6 +316,12 @@ static void scsi_host_dev_release(struct device *dev)
>  	kfree(shost);
>  }
>  
> +static unsigned int shost_eh_deadline;
> +
> +module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(eh_deadline,
> +		 "SCSI EH deadline in seconds (should be between 1 and 2^32-1)");
> +
>  static struct device_type scsi_host_type = {
>  	.name =		"scsi_host",
>  	.release =	scsi_host_dev_release,
> @@ -388,6 +394,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
>  	shost->unchecked_isa_dma = sht->unchecked_isa_dma;
>  	shost->use_clustering = sht->use_clustering;
>  	shost->ordered_tag = sht->ordered_tag;
> +	shost->eh_deadline = shost_eh_deadline;

This should be shost->eh_deadline = shost_eh_deadline * HZ; since the
parameter is specified in seconds.

>  
>  	if (sht->supported_mode == MODE_UNKNOWN)
>  		/* means we didn't set it ... default to INITIATOR */
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 467cb3c..cf30475 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -91,6 +91,31 @@ void scsi_schedule_eh(struct Scsi_Host *shost)
>  }
>  EXPORT_SYMBOL_GPL(scsi_schedule_eh);
>  
> +static int sdev_eh_deadline(struct Scsi_Host *shost,
> +			   unsigned long eh_start)
> +{
> +	if (!shost->eh_deadline)
> +		return 0;
> +
> +	if (shost->last_reset != 0 &&
> +	    time_before(shost->last_reset, eh_start))
> +		eh_start = shost->last_reset;
> +
> +	if (time_before(jiffies,
> +			eh_start + shost->eh_deadline))
> +		return 0;
> +
> +	return 1;
> +}
> +
> +static int scsi_host_eh_deadline(struct Scsi_Host *shost)
> +{
> +	if (!shost->last_reset)
> +		return 0;
> +
> +	return sdev_eh_deadline(shost, shost->last_reset);
> +}
> +
>  /**
>   * scsi_eh_abort_handler - Handle command aborts
>   * @work:	sdev on which commands should be aborted.
> @@ -102,13 +127,15 @@ scsi_eh_abort_handler(struct work_struct *work)
>  		container_of(work, struct scsi_device, abort_work);
>  	struct scsi_cmnd *scmd, *tmp;
>  	LIST_HEAD(abort_list);
> -	unsigned long flags;
> +	unsigned long flags, eh_start;
>  	int rtn;
>  
>  	spin_lock_irqsave(&sdev->list_lock, flags);
>  	list_splice_init(&sdev->eh_abort_list, &abort_list);
>  	spin_unlock_irqrestore(&sdev->list_lock, flags);
>  
> +	eh_start = jiffies;
> +
>  	list_for_each_entry_safe(scmd, tmp, &abort_list, eh_entry) {
>  		list_del_init(&scmd->eh_entry);
>  		if (sdev->sdev_state == SDEV_CANCEL) {
> @@ -119,6 +146,13 @@ scsi_eh_abort_handler(struct work_struct *work)
>  			scsi_finish_command(scmd);
>  			continue;
>  		}
> +		if (sdev_eh_deadline(sdev->host, eh_start)) {
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				scmd_printk(KERN_INFO, scmd,
> +					     "eh timeout, not aborting\n"));
> +			list_move_tail(&scmd->eh_entry, &abort_list);
> +			goto start_eh;
> +		}
>  		SCSI_LOG_ERROR_RECOVERY(3,
>  			scmd_printk(KERN_INFO, scmd,
>  				    "aborting command %p\n", scmd));
> @@ -151,6 +185,12 @@ scsi_eh_abort_handler(struct work_struct *work)
>  		return;
>  
>  start_eh:
> +	spin_lock_irqsave(sdev->host->host_lock, flags);
> +	if (sdev->host->eh_deadline &&
> +	    (!sdev->host->last_reset ||
> +	     time_before(eh_start, sdev->host->last_reset)))
> +		sdev->host->last_reset = eh_start;
> +	spin_unlock_irqrestore(sdev->host->host_lock, flags);
>  	list_for_each_entry_safe(scmd, tmp, &abort_list, eh_entry) {
>  		scmd->result |= DID_TIME_OUT << 16;
>  		if (!scsi_eh_scmd_add(scmd, 0)) {
> @@ -232,6 +272,9 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
>  		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
>  			goto out_unlock;
>  
> +	if (sdev->eh_deadline && !shost->last_reset)
> +		shost->last_reset = jiffies;
> +

I think this is supposed to be if (shost->eh_deadline ...

>  	ret = 1;
>  	scmd->eh_eflags |= eh_flag;
>  	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
> @@ -1052,13 +1095,25 @@ int scsi_eh_get_sense(struct list_head *work_q,
>  		      struct list_head *done_q)
>  {
>  	struct scsi_cmnd *scmd, *next;
> +	struct Scsi_Host *shost;
>  	int rtn;
> +	unsigned long flags;
>  
>  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
>  		if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
>  		    SCSI_SENSE_VALID(scmd))
>  			continue;
>  
> +		shost = scmd->device->host;
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			break;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
>  		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
>  						  "%s: requesting sense\n",
>  						  current->comm));
> @@ -1143,11 +1198,22 @@ static int scsi_eh_test_devices(struct list_head *cmd_list,
>  	struct scsi_cmnd *scmd, *next;
>  	struct scsi_device *sdev;
>  	int finish_cmds;
> +	unsigned long flags;
>  
>  	while (!list_empty(cmd_list)) {
>  		scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
>  		sdev = scmd->device;
>  
> +		if (!try_stu) {
> +			spin_lock_irqsave(sdev->host->host_lock, flags);
> +			if (scsi_host_eh_deadline(sdev->host)) {
> +				spin_unlock_irqrestore(sdev->host->host_lock,
> +						       flags);

I think a list_splice_init(cmd_list, work_q); is needed here, otherwise
scmds that are still on the cmd_list will be orphaned.  There should
also be a SCSI_LOG_ERROR_RECOVERY() as was done in other places.

> +				break;
> +			}
> +			spin_unlock_irqrestore(sdev->host->host_lock, flags);
> +		}
> +
>  		finish_cmds = !scsi_device_online(scmd->device) ||
>  			(try_stu && !scsi_eh_try_stu(scmd) &&
>  			 !scsi_eh_tur(scmd)) ||
> @@ -1183,14 +1249,26 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
>  	struct scsi_cmnd *scmd, *next;
>  	LIST_HEAD(check_list);
>  	int rtn;
> +	struct Scsi_Host *shost;
> +	unsigned long flags;
>  
>  	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
>  		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
>  			continue;
> +		shost = scmd->device->host;
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);

I think a list_splice_init(&check_list, work_q); is needed here,
otherwise scmds that are on the check_list will be orphaned.

> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			return 1;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
>  		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
>  						  "0x%p\n", current->comm,
>  						  scmd));
> -		rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd);
> +		rtn = scsi_try_to_abort_cmd(shost->hostt, scmd);
>  		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
>  			scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
>  			if (rtn == FAST_IO_FAIL)
> @@ -1248,8 +1326,18 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
>  {
>  	struct scsi_cmnd *scmd, *stu_scmd, *next;
>  	struct scsi_device *sdev;
> +	unsigned long flags;
>  
>  	shost_for_each_device(sdev, shost) {
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			break;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
>  		stu_scmd = NULL;
>  		list_for_each_entry(scmd, work_q, eh_entry)
>  			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
> @@ -1302,9 +1390,19 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
>  {
>  	struct scsi_cmnd *scmd, *bdr_scmd, *next;
>  	struct scsi_device *sdev;
> +	unsigned long flags;
>  	int rtn;
>  
>  	shost_for_each_device(sdev, shost) {
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			break;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
>  		bdr_scmd = NULL;
>  		list_for_each_entry(scmd, work_q, eh_entry)
>  			if (scmd->device == sdev) {
> @@ -1364,6 +1462,19 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,
>  		struct scsi_cmnd *next, *scmd;
>  		int rtn;
>  		unsigned int id;
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);
> +			/* push back on work queue for further processing */

I think a list_splice_init(&check_list, work_q); is needed here,
otherwise scmds that are on the check_list will be orphaned.

> +			list_splice_init(&tmp_list, work_q);
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			return list_empty(work_q);
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
>  
>  		scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
>  		id = scmd_id(scmd);
> @@ -1408,6 +1519,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
>  	LIST_HEAD(check_list);
>  	unsigned int channel;
>  	int rtn;
> +	unsigned long flags;
>  
>  	/*
>  	 * we really want to loop over the various channels, and do this on
> @@ -1417,6 +1529,16 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
>  	 */
>  
>  	for (channel = 0; channel <= shost->max_channel; channel++) {
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_eh_deadline(shost)) {
> +			spin_unlock_irqrestore(shost->host_lock, flags);

I think a list_splice_init(&check_list, work_q); is needed here,
otherwise scmds that are on the check_list will be orphaned.

> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				shost_printk(KERN_INFO, shost,
> +					    "skip %s, eh timeout\n", __func__));
> +			return list_empty(work_q);
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
> +
>  		chan_scmd = NULL;
>  		list_for_each_entry(scmd, work_q, eh_entry) {
>  			if (channel == scmd_channel(scmd)) {
> @@ -1822,8 +1944,9 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
>  	 * will be requests for character device operations, and also for
>  	 * ioctls to queued block devices.
>  	 */
> -	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
> -					  __func__));
> +	SCSI_LOG_ERROR_RECOVERY(3,
> +		printk("scsi_eh_%d waking up host to restart\n",
> +		       shost->host_no));
>  
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	if (scsi_host_set_state(shost, SHOST_RUNNING))
> @@ -1950,6 +2073,10 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
>  		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
>  			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
>  
> +	spin_lock_irqsave(shost->host_lock, flags);
> +	if (sdev->eh_deadline)

I think this is supposed to be if (shost->eh_deadline ...

> +		shost->last_reset = 0;
> +	spin_unlock_irqrestore(shost->host_lock, flags);
>  	scsi_eh_flush_done_q(&eh_done_q);
>  }
>  
> @@ -1976,7 +2103,7 @@ int scsi_error_handler(void *data)
>  		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
>  		    shost->host_failed != shost->host_busy) {
>  			SCSI_LOG_ERROR_RECOVERY(1,
> -				printk("Error handler scsi_eh_%d sleeping\n",
> +				printk("scsi_eh_%d: sleeping\n",
>  					shost->host_no));
>  			schedule();
>  			continue;
> @@ -1984,8 +2111,9 @@ int scsi_error_handler(void *data)
>  
>  		__set_current_state(TASK_RUNNING);
>  		SCSI_LOG_ERROR_RECOVERY(1,
> -			printk("Error handler scsi_eh_%d waking up\n",
> -				shost->host_no));
> +			printk("scsi_eh_%d: waking up %d/%d/%d\n",
> +			       shost->host_no, shost->host_eh_scheduled,
> +			       shost->host_failed, shost->host_busy));
>  
>  		/*
>  		 * We have a host that is failing for some reason.  Figure out
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index af64c1c..3c1742f 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -281,6 +281,42 @@ exit_store_host_reset:
>  
>  static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset);
>  
> +static ssize_t
> +show_shost_eh_deadline(struct device *dev,
> +		      struct device_attribute *attr, char *buf)
> +{
> +	struct Scsi_Host *shost = class_to_shost(dev);
> +
> +	return sprintf(buf, "%d\n", shost->eh_deadline);

I think that the attribute should be specified in seconds, so
this should be shost->eh_deadline / HZ.

> +}
> +
> +static ssize_t
> +store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
> +		const char *buf, size_t count)
> +{
> +	struct Scsi_Host *shost = class_to_shost(dev);
> +	int ret = -EINVAL;
> +	int timeout;
> +	unsigned long flags;
> +
> +	if (shost->transportt->eh_strategy_handler)
> +		return ret;
> +
> +	if (sscanf(buf, "%d\n", &timeout) == 1) {
> +		spin_lock_irqsave(shost->host_lock, flags);
> +		if (scsi_host_in_recovery(shost))
> +			ret = -EBUSY;
> +		else {
> +			shost->eh_deadline = timeout;

I think the deadline should be specified in seconds, so this
should be shost->eh_deadline = timeout * HZ;

> +			ret = count;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, flags);
> +	}
> +	return ret;
> +}
> +
> +static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
> +
>  shost_rd_attr(unique_id, "%u\n");
>  shost_rd_attr(host_busy, "%hu\n");
>  shost_rd_attr(cmd_per_lun, "%hd\n");
> @@ -308,6 +344,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = {
>  	&dev_attr_prot_capabilities.attr,
>  	&dev_attr_prot_guard_type.attr,
>  	&dev_attr_host_reset.attr,
> +	&dev_attr_eh_deadline.attr,
>  	NULL
>  };
>  
> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
> index 7552435..ca87486 100644
> --- a/include/scsi/scsi_host.h
> +++ b/include/scsi/scsi_host.h
> @@ -598,7 +598,7 @@ struct Scsi_Host {
>  	unsigned int host_eh_scheduled;    /* EH scheduled without command */
>      
>  	unsigned int host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
> -	int resetting; /* if set, it means that last_reset is a valid value */
> +	int eh_deadline; /* Deadline for EH runtime */
>  	unsigned long last_reset;
>  
>  	/*



  reply	other threads:[~2013-06-27 14:33 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-10 11:11 [PATCH 0/7] Limit overall SCSI EH runtime Hannes Reinecke
2013-06-10 11:11 ` [PATCH 1/7] dpt_i2o: Remove DPTI_STATE_IOCTL Hannes Reinecke
2013-06-10 11:11 ` [PATCH 2/7] dpt_i2o: return SCSI_MLQUEUE_HOST_BUSY when in reset Hannes Reinecke
2013-06-10 11:11 ` [PATCH 3/7] advansys: Remove 'last_reset' references Hannes Reinecke
2013-06-10 11:11 ` [PATCH 4/7] tmscsim: Move 'last_reset' into host structure Hannes Reinecke
2013-06-10 11:11 ` [PATCH 5/7] dc395: Move 'last_reset' into internal " Hannes Reinecke
2013-06-10 11:11 ` [PATCH 6/7] scsi: remove check for 'resetting' Hannes Reinecke
2013-06-10 11:11 ` [PATCH 7/7] scsi: Add 'eh_deadline' to limit SCSI EH runtime Hannes Reinecke
2013-06-27 14:33   ` Ewan Milne [this message]
2013-06-28  7:14     ` Hannes Reinecke
2013-06-28 12:54       ` Ewan Milne
2013-06-28  7:29   ` Bart Van Assche
2013-06-28  7:42     ` Hannes Reinecke
2013-06-27  9:23 ` [PATCH 0/7] Limit overall " Ren Mingxin
2013-07-01  6:50 [PATCHv2 " Hannes Reinecke
2013-07-01  6:50 ` [PATCH 7/7] scsi: Add 'eh_deadline' to limit " Hannes Reinecke
2013-09-20  7:48   ` Ren Mingxin
2013-10-16 19:22   ` James Bottomley
2013-10-17 14:27     ` Ewan Milne
2013-10-23  9:25     ` Hannes Reinecke
2013-10-23  7:46       ` James Bottomley
2013-10-23  9:49         ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1372343589.3871.406.camel@localhost.localdomain \
    --to=emilne@redhat.com \
    --cc=bmr@redhat.com \
    --cc=hare@suse.de \
    --cc=hch@infradead.org \
    --cc=james.smart@emulex.com \
    --cc=jbottomley@parallels.com \
    --cc=joern@logfs.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=renmx@cn.fujitsu.com \
    --cc=roland@purestorage.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.