Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2] nvme-fabrics: reject I/O to offline device
@ 2020-01-01 16:45 Victor Gladkov
  2020-01-07 16:17 ` Hannes Reinecke
  2020-01-08 19:47 ` James Smart
  0 siblings, 2 replies; 8+ messages in thread
From: Victor Gladkov @ 2020-01-01 16:45 UTC (permalink / raw)
  To: Sagi Grimberg, James Smart, linux-nvme

Issue Description:
Commands get stuck while Host NVMe controller (TCP or RDMA) is in reconnect state. 
NVMe controller enters into reconnect state when it loses connection with the target. It tries to reconnect every 10 seconds (default) until successful reconnection or until reconnect time-out is reached. The default reconnect time out is 10 minutes.
This behavior is different than ISCSI where Commands during reconnect state returns with the following error: "rejecting I/O to offline device"

Fix Description:
Following your suggestions:
1. Add a new session parameter called "fast_fail_tmo". 
	The timeout is measured in seconds from the controller reconnect, any command beyond that timeout is rejected. 
	The new parameter value may be passed during 'connect', and its default value is 30 seconds. 
	A value of 0 means no timeout (in similar to current behavior).
2. Add a controller flag of "failfast_expired".
3. Add dedicated delayed_work that update the "failfast_expired" controller flag.
4. When entering CONNECTING, schedule the delayed_work based on failfast timeout value. 
	If transition out of CONNECTING, terminate delayed work item and ensure failfast_expired is false.
	If delayed work item expires: set "failfast_expired" flag to true.
5. Update nvmf_fail_nonready_command() with check the "failfast_expired" controller flag.

branch nvme/for-5.5
---------------------------------
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2a84e14..8e30e03 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -93,6 +93,8 @@
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
+static void nvme_start_failfast_work(struct nvme_ctrl *ctrl);
+static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl);
  static void nvme_set_queue_dying(struct nvme_ns *ns)
 {
@@ -339,6 +341,8 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
+			if(old_state == NVME_CTRL_CONNECTING)
+				nvme_stop_failfast_work(ctrl);
 		default:
 			break;
 		}
@@ -359,6 +363,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		case NVME_CTRL_RESETTING:
 			changed = true;
 			/* FALLTHRU */
+			nvme_start_failfast_work(ctrl);
 		default:
 			break;
 		}
@@ -1033,6 +1038,36 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 +static void nvme_failfast_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_ctrl, failfast_work);
+
+	spin_lock_irq(&ctrl->lock);
+	if (ctrl->state == NVME_CTRL_CONNECTING) {
+		set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+		dev_info(ctrl->device, "failfast expired set for controller %s\n", ctrl->opts->subsysnqn);
+	}
+	spin_unlock_irq(&ctrl->lock);
+}
+
+static void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	schedule_delayed_work(&ctrl->failfast_work, ctrl->opts->fail_fast_tmo * HZ);
+}
+
+static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	cancel_delayed_work_sync(&ctrl->failfast_work);
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
 static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
@@ -3979,6 +4014,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
 	nvme_mpath_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
+	nvme_stop_failfast_work(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
 }
@@ -4043,6 +4079,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	int ret;
 	ctrl->state = NVME_CTRL_NEW;
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
 	INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4057,6 +4094,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	init_waitqueue_head(&ctrl->state_wq);
 	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
 	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
 	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..e952e5d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
 {
 	if (ctrl->state != NVME_CTRL_DELETING &&
 	    ctrl->state != NVME_CTRL_DEAD &&
+	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;
@@ -612,6 +613,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
 	{ NVMF_OPT_TOS,			"tos=%d"		},
+	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
 	{ NVMF_OPT_ERR,			NULL			}
 };
@@ -630,6 +632,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	opts->nr_io_queues = num_online_cpus();
 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
 	opts->kato = NVME_DEFAULT_KATO;
+	opts->fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
 	opts->duplicate_connect = false;
 	opts->hdr_digest = false;
 	opts->data_digest = false;
@@ -751,6 +754,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
 			ctrl_loss_tmo = token;
 			break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token)
+				pr_warn("fail_fast_tmo != 0, I/O will failed on reconnect controller after %d sec\n", token);
+
+			opts->fail_fast_tmo  = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -881,11 +895,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		opts->nr_poll_queues = 0;
 		opts->duplicate_connect = true;
 	}
-	if (ctrl_loss_tmo < 0)
+	if (ctrl_loss_tmo < 0){
 		opts->max_reconnects = -1;
-	else
+	}else{
 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
 						opts->reconnect_delay);
+		if(ctrl_loss_tmo < opts->fail_fast_tmo)
+			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fail_fast_tmo, ctrl_loss_tmo);
+	}

 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
@@ -985,7 +1002,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW)
+				 NVMF_OPT_DISABLE_SQFLOW|NVMF_OPT_FAIL_FAST_TMO)

static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a0ec40a..fd8c7dd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
 #define NVMF_DEF_RECONNECT_DELAY	10
/* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
+/* default to 0: don't fail fast IO commands  */
+#define NVMF_DEF_FAIL_FAST_TMO		0

/*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
 	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };
/**
@@ -89,6 +92,7 @@ enum {
  * @nr_write_queues: number of queues for write I/O
  * @nr_poll_queues: number of queues for polling I/O
  * @tos: type of service
+ * @fast_fail_tmo: Fast I/O fail timeout in seconds;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
 	unsigned int		nr_write_queues;
 	unsigned int		nr_poll_queues;
 	int			tos;
+	unsigned int	fail_fast_tmo;
 };

 /*
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec..b6a199e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -256,6 +256,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	struct delayed_work failfast_work;
 	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
 	unsigned long events;
@@ -289,6 +290,8 @@ struct nvme_ctrl {
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED	0
 	struct nvmf_ctrl_options *opts;

 	struct page *discard_page;

-----------------
Regards,
Victor


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-01 16:45 [PATCH v2] nvme-fabrics: reject I/O to offline device Victor Gladkov
@ 2020-01-07 16:17 ` Hannes Reinecke
  2020-01-08 19:47 ` James Smart
  1 sibling, 0 replies; 8+ messages in thread
From: Hannes Reinecke @ 2020-01-07 16:17 UTC (permalink / raw)
  To: Victor Gladkov, Sagi Grimberg, James Smart, linux-nvme

On 1/1/20 5:45 PM, Victor Gladkov wrote:
> Issue Description:
> Commands get stuck while Host NVMe controller (TCP or RDMA) is in reconnect state. 
> NVMe controller enters into reconnect state when it loses connection with the target. It tries to reconnect every 10 seconds (default) until successful reconnection or until reconnect time-out is reached. The default reconnect time out is 10 minutes.
> This behavior is different than ISCSI where Commands during reconnect state returns with the following error: "rejecting I/O to offline device"
> 
> Fix Description:
> Following your suggestions:
> 1. Add a new session parameter called "fast_fail_tmo". 
> 	The timeout is measured in seconds from the controller reconnect, any command beyond that timeout is rejected. 
> 	The new parameter value may be passed during 'connect', and its default value is 30 seconds. 
> 	A value of 0 means no timeout (in similar to current behavior).
> 2. Add a controller flag of "failfast_expired".
> 3. Add dedicated delayed_work that update the "failfast_expired" controller flag.
> 4. When entering CONNECTING, schedule the delayed_work based on failfast timeout value. 
> 	If transition out of CONNECTING, terminate delayed work item and ensure failfast_expired is false.
> 	If delayed work item expires: set "failfast_expired" flag to true.
> 5. Update nvmf_fail_nonready_command() with check the "failfast_expired" controller flag.
> 
> branch nvme/for-5.5
> ---------------------------------
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 2a84e14..8e30e03 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -93,6 +93,8 @@
>  static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>  					   unsigned nsid);
> +static void nvme_start_failfast_work(struct nvme_ctrl *ctrl);
> +static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl);
>   static void nvme_set_queue_dying(struct nvme_ns *ns)
>  {
> @@ -339,6 +341,8 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
>  		case NVME_CTRL_CONNECTING:
>  			changed = true;
>  			/* FALLTHRU */
> +			if(old_state == NVME_CTRL_CONNECTING)
> +				nvme_stop_failfast_work(ctrl);
>  		default:
>  			break;
>  		}
> @@ -359,6 +363,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
>  		case NVME_CTRL_RESETTING:
>  			changed = true;
>  			/* FALLTHRU */
> +			nvme_start_failfast_work(ctrl);
>  		default:
>  			break;
>  		}
> @@ -1033,6 +1038,36 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
>  }
>  EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
>  +static void nvme_failfast_work(struct work_struct *work)
> +{
> +	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
> +			struct nvme_ctrl, failfast_work);
> +
> +	spin_lock_irq(&ctrl->lock);
> +	if (ctrl->state == NVME_CTRL_CONNECTING) {
> +		set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> +		dev_info(ctrl->device, "failfast expired set for controller %s\n", ctrl->opts->subsysnqn);
> +	}
> +	spin_unlock_irq(&ctrl->lock);
> +}
> +
> +static void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
> +{
> +	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
> +		return;
> +
> +	schedule_delayed_work(&ctrl->failfast_work, ctrl->opts->fail_fast_tmo * HZ);
> +}
> +
> +static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
> +{
> +	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
> +		return;
> +
> +	cancel_delayed_work_sync(&ctrl->failfast_work);
> +	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> +}
> +
>  static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
>  {
>  	struct nvme_command c = { };
> @@ -3979,6 +4014,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
>  {
>  	nvme_mpath_stop(ctrl);
>  	nvme_stop_keep_alive(ctrl);
> +	nvme_stop_failfast_work(ctrl);
>  	flush_work(&ctrl->async_event_work);
>  	cancel_work_sync(&ctrl->fw_act_work);
>  }
> @@ -4043,6 +4079,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>  	int ret;
>  	ctrl->state = NVME_CTRL_NEW;
> +	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
>  	spin_lock_init(&ctrl->lock);
>  	mutex_init(&ctrl->scan_lock);
>  	INIT_LIST_HEAD(&ctrl->namespaces);
> @@ -4057,6 +4094,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>  	init_waitqueue_head(&ctrl->state_wq);
>  	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
> +	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
>  	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
>  	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 74b8818..e952e5d 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
>  {
>  	if (ctrl->state != NVME_CTRL_DELETING &&
>  	    ctrl->state != NVME_CTRL_DEAD &&
> +	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
>  	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
>  		return BLK_STS_RESOURCE;
> @@ -612,6 +613,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
>  	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
>  	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
>  	{ NVMF_OPT_TOS,			"tos=%d"		},
> +	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
>  	{ NVMF_OPT_ERR,			NULL			}
>  };
> @@ -630,6 +632,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>  	opts->nr_io_queues = num_online_cpus();
>  	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
>  	opts->kato = NVME_DEFAULT_KATO;
> +	opts->fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
>  	opts->duplicate_connect = false;
>  	opts->hdr_digest = false;
>  	opts->data_digest = false;
> @@ -751,6 +754,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>  				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
>  			ctrl_loss_tmo = token;
>  			break;
> +		case NVMF_OPT_FAIL_FAST_TMO:
> +			if (match_int(args, &token)) {
> +				ret = -EINVAL;
> +				goto out;
> +			}
> +
> +			if (token)
> +				pr_warn("fail_fast_tmo != 0, I/O will failed on reconnect controller after %d sec\n", token);
> +
> +			opts->fail_fast_tmo  = token;
> +			break;
>  		case NVMF_OPT_HOSTNQN:
>  			if (opts->host) {
>  				pr_err("hostnqn already user-assigned: %s\n",
> @@ -881,11 +895,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>  		opts->nr_poll_queues = 0;
>  		opts->duplicate_connect = true;
>  	}
> -	if (ctrl_loss_tmo < 0)
> +	if (ctrl_loss_tmo < 0){
>  		opts->max_reconnects = -1;
> -	else
> +	}else{
>  		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
>  						opts->reconnect_delay);
> +		if(ctrl_loss_tmo < opts->fail_fast_tmo)
> +			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fail_fast_tmo, ctrl_loss_tmo);
> +	}
> 
>  	if (!opts->host) {
>  		kref_get(&nvmf_default_host->ref);
> @@ -985,7 +1002,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
>  #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
>  				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
>  				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
> -				 NVMF_OPT_DISABLE_SQFLOW)
> +				 NVMF_OPT_DISABLE_SQFLOW|NVMF_OPT_FAIL_FAST_TMO)
> 
> static struct nvme_ctrl *
> nvmf_create_ctrl(struct device *dev, const char *buf)
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index a0ec40a..fd8c7dd 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -15,6 +15,8 @@
>  #define NVMF_DEF_RECONNECT_DELAY	10
> /* default to 600 seconds of reconnect attempts before giving up */
>  #define NVMF_DEF_CTRL_LOSS_TMO		600
> +/* default to 0: don't fail fast IO commands  */
> +#define NVMF_DEF_FAIL_FAST_TMO		0
> 
> /*
>   * Define a host as seen by the target.  We allocate one at boot, but also
> @@ -56,6 +58,7 @@ enum {
>  	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
>  	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>  	NVMF_OPT_TOS		= 1 << 19,
> +	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
>  };
> /**
> @@ -89,6 +92,7 @@ enum {
>   * @nr_write_queues: number of queues for write I/O
>   * @nr_poll_queues: number of queues for polling I/O
>   * @tos: type of service
> + * @fast_fail_tmo: Fast I/O fail timeout in seconds;
>   */
>  struct nvmf_ctrl_options {
>  	unsigned		mask;
> @@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
>  	unsigned int		nr_write_queues;
>  	unsigned int		nr_poll_queues;
>  	int			tos;
> +	unsigned int	fail_fast_tmo;
>  };
> 
>  /*
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 1024fec..b6a199e 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -256,6 +256,7 @@ struct nvme_ctrl {
>  	struct work_struct scan_work;
>  	struct work_struct async_event_work;
>  	struct delayed_work ka_work;
> +	struct delayed_work failfast_work;
>  	struct nvme_command ka_cmd;
>  	struct work_struct fw_act_work;
>  	unsigned long events;
> @@ -289,6 +290,8 @@ struct nvme_ctrl {
>  	u16 icdoff;
>  	u16 maxcmd;
>  	int nr_reconnects;
> +	unsigned long flags;
> +#define NVME_CTRL_FAILFAST_EXPIRED	0
>  	struct nvmf_ctrl_options *opts;
> 
>  	struct page *discard_page;
> 
In principle it looks good, but:

What happens if the controller reconnects _after_ failfast has triggered?
From my reading it would simply establish the connection, so I/O _could_
continue to be served from this path.
But seeing that I/O has already been failed due to failfast tmo the
system will most likely have taken corrective action, such as failing
over to another path or informing the cluster manager etc.
So who's going to inform the upper layers that the path has become live
again?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      Teamlead Storage & Networking
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-01 16:45 [PATCH v2] nvme-fabrics: reject I/O to offline device Victor Gladkov
  2020-01-07 16:17 ` Hannes Reinecke
@ 2020-01-08 19:47 ` James Smart
  2020-01-15 15:42   ` Victor Gladkov
  2020-01-26 10:06   ` Victor Gladkov
  1 sibling, 2 replies; 8+ messages in thread
From: James Smart @ 2020-01-08 19:47 UTC (permalink / raw)
  To: Victor Gladkov, Sagi Grimberg, linux-nvme

On 1/1/2020 8:45 AM, Victor Gladkov wrote:
> Issue Description:
> Commands get stuck while Host NVMe controller (TCP or RDMA) is in reconnect state.
> NVMe controller enters into reconnect state when it loses connection with the target. It tries to reconnect every 10 seconds (default) until successful reconnection or until reconnect time-out is reached. The default reconnect time out is 10 minutes.
> This behavior is different than ISCSI where Commands during reconnect state returns with the following error: "rejecting I/O to offline device"
>
> Fix Description:
> Following your suggestions:
> 1. Add a new session parameter called "fast_fail_tmo".
> 	The timeout is measured in seconds from the controller reconnect, any command beyond that timeout is rejected.
> 	The new parameter value may be passed during 'connect', and its default value is 30 seconds.
> 	A value of 0 means no timeout (in similar to current behavior).
> 2. Add a controller flag of "failfast_expired".
> 3. Add dedicated delayed_work that update the "failfast_expired" controller flag.
> 4. When entering CONNECTING, schedule the delayed_work based on failfast timeout value.
> 	If transition out of CONNECTING, terminate delayed work item and ensure failfast_expired is false.
> 	If delayed work item expires: set "failfast_expired" flag to true.
> 5. Update nvmf_fail_nonready_command() with check the "failfast_expired" controller flag.
>
> branch nvme/for-5.5
> ---------------------------------
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 2a84e14..8e30e03 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -93,6 +93,8 @@
>   static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>   static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>   					   unsigned nsid);
> +static void nvme_start_failfast_work(struct nvme_ctrl *ctrl);
> +static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl);
>    static void nvme_set_queue_dying(struct nvme_ns *ns)
>   {
> @@ -339,6 +341,8 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
>   		case NVME_CTRL_CONNECTING:
>   			changed = true;
>   			/* FALLTHRU */
> +			if(old_state == NVME_CTRL_CONNECTING)
> +				nvme_stop_failfast_work(ctrl);

What you have fine, but a simple reorg to avoid the iftest works fine too:
e.g.
@@ -334,9 +334,11 @@ bool nvme_change_ctrl_state(struct nvme_
      switch (new_state) {
      case NVME_CTRL_LIVE:
          switch (old_state) {
+        case NVME_CTRL_CONNECTING:
+            nvme_stop_failfast_work(ctrl);
+            /* FALLTHRU */
          case NVME_CTRL_NEW:
          case NVME_CTRL_RESETTING:
-        case NVME_CTRL_CONNECTING:
              changed = true;
              /* FALLTHRU */
          default:


>   		default:
>   			break;
>   		}
> @@ -359,6 +363,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
>   		case NVME_CTRL_RESETTING:
>   			changed = true;
>   			/* FALLTHRU */
> +			nvme_start_failfast_work(ctrl);
>   		default:
>   			break;
>   		}
> @@ -1033,6 +1038,36 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
>   }
>   EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
>   +static void nvme_failfast_work(struct work_struct *work)
> +{
> +	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
> +			struct nvme_ctrl, failfast_work);
> +
> +	spin_lock_irq(&ctrl->lock);
> +	if (ctrl->state == NVME_CTRL_CONNECTING) {
> +		set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> +		dev_info(ctrl->device, "failfast expired set for controller %s\n", ctrl->opts->subsysnqn);
> +	}
> +	spin_unlock_irq(&ctrl->lock);
> +}
> +
> +static void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
> +{
> +	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
> +		return;
> +
> +	schedule_delayed_work(&ctrl->failfast_work, ctrl->opts->fail_fast_tmo * HZ);
> +}
> +
> +static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
> +{
> +	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
> +		return;
> +
> +	cancel_delayed_work_sync(&ctrl->failfast_work);
> +	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> +}
> +

Any reason they aren't added higher in the file to avoid the prototypes ?

>   static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
>   {
>   	struct nvme_command c = { };
> @@ -3979,6 +4014,7 @@ void nvme_stop_ctrl:q(struct nvme_ctrl *ctrl)
>   {
>   	nvme_mpath_stop(ctrl);
>   	nvme_stop_keep_alive(ctrl);
> +	nvme_stop_failfast_work(ctrl);
>   	flush_work(&ctrl->async_event_work);
>   	cancel_work_sync(&ctrl->fw_act_work);
>   }
> @@ -4043,6 +4079,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>   	int ret;
>   	ctrl->state = NVME_CTRL_NEW;
> +	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
>   	spin_lock_init(&ctrl->lock);
>   	mutex_init(&ctrl->scan_lock);
>   	INIT_LIST_HEAD(&ctrl->namespaces);
> @@ -4057,6 +4094,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>   	init_waitqueue_head(&ctrl->state_wq);
>   	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
> +	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
>   	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
>   	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 74b8818..e952e5d 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
>   {
>   	if (ctrl->state != NVME_CTRL_DELETING &&
>   	    ctrl->state != NVME_CTRL_DEAD &&
> +	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
>   	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
>   		return BLK_STS_RESOURCE;
> @@ -612,6 +613,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
>   	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
>   	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
>   	{ NVMF_OPT_TOS,			"tos=%d"		},
> +	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
>   	{ NVMF_OPT_ERR,			NULL			}
>   };
> @@ -630,6 +632,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   	opts->nr_io_queues = num_online_cpus();
>   	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
>   	opts->kato = NVME_DEFAULT_KATO;
> +	opts->fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
>   	opts->duplicate_connect = false;
>   	opts->hdr_digest = false;
>   	opts->data_digest = false;
> @@ -751,6 +754,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
>   			ctrl_loss_tmo = token;
>   			break;
> +		case NVMF_OPT_FAIL_FAST_TMO:
> +			if (match_int(args, &token)) {
> +				ret = -EINVAL;
> +				goto out;
> +			}
> +
> +			if (token)
> +				pr_warn("fail_fast_tmo != 0, I/O will failed on reconnect controller after %d sec\n", token);
> +
> +			opts->fail_fast_tmo  = token;
> +			break;
>   		case NVMF_OPT_HOSTNQN:
>   			if (opts->host) {
>   				pr_err("hostnqn already user-assigned: %s\n",
> @@ -881,11 +895,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   		opts->nr_poll_queues = 0;
>   		opts->duplicate_connect = true;
>   	}
> -	if (ctrl_loss_tmo < 0)
> +	if (ctrl_loss_tmo < 0){
>   		opts->max_reconnects = -1;
> -	else
> +	}else{
style issues - need spaces before uses of  { and }
e.g.   "if (ctrl_loss_tmo < 0) {" and "} else {"

>   		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
>   						opts->reconnect_delay);
> +		if(ctrl_loss_tmo < opts->fail_fast_tmo)
> +			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fail_fast_tmo, ctrl_loss_tmo);
> +	}
>
>   	if (!opts->host) {
>   		kref_get(&nvmf_default_host->ref);
> @@ -985,7 +1002,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
>   #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
>   				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
>   				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
> -				 NVMF_OPT_DISABLE_SQFLOW)
> +				 NVMF_OPT_DISABLE_SQFLOW|NVMF_OPT_FAIL_FAST_TMO)

spaces here too, or line break/continuation

>
> static struct nvme_ctrl *
> nvmf_create_ctrl(struct device *dev, const char *buf)
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index a0ec40a..fd8c7dd 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -15,6 +15,8 @@
>   #define NVMF_DEF_RECONNECT_DELAY	10
> /* default to 600 seconds of reconnect attempts before giving up */
>   #define NVMF_DEF_CTRL_LOSS_TMO		600
> +/* default to 0: don't fail fast IO commands  */
> +#define NVMF_DEF_FAIL_FAST_TMO		0

I can agree with this - default behavior is old behavior - but given we 
are talking many minutes for old behavior, perhaps we should be making 0 
be immediate failure and not preserve the old behavior ?? Thoughts from 
anyone ?

>
> /*
>    * Define a host as seen by the target.  We allocate one at boot, but also
> @@ -56,6 +58,7 @@ enum {
>   	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
>   	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>   	NVMF_OPT_TOS		= 1 << 19,
> +	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
>   };
> /**
> @@ -89,6 +92,7 @@ enum {
>    * @nr_write_queues: number of queues for write I/O
>    * @nr_poll_queues: number of queues for polling I/O
>    * @tos: type of service
> + * @fast_fail_tmo: Fast I/O fail timeout in seconds;
>    */
>   struct nvmf_ctrl_options {
>   	unsigned		mask;
> @@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
>   	unsigned int		nr_write_queues;
>   	unsigned int		nr_poll_queues;
>   	int			tos;
> +	unsigned int	fail_fast_tmo;
>   };
>
>   /*
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 1024fec..b6a199e 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -256,6 +256,7 @@ struct nvme_ctrl {
>   	struct work_struct scan_work;
>   	struct work_struct async_event_work;
>   	struct delayed_work ka_work;
> +	struct delayed_work failfast_work;
>   	struct nvme_command ka_cmd;
>   	struct work_struct fw_act_work;
>   	unsigned long events;
> @@ -289,6 +290,8 @@ struct nvme_ctrl {
>   	u16 icdoff;
>   	u16 maxcmd;
>   	int nr_reconnects;
> +	unsigned long flags;
> +#define NVME_CTRL_FAILFAST_EXPIRED	0
>   	struct nvmf_ctrl_options *opts;
>
>   	struct page *discard_page;
>
> -----------------
> Regards,
> Victor
>

overall - looks good.   I'd like to see what the answer is to what our 
default action should be (see above, fail_fast = 0) and agree with the 
multipath comment Hannes had.

-- james


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-08 19:47 ` James Smart
@ 2020-01-15 15:42   ` Victor Gladkov
  2020-01-26 10:06   ` Victor Gladkov
  1 sibling, 0 replies; 8+ messages in thread
From: Victor Gladkov @ 2020-01-15 15:42 UTC (permalink / raw)
  To: linux-nvme; +Cc: Hannes Reinecke, Sagi Grimberg, James Smart

1. Added multipath support for this patch.
2. Small refactoring (according to the review)

On 1/8/2020 9:47 PM, James Smart wrote:
> 
> I can agree with this - default behavior is old behavior - but given we are
> talking many minutes for old behavior, perhaps we should be making 0 be
> immediate failure and not preserve the old behavior ?? Thoughts from
> anyone ?
> 
> 
> overall - looks good.   I'd like to see what the answer is to what our default
> action should be (see above, fail_fast = 0) and agree with the multipath
> comment Hannes had.

James, please see your comment on Tue Dec 17 10:03:26 PST 2019
http://lists.infradead.org/pipermail/linux-nvme/2019-December/028483.html

>> +	if (fail_fast_tmo < 0)
>> +		opts->fail_fast_tmo_ns = -1;
>would prefer it be set to 0 to mean disabled. [J.S.]



On 1/7/2020 6:18 PM, Hannes Reinecke wrote:
> 
> What happens if the controller reconnects _after_ failfast has triggered?
> From my reading it would simply establish the connection, so I/O _could_
> continue to be served from this path.
> But seeing that I/O has already been failed due to failfast tmo the system will
> most likely have taken corrective action, such as failing over to another path
> or informing the cluster manager etc.
> So who's going to inform the upper layers that the path has become live again?
>

The function nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) schedules a requeue_work 
for all namespaces on the controller.
See drivers/nvme/host/core.c:
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
		enum nvme_ctrl_state new_state)
{
	..............................................................................
	if (changed && ctrl->state == NVME_CTRL_LIVE)
		nvme_kick_requeue_lists(ctrl);
	return changed;
}

---------------------------------
---------------------------------

The updated patch:

branch nvme/for-5.5
---------------------------------
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2a84e14..aa51d6a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -321,6 +321,37 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);

+static void nvme_failfast_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_ctrl, failfast_work);
+
+	spin_lock_irq(&ctrl->lock);
+	if (ctrl->state == NVME_CTRL_CONNECTING) {
+		set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+		dev_info(ctrl->device, "failfast expired set for controller %s\n", ctrl->opts->subsysnqn);
+		nvme_kick_requeue_lists(ctrl);
+	}
+	spin_unlock_irq(&ctrl->lock);
+}
+
+static void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	schedule_delayed_work(&ctrl->failfast_work, ctrl->opts->fail_fast_tmo * HZ);
+}
+
+static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	cancel_delayed_work_sync(&ctrl->failfast_work);
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state)
 {
@@ -334,9 +365,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	switch (new_state) {
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
+		case NVME_CTRL_CONNECTING:
+			nvme_stop_failfast_work(ctrl);
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -355,8 +387,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		break;
 	case NVME_CTRL_CONNECTING:
 		switch (old_state) {
-		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
+			nvme_start_failfast_work(ctrl);
+		case NVME_CTRL_NEW:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -3979,6 +4012,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
 	nvme_mpath_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
+	nvme_stop_failfast_work(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
 }
@@ -4043,6 +4077,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	int ret;

 	ctrl->state = NVME_CTRL_NEW;
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
 	INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4057,6 +4092,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	init_waitqueue_head(&ctrl->state_wq);

 	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
 	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
 	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..16e5464 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
 {
 	if (ctrl->state != NVME_CTRL_DELETING &&
 	    ctrl->state != NVME_CTRL_DEAD &&
+	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;

@@ -612,6 +613,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
 	{ NVMF_OPT_TOS,			"tos=%d"		},
+	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
 	{ NVMF_OPT_ERR,			NULL			}
 };

@@ -630,6 +632,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	opts->nr_io_queues = num_online_cpus();
 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
 	opts->kato = NVME_DEFAULT_KATO;
+	opts->fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
 	opts->duplicate_connect = false;
 	opts->hdr_digest = false;
 	opts->data_digest = false;
@@ -751,6 +754,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
 			ctrl_loss_tmo = token;
 			break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token)
+				pr_warn("fail_fast_tmo != 0, I/O will failed on reconnect controller after %d sec\n", token);
+
+			opts->fail_fast_tmo  = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -881,11 +895,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		opts->nr_poll_queues = 0;
 		opts->duplicate_connect = true;
 	}
-	if (ctrl_loss_tmo < 0)
+	if (ctrl_loss_tmo < 0) {
 		opts->max_reconnects = -1;
-	else
+	} else {
 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
 						opts->reconnect_delay);
+		if(ctrl_loss_tmo < opts->fail_fast_tmo)
+			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fail_fast_tmo, ctrl_loss_tmo);
+	}

 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
@@ -985,7 +1002,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW)
+				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_FAIL_FAST_TMO)

 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a0ec40a..fd8c7dd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
 #define NVMF_DEF_RECONNECT_DELAY	10
 /* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
+/* default to 30 seconds of fail fast IO commands  */
+#define NVMF_DEF_FAIL_FAST_TMO		0

 /*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
 	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };

 /**
@@ -89,6 +92,7 @@ enum {
  * @nr_write_queues: number of queues for write I/O
  * @nr_poll_queues: number of queues for polling I/O
  * @tos: type of service
+ * @fast_fail_tmo_ns: Fast I/O fail timeout in nanoseconds;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
 	unsigned int		nr_write_queues;
 	unsigned int		nr_poll_queues;
 	int			tos;
+	unsigned int	fail_fast_tmo;
 };

 /*
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 797c183..4edcaf1 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -281,9 +281,11 @@ static bool nvme_available_path(struct nvme_ns_head *head)

 	list_for_each_entry_rcu(ns, &head->list, siblings) {
 		switch (ns->ctrl->state) {
+		case NVME_CTRL_CONNECTING:
+		    if(test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
+				break;
 		case NVME_CTRL_LIVE:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_CONNECTING:
 			/* fallthru */
 			return true;
 		default:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec..b6a199e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -256,6 +256,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	struct delayed_work failfast_work;
 	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
 	unsigned long events;
@@ -289,6 +290,8 @@ struct nvme_ctrl {
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED	0
 	struct nvmf_ctrl_options *opts;

 	struct page *discard_page;

-----------------
Regards,
Victor
_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-08 19:47 ` James Smart
  2020-01-15 15:42   ` Victor Gladkov
@ 2020-01-26 10:06   ` Victor Gladkov
  2020-01-30 15:08     ` Christoph Hellwig
  1 sibling, 1 reply; 8+ messages in thread
From: Victor Gladkov @ 2020-01-26 10:06 UTC (permalink / raw)
  To: linux-nvme; +Cc: Hannes Reinecke, Sagi Grimberg, James Smart

On 1/15/2020 5:43 PM, Victor Gladkov wrote:
> 1. Added multipath support for this patch.
> 2. Small refactoring (according to the review)

Anyone have any comments on the latest proposed patch?

Regards,
Victor
_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-26 10:06   ` Victor Gladkov
@ 2020-01-30 15:08     ` Christoph Hellwig
  2020-02-03 13:40       ` Victor Gladkov
  0 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2020-01-30 15:08 UTC (permalink / raw)
  To: Victor Gladkov; +Cc: James Smart, Hannes Reinecke, linux-nvme, Sagi Grimberg

On Sun, Jan 26, 2020 at 10:06:20AM +0000, Victor Gladkov wrote:
> On 1/15/2020 5:43 PM, Victor Gladkov wrote:
> > 1. Added multipath support for this patch.
> > 2. Small refactoring (according to the review)
> 
> Anyone have any comments on the latest proposed patch?

Where is the latest patch?  I didn't see a repost.  To kick off a
discussion please just resent it, in proper patch format and with the
proper style to make reviewing it easier.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-01-30 15:08     ` Christoph Hellwig
@ 2020-02-03 13:40       ` Victor Gladkov
  2020-02-03 17:08         ` Mike Snitzer
  0 siblings, 1 reply; 8+ messages in thread
From: Victor Gladkov @ 2020-02-03 13:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: James Smart, Hannes Reinecke, linux-nvme, Sagi Grimberg

On Sun, Jan 30, 2020 at 17:09, Christoph Hellwig wrote:
> On Sun, Jan 26, 2020 at 10:06:20AM +0000, Victor Gladkov wrote:
> > On 1/15/2020 5:43 PM, Victor Gladkov wrote:
> > > 1. Added multipath support for this patch.
> > > 2. Small refactoring (according to the review)
> >
> > Anyone have any comments on the latest proposed patch?
> 
> Where is the latest patch?  I didn't see a repost.  To kick off a discussion
> please just resent it, in proper patch format and with the proper style to make
> reviewing it easier.

Patch updated for branch nvme/for-5.6
---------------------------------
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6ec0350..c00efb2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -321,6 +321,37 @@
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);

+static void nvme_failfast_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_ctrl, failfast_work);
+
+	spin_lock_irq(&ctrl->lock);
+	if (ctrl->state == NVME_CTRL_CONNECTING) {
+		set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+		dev_info(ctrl->device, "failfast expired set for controller %s\n", ctrl->opts->subsysnqn);
+		nvme_kick_requeue_lists(ctrl);
+	}
+	spin_unlock_irq(&ctrl->lock);
+}
+
+static void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	schedule_delayed_work(&ctrl->failfast_work, ctrl->opts->fail_fast_tmo * HZ);
+}
+
+static void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (unlikely(ctrl->opts->fail_fast_tmo == 0))
+		return;
+
+	cancel_delayed_work_sync(&ctrl->failfast_work);
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state)
 {
@@ -334,9 +365,10 @@
 	switch (new_state) {
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
+		case NVME_CTRL_CONNECTING:
+			nvme_stop_failfast_work(ctrl);
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_CONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -355,8 +387,9 @@
 		break;
 	case NVME_CTRL_CONNECTING:
 		switch (old_state) {
-		case NVME_CTRL_NEW:
 		case NVME_CTRL_RESETTING:
+			nvme_start_failfast_work(ctrl);
+		case NVME_CTRL_NEW:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -3960,6 +3993,7 @@
 {
 	nvme_mpath_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
+	nvme_stop_failfast_work(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
 }
@@ -4024,6 +4058,7 @@
 	int ret;

 	ctrl->state = NVME_CTRL_NEW;
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
 	INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4038,6 +4073,7 @@
 	init_waitqueue_head(&ctrl->state_wq);

 	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
 	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
 	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..d5fa25e 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
 {
 	if (ctrl->state != NVME_CTRL_DELETING &&
 	    ctrl->state != NVME_CTRL_DEAD &&
+	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;

@@ -612,6 +613,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
 	{ NVMF_OPT_TOS,			"tos=%d"		},
+	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
 	{ NVMF_OPT_ERR,			NULL			}
 };

@@ -630,6 +632,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	opts->nr_io_queues = num_online_cpus();
 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
 	opts->kato = NVME_DEFAULT_KATO;
+	opts->fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
 	opts->duplicate_connect = false;
 	opts->hdr_digest = false;
 	opts->data_digest = false;
@@ -751,6 +754,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
 			ctrl_loss_tmo = token;
 			break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token)
+				pr_warn("fail_fast_tmo != 0, I/O will failed on reconnect controller after %d sec\n", token);
+
+			opts->fail_fast_tmo  = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -881,11 +895,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		opts->nr_poll_queues = 0;
 		opts->duplicate_connect = true;
 	}
-	if (ctrl_loss_tmo < 0)
+	if (ctrl_loss_tmo < 0) {
 		opts->max_reconnects = -1;
-	else
+	} else {
 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
 						opts->reconnect_delay);
+		if(ctrl_loss_tmo < opts->fail_fast_tmo)
+			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fail_fast_tmo, ctrl_loss_tmo);
+	}

 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
@@ -985,7 +1002,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW)
+				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_FAIL_FAST_TMO)

 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a0ec40a..fd8c7dd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
 #define NVMF_DEF_RECONNECT_DELAY	10
 /* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
+/* default to 30 seconds of fail fast IO commands  */
+#define NVMF_DEF_FAIL_FAST_TMO		0

 /*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
 	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };

 /**
@@ -89,6 +92,7 @@ enum {
  * @nr_write_queues: number of queues for write I/O
  * @nr_poll_queues: number of queues for polling I/O
  * @tos: type of service
+ * @fast_fail_tmo_ns: Fast I/O fail timeout in nanoseconds;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
 	unsigned int		nr_write_queues;
 	unsigned int		nr_poll_queues;
 	int			tos;
+	unsigned int	fail_fast_tmo;
 };

 /*
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 797c183..4edcaf1 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -281,9 +281,11 @@ static bool nvme_available_path(struct nvme_ns_head *head)

 	list_for_each_entry_rcu(ns, &head->list, siblings) {
 		switch (ns->ctrl->state) {
+		case NVME_CTRL_CONNECTING:
+		    if(test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
+				break;
 		case NVME_CTRL_LIVE:
 		case NVME_CTRL_RESETTING:
-		case NVME_CTRL_CONNECTING:
 			/* fallthru */
 			return true;
 		default:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec..b6a199e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -256,6 +256,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	struct delayed_work failfast_work;
 	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
 	unsigned long events;
@@ -289,6 +290,8 @@ struct nvme_ctrl {
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED	0
 	struct nvmf_ctrl_options *opts;

 	struct page *discard_page;
-----------------
Regards,
Victor

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-fabrics: reject I/O to offline device
  2020-02-03 13:40       ` Victor Gladkov
@ 2020-02-03 17:08         ` Mike Snitzer
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2020-02-03 17:08 UTC (permalink / raw)
  To: Victor Gladkov
  Cc: Christoph Hellwig, Sagi Grimberg, James Smart, linux-nvme,
	Hannes Reinecke

On Mon, Feb 03 2020 at  8:40am -0500,
Victor Gladkov <Victor.Gladkov@kioxia.com> wrote:

> On Sun, Jan 30, 2020 at 17:09, Christoph Hellwig wrote:
> > On Sun, Jan 26, 2020 at 10:06:20AM +0000, Victor Gladkov wrote:
> > > On 1/15/2020 5:43 PM, Victor Gladkov wrote:
> > > > 1. Added multipath support for this patch.
> > > > 2. Small refactoring (according to the review)
> > >
> > > Anyone have any comments on the latest proposed patch?
> > 
> > Where is the latest patch?  I didn't see a repost.  To kick off a discussion
> > please just resent it, in proper patch format and with the proper style to make
> > reviewing it easier.
> 
> Patch updated for branch nvme/for-5.6

Christoph meant you should send a patch with a version bump in subject
(e.g. [PATCH v3]) and a proper patch header. No maintainer can pick this
patch up like you've posted (not without forcing them to do more work
than they should).

Some nits below.

> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index a0ec40a..fd8c7dd 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -15,6 +15,8 @@
>  #define NVMF_DEF_RECONNECT_DELAY	10
>  /* default to 600 seconds of reconnect attempts before giving up */
>  #define NVMF_DEF_CTRL_LOSS_TMO		600
> +/* default to 30 seconds of fail fast IO commands  */
> +#define NVMF_DEF_FAIL_FAST_TMO		0

Your comment says "default to 30 seconds" yet in practice 0 is used?

>  /*
>   * Define a host as seen by the target.  We allocate one at boot, but also
> @@ -56,6 +58,7 @@ enum {
>  	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
>  	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>  	NVMF_OPT_TOS		= 1 << 19,
> +	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
>  };
> 
>  /**
> @@ -89,6 +92,7 @@ enum {
>   * @nr_write_queues: number of queues for write I/O
>   * @nr_poll_queues: number of queues for polling I/O
>   * @tos: type of service
> + * @fast_fail_tmo_ns: Fast I/O fail timeout in nanoseconds;
>   */
>  struct nvmf_ctrl_options {
>  	unsigned		mask;
> @@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
>  	unsigned int		nr_write_queues;
>  	unsigned int		nr_poll_queues;
>  	int			tos;
> +	unsigned int	fail_fast_tmo;
>  };

Above you document "@fast_fail_tmo_ns" but then add the member with name
"fail_fast_tmo".

>  /*
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 797c183..4edcaf1 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -281,9 +281,11 @@ static bool nvme_available_path(struct nvme_ns_head *head)
> 
>  	list_for_each_entry_rcu(ns, &head->list, siblings) {
>  		switch (ns->ctrl->state) {
> +		case NVME_CTRL_CONNECTING:
> +		    if(test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
> +				break;

James suggested this case order, when he did he included /* fallthru */,
it helps to have it where appropriate (like this case).

>  		case NVME_CTRL_LIVE:
>  		case NVME_CTRL_RESETTING:
> -		case NVME_CTRL_CONNECTING:
>  			/* fallthru */
>  			return true;
>  		default:
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 1024fec..b6a199e 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -256,6 +256,7 @@ struct nvme_ctrl {
>  	struct work_struct scan_work;
>  	struct work_struct async_event_work;
>  	struct delayed_work ka_work;
> +	struct delayed_work failfast_work;
>  	struct nvme_command ka_cmd;
>  	struct work_struct fw_act_work;
>  	unsigned long events;
> @@ -289,6 +290,8 @@ struct nvme_ctrl {
>  	u16 icdoff;
>  	u16 maxcmd;
>  	int nr_reconnects;
> +	unsigned long flags;
> +#define NVME_CTRL_FAILFAST_EXPIRED	0
>  	struct nvmf_ctrl_options *opts;
> 
>  	struct page *discard_page;

Seems awkward to define bits for "flags" within the body of the struct;
but I'll defer to others' judgement.

Mike


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-01 16:45 [PATCH v2] nvme-fabrics: reject I/O to offline device Victor Gladkov
2020-01-07 16:17 ` Hannes Reinecke
2020-01-08 19:47 ` James Smart
2020-01-15 15:42   ` Victor Gladkov
2020-01-26 10:06   ` Victor Gladkov
2020-01-30 15:08     ` Christoph Hellwig
2020-02-03 13:40       ` Victor Gladkov
2020-02-03 17:08         ` Mike Snitzer

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git