RE: [PATCH] nvme-fabrics: reject I/O to offline device

From: Victor Gladkov <Victor.Gladkov@kioxia.com>
To: Hannes Reinecke <hare@suse.de>, Sagi Grimberg <sagi@grimberg.me>,
	"James Smart" <james.smart@broadcom.com>,
	"linux-nvme@lists.infradead.org" <linux-nvme@lists.infradead.org>
Subject: RE: [PATCH] nvme-fabrics: reject I/O to offline device
Date: Mon, 9 Dec 2019 15:30:14 +0000	[thread overview]
Message-ID: <d7953accf06e418a893b9cc6017b981a@kioxia.com> (raw)
In-Reply-To: <e009d8fe-74ec-8c87-30ec-b1ac657b2aa8@suse.de>

On 12/8/19 14:18 PM, Hannes Reinecke wrote:
> 
> On 12/6/19 11:18 PM, Sagi Grimberg wrote:
> >
> >>> ---
> >>> diff --git a/drivers/nvme/host/fabrics.c
> >>> b/drivers/nvme/host/fabrics.c index 74b8818..b58abc1 100644
> >>> --- a/drivers/nvme/host/fabrics.c
> >>> +++ b/drivers/nvme/host/fabrics.c
> >>> @@ -549,6 +549,8 @@ blk_status_t nvmf_fail_nonready_command(struct
> >>> nvme_ctrl *ctrl,
> >>>   {
> >>>          if (ctrl->state != NVME_CTRL_DELETING &&
> >>>              ctrl->state != NVME_CTRL_DEAD &&
> >>> +           !(ctrl->state == NVME_CTRL_CONNECTING &&
> >>> +            ((ktime_get_ns() - rq->start_time_ns) >
> >>> jiffies_to_nsecs(rq->timeout))) &&
> >>>              !blk_noretry_request(rq) && !(rq->cmd_flags &
> >>> REQ_NVME_MPATH))
> >>>                  return BLK_STS_RESOURCE;
> >>>
> >>
> >> Did you test this to ensure it's doing what you expect. I'm not sure
> >> that all the timers are set right at this point. Most I/O's timeout
> >> from a deadline time stamped at blk_mq_start_request(). But that
> >> routine is actually called by the transports post the
> >> nvmf_check_ready/fail_nonready calls.  E.g. the io is not yet in
> >> flight, thus queued, and the blk-mq internal queuing doesn't count
> >> against the io timeout.  I can't see anything that guarantees
> >> start_time_ns is set.
> >
> > I'm not sure this behavior for failing I/O always desired? some
> > consumers would actually not want the I/O to fail prematurely if we
> > are not multipathing...
> >
> > I think we need a fail_fast_tmo set in when establishing the
> > controller to get it right.
> >
> Agreed. This whole patch looks like someone is trying to reimplement
> fast_io_fail_tmo / dev_loss_tmo.
> As we're moving into unreliable fabrics I guess we'll need a similar mechanism.
> 
> Cheers,
> 
> Hannes


Following your suggestions, I added a new session parameter called "fast_fail_tmo". 
The timeout is measured in seconds from the controller reconnect, any command beyond that timeout is rejected. 
The new parameter value may be passed during ‘connect’, and its default value is 30 seconds. 
A value of -1 means no timeout (in similar to current behavior).

---

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..ed6b911 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -406,6 +406,7 @@
 	}

 	ctrl->cntlid = le16_to_cpu(res.u16);
+	ctrl->start_reconnect_ns = ktime_get_ns();

 out_free_data:
 	kfree(data);
@@ -474,8 +475,12 @@
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
 {
 	if (ctrl->opts->max_reconnects == -1 ||
-	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
+	    ctrl->nr_reconnects < ctrl->opts->max_reconnects){
+		if(ctrl->nr_reconnects == 0)
+			ctrl->start_reconnect_ns = ktime_get_ns();
+
 		return true;
+	}

 	return false;
 }
@@ -549,6 +554,8 @@
 {
 	if (ctrl->state != NVME_CTRL_DELETING &&
 	    ctrl->state != NVME_CTRL_DEAD &&
+            !(ctrl->state == NVME_CTRL_CONNECTING && ctrl->opts->fail_fast_tmo_ns >= 0 &&
+            ((ktime_get_ns() - ctrl->start_reconnect_ns) >  ctrl->opts->fail_fast_tmo_ns)) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;

@@ -612,6 +619,7 @@
 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
 	{ NVMF_OPT_TOS,			"tos=%d"		},
+	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
 	{ NVMF_OPT_ERR,			NULL			}
 };

@@ -623,6 +631,7 @@
 	int token, ret = 0;
 	size_t nqnlen  = 0;
 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+	int fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
 	uuid_t hostid;

 	/* Set defaults */
@@ -751,6 +760,16 @@
 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
 			ctrl_loss_tmo = token;
 			break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token < 0)
+				pr_warn("fail_fast_tmo < 0, I/O will wait for reconnect/loss controller\n");
+			fail_fast_tmo = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -887,6 +906,11 @@
 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
 						opts->reconnect_delay);

+	if (fail_fast_tmo < 0)
+		opts->fail_fast_tmo_ns = -1;
+	else
+		opts->fail_fast_tmo_ns = fail_fast_tmo * NSEC_PER_SEC;
+
 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
 		opts->host = nvmf_default_host;
@@ -985,7 +1009,7 @@
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW)
+				 NVMF_OPT_DISABLE_SQFLOW|NVMF_OPT_FAIL_FAST_TMO)

 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
@@ -1198,7 +1222,6 @@
 	class_destroy(nvmf_class);
 	nvmf_host_put(nvmf_default_host);

-	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);

diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 93f08d7..235c767 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
 #define NVMF_DEF_RECONNECT_DELAY	10
 /* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
+/* default to 30 seconds of fail fast IO commands  */
+#define NVMF_DEF_FAIL_FAST_TMO		30

 /*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@
 	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };

 /**
@@ -89,6 +92,7 @@
  * @nr_write_queues: number of queues for write I/O
  * @nr_poll_queues: number of queues for polling I/O
  * @tos: type of service
+ * @fast_fail_tmo_ns: Fast I/O fail timeout in nanoseconds;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -111,6 +115,7 @@
 	unsigned int		nr_write_queues;
 	unsigned int		nr_poll_queues;
 	int			tos;
+	u64			fail_fast_tmo_ns;
 };

 /*

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 34ac79c..9b5f6d9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -282,6 +282,7 @@
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	u64 start_reconnect_ns;
 	struct nvmf_ctrl_options *opts;

 	struct page *discard_page;

---
Regards,
Victor
_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme