Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
From: Victor Gladkov <Victor.Gladkov@kioxia.com>
To: Hannes Reinecke <hare@suse.de>, Sagi Grimberg <sagi@grimberg.me>,
	"James Smart" <james.smart@broadcom.com>,
	"linux-nvme@lists.infradead.org" <linux-nvme@lists.infradead.org>
Subject: RE: [PATCH] nvme-fabrics: reject I/O to offline device
Date: Mon, 9 Dec 2019 15:30:14 +0000
Message-ID: <d7953accf06e418a893b9cc6017b981a@kioxia.com> (raw)
In-Reply-To: <e009d8fe-74ec-8c87-30ec-b1ac657b2aa8@suse.de>

On 12/8/19 14:18 PM, Hannes Reinecke wrote:
> 
> On 12/6/19 11:18 PM, Sagi Grimberg wrote:
> >
> >>> ---
> >>> diff --git a/drivers/nvme/host/fabrics.c
> >>> b/drivers/nvme/host/fabrics.c index 74b8818..b58abc1 100644
> >>> --- a/drivers/nvme/host/fabrics.c
> >>> +++ b/drivers/nvme/host/fabrics.c
> >>> @@ -549,6 +549,8 @@ blk_status_t nvmf_fail_nonready_command(struct
> >>> nvme_ctrl *ctrl,
> >>>   {
> >>>          if (ctrl->state != NVME_CTRL_DELETING &&
> >>>              ctrl->state != NVME_CTRL_DEAD &&
> >>> +           !(ctrl->state == NVME_CTRL_CONNECTING &&
> >>> +            ((ktime_get_ns() - rq->start_time_ns) >
> >>> jiffies_to_nsecs(rq->timeout))) &&
> >>>              !blk_noretry_request(rq) && !(rq->cmd_flags &
> >>> REQ_NVME_MPATH))
> >>>                  return BLK_STS_RESOURCE;
> >>>
> >>
> >> Did you test this to ensure it's doing what you expect. I'm not sure
> >> that all the timers are set right at this point. Most I/O's timeout
> >> from a deadline time stamped at blk_mq_start_request(). But that
> >> routine is actually called by the transports post the
> >> nvmf_check_ready/fail_nonready calls.  E.g. the io is not yet in
> >> flight, thus queued, and the blk-mq internal queuing doesn't count
> >> against the io timeout.  I can't see anything that guarantees
> >> start_time_ns is set.
> >
> > I'm not sure this behavior for failing I/O always desired? some
> > consumers would actually not want the I/O to fail prematurely if we
> > are not multipathing...
> >
> > I think we need a fail_fast_tmo set in when establishing the
> > controller to get it right.
> >
> Agreed. This whole patch looks like someone is trying to reimplement
> fast_io_fail_tmo / dev_loss_tmo.
> As we're moving into unreliable fabrics I guess we'll need a similar mechanism.
> 
> Cheers,
> 
> Hannes


Following your suggestions, I added a new session parameter called "fast_fail_tmo". 
The timeout is measured in seconds from the controller reconnect, any command beyond that timeout is rejected. 
The new parameter value may be passed during ‘connect’, and its default value is 30 seconds. 
A value of -1 means no timeout (in similar to current behavior).

---
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818..ed6b911 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -406,6 +406,7 @@
 	}

 	ctrl->cntlid = le16_to_cpu(res.u16);
+	ctrl->start_reconnect_ns = ktime_get_ns();

 out_free_data:
 	kfree(data);
@@ -474,8 +475,12 @@
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
 {
 	if (ctrl->opts->max_reconnects == -1 ||
-	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
+	    ctrl->nr_reconnects < ctrl->opts->max_reconnects){
+		if(ctrl->nr_reconnects == 0)
+			ctrl->start_reconnect_ns = ktime_get_ns();
+
 		return true;
+	}

 	return false;
 }
@@ -549,6 +554,8 @@
 {
 	if (ctrl->state != NVME_CTRL_DELETING &&
 	    ctrl->state != NVME_CTRL_DEAD &&
+            !(ctrl->state == NVME_CTRL_CONNECTING && ctrl->opts->fail_fast_tmo_ns >= 0 &&
+            ((ktime_get_ns() - ctrl->start_reconnect_ns) >  ctrl->opts->fail_fast_tmo_ns)) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;

@@ -612,6 +619,7 @@
 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
 	{ NVMF_OPT_TOS,			"tos=%d"		},
+	{ NVMF_OPT_FAIL_FAST_TMO,	"fail_fast_tmo=%d"	},
 	{ NVMF_OPT_ERR,			NULL			}
 };

@@ -623,6 +631,7 @@
 	int token, ret = 0;
 	size_t nqnlen  = 0;
 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+	int fail_fast_tmo = NVMF_DEF_FAIL_FAST_TMO;
 	uuid_t hostid;

 	/* Set defaults */
@@ -751,6 +760,16 @@
 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
 			ctrl_loss_tmo = token;
 			break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token < 0)
+				pr_warn("fail_fast_tmo < 0, I/O will wait for reconnect/loss controller\n");
+			fail_fast_tmo = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -887,6 +906,11 @@
 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
 						opts->reconnect_delay);

+	if (fail_fast_tmo < 0)
+		opts->fail_fast_tmo_ns = -1;
+	else
+		opts->fail_fast_tmo_ns = fail_fast_tmo * NSEC_PER_SEC;
+
 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
 		opts->host = nvmf_default_host;
@@ -985,7 +1009,7 @@
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW)
+				 NVMF_OPT_DISABLE_SQFLOW|NVMF_OPT_FAIL_FAST_TMO)

 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
@@ -1198,7 +1222,6 @@
 	class_destroy(nvmf_class);
 	nvmf_host_put(nvmf_default_host);

-	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);

diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 93f08d7..235c767 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
 #define NVMF_DEF_RECONNECT_DELAY	10
 /* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
+/* default to 30 seconds of fail fast IO commands  */
+#define NVMF_DEF_FAIL_FAST_TMO		30

 /*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@
 	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };

 /**
@@ -89,6 +92,7 @@
  * @nr_write_queues: number of queues for write I/O
  * @nr_poll_queues: number of queues for polling I/O
  * @tos: type of service
+ * @fast_fail_tmo_ns: Fast I/O fail timeout in nanoseconds;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -111,6 +115,7 @@
 	unsigned int		nr_write_queues;
 	unsigned int		nr_poll_queues;
 	int			tos;
+	u64			fail_fast_tmo_ns;
 };

 /*

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 34ac79c..9b5f6d9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -282,6 +282,7 @@
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	u64 start_reconnect_ns;
 	struct nvmf_ctrl_options *opts;

 	struct page *discard_page;

---
Regards,
Victor
_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

  reply index

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-01  7:59 Victor Gladkov
2019-12-02 22:26 ` Chaitanya Kulkarni
2019-12-02 22:47 ` James Smart
2019-12-03 10:04   ` Victor Gladkov
2019-12-03 16:19     ` James Smart
2019-12-04  8:28       ` Victor Gladkov
2019-12-06  0:38         ` James Smart
2019-12-06 22:18           ` Sagi Grimberg
2019-12-08 12:31             ` Hannes Reinecke
2019-12-09 15:30               ` Victor Gladkov [this message]
2019-12-17 18:03                 ` James Smart
2019-12-17 21:46                 ` Sagi Grimberg
2019-12-18 22:20                   ` James Smart
2019-12-15 12:33               ` Victor Gladkov

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d7953accf06e418a893b9cc6017b981a@kioxia.com \
    --to=victor.gladkov@kioxia.com \
    --cc=hare@suse.de \
    --cc=james.smart@broadcom.com \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git