All of lore.kernel.org
 help / color / mirror / Atom feed
From: jsmart2021@gmail.com (jsmart2021@gmail.com)
Subject: [RFC 7/7] nvme_fc: add dev_loss_tmo timeout and remoteport resume support
Date: Thu,  4 May 2017 11:07:37 -0700	[thread overview]
Message-ID: <20170504180737.5472-8-jsmart2021@gmail.com> (raw)
In-Reply-To: <20170504180737.5472-1-jsmart2021@gmail.com>

From: James Smart <jsmart2021@gmail.com>

This patch adds the dev_loss_tmo functionality to the transport.

When a remoteport is unregistered (connectivity lost), it is marked
DELETED and the following is perfomed on all the controllers on the
remoteport: the controller is reset to delete the current association.
Once the association is terminated, the dev_loss_tmo timer is started.
A reconnect is not scheduled as there is no connectivity. Note: the
start of the dev_loss_tmo timer is in the generic
delete-association/create-new-association path. Thus it will be started
regardless of whether the reset was due to remote port connectivity
loss, a controller reset, or a transport run-time error.

When a remoteport is registered (connectivity established), the
transport searches the list of remoteport structures that have pending
deletions (controllers waiting to have dev_loss_tmo fire, thus
preventing remoteport deletion). The transport looks for a matching
wwnn/wwpn. If one is found, the remoteport is transitioned back to
ONLINE, and the following occurs on all controllers on the remoteport:
any controllers in a RECONNECTING state have reconnection attempts kicked
off. If the controller was DELETING, it's natural RECONNECT transition
will start a reconnect attempt.

Once a controller successfully reconnects to a new association, any
dev_loss_tmo timer for it is terminated.

If a dev_loss_tmo timer for a controller fires, the controller is
unconditionally deleted.

Signed-off-by: James Smart <james.smart at broadcom.com>
---
 drivers/nvme/host/fc.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 214 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 484b7d55676c..a3d4b061fe39 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -167,6 +167,7 @@ struct nvme_fc_ctrl {
 	struct work_struct	delete_work;
 	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
+	struct delayed_work	dev_loss_work;
 	u32			dev_loss_tmo;
 
 	struct kref		ref;
@@ -433,6 +434,86 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
 	return kref_get_unless_zero(&rport->ref);
 }
 
+static void
+nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
+{
+	switch (ctrl->ctrl.state) {
+	case NVME_CTRL_RECONNECTING:
+		/*
+		 * As all reconnects were suppressed, schedule a
+		 * connect.
+		 */
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, 0);
+		break;
+
+	case NVME_CTRL_RESETTING:
+		/*
+		 * Controller is already in the process of terminating the
+		 * association. No need to do anything further. The reconnect
+		 * step will naturally occur after the reset completes.
+		 */
+		break;
+
+	default:
+		/* no action to take - let it delete */
+		break;
+	}
+}
+
+static struct nvme_fc_rport *
+nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
+				struct nvme_fc_port_info *pinfo)
+{
+	struct nvme_fc_rport *rport;
+	struct nvme_fc_ctrl *ctrl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&nvme_fc_lock, flags);
+
+	list_for_each_entry(rport, &lport->endp_list, endp_list) {
+		if (rport->remoteport.node_name != pinfo->node_name ||
+		    rport->remoteport.port_name != pinfo->port_name)
+			continue;
+
+		if (!nvme_fc_rport_get(rport)) {
+			rport = ERR_PTR(-ENOLCK);
+			goto out_done;
+		}
+
+		spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+		spin_lock_irqsave(&rport->lock, flags);
+
+		/* has it been unregistered */
+		if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
+			/* means lldd called us twice */
+			spin_unlock_irqrestore(&rport->lock, flags);
+			nvme_fc_rport_put(rport);
+			return ERR_PTR(-ESTALE);
+		}
+
+		rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
+
+		/*
+		 * kick off a reconnect attempt on all associations to the
+		 * remote port. A successful reconnects will resume i/o.
+		 */
+		list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+			nvme_fc_resume_controller(ctrl);
+
+		spin_unlock_irqrestore(&rport->lock, flags);
+
+		return rport;
+	}
+
+	rport = NULL;
+
+out_done:
+	spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+	return rport;
+}
+
 /**
  * nvme_fc_register_remoteport - transport entry point called by an
  *                              LLDD to register the existence of a NVME
@@ -465,22 +546,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 		goto out_reghost_failed;
 	}
 
+	if (!nvme_fc_lport_get(lport)) {
+		ret = -ESHUTDOWN;
+		goto out_reghost_failed;
+	}
+
+	/*
+	 * look to see if there is already a remoteport that is waiting
+	 * for a reconnect (within dev_loss_tmo) with the same WWN's.
+	 * If so, transition to it and reconnect.
+	 */
+	newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
+
+	/* found an rport, but something about its state is bad */
+	if (IS_ERR(newrec)) {
+		ret = PTR_ERR(newrec);
+		goto out_lport_put;
+
+	/* found existing rport, which was resumed */
+	} else if (newrec) {
+		/* Ignore pinfo->dev_loss_tmo. Leave rport and ctlr's as is */
+
+		nvme_fc_lport_put(lport);
+		*portptr = &newrec->remoteport;
+		return 0;
+	}
+
+	/* nothing found - allocate a new remoteport struct */
+
 	newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
 			 GFP_KERNEL);
 	if (!newrec) {
 		ret = -ENOMEM;
-		goto out_reghost_failed;
-	}
-
-	if (!nvme_fc_lport_get(lport)) {
-		ret = -ESHUTDOWN;
-		goto out_kfree_rport;
+		goto out_lport_put;
 	}
 
 	idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
-		goto out_lport_put;
+		goto out_kfree_rport;
 	}
 
 	INIT_LIST_HEAD(&newrec->endp_list);
@@ -510,10 +614,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	*portptr = &newrec->remoteport;
 	return 0;
 
-out_lport_put:
-	nvme_fc_lport_put(lport);
 out_kfree_rport:
 	kfree(newrec);
+out_lport_put:
+	nvme_fc_lport_put(lport);
 out_reghost_failed:
 	*portptr = NULL;
 	return ret;
@@ -544,6 +648,74 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
 	return 0;
 }
 
+static void
+nvmet_fc_start_dev_loss_tmo(struct nvme_fc_ctrl *ctrl, u32 dev_loss_tmo)
+{
+	/* if dev_loss_tmo==0, dev loss is immediate */
+	if (!dev_loss_tmo) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller connectivity lost. "
+			"Deleting controller.\n",
+			ctrl->cnum);
+		__nvme_fc_del_ctrl(ctrl);
+		return;
+	}
+
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: controller connectivity lost. Awaiting reconnect",
+		ctrl->cnum);
+
+	switch (ctrl->ctrl.state) {
+	case NVME_CTRL_LIVE:
+		/*
+		 * Schedule a controller reset. The reset will terminate
+		 * the association and schedule the dev_loss_tmo timer.
+		 * The reconnect after terminating the association will
+		 * note the rport state and will not be scheduled.
+		 * The controller will sit in that state, with io
+		 * suspended at the block layer, until either dev_loss_tmo
+		 * expires or the remoteport is re-registered. If
+		 * re-registered, an immediate connect attempt will be
+		 * made.
+		 */
+		if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
+		    !queue_work(nvme_fc_wq, &ctrl->reset_work))
+			__nvme_fc_del_ctrl(ctrl);
+		break;
+
+	case NVME_CTRL_RECONNECTING:
+		/*
+		 * The association has already been terminated and
+		 * dev_loss_tmo scheduled. The controller is either in
+		 * the process of connecting or has scheduled a
+		 * reconnect attempt.
+		 * If in the process of connecting, it will fail due
+		 * to loss of connectivity to the remoteport, and the
+		 * reconnect will not be scheduled as there is no
+		 * connectivity.
+		 * If awaiting the reconnect, terminate it as it'll only
+		 * fail.
+		 */
+		cancel_delayed_work_sync(&ctrl->connect_work);
+		break;
+
+	case NVME_CTRL_RESETTING:
+		/*
+		 * Controller is already in the process of terminating the
+		 * association. No need to do anything further. The reconnect
+		 * step will kick in naturally after the association is
+		 * terminated, detecting the lack of connectivity, and not
+		 * attempt a reconnect or schedule one.
+		 */
+		break;
+
+	case NVME_CTRL_DELETING:
+	default:
+		/* no action to take - let it delete */
+		break;
+	}
+}
+
 /**
  * nvme_fc_unregister_remoteport - transport entry point called by an
  *                              LLDD to deregister/remove a previously
@@ -573,15 +745,20 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
 	}
 	portptr->port_state = FC_OBJSTATE_DELETED;
 
-	/* tear down all associations to the remote port */
 	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
-		__nvme_fc_del_ctrl(ctrl);
+		nvmet_fc_start_dev_loss_tmo(ctrl, portptr->dev_loss_tmo);
 
 	spin_unlock_irqrestore(&rport->lock, flags);
 
 	nvme_fc_abort_lsops(rport);
 
+	/*
+	 * release the reference, which will allow, if all controllers
+	 * go away, which should only occur after dev_loss_tmo occurs,
+	 * for the rport to be torn down.
+	 */
 	nvme_fc_rport_put(rport);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
@@ -2434,6 +2611,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
 
+	cancel_delayed_work_sync(&ctrl->dev_loss_work);
+
 	return 0;	/* Success */
 
 out_term_aen_ops:
@@ -2552,6 +2731,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	cancel_work_sync(&ctrl->reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
+	cancel_delayed_work_sync(&ctrl->dev_loss_work);
 
 	/*
 	 * kill the association on the link side.  this will block
@@ -2666,6 +2846,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
+	queue_delayed_work(nvme_fc_wq, &ctrl->dev_loss_work,
+			ctrl->dev_loss_tmo * HZ);
+
 	if (nvme_fc_rport_is_online(ctrl->rport)) {
 		ret = nvme_fc_create_association(ctrl);
 		if (ret)
@@ -2733,6 +2916,25 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 			ctrl->cnum);
 }
 
+static void
+nvme_fc_dev_loss_ctrl_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(to_delayed_work(work),
+				struct nvme_fc_ctrl, dev_loss_work);
+
+	if (ctrl->ctrl.state != NVME_CTRL_DELETING) {
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Device failed to reconnect within "
+			"dev_loss_tmo (%d seconds). Deleting controller\n",
+			ctrl->cnum, ctrl->dev_loss_tmo);
+		if (__nvme_fc_del_ctrl(ctrl))
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: delete request failed\n",
+				ctrl->cnum);
+	}
+}
+
 
 static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
@@ -2891,6 +3093,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
 	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	INIT_DELAYED_WORK(&ctrl->dev_loss_work, nvme_fc_dev_loss_ctrl_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-- 
2.11.0

  parent reply	other threads:[~2017-05-04 18:07 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-04 18:07 [RFC 0/7] nvme_fc: add dev_loss_tmo support jsmart2021
2017-05-04 18:07 ` [RFC 1/7] nvme_fc: change ctlr state assignments during reset/reconnect jsmart2021
2017-05-04 18:07 ` [RFC 2/7] nvme_fc: add a dev_loss_tmo field to the remoteport jsmart2021
2017-05-04 18:07 ` [RFC 3/7] nvme_fc: add dev_loss_tmo to controller jsmart2021
2017-05-04 18:07 ` [RFC 4/7] nvme_fc: check connectivity before initiating reconnects jsmart2021
2017-05-04 18:07 ` [RFC 5/7] nvme_fc: change failure code on remoteport connectivity loss jsmart2021
2017-05-04 18:07 ` [RFC 6/7] nvme_fc: move remote port get/put/free location jsmart2021
2017-05-04 18:07 ` jsmart2021 [this message]
2017-05-04 19:24 ` [RFC 0/7] nvme_fc: add dev_loss_tmo support James Smart
2017-05-04 21:07   ` Christoph Hellwig
2017-05-04 23:17     ` James Smart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170504180737.5472-8-jsmart2021@gmail.com \
    --to=jsmart2021@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.