All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects
@ 2017-05-05 23:13 jsmart2021
  2017-05-05 23:13 ` [PATCH 1/2] nvme_fc: create fc class and transport device jsmart2021
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: jsmart2021 @ 2017-05-05 23:13 UTC (permalink / raw)


From: James Smart <jsmart2021@gmail.com>

These patches add support for generating a uevent which can be
caugth and converted to a nvme-cli connect-all request for the
FC initiator and target ports.

James Smart (2):
  nvme_fc: create fc class and transport device
  nvme_fc: add uevent for auto-connect

 drivers/nvme/host/fc.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 79 insertions(+), 4 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] nvme_fc: create fc class and transport device
  2017-05-05 23:13 [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects jsmart2021
@ 2017-05-05 23:13 ` jsmart2021
  2017-05-05 23:13 ` [PATCH 2/2] nvme_fc: add uevent for auto-connect jsmart2021
  2017-05-08 11:11 ` [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects Johannes Thumshirn
  2 siblings, 0 replies; 7+ messages in thread
From: jsmart2021 @ 2017-05-05 23:13 UTC (permalink / raw)


From: James Smart <jsmart2021@gmail.com>

Added a new fc class, and a device node for the nvme_fc transport
under it. I expect the fc class will eventually be the location the
SCSI and FC transports merge in the future.

Signed-off-by: James Smart <james.smart at broadcom.com>
---
 drivers/nvme/host/fc.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e8f4cbdb9b5f..50dbe0b2f1fd 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -217,6 +217,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
 static struct workqueue_struct *nvme_fc_wq;
 
+static struct class *fc_class;
+static struct device *nvmefc_device;
+
 
 
 /* *********************** FC-NVME Port Management ************************ */
@@ -2966,17 +2969,38 @@ static int __init nvme_fc_init_module(void)
 {
 	int ret;
 
+	fc_class = class_create(THIS_MODULE, "fc");
+	if (IS_ERR(fc_class)) {
+		pr_err("couldn't register class fc\n");
+		return PTR_ERR(fc_class);
+	}
+
+	nvmefc_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
+				"nvme_fc_transport");
+	if (IS_ERR(nvmefc_device)) {
+		pr_err("couldn't create nvme_fc device!\n");
+		ret = PTR_ERR(nvmefc_device);
+		goto out_destroy_class;
+	}
+
 	nvme_fc_wq = create_workqueue("nvme_fc_wq");
-	if (!nvme_fc_wq)
-		return -ENOMEM;
+	if (!nvme_fc_wq) {
+		ret = -ENOMEM;
+		goto out_destroy_device;
+	}
 
 	ret = nvmf_register_transport(&nvme_fc_transport);
 	if (ret)
-		goto err;
+		goto out_destroy_workqueue;
 
 	return 0;
-err:
+
+out_destroy_workqueue:
 	destroy_workqueue(nvme_fc_wq);
+out_destroy_device:
+	device_destroy(fc_class, MKDEV(0, 0));
+out_destroy_class:
+	class_destroy(fc_class);
 	return ret;
 }
 
@@ -2992,6 +3016,9 @@ static void __exit nvme_fc_exit_module(void)
 
 	ida_destroy(&nvme_fc_local_port_cnt);
 	ida_destroy(&nvme_fc_ctrl_cnt);
+
+	device_destroy(fc_class, MKDEV(0, 0));
+	class_destroy(fc_class);
 }
 
 module_init(nvme_fc_init_module);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] nvme_fc: add uevent for auto-connect
  2017-05-05 23:13 [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects jsmart2021
  2017-05-05 23:13 ` [PATCH 1/2] nvme_fc: create fc class and transport device jsmart2021
@ 2017-05-05 23:13 ` jsmart2021
  2017-05-08 11:17   ` Hannes Reinecke
  2017-05-08 11:11 ` [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects Johannes Thumshirn
  2 siblings, 1 reply; 7+ messages in thread
From: jsmart2021 @ 2017-05-05 23:13 UTC (permalink / raw)


From: James Smart <jsmart2021@gmail.com>

To support auto-connecting to FC-NVME devices upon their dynamic
appearance, add a uevent that can kick off connection scripts.
uevent is posted against the nvme_fc transport device.

Added checking to stop the "feature" of connecting to the same
subsystem multiple times on FC devices.

Tested with the following rule to kick an nvme-cli connect-all for the
FC initiator and FC target ports. This is just an example for testing
and not intended for real life use.

ACTION=="change", SUBSYSTEM=="fc", ENV{NVMEFC_DISCVRY}=="*", \
	RUN+="/bin/sh -c '/usr/local/sbin/nvme connect-all --transport=fc $env{NVMEFC_DISCVRY} >> /tmp/nvme_fc.log'"

Signed-off-by: James Smart <james.smart at broadcom.com>
---
 drivers/nvme/host/fc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 50dbe0b2f1fd..512d3c521dcd 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -392,6 +392,24 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
 }
 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
 
+static void
+nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
+		struct nvme_fc_rport *rport)
+{
+	char buffer[128];	/* NVMEFC_DISCVRY=...*/
+	char *envp[2] = {buffer, NULL};
+
+	if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
+		return;
+
+	snprintf(buffer, sizeof(buffer),
+		"NVMEFC_DISCVRY=--host-traddr=nn-0x%016llx:pn-0x%016llx"
+		" --traddr=nn-0x%016llx:pn-0x%016llx",
+		lport->localport.node_name, lport->localport.port_name,
+		rport->remoteport.node_name, rport->remoteport.port_name);
+	kobject_uevent_env(&nvmefc_device->kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * nvme_fc_register_remoteport - transport entry point called by an
  *                              LLDD to register the existence of a NVME
@@ -456,6 +474,8 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	list_add_tail(&newrec->endp_list, &lport->endp_list);
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
+	nvme_fc_signal_discovery_scan(lport, newrec);
+
 	*portptr = &newrec->remoteport;
 	return 0;
 
@@ -2707,6 +2727,19 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
 };
 
 
+static inline bool
+__nvme_fc_options_match(struct nvmf_ctrl_options *opts,
+			struct nvme_fc_ctrl *ctrl)
+{
+	if (strcmp(opts->subsysnqn, ctrl->ctrl.opts->subsysnqn) ||
+	    strcmp(opts->host->nqn, ctrl->ctrl.opts->host->nqn) ||
+	    memcmp(&opts->host->id, &ctrl->ctrl.opts->host->id,
+				sizeof(uuid_be)))
+		return false;
+
+	return true;
+}
+
 static struct nvme_ctrl *
 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
@@ -2714,6 +2747,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_ctrl *ctrl;
 	unsigned long flags;
 	int ret, idx;
+	bool found = false;
 
 	if (!(rport->remoteport.port_role &
 		(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -2721,6 +2755,20 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		goto out_fail;
 	}
 
+	spin_lock_irqsave(&rport->lock, flags);
+	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+		if (__nvme_fc_options_match(opts, ctrl)) {
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	if (found) {
+		ret = -EALREADY;
+		goto out_fail;
+	}
+
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
 		ret = -ENOMEM;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects
  2017-05-05 23:13 [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects jsmart2021
  2017-05-05 23:13 ` [PATCH 1/2] nvme_fc: create fc class and transport device jsmart2021
  2017-05-05 23:13 ` [PATCH 2/2] nvme_fc: add uevent for auto-connect jsmart2021
@ 2017-05-08 11:11 ` Johannes Thumshirn
  2017-05-08 17:22   ` James Smart
  2 siblings, 1 reply; 7+ messages in thread
From: Johannes Thumshirn @ 2017-05-08 11:11 UTC (permalink / raw)


On 05/06/2017 01:13 AM, jsmart2021@gmail.com wrote:
> From: James Smart <jsmart2021 at gmail.com>
> 
> These patches add support for generating a uevent which can be
> caugth and converted to a nvme-cli connect-all request for the
> FC initiator and target ports.

IMHO this should be a bit more generic. We do have two fabrics, FC and
RDMA (we can leave out PCIe here I think). *Iff* we want to do this we
at least need to do this for FC _and_ RDMA.


-- 
Johannes Thumshirn                                          Storage
jthumshirn at suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] nvme_fc: add uevent for auto-connect
  2017-05-05 23:13 ` [PATCH 2/2] nvme_fc: add uevent for auto-connect jsmart2021
@ 2017-05-08 11:17   ` Hannes Reinecke
  2017-05-08 18:17     ` James Smart
  0 siblings, 1 reply; 7+ messages in thread
From: Hannes Reinecke @ 2017-05-08 11:17 UTC (permalink / raw)


On 05/06/2017 01:13 AM, jsmart2021@gmail.com wrote:
> From: James Smart <jsmart2021 at gmail.com>
> 
> To support auto-connecting to FC-NVME devices upon their dynamic
> appearance, add a uevent that can kick off connection scripts.
> uevent is posted against the nvme_fc transport device.
> 
> Added checking to stop the "feature" of connecting to the same
> subsystem multiple times on FC devices.
> 
> Tested with the following rule to kick an nvme-cli connect-all for the
> FC initiator and FC target ports. This is just an example for testing
> and not intended for real life use.
> 
> ACTION=="change", SUBSYSTEM=="fc", ENV{NVMEFC_DISCVRY}=="*", \
> 	RUN+="/bin/sh -c '/usr/local/sbin/nvme connect-all --transport=fc $env{NVMEFC_DISCVRY} >> /tmp/nvme_fc.log'"
> 
> Signed-off-by: James Smart <james.smart at broadcom.com>
> ---
>  drivers/nvme/host/fc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
> 
Hmm.

I'm not sure if that will work for the proposed multipath extensions for
NVMe.

>From my understanding NVMe will drop all queues and do a full
reconnection upon failure, correct?

So if there is a multipath failure NVMe will have to drop all failed
paths and reconnect.
Which means that if we have an all paths down scenario _all_ paths are
down, and need to be reconnect.
Consequently the root-fs becomes inaccessible for a brief period of
time, and relying on things like udev to do a reconnect will probably
not work.

Also, any other driver (with the notable exception of S/390 ones)(ok,
and iSCSI) does an autoconnect.
And went into _soo_ many configuration problems due to that fact.
zfcp finally caved in and moved to autoconnect, too, precisely to avoid
all these configuration issues.

So what makes NVMe special that it cannot do autoconnect within the driver?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare at suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: F. Imend?rffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG N?rnberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects
  2017-05-08 11:11 ` [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects Johannes Thumshirn
@ 2017-05-08 17:22   ` James Smart
  0 siblings, 0 replies; 7+ messages in thread
From: James Smart @ 2017-05-08 17:22 UTC (permalink / raw)


On 5/8/2017 4:11 AM, Johannes Thumshirn wrote:
> On 05/06/2017 01:13 AM, jsmart2021@gmail.com wrote:
>> From: James Smart <jsmart2021 at gmail.com>
>>
>> These patches add support for generating a uevent which can be
>> caugth and converted to a nvme-cli connect-all request for the
>> FC initiator and target ports.
>
> IMHO this should be a bit more generic. We do have two fabrics, FC and
> RDMA (we can leave out PCIe here I think). *Iff* we want to do this we
> at least need to do this for FC _and_ RDMA.

It's not really possible due to differences in fabrics.

With Ethernet/IP fabrics, the local host doesn't necessarily know about 
the activation/termination of other IP addresses on the network, nor 
what IP addresses map to NVME subsystems (discovery controllers or 
storage controllers) nor the IP port numbers for the subsystems. Thus, 
something (admin action, config scripts, etc) must at least supply how 
to start the discovery process - specifying the location of the storage 
controllers or at least the location(s) of the discovery controllers. 
Even then, dynamic changes need something to restart that scan or be 
told where to rescan. None of the initial or dynamic connectivity info 
can come from the RDMA transport.

With FC fabrics, given FC has it's own dynamic discovery engine, it can 
recognize when FC ports have nvme subsystems present - discovery 
controllers and/or storage controllers. It can determine when they come 
into existence, go away, or are in existence and had some kind of state 
change (thus please rescan). FC fabrics still use the same NVME 
discovery engines that RDMA uses - based on the discovery controllers. 
Its using the FC discovery engine to then initiate the discovery 
controller scans, eliminating the admin/config steps mandated for RDMA. 
The other things about FC is that connections are very centric to the 
initiator FC ports, due to zoning. Thus, every FC connection must 
specify the initiator FC port and the target FC port.

The user is free to not put the udev scripts in place and to utilize the 
same methods used by RDMA.   However, the FC storage environment has 
always had dynamic auto-connection and re-connection for SCSI storage 
and the eco-system has the same expectation for NVME storage. Therefore 
I believe FC should have the support in the kernel and allow for the 
user to put the udev scripts in place to support the auto-connection.

In the future, although there is currently a desire to keep connectivity 
management in user space, in order to support boot/swap support (and 
low-memory configs) on NVME fabric storage, it will likely require 
moving the discovery controller attachment in to the kernel rather than 
in the nvme cli.  (iscsi discussions ring a bell ?).  With FC, boot 
support does not require things like iBFT for boot support. It acts like 
SCSI. Making it much easier.


-- james

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] nvme_fc: add uevent for auto-connect
  2017-05-08 11:17   ` Hannes Reinecke
@ 2017-05-08 18:17     ` James Smart
  0 siblings, 0 replies; 7+ messages in thread
From: James Smart @ 2017-05-08 18:17 UTC (permalink / raw)


On 5/8/2017 4:17 AM, Hannes Reinecke wrote:
> On 05/06/2017 01:13 AM, jsmart2021@gmail.com wrote:
>> From: James Smart <jsmart2021 at gmail.com>
>>
>> To support auto-connecting to FC-NVME devices upon their dynamic
>> appearance, add a uevent that can kick off connection scripts.
>> uevent is posted against the nvme_fc transport device.
>>
> I'm not sure if that will work for the proposed multipath extensions for
> NVMe.

I don't know why this should conflict with multipath.


>
> From my understanding NVMe will drop all queues and do a full
> reconnection upon failure, correct?

Sure... but these are all actions "below the nvme device".

The nvme storage device presented to the OS, the namespace, is issuing 
io's to the controller block queue. When a controller errors or is lost, 
the nvme fabrics level (currently the transport) will stop the 
controller block queue and all outstanding requests are terminated and 
returned to the block queue for requeuing. The transport initially tries 
to reconnect, and if the reconnect fails, a timer is started to retry 
the reconnect in a few seconds. This repeats until a controller time 
limit expires (ctrl_loss_tmo), at which point the controller block 
queues are torn down. FC differs from RDMA in that: it won't try to 
reconnect if there isn't connectivity; and it sets the controller time 
limit to the smaller of the SCSI FC transport dev_loss_timer (passed to 
it via the driver) and the connection requested ctrl_loss_tmo.

So, while the reconnect is pending, the block queues are stopped and 
idle. If the transport successfully completes a reconnect before the 
timer expires, the controller's block queues are then released and io 
starts again.

This patch changes nothing in the behavior - it only keys the FC 
reconnect attempt to device appearance (note: if fc port is connected, 
the same timers used by rdma still occur on FC).

The patch does add one other things though.  If the time limit did 
expire and the controller was tied down, in order to "get the path back" 
a new create_controller request has to be made.  The patch does, for FC, 
key this to device appearance, so it's automated.  This is likely 
different from RDMA where a system script/daemon is periodically trying 
to connect again (device was there so keep trying to see if it comes 
back) or it depends on some administrative action to create the controller.


>
> So if there is a multipath failure NVMe will have to drop all failed
> paths and reconnect.
> Which means that if we have an all paths down scenario _all_ paths are
> down, and need to be reconnect.
> Consequently the root-fs becomes inaccessible for a brief period of
> time, and relying on things like udev to do a reconnect will probably
> not work.

As for multipathing:
1) if md-like multipathing is done, I believe it means there are 
separate nvme storage devices (each a namespaces on top of a 
controller). Thus each device is a "path". Paths would be added with the 
appearance of a new nvme storage device, and when they are torn down, 
the path would go away.  I assume multipathing would also become aware 
of when the device is "stopped/blocked" due to its controller queues 
being stopped.

2) if a lighter-weight multipathing is done, say within the nvme layer, 
the rescanning of the nvme namespaces would pair it up to the nvme 
storage device, thus each set of controller blk queues would be the 
"path". Thus, when a controller's queues are "stopped/blocked" the nvme 
device knows and stops using that path. And when they go away, the 
"path" is removed.

We could talk further about options when the last path is gone but... 
back to this patch - you'll note nothing in this section has anything to 
do with the patch. The patch changes nothing in the overall nvme device 
or controller behaviors. The only thing the patch does is specific to 
the bottom levels of the FC transport - keying reconnects and or new 
device scanning to FC target device connectivity announcements.


> Also, any other driver (with the notable exception of S/390 ones)(ok,
> and iSCSI) does an autoconnect.
> And went into _soo_ many configuration problems due to that fact.
> zfcp finally caved in and moved to autoconnect, too, precisely to avoid
> all these configuration issues.
>
> So what makes NVMe special that it cannot do autoconnect within the driver?

Well, this gets back to the response I just sent back to Johannes.  NVME 
discovery requires connecting to a discovery controller and reading 
discovery log records (sound similar to iscsi and iSNS) and from the 
discovery log records then connect to nvme subsystems resulting in the 
nvme controllers. This functionality is currently not in the kernel, 
it's in the nvme cli as the "connect-all" functionality when talking to 
a discovery controller. For FC, as it knows the presence of discovery 
controllers on the FC fabric, this patch is how we're invoking that 
functionality. Note: there's nothing in FC that would provide the 
content of the discovery log records so that it could skip the discovery 
controller connection.   For RDMA, it's lack of connectivity knowledge 
prevents it from doing auto-connect once it's torn down the controller 
after a ctrl_loss_tmo.

-- james

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-05-08 18:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-05 23:13 [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects jsmart2021
2017-05-05 23:13 ` [PATCH 1/2] nvme_fc: create fc class and transport device jsmart2021
2017-05-05 23:13 ` [PATCH 2/2] nvme_fc: add uevent for auto-connect jsmart2021
2017-05-08 11:17   ` Hannes Reinecke
2017-05-08 18:17     ` James Smart
2017-05-08 11:11 ` [PATCH 0/2] nvme_fc: add uevent to allow dynamic connects Johannes Thumshirn
2017-05-08 17:22   ` James Smart

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.