All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/3] nvmet-rdma automatic port re-activation
@ 2018-04-12  8:06 Sagi Grimberg
  2018-04-12  8:06 ` [PATCH v1 1/3] nvmet-rdma: automatic listening " Sagi Grimberg
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12  8:06 UTC (permalink / raw)


When a RDMA device goes away we must destroy all it's associated
RDMA resources. RDMa device resets also manifest as device removal
events and a short while after they come back. We want to re-activate
a port listener on this RDMA device when it comes back in to the system.

In order to make it happen, we save the RDMA device node_guid on a
ULP listener representation (nvmet_rdma_port) and when a RDMA device
comes into the system, we check if there is a listener port that needs
to be re-activated.

In addition, reflect the port state to the sysadmin nicely with a patch
to nvmetcli.

Changes from v0 (rfc):
- renamed tractive to trstate
- trstate configfs file without addr_ prefix to prevent json serialization on it
- nvmet_rdma_port_enable_work self requeue delay was increased to 5 seconds

Israel Rukshin (2):
  nvmet: Add fabrics ops to port
  nvmet: Add port transport state flag

Sagi Grimberg (1):
  nvmet-rdma: automatic listening port re-activation

 drivers/nvme/target/configfs.c |  11 ++
 drivers/nvme/target/core.c     |  17 ++-
 drivers/nvme/target/nvmet.h    |   3 +
 drivers/nvme/target/rdma.c     | 237 ++++++++++++++++++++++++++---------------
 4 files changed, 175 insertions(+), 93 deletions(-)

-- 
2.14.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 1/3] nvmet-rdma: automatic listening port re-activation
  2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
@ 2018-04-12  8:06 ` Sagi Grimberg
  2018-04-12 13:08   ` Israel Rukshin
  2018-04-12  8:06 ` [PATCH v1 2/3] nvmet: Add fabrics ops to port Sagi Grimberg
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12  8:06 UTC (permalink / raw)


In case the device goes away (or resets) we get a device
removal event (or .remove ib_client callback). So what
we want is to destroy the listening cm_id and re-activate
(or enable) when the same device comes back. Hence we introduce
nvmet_rdma_port which stores the ib_device node guid, and when
a new device comes in to the system (ib_client .add callback) we
search for an existing listener port on this device and reconfigure
the listener cm_id.

Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/target/rdma.c | 229 ++++++++++++++++++++++++++++-----------------
 1 file changed, 141 insertions(+), 88 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d579a7..b0bc716de96d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -118,6 +118,15 @@ struct nvmet_rdma_device {
 	struct list_head	entry;
 };
 
+struct nvmet_rdma_port {
+	struct nvmet_port	*nport;
+	struct sockaddr_storage addr;
+	struct rdma_cm_id	*cm_id;
+	__be64			node_guid;
+	struct list_head	entry;
+	struct delayed_work	enable_work;
+};
+
 static bool nvmet_rdma_use_srq;
 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
@@ -129,6 +138,9 @@ static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
 static LIST_HEAD(device_list);
 static DEFINE_MUTEX(device_list_mutex);
 
+static LIST_HEAD(port_list);
+static DEFINE_MUTEX(port_list_mutex);
+
 static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
 static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -1127,6 +1139,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
 static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
+	struct nvmet_rdma_port *port = cm_id->context;
 	struct nvmet_rdma_device *ndev;
 	struct nvmet_rdma_queue *queue;
 	int ret = -EINVAL;
@@ -1142,7 +1155,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 		ret = -ENOMEM;
 		goto put_device;
 	}
-	queue->port = cm_id->context;
+	queue->port = port->nport;
 
 	if (queue->host_qid == 0) {
 		/* Let inflight controller teardown complete */
@@ -1249,53 +1262,6 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 	schedule_work(&queue->release_work);
 }
 
-/**
- * nvme_rdma_device_removal() - Handle RDMA device removal
- * @cm_id:	rdma_cm id, used for nvmet port
- * @queue:      nvmet rdma queue (cm id qp_context)
- *
- * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug. Note that this event can be generated on a normal
- * queue cm_id and/or a device bound listener cm_id (where in this
- * case queue will be null).
- *
- * We registered an ib_client to handle device removal for queues,
- * so we only need to handle the listening port cm_ids. In this case
- * we nullify the priv to prevent double cm_id destruction and destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
- */
-static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
-		struct nvmet_rdma_queue *queue)
-{
-	struct nvmet_port *port;
-
-	if (queue) {
-		/*
-		 * This is a queue cm_id. we have registered
-		 * an ib_client to handle queues removal
-		 * so don't interfear and just return.
-		 */
-		return 0;
-	}
-
-	port = cm_id->context;
-
-	/*
-	 * This is a listener cm_id. Make sure that
-	 * future remove_port won't invoke a double
-	 * cm_id destroy. use atomic xchg to make sure
-	 * we don't compete with remove_port.
-	 */
-	if (xchg(&port->priv, NULL) != cm_id)
-		return 0;
-
-	/*
-	 * We need to return 1 so that the core will destroy
-	 * it's own ID.  What a great API design..
-	 */
-	return 1;
-}
-
 static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
@@ -1322,8 +1288,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		nvmet_rdma_queue_disconnect(queue);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		ret = nvmet_rdma_device_removal(cm_id, queue);
-		break;
+		break; /* handled by nvmet_rdma_remove_one */
 	case RDMA_CM_EVENT_REJECTED:
 		pr_debug("Connection rejected: %s\n",
 			 rdma_reject_msg(cm_id, event->status));
@@ -1359,34 +1324,12 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
 	mutex_unlock(&nvmet_rdma_queue_mutex);
 }
 
-static int nvmet_rdma_add_port(struct nvmet_port *port)
+static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
 {
+	struct sockaddr *addr = (struct sockaddr *)&port->addr;
 	struct rdma_cm_id *cm_id;
-	struct sockaddr_storage addr = { };
-	__kernel_sa_family_t af;
 	int ret;
 
-	switch (port->disc_addr.adrfam) {
-	case NVMF_ADDR_FAMILY_IP4:
-		af = AF_INET;
-		break;
-	case NVMF_ADDR_FAMILY_IP6:
-		af = AF_INET6;
-		break;
-	default:
-		pr_err("address family %d not supported\n",
-				port->disc_addr.adrfam);
-		return -EINVAL;
-	}
-
-	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
-			port->disc_addr.trsvcid, &addr);
-	if (ret) {
-		pr_err("malformed ip/port passed: %s:%s\n",
-			port->disc_addr.traddr, port->disc_addr.trsvcid);
-		return ret;
-	}
-
 	cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
 			RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cm_id)) {
@@ -1404,23 +1347,22 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		goto out_destroy_id;
 	}
 
-	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr);
+	ret = rdma_bind_addr(cm_id, addr);
 	if (ret) {
-		pr_err("binding CM ID to %pISpcs failed (%d)\n",
-			(struct sockaddr *)&addr, ret);
+		pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret);
 		goto out_destroy_id;
 	}
 
 	ret = rdma_listen(cm_id, 128);
 	if (ret) {
-		pr_err("listening to %pISpcs failed (%d)\n",
-			(struct sockaddr *)&addr, ret);
+		pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
 		goto out_destroy_id;
 	}
 
-	pr_info("enabling port %d (%pISpcs)\n",
-		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
-	port->priv = cm_id;
+	port->cm_id = cm_id;
+	if (cm_id->device)
+		port->node_guid = cm_id->device->node_guid;
+
 	return 0;
 
 out_destroy_id:
@@ -1428,18 +1370,100 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 	return ret;
 }
 
-static void nvmet_rdma_remove_port(struct nvmet_port *port)
+static void nvmet_rdma_enable_port_work(struct work_struct *w)
+{
+	struct nvmet_rdma_port *port = container_of(to_delayed_work(w),
+			struct nvmet_rdma_port, enable_work);
+	int ret;
+
+	ret = nvmet_rdma_enable_port(port);
+	if (ret)
+		schedule_delayed_work(&port->enable_work, 5 * HZ);
+}
+
+static int nvmet_rdma_add_port(struct nvmet_port *nport)
+{
+	struct nvmet_rdma_port *port;
+	__kernel_sa_family_t af;
+	int ret;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	switch (nport->disc_addr.adrfam) {
+	case NVMF_ADDR_FAMILY_IP4:
+		af = AF_INET;
+		break;
+	case NVMF_ADDR_FAMILY_IP6:
+		af = AF_INET6;
+		break;
+	default:
+		pr_err("address family %d not supported\n",
+				nport->disc_addr.adrfam);
+		ret = -EINVAL;
+		goto out_free_port;
+	}
+
+	ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
+			nport->disc_addr.trsvcid, &port->addr);
+	if (ret) {
+		pr_err("malformed ip/port passed: %s:%s\n",
+			nport->disc_addr.traddr, nport->disc_addr.trsvcid);
+		goto out_free_port;
+	}
+
+	ret = nvmet_rdma_enable_port(port);
+	if(ret)
+		goto out_free_port;
+
+	pr_info("enabling port %d (%pISpcs)\n",
+		le16_to_cpu(nport->disc_addr.portid),
+		(struct sockaddr *)&port->addr);
+
+	nport->priv = port;
+	port->nport = nport;
+	INIT_DELAYED_WORK(&port->enable_work, nvmet_rdma_enable_port_work);
+
+	mutex_lock(&port_list_mutex);
+	list_add_tail(&port->entry, &port_list);
+	mutex_unlock(&port_list_mutex);
+
+	return 0;
+
+out_free_port:
+	kfree(port);
+	return ret;
+}
+
+static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
 {
-	struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
+	struct rdma_cm_id *cm_id = port->cm_id;
 
+	port->cm_id = NULL;
 	if (cm_id)
 		rdma_destroy_id(cm_id);
 }
 
+static void nvmet_rdma_remove_port(struct nvmet_port *nport)
+{
+	struct nvmet_rdma_port *port = nport->priv;
+
+	mutex_lock(&port_list_mutex);
+	list_del(&port->entry);
+	mutex_unlock(&port_list_mutex);
+
+	cancel_delayed_work_sync(&port->enable_work);
+
+	nvmet_rdma_disable_port(port);
+	kfree(port);
+}
+
 static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
-		struct nvmet_port *port, char *traddr)
+		struct nvmet_port *nport, char *traddr)
 {
-	struct rdma_cm_id *cm_id = port->priv;
+	struct nvmet_rdma_port *port = nport->priv;
+	struct rdma_cm_id *cm_id = port->cm_id;
 
 	if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
 		struct nvmet_rdma_rsp *rsp =
@@ -1449,7 +1473,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 
 		sprintf(traddr, "%pISc", addr);
 	} else {
-		memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+		memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
 	}
 }
 
@@ -1466,9 +1490,26 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.disc_traddr		= nvmet_rdma_disc_port_addr,
 };
 
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+	struct nvmet_rdma_port *port, *n;
+
+	mutex_lock(&port_list_mutex);
+	list_for_each_entry_safe(port, n, &port_list, entry) {
+		if (port->node_guid != ib_device->node_guid)
+			continue;
+
+		pr_info("device added, enabling port %d\n",
+			le16_to_cpu(port->nport->disc_addr.portid));
+		schedule_delayed_work(&port->enable_work, HZ);
+	}
+	mutex_unlock(&port_list_mutex);
+}
+
 static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 {
 	struct nvmet_rdma_queue *queue, *tmp;
+	struct nvmet_rdma_port *port, *n;
 	struct nvmet_rdma_device *ndev;
 	bool found = false;
 
@@ -1481,6 +1522,17 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
 	}
 	mutex_unlock(&device_list_mutex);
 
+	mutex_lock(&port_list_mutex);
+	list_for_each_entry_safe(port, n, &port_list, entry) {
+		if (port->node_guid != ib_device->node_guid)
+			continue;
+
+		pr_info("device removal, disabling port %d\n",
+			le16_to_cpu(port->nport->disc_addr.portid));
+		nvmet_rdma_disable_port(port);
+	}
+	mutex_unlock(&port_list_mutex);
+
 	if (!found)
 		return;
 
@@ -1494,7 +1546,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
 		if (queue->dev->device != ib_device)
 			continue;
 
-		pr_info("Removing queue %d\n", queue->idx);
+		pr_info("device removal, removing queue %d\n", queue->idx);
 		list_del_init(&queue->queue_list);
 		__nvmet_rdma_queue_disconnect(queue);
 	}
@@ -1505,6 +1557,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
 
 static struct ib_client nvmet_rdma_ib_client = {
 	.name   = "nvmet_rdma",
+	.add = nvmet_rdma_add_one,
 	.remove = nvmet_rdma_remove_one
 };
 
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v1 2/3] nvmet: Add fabrics ops to port
  2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
  2018-04-12  8:06 ` [PATCH v1 1/3] nvmet-rdma: automatic listening " Sagi Grimberg
@ 2018-04-12  8:06 ` Sagi Grimberg
  2018-04-12  8:06 ` [PATCH v1 3/3] nvmet: Add port transport state flag Sagi Grimberg
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12  8:06 UTC (permalink / raw)


From: Israel Rukshin <israelr@mellanox.com>

Instead of getting the nvmet_fabrics_ops from nvmet_transports array
each time we want to use it, just take it directly from the port.

Signed-off-by: Israel Rukshin <israelr at mellanox.com>
Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/target/core.c  | 9 ++++-----
 drivers/nvme/target/nvmet.h | 1 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95424f172fd..e95e9cd30647 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -189,21 +189,20 @@ int nvmet_enable_port(struct nvmet_port *port)
 		return ret;
 	}
 
+	port->ops = ops;
 	port->enabled = true;
 	return 0;
 }
 
 void nvmet_disable_port(struct nvmet_port *port)
 {
-	const struct nvmet_fabrics_ops *ops;
-
 	lockdep_assert_held(&nvmet_config_sem);
 
 	port->enabled = false;
 
-	ops = nvmet_transports[port->disc_addr.trtype];
-	ops->remove_port(port);
-	module_put(ops->owner);
+	port->ops->remove_port(port);
+	module_put(port->ops->owner);
+	port->ops = NULL;
 }
 
 static void nvmet_keep_alive_timer(struct work_struct *work)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84ab21f8..bc04e5db3a12 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -98,6 +98,7 @@ struct nvmet_port {
 	struct list_head		referrals;
 	void				*priv;
 	bool				enabled;
+	const struct nvmet_fabrics_ops 	*ops;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v1 3/3] nvmet: Add port transport state flag
  2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
  2018-04-12  8:06 ` [PATCH v1 1/3] nvmet-rdma: automatic listening " Sagi Grimberg
  2018-04-12  8:06 ` [PATCH v1 2/3] nvmet: Add fabrics ops to port Sagi Grimberg
@ 2018-04-12  8:06 ` Sagi Grimberg
  2018-04-13 17:14   ` Christoph Hellwig
  2018-04-12  8:06 ` [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state Sagi Grimberg
  2018-04-13 17:00 ` [PATCH v1 0/3] nvmet-rdma automatic port re-activation Christoph Hellwig
  4 siblings, 1 reply; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12  8:06 UTC (permalink / raw)


From: Israel Rukshin <israelr@mellanox.com>

trstate port flag means that nvmet transport state is up and ready for
receiving requests from host. It differ from enabled port state (port
with subsystem symbolic link) which doesn't guarantee this. The trstate
flag is necessary in case the physical ports become down while nvmet ports are enabled.
In this case the port state remains enabled but tractive flag becomes false.

Signed-off-by: Israel Rukshin <israelr at mellanox.com>
Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/target/configfs.c | 11 +++++++++++
 drivers/nvme/target/core.c     |  8 ++++++++
 drivers/nvme/target/nvmet.h    |  2 ++
 drivers/nvme/target/rdma.c     |  8 ++++++++
 4 files changed, 29 insertions(+)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ad9ff27234b5..6501b2655508 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -265,6 +265,16 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, addr_trtype);
 
+static ssize_t nvmet_trstate_show(struct config_item *item, char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return sprintf(page, "%s\n",
+		nvmet_is_port_active(port) ? "up" : "down");
+}
+
+CONFIGFS_ATTR_RO(nvmet_, trstate);
+
 /*
  * Namespace structures & file operation functions below
  */
@@ -870,6 +880,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
 	&nvmet_attr_addr_traddr,
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
+	&nvmet_attr_trstate,
 	NULL,
 };
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95e9cd30647..96ba6a1a504c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -205,6 +205,14 @@ void nvmet_disable_port(struct nvmet_port *port)
 	port->ops = NULL;
 }
 
+bool nvmet_is_port_active(struct nvmet_port *port)
+{
+	if (port->ops && port->ops->is_port_active)
+		return port->ops->is_port_active(port);
+
+	return port->enabled;
+}
+
 static void nvmet_keep_alive_timer(struct work_struct *work)
 {
 	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index bc04e5db3a12..30981d738d37 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -212,6 +212,7 @@ struct nvmet_fabrics_ops {
 	void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
 	void (*disc_traddr)(struct nvmet_req *req,
 			struct nvmet_port *port, char *traddr);
+	bool (*is_port_active)(struct nvmet_port *port);
 };
 
 #define NVMET_MAX_INLINE_BIOVEC	8
@@ -309,6 +310,7 @@ void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
 
 int nvmet_enable_port(struct nvmet_port *port);
 void nvmet_disable_port(struct nvmet_port *port);
+bool nvmet_is_port_active(struct nvmet_port *port);
 
 void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
 void nvmet_referral_disable(struct nvmet_port *port);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index b0bc716de96d..fdd651d3853c 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1477,6 +1477,13 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 	}
 }
 
+static bool nvmet_rdma_is_port_active(struct nvmet_port *nport)
+{
+	struct nvmet_rdma_port *port = nport->priv;
+
+	return port->cm_id ? true : false;
+}
+
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_RDMA,
@@ -1484,6 +1491,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.msdbd			= 1,
 	.has_keyed_sgls		= 1,
 	.add_port		= nvmet_rdma_add_port,
+	.is_port_active		= nvmet_rdma_is_port_active,
 	.remove_port		= nvmet_rdma_remove_port,
 	.queue_response		= nvmet_rdma_queue_response,
 	.delete_ctrl		= nvmet_rdma_delete_ctrl,
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state
  2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
                   ` (2 preceding siblings ...)
  2018-04-12  8:06 ` [PATCH v1 3/3] nvmet: Add port transport state flag Sagi Grimberg
@ 2018-04-12  8:06 ` Sagi Grimberg
  2018-04-12 11:25   ` Nitzan Carmi
  2018-04-13 17:00 ` [PATCH v1 0/3] nvmet-rdma automatic port re-activation Christoph Hellwig
  4 siblings, 1 reply; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12  8:06 UTC (permalink / raw)


status reflects if any subsystems are bound and can
accept existing connections. state reflects the
physical state of the port. Also colorize UI if
port state is down.

Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 nvmet/nvme.py | 11 +++++++++++
 nvmetcli      |  9 ++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/nvmet/nvme.py b/nvmet/nvme.py
index c245a4240dd9..290de2807930 100644
--- a/nvmet/nvme.py
+++ b/nvmet/nvme.py
@@ -174,6 +174,17 @@ class CFSNode(object):
             self._enable = int(file_fd.read().strip())
         return self._enable
 
+    def get_trstate(self):
+        self._check_self()
+        _trstate = None
+        path = "%s/trstate" % self.path
+        if not os.path.isfile(path):
+            return None
+
+        with open(path, 'r') as file_fd:
+            _trstate = file_fd.read().strip()
+        return _trstate
+
     def set_enable(self, value):
         self._check_self()
         path = "%s/enable" % self.path
diff --git a/nvmetcli b/nvmetcli
index 6b102a235450..fc6b0856c249 100755
--- a/nvmetcli
+++ b/nvmetcli
@@ -388,7 +388,14 @@ class UIPortNode(UINode):
         if trsvcid != "none":
             info.append("trsvcid=%s" % trsvcid)
         enabled = not (not self.cfnode.subsystems and not list(self.cfnode.referrals))
-        return (", ".join(info), True if enabled else 0)
+        info.append("status=" + ("enabled" if enabled else "disabled"))
+        if not enabled:
+            ret = 0
+        else:
+            trstate = self.cfnode.get_trstate()
+            info.append("state=" + trstate)
+            ret = True if trstate == "up" else False
+        return (", ".join(info), ret)
 
 class UIPortSubsystemsNode(UINode):
     def __init__(self, parent):
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state
  2018-04-12  8:06 ` [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state Sagi Grimberg
@ 2018-04-12 11:25   ` Nitzan Carmi
  2018-04-12 12:34     ` Sagi Grimberg
  0 siblings, 1 reply; 16+ messages in thread
From: Nitzan Carmi @ 2018-04-12 11:25 UTC (permalink / raw)


On 12/04/2018 11:06, Sagi Grimberg wrote:
> status reflects if any subsystems are bound and can
> accept existing connections. state reflects the
> physical state of the port. Also colorize UI if
> port state is down.
> 
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
>   nvmet/nvme.py | 11 +++++++++++
>   nvmetcli      |  9 ++++++++-
>   2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/nvmet/nvme.py b/nvmet/nvme.py
> index c245a4240dd9..290de2807930 100644
> --- a/nvmet/nvme.py
> +++ b/nvmet/nvme.py
> @@ -174,6 +174,17 @@ class CFSNode(object):

get_trstate is supposed to be available only for port nodes,
so I think it is better to put it under class Port (which
inherits from class CFSNode), rather than from class CFSNode itself.

>               self._enable = int(file_fd.read().strip())
>           return self._enable
>   
> +    def get_trstate(self):
> +        self._check_self()
> +        _trstate = None
> +        path = "%s/trstate" % self.path
> +        if not os.path.isfile(path):
> +            return None
> +
> +        with open(path, 'r') as file_fd:
> +            _trstate = file_fd.read().strip()
> +        return _trstate
> +
>       def set_enable(self, value):
>           self._check_self()
>           path = "%s/enable" % self.path
> diff --git a/nvmetcli b/nvmetcli
> index 6b102a235450..fc6b0856c249 100755
> --- a/nvmetcli
> +++ b/nvmetcli
> @@ -388,7 +388,14 @@ class UIPortNode(UINode):
>           if trsvcid != "none":
>               info.append("trsvcid=%s" % trsvcid)
>           enabled = not (not self.cfnode.subsystems and not list(self.cfnode.referrals))
> -        return (", ".join(info), True if enabled else 0)
> +        info.append("status=" + ("enabled" if enabled else "disabled"))
> +        if not enabled:
> +            ret = 0
> +        else:
> +            trstate = self.cfnode.get_trstate()
> +            info.append("state=" + trstate)
> +            ret = True if trstate == "up" else False
> +        return (", ".join(info), ret)
>   
>   class UIPortSubsystemsNode(UINode):
>       def __init__(self, parent):
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state
  2018-04-12 11:25   ` Nitzan Carmi
@ 2018-04-12 12:34     ` Sagi Grimberg
  0 siblings, 0 replies; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-12 12:34 UTC (permalink / raw)



> get_trstate is supposed to be available only for port nodes,
> so I think it is better to put it under class Port (which
> inherits from class CFSNode), rather than from class CFSNode itself.

You are absolutely correct, thanks!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 1/3] nvmet-rdma: automatic listening port re-activation
  2018-04-12  8:06 ` [PATCH v1 1/3] nvmet-rdma: automatic listening " Sagi Grimberg
@ 2018-04-12 13:08   ` Israel Rukshin
  0 siblings, 0 replies; 16+ messages in thread
From: Israel Rukshin @ 2018-04-12 13:08 UTC (permalink / raw)


Looks good,

Reviewed-by: Israel Rukshin <israelr at mellanox.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 0/3] nvmet-rdma automatic port re-activation
  2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
                   ` (3 preceding siblings ...)
  2018-04-12  8:06 ` [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state Sagi Grimberg
@ 2018-04-13 17:00 ` Christoph Hellwig
  2018-04-15  8:53   ` Sagi Grimberg
  4 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2018-04-13 17:00 UTC (permalink / raw)


On Thu, Apr 12, 2018@11:06:52AM +0300, Sagi Grimberg wrote:
> When a RDMA device goes away we must destroy all it's associated
> RDMA resources. RDMa device resets also manifest as device removal
> events and a short while after they come back. We want to re-activate
> a port listener on this RDMA device when it comes back in to the system.

I really detest this series.  It just shows how messed up the whole
IB core interaction is.  The right way to fix this is to stop treating
a IB device reset as a device removal, and give it a different event.

And also make sure we have a single unified event system instead of
three separate ones.

> 
> In order to make it happen, we save the RDMA device node_guid on a
> ULP listener representation (nvmet_rdma_port) and when a RDMA device
> comes into the system, we check if there is a listener port that needs
> to be re-activated.
> 
> In addition, reflect the port state to the sysadmin nicely with a patch
> to nvmetcli.
> 
> Changes from v0 (rfc):
> - renamed tractive to trstate
> - trstate configfs file without addr_ prefix to prevent json serialization on it
> - nvmet_rdma_port_enable_work self requeue delay was increased to 5 seconds
> 
> Israel Rukshin (2):
>   nvmet: Add fabrics ops to port
>   nvmet: Add port transport state flag
> 
> Sagi Grimberg (1):
>   nvmet-rdma: automatic listening port re-activation
> 
>  drivers/nvme/target/configfs.c |  11 ++
>  drivers/nvme/target/core.c     |  17 ++-
>  drivers/nvme/target/nvmet.h    |   3 +
>  drivers/nvme/target/rdma.c     | 237 ++++++++++++++++++++++++++---------------
>  4 files changed, 175 insertions(+), 93 deletions(-)
> 
> -- 
> 2.14.1
---end quoted text---

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 3/3] nvmet: Add port transport state flag
  2018-04-12  8:06 ` [PATCH v1 3/3] nvmet: Add port transport state flag Sagi Grimberg
@ 2018-04-13 17:14   ` Christoph Hellwig
  2018-04-15  8:54     ` Sagi Grimberg
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2018-04-13 17:14 UTC (permalink / raw)


> +{
> +	struct nvmet_port *port = to_nvmet_port(item);
> +
> +	return sprintf(page, "%s\n",
> +		nvmet_is_port_active(port) ? "up" : "down");
> +}
> +
> +CONFIGFS_ATTR_RO(nvmet_, trstate);
> +
>  /*
>   * Namespace structures & file operation functions below
>   */
> @@ -870,6 +880,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
>  	&nvmet_attr_addr_traddr,
>  	&nvmet_attr_addr_trsvcid,
>  	&nvmet_attr_addr_trtype,
> +	&nvmet_attr_trstate,

Please don't create attributes without a group ever.  Just create a
different group which isn't serialized, e.g. state as the group name
and then transport as the attribute name inside it, or something similar.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 0/3] nvmet-rdma automatic port re-activation
  2018-04-13 17:00 ` [PATCH v1 0/3] nvmet-rdma automatic port re-activation Christoph Hellwig
@ 2018-04-15  8:53   ` Sagi Grimberg
  2018-04-17 15:43     ` Christoph Hellwig
  2018-04-20  2:48     ` Doug Ledford
  0 siblings, 2 replies; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-15  8:53 UTC (permalink / raw)



>> When a RDMA device goes away we must destroy all it's associated
>> RDMA resources. RDMa device resets also manifest as device removal
>> events and a short while after they come back. We want to re-activate
>> a port listener on this RDMA device when it comes back in to the system.
> 
> I really detest this series.  It just shows how messed up the whole
> IB core interaction is.  The right way to fix this is to stop treating
> a IB device reset as a device removal, and give it a different event.
> 
> And also make sure we have a single unified event system instead of
> three separate ones.

I've raised this claim before, but got resistance from Doug:
https://www.spinics.net/lists/linux-rdma/msg59815.html

At that point, in the lack of change in the RDMA interface, this
is pretty much what we can do to support device resets...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 3/3] nvmet: Add port transport state flag
  2018-04-13 17:14   ` Christoph Hellwig
@ 2018-04-15  8:54     ` Sagi Grimberg
  2018-04-17 15:28       ` Christoph Hellwig
  0 siblings, 1 reply; 16+ messages in thread
From: Sagi Grimberg @ 2018-04-15  8:54 UTC (permalink / raw)




>> +{
>> +	struct nvmet_port *port = to_nvmet_port(item);
>> +
>> +	return sprintf(page, "%s\n",
>> +		nvmet_is_port_active(port) ? "up" : "down");
>> +}
>> +
>> +CONFIGFS_ATTR_RO(nvmet_, trstate);
>> +
>>   /*
>>    * Namespace structures & file operation functions below
>>    */
>> @@ -870,6 +880,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
>>   	&nvmet_attr_addr_traddr,
>>   	&nvmet_attr_addr_trsvcid,
>>   	&nvmet_attr_addr_trtype,
>> +	&nvmet_attr_trstate,
> 
> Please don't create attributes without a group ever.  Just create a
> different group which isn't serialized, e.g. state as the group name
> and then transport as the attribute name inside it, or something similar.

We already have ns enable which is not within a group. How is that
different?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 3/3] nvmet: Add port transport state flag
  2018-04-15  8:54     ` Sagi Grimberg
@ 2018-04-17 15:28       ` Christoph Hellwig
  2018-05-16 12:40         ` Max Gurtovoy
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2018-04-17 15:28 UTC (permalink / raw)


On Sun, Apr 15, 2018@11:54:59AM +0300, Sagi Grimberg wrote:
>
>
>>> +{
>>> +	struct nvmet_port *port = to_nvmet_port(item);
>>> +
>>> +	return sprintf(page, "%s\n",
>>> +		nvmet_is_port_active(port) ? "up" : "down");
>>> +}
>>> +
>>> +CONFIGFS_ATTR_RO(nvmet_, trstate);
>>> +
>>>   /*
>>>    * Namespace structures & file operation functions below
>>>    */
>>> @@ -870,6 +880,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
>>>   	&nvmet_attr_addr_traddr,
>>>   	&nvmet_attr_addr_trsvcid,
>>>   	&nvmet_attr_addr_trtype,
>>> +	&nvmet_attr_trstate,
>>
>> Please don't create attributes without a group ever.  Just create a
>> different group which isn't serialized, e.g. state as the group name
>> and then transport as the attribute name inside it, or something similar.
>
> We already have ns enable which is not within a group. How is that
> different?

enable is a magic trigger that we can't handle automatically when
restoring the state in nvmetcli.  trstate is a read-only field that
make sense to show with generic code.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 0/3] nvmet-rdma automatic port re-activation
  2018-04-15  8:53   ` Sagi Grimberg
@ 2018-04-17 15:43     ` Christoph Hellwig
  2018-04-20  2:48     ` Doug Ledford
  1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2018-04-17 15:43 UTC (permalink / raw)


On Sun, Apr 15, 2018@11:53:02AM +0300, Sagi Grimberg wrote:
>>> When a RDMA device goes away we must destroy all it's associated
>>> RDMA resources. RDMa device resets also manifest as device removal
>>> events and a short while after they come back. We want to re-activate
>>> a port listener on this RDMA device when it comes back in to the system.
>>
>> I really detest this series.  It just shows how messed up the whole
>> IB core interaction is.  The right way to fix this is to stop treating
>> a IB device reset as a device removal, and give it a different event.
>>
>> And also make sure we have a single unified event system instead of
>> three separate ones.
>
> I've raised this claim before, but got resistance from Doug:
> https://www.spinics.net/lists/linux-rdma/msg59815.html
>
> At that point, in the lack of change in the RDMA interface, this
> is pretty much what we can do to support device resets...

Meh.  I really hate all that rubbish boilerplate code.  Nothing
in here is protocol specific, so even without a proper reset
even it is something that should be done in common code.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 0/3] nvmet-rdma automatic port re-activation
  2018-04-15  8:53   ` Sagi Grimberg
  2018-04-17 15:43     ` Christoph Hellwig
@ 2018-04-20  2:48     ` Doug Ledford
  1 sibling, 0 replies; 16+ messages in thread
From: Doug Ledford @ 2018-04-20  2:48 UTC (permalink / raw)


On Sun, 2018-04-15@11:53 +0300, Sagi Grimberg wrote:
> > > When a RDMA device goes away we must destroy all it's associated
> > > RDMA resources. RDMa device resets also manifest as device removal
> > > events and a short while after they come back. We want to re-activate
> > > a port listener on this RDMA device when it comes back in to the system.
> > 
> > I really detest this series.  It just shows how messed up the whole
> > IB core interaction is.  The right way to fix this is to stop treating
> > a IB device reset as a device removal, and give it a different event.
> > 
> > And also make sure we have a single unified event system instead of
> > three separate ones.
> 
> I've raised this claim before, but got resistance from Doug:
> https://www.spinics.net/lists/linux-rdma/msg59815.html

I'm not the sole maintainer any more.  If you want to appeal to Jason's
better sensibilities in an attempt to get someone on your side and make
a difference, I won't take offense.

> At that point, in the lack of change in the RDMA interface, this
> is pretty much what we can do to support device resets...
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Doug Ledford <dledford at redhat.com>
    GPG KeyID: B826A3330E572FDD
    Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20180419/cfab87ee/attachment.sig>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v1 3/3] nvmet: Add port transport state flag
  2018-04-17 15:28       ` Christoph Hellwig
@ 2018-05-16 12:40         ` Max Gurtovoy
  0 siblings, 0 replies; 16+ messages in thread
From: Max Gurtovoy @ 2018-05-16 12:40 UTC (permalink / raw)




On 4/17/2018 6:28 PM, Christoph Hellwig wrote:
> On Sun, Apr 15, 2018@11:54:59AM +0300, Sagi Grimberg wrote:
>>
>>
>>>> +{
>>>> +	struct nvmet_port *port = to_nvmet_port(item);
>>>> +
>>>> +	return sprintf(page, "%s\n",
>>>> +		nvmet_is_port_active(port) ? "up" : "down");
>>>> +}
>>>> +
>>>> +CONFIGFS_ATTR_RO(nvmet_, trstate);
>>>> +
>>>>    /*
>>>>     * Namespace structures & file operation functions below
>>>>     */
>>>> @@ -870,6 +880,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
>>>>    	&nvmet_attr_addr_traddr,
>>>>    	&nvmet_attr_addr_trsvcid,
>>>>    	&nvmet_attr_addr_trtype,
>>>> +	&nvmet_attr_trstate,
>>>
>>> Please don't create attributes without a group ever.  Just create a
>>> different group which isn't serialized, e.g. state as the group name
>>> and then transport as the attribute name inside it, or something similar.
>>
>> We already have ns enable which is not within a group. How is that
>> different?
> 
> enable is a magic trigger that we can't handle automatically when
> restoring the state in nvmetcli.  trstate is a read-only field that
> make sense to show with generic code.
> 

Sagi,
are you going to send another version for this one with the above changes ?
If so, a fix should be made also in nvmetcli...

-Max.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-05-16 12:40 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-12  8:06 [PATCH v1 0/3] nvmet-rdma automatic port re-activation Sagi Grimberg
2018-04-12  8:06 ` [PATCH v1 1/3] nvmet-rdma: automatic listening " Sagi Grimberg
2018-04-12 13:08   ` Israel Rukshin
2018-04-12  8:06 ` [PATCH v1 2/3] nvmet: Add fabrics ops to port Sagi Grimberg
2018-04-12  8:06 ` [PATCH v1 3/3] nvmet: Add port transport state flag Sagi Grimberg
2018-04-13 17:14   ` Christoph Hellwig
2018-04-15  8:54     ` Sagi Grimberg
2018-04-17 15:28       ` Christoph Hellwig
2018-05-16 12:40         ` Max Gurtovoy
2018-04-12  8:06 ` [PATCH 4/3 v1 nvmetcli] nvmetcli: expose nvmet port status and state Sagi Grimberg
2018-04-12 11:25   ` Nitzan Carmi
2018-04-12 12:34     ` Sagi Grimberg
2018-04-13 17:00 ` [PATCH v1 0/3] nvmet-rdma automatic port re-activation Christoph Hellwig
2018-04-15  8:53   ` Sagi Grimberg
2018-04-17 15:43     ` Christoph Hellwig
2018-04-20  2:48     ` Doug Ledford

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.