All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h
       [not found] ` <cover.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-01-30 16:59   ` Steve Wise
       [not found]     ` <a85bb48eb9fc8846c81118a6777ab9ccbd27e9d7.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  2018-01-30 16:59   ` [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information Steve Wise
  1 sibling, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-01-30 16:59 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

So the resource tracking services in core/nldev.c can see useful
information about cm_ids.

There are other approaches.  I just moved rdma_id_private to develop
this prototype quickly, and it was simple.  Other approaches include:

1) move the nldev cm_id dumpit functions into cma.c, and have nldev.c
call it.  This, however puts a ib_core->rdma_cm module dependency which
makes the two modules interdependent in both directions.  Thus, rdma_cm
would have to be merged into ib_core.  This might not be a bad idea with
all the kernel rdma ULPs now using the rdma_cm.

2) move the specific attributes that are being dumped from the
rdma_id_private struct to the rdma_cm_id struct, so nldev.c has access
to them.

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
 drivers/infiniband/core/cma.c | 40 ----------------------------------------
 include/rdma/rdma_cm.h        | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963c..72ad52b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -327,46 +327,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
  * We do this by disabling removal notification while a callback is in process,
  * and reporting it after the callback completes.
  */
-struct rdma_id_private {
-	struct rdma_cm_id	id;
-
-	struct rdma_bind_list	*bind_list;
-	struct hlist_node	node;
-	struct list_head	list; /* listen_any_list or cma_device.list */
-	struct list_head	listen_list; /* per device listens */
-	struct cma_device	*cma_dev;
-	struct list_head	mc_list;
-
-	int			internal_id;
-	enum rdma_cm_state	state;
-	spinlock_t		lock;
-	struct mutex		qp_mutex;
-
-	struct completion	comp;
-	atomic_t		refcount;
-	struct mutex		handler_mutex;
-
-	int			backlog;
-	int			timeout_ms;
-	struct ib_sa_query	*query;
-	int			query_id;
-	union {
-		struct ib_cm_id	*ib;
-		struct iw_cm_id	*iw;
-	} cm_id;
-
-	u32			seq_num;
-	u32			qkey;
-	u32			qp_num;
-	pid_t			owner;
-	u32			options;
-	u8			srq;
-	u8			tos;
-	bool			tos_set;
-	u8			reuseaddr;
-	u8			afonly;
-	enum ib_gid_type	gid_type;
-};
 
 struct cma_multicast {
 	struct rdma_id_private *id_priv;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6538a5c..5984225 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -157,6 +157,47 @@ struct rdma_cm_id {
 	u8			 port_num;
 };
 
+struct rdma_id_private {
+	struct rdma_cm_id	id;
+
+	struct rdma_bind_list	*bind_list;
+	struct hlist_node	node;
+	struct list_head	list; /* listen_any_list or cma_device.list */
+	struct list_head	listen_list; /* per device listens */
+	struct cma_device	*cma_dev;
+	struct list_head	mc_list;
+
+	int			internal_id;
+	enum rdma_cm_state	state;
+	spinlock_t		lock;
+	struct mutex		qp_mutex;
+
+	struct completion	comp;
+	atomic_t		refcount;
+	struct mutex		handler_mutex;
+
+	int			backlog;
+	int			timeout_ms;
+	struct ib_sa_query	*query;
+	int			query_id;
+	union {
+		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
+	} cm_id;
+
+	u32			seq_num;
+	u32			qkey;
+	u32			qp_num;
+	pid_t			owner;
+	u32			options;
+	u8			srq;
+	u8			tos;
+	bool			tos_set;
+	u8			reuseaddr;
+	u8			afonly;
+	enum ib_gid_type	gid_type;
+};
+
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found] ` <cover.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  2018-01-30 16:59   ` [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h Steve Wise
@ 2018-01-30 16:59   ` Steve Wise
       [not found]     ` <531889e6a24f7919dec71734c91298d266aa9721.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-01-30 16:59 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

Implement RDMA nldev netlink interface to get detailed CM_ID information.

Because cm_id's are attached to rdma devices in various work queue contexts,
the pid and task information at device-attach time is sometimes not useful.
For example, an nvme/f host connection ends up being bound to a device
in a work queue context and the resulting pid at attach time no longer exists
after connection setup.  So instead we mark all cm_id's created via the
rdma_ucm as "user", and all others as "kernel".  This required tweaking
the restrack code a little.  It also required wrapping some rdma_cm
functions to allow passing the module name string.

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
 drivers/infiniband/core/cma.c      |  55 ++++++---
 drivers/infiniband/core/nldev.c    | 245 +++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/restrack.c |  29 ++++-
 drivers/infiniband/core/ucma.c     |   8 +-
 include/rdma/rdma_cm.h             |  24 +++-
 include/rdma/restrack.h            |   4 +
 include/uapi/rdma/rdma_netlink.h   |  30 +++++
 7 files changed, 365 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 72ad52b..51fbfa1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 	id_priv->id.route.addr.dev_addr.transport =
 		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
+	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
+	id_priv->id.res.kern_name = id_priv->id.caller;
+	rdma_restrack_add(&id_priv->id.res);
 }
 
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-struct rdma_cm_id *rdma_create_id(struct net *net,
-				  rdma_cm_event_handler event_handler,
-				  void *context, enum rdma_port_space ps,
-				  enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+				    rdma_cm_event_handler event_handler,
+				    void *context, enum rdma_port_space ps,
+				    enum ib_qp_type qp_type, const char *caller)
 {
 	struct rdma_id_private *id_priv;
 
@@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	id_priv->owner = task_pid_nr(current);
+	if (caller)
+		id_priv->id.caller = caller;
+	else
+		id_priv->owner = task_pid_nr(current);
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
@@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
 
 	return &id_priv->id;
 }
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
@@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	mutex_unlock(&id_priv->handler_mutex);
 
 	if (id_priv->cma_dev) {
+		rdma_restrack_del(&id_priv->id.res);
 		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
 			if (id_priv->cm_id.ib)
 				ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1786,9 +1793,10 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 		ib_event->param.req_rcvd.primary_path->service_id;
 	int ret;
 
-	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
 			    listen_id->event_handler, listen_id->context,
-			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
+			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
+			    listen_id->caller);
 	if (IS_ERR(id))
 		return NULL;
 
@@ -1843,8 +1851,8 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	struct net *net = listen_id->route.addr.dev_addr.net;
 	int ret;
 
-	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
-			    listen_id->ps, IB_QPT_UD);
+	id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+			      listen_id->ps, IB_QPT_UD, listen_id->caller);
 	if (IS_ERR(id))
 		return NULL;
 
@@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
-	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
-				   listen_id->id.event_handler,
-				   listen_id->id.context,
-				   RDMA_PS_TCP, IB_QPT_RC);
+	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+				     listen_id->id.event_handler,
+				     listen_id->id.context,
+				     RDMA_PS_TCP, IB_QPT_RC,
+				     listen_id->id.caller);
 	if (IS_ERR(new_cm_id)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
 		return;
 
-	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
-			    id_priv->id.qp_type);
+	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+			      id_priv->id.qp_type, id_priv->id.caller);
 	if (IS_ERR(id))
 		return;
 
@@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	if (id_priv->cma_dev)
+	if (id_priv->cma_dev) {
+		rdma_restrack_del(&id_priv->id.res);
 		cma_release_dev(id_priv);
+	}
 err1:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
@@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		  const char *caller)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 
-	id_priv->owner = task_pid_nr(current);
+	if (caller)
+		id_priv->id.caller = caller;
+	else
+		id_priv->owner = task_pid_nr(current);
 
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
@@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	rdma_reject(id, NULL, 0);
 	return ret;
 }
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index fa8655e..a4091b5 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -34,6 +34,7 @@
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
 #include <net/netlink.h>
+#include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
 
 #include "core_priv.h"
@@ -71,6 +72,22 @@
 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
 						    .len = TASK_COMM_LEN },
+	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
+				.len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
+				.len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
+				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
+				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
+	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
+	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
 };
 
 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 		[RDMA_RESTRACK_PD] = "pd",
 		[RDMA_RESTRACK_CQ] = "cq",
 		[RDMA_RESTRACK_QP] = "qp",
+		[RDMA_RESTRACK_CM_ID] = "cm_id",
 	};
 
 	struct rdma_restrack_root *res = &device->res;
@@ -284,6 +302,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
 	return -EMSGSIZE;
 }
 
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+				struct rdma_cm_id *cm_id, uint32_t port)
+{
+	struct rdma_id_private *id_priv;
+	struct nlattr *entry_attr;
+
+	if (port && port != cm_id->port_num)
+		return 0;
+
+	id_priv = container_of(cm_id, struct rdma_id_private, id);
+	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+	if (!entry_attr)
+		goto out;
+
+	if (cm_id->port_num &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+		goto err;
+
+	if (id_priv->qp_num &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
+		goto err;
+
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+		goto err;
+
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
+		       id_priv->id.route.addr.dev_addr.dev_type))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
+		       id_priv->id.route.addr.dev_addr.transport))
+		goto err;
+
+	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
+		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
+				    sin->sin_addr.s_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+				  sin->sin_port))
+			goto err;
+
+		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
+		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
+				    sin->sin_addr.s_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+				  sin->sin_port))
+			goto err;
+	} else {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
+		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
+				     &sin6->sin6_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+				  sin6->sin6_port))
+			goto err;
+
+		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
+		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
+				     &sin6->sin6_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+				  sin6->sin6_port))
+			goto err;
+	}
+
+	if (id_priv->id.caller) {
+		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+				   id_priv->id.caller))
+			goto err;
+	} else {
+		/* CMA keeps the owning pid. */
+		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner))
+			goto err;
+	}
+
+	nla_nest_end(msg, entry_attr);
+	return 0;
+
+err:
+	nla_nest_cancel(msg, entry_attr);
+out:
+	return -EMSGSIZE;
+}
+
 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
@@ -686,6 +797,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 	return ret;
 }
 
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+				      struct netlink_callback *cb)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct rdma_restrack_entry *res;
+	int err, ret = 0, idx = 0;
+	struct nlattr *table_attr;
+	struct ib_device *device;
+	int start = cb->args[0];
+	struct rdma_cm_id *cm_id = NULL;
+	struct nlmsghdr *nlh;
+	u32 index, port = 0;
+
+	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, NULL);
+	/*
+	 * Right now, we are expecting the device index to get QP information,
+	 * but it is possible to extend this code to return all devices in
+	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+	 * if it doesn't exist, we will iterate over all devices.
+	 *
+	 * But it is not needed for now.
+	 */
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(index);
+	if (!device)
+		return -EINVAL;
+
+	/*
+	 * If no PORT_INDEX is supplied, we will return all QPs from that device
+	 */
+	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+		if (!rdma_is_port_valid(device, port)) {
+			ret = -EINVAL;
+			goto err_index;
+		}
+	}
+
+	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+		0, NLM_F_MULTI);
+
+	if (fill_nldev_handle(skb, device)) {
+		ret = -EMSGSIZE;
+		goto err;
+	}
+
+	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
+	if (!table_attr) {
+		ret = -EMSGSIZE;
+		goto err;
+	}
+
+	down_read(&device->res.rwsem);
+	hash_for_each_possible(device->res.hash, res, node,
+			       RDMA_RESTRACK_CM_ID) {
+		if (idx < start)
+			goto next;
+
+		if ((rdma_is_kernel_res(res) &&
+		     task_active_pid_ns(current) != &init_pid_ns) ||
+		    (!rdma_is_kernel_res(res) &&
+		     task_active_pid_ns(current) !=
+		     task_active_pid_ns(res->task)))
+			/*
+			 * 1. Kernel QPs should be visible in init namsapce only
+			 * 2. Preent only QPs visible in the current namespace
+			 */
+			goto next;
+
+		if (!rdma_restrack_get(res))
+			/*
+			 * Resource is under release now, but we are not
+			 * relesing lock now, so it will be released in
+			 * our next pass, once we will get ->next pointer.
+			 */
+			goto next;
+
+		cm_id = container_of(res, struct rdma_cm_id, res);
+
+		up_read(&device->res.rwsem);
+		ret = fill_res_cm_id_entry(skb, cm_id, port);
+		down_read(&device->res.rwsem);
+		/*
+		 * Return resource back, but it won't be released till
+		 * the &device->res.rwsem will be released for write.
+		 */
+		rdma_restrack_put(res);
+
+		if (ret == -EMSGSIZE)
+			/*
+			 * There is a chance to optimize here.
+			 * It can be done by using list_prepare_entry
+			 * and list_for_each_entry_continue afterwards.
+			 */
+			break;
+		if (ret)
+			goto res_err;
+next:		idx++;
+	}
+	up_read(&device->res.rwsem);
+
+	nla_nest_end(skb, table_attr);
+	nlmsg_end(skb, nlh);
+	cb->args[0] = idx;
+
+	/*
+	 * No more CM_IDs to fill, cancel the message and
+	 * return 0 to mark end of dumpit.
+	 */
+	if (!cm_id)
+		goto err;
+
+	put_device(&device->dev);
+	return skb->len;
+
+res_err:
+	nla_nest_cancel(skb, table_attr);
+	up_read(&device->res.rwsem);
+
+err:
+	nlmsg_cancel(skb, nlh);
+
+err_index:
+	put_device(&device->dev);
+	return ret;
+}
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -712,6 +954,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 		 * too.
 		 */
 	},
+	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+		.dump = nldev_res_get_cm_id_dumpit,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 857637b..bb13169 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <rdma/rdma_cm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/restrack.h>
 #include <linux/mutex.h>
@@ -67,6 +68,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	struct ib_pd *pd;
 	struct ib_cq *cq;
 	struct ib_qp *qp;
+	struct rdma_cm_id *cm_id;
 
 	switch (type) {
 	case RDMA_RESTRACK_PD:
@@ -85,6 +87,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 		xrcd = container_of(res, struct ib_xrcd, res);
 		dev = xrcd->device;
 		break;
+	case RDMA_RESTRACK_CM_ID:
+		cm_id = container_of(res, struct rdma_cm_id, res);
+		dev = cm_id->device;
+		break;
 	default:
 		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
 		return NULL;
@@ -93,6 +99,27 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	return dev;
 }
 
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+	enum rdma_restrack_type type = res->type;
+	bool is_user;
+
+	switch (type) {
+	case RDMA_RESTRACK_CM_ID: {
+		struct rdma_cm_id *cm_id;
+
+		cm_id = container_of(res, struct rdma_cm_id, res);
+		is_user = !cm_id->caller;
+		break;
+	}
+	default:
+		is_user = !uaccess_kernel();
+		break;
+	}
+
+	return is_user;
+}
+
 void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
@@ -100,7 +127,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
 	if (!dev)
 		return;
 
-	if (!uaccess_kernel()) {
+	if (res_is_user(res)) {
 		get_task_struct(current);
 		res->task = current;
 		res->kern_name = NULL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d67219d..f7f0282 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
-				    ucma_event_handler, ctx, cmd.ps, qp_type);
+	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
+			      ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
 	if (IS_ERR(ctx->cm_id)) {
 		ret = PTR_ERR(ctx->cm_id);
 		goto err1;
@@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
 	if (cmd.conn_param.valid) {
 		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
 		mutex_lock(&file->mut);
-		ret = rdma_accept(ctx->cm_id, &conn_param);
+		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
 		if (!ret)
 			ctx->uid = cmd.uid;
 		mutex_unlock(&file->mut);
 	} else
-		ret = rdma_accept(ctx->cm_id, NULL);
+		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
 
 	ucma_put_ctx(ctx);
 	return ret;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5984225..73dac9b 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -155,6 +155,12 @@ struct rdma_cm_id {
 	enum rdma_port_space	 ps;
 	enum ib_qp_type		 qp_type;
 	u8			 port_num;
+	const char *caller;
+
+	/*
+	 * Internal to RDMA/core, don't use in the drivers
+	 */
+	struct rdma_restrack_entry     res;
 };
 
 struct rdma_id_private {
@@ -198,6 +204,11 @@ struct rdma_id_private {
 	enum ib_gid_type	gid_type;
 };
 
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+				  rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps,
+				  enum ib_qp_type qp_type, const char *caller);
+
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
@@ -210,10 +221,9 @@ struct rdma_id_private {
  *
  * The id holds a reference on the network namespace until it is destroyed.
  */
-struct rdma_cm_id *rdma_create_id(struct net *net,
-				  rdma_cm_event_handler event_handler,
-				  void *context, enum rdma_port_space ps,
-				  enum ib_qp_type qp_type);
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
+			 KBUILD_MODNAME)
 
 /**
   * rdma_destroy_id - Destroys an RDMA identifier.
@@ -325,6 +335,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
  */
 int rdma_listen(struct rdma_cm_id *id, int backlog);
 
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		  const char *caller);
+
 /**
  * rdma_accept - Called to accept a connection request or response.
  * @id: Connection identifier associated with the request.
@@ -340,7 +353,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
  * state of the qp associated with the id is modified to error, such that any
  * previously posted receive buffers would be flushed.
  */
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+#define rdma_accept(id, conn_param) \
+	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
 
 /**
  * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index c2d8116..a794e0e 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -33,6 +33,10 @@ enum rdma_restrack_type {
 	 */
 	RDMA_RESTRACK_XRCD,
 	/**
+	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
+	 */
+	RDMA_RESTRACK_CM_ID,
+	/**
 	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
 	 */
 	RDMA_RESTRACK_MAX
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 17e59be..4ed21ee 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -240,6 +240,8 @@ enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -352,6 +354,34 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
 
+	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
+	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
+	/*
+	 * rdma_cm_id port space.
+	 */
+	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
+	/*
+	 * IP Addresses/port attributes.
+	 */
+	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
+	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
+	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
+	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
+	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
+	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
+	/*
+	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
+	 */
+	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
+	/*
+	 * enum enum rdma_transport_type (IB, IWARP, ...)
+	 */
+	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
+	/*
+	 * enum rdma_network_type (IB, IPv4, IPv6,...)
+	 */
+	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC 0/2] cm_id resource tracking
@ 2018-01-31 17:09 Steve Wise
       [not found] ` <cover.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-01-31 17:09 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

This series adds rdma_cm_id tracking to the new resource tracking
database.  The patches are on top of Jason's merged rdma-next 
branch [1].

Please comment!

Thanks,

Steve.

[1] git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
branch wip/for-linus-merged.

Steve Wise (2):
  RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h
  RDMA/nldev: provide detailed CM_ID information

 drivers/infiniband/core/cma.c      |  95 ++++++--------
 drivers/infiniband/core/nldev.c    | 245 +++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/restrack.c |  29 ++++-
 drivers/infiniband/core/ucma.c     |   8 +-
 include/rdma/rdma_cm.h             |  65 +++++++++-
 include/rdma/restrack.h            |   4 +
 include/uapi/rdma/rdma_netlink.h   |  30 +++++
 7 files changed, 406 insertions(+), 70 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h
       [not found]     ` <a85bb48eb9fc8846c81118a6777ab9ccbd27e9d7.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-01-31 20:42       ` Parav Pandit
       [not found]         ` <VI1PR0502MB300809BAC31D5CBC0FA2311CD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Parav Pandit @ 2018-01-31 20:42 UTC (permalink / raw)
  To: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA, Jason Gunthorpe
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

Hi Steve,

> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> Sent: Tuesday, January 30, 2018 10:59 AM
> To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; Jason Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Subject: [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into
> include/rdma/rdma_cm.h
> 
> So the resource tracking services in core/nldev.c can see useful information
> about cm_ids.
> 
> There are other approaches.  I just moved rdma_id_private to develop this
> prototype quickly, and it was simple.  Other approaches include:
> 
> 1) move the nldev cm_id dumpit functions into cma.c, and have nldev.c call it.
> This, however puts a ib_core->rdma_cm module dependency which makes the
> two modules interdependent in both directions.  Thus, rdma_cm would have to
> be merged into ib_core.  This might not be a bad idea with all the kernel rdma
> ULPs now using the rdma_cm.
> 
> 2) move the specific attributes that are being dumped from the rdma_id_private
> struct to the rdma_cm_id struct, so nldev.c has access to them.
> 

I prefer to see drivers/infiniband/core/cma_priv.h to contain rdma_cm_id_private structure.
This allows not to expose that to ULPs which includes include/rdma/rdma_cm.h.
This also includes keeping cm_id state enum within cma_priv.h.
Core files include this new cma_priv.h.

This approach is similar to core_priv.h where structures are not place in include/rdma/ib_*.h.
I actually have that cleanup patch as part of other patches that I have been doing.
But it was not worth enough as stand along patch, so didn't send it.
But not that this RFC is present, I think it should be done in cma_priv.h.

> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
>  drivers/infiniband/core/cma.c | 40 ----------------------------------------
>  include/rdma/rdma_cm.h        | 41
> +++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index
> e66963c..72ad52b 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -327,46 +327,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device
> *cma_dev)
>   * We do this by disabling removal notification while a callback is in process,
>   * and reporting it after the callback completes.
>   */
> -struct rdma_id_private {
> -	struct rdma_cm_id	id;
> -
> -	struct rdma_bind_list	*bind_list;
> -	struct hlist_node	node;
> -	struct list_head	list; /* listen_any_list or cma_device.list */
> -	struct list_head	listen_list; /* per device listens */
> -	struct cma_device	*cma_dev;
> -	struct list_head	mc_list;
> -
> -	int			internal_id;
> -	enum rdma_cm_state	state;
> -	spinlock_t		lock;
> -	struct mutex		qp_mutex;
> -
> -	struct completion	comp;
> -	atomic_t		refcount;
> -	struct mutex		handler_mutex;
> -
> -	int			backlog;
> -	int			timeout_ms;
> -	struct ib_sa_query	*query;
> -	int			query_id;
> -	union {
> -		struct ib_cm_id	*ib;
> -		struct iw_cm_id	*iw;
> -	} cm_id;
> -
> -	u32			seq_num;
> -	u32			qkey;
> -	u32			qp_num;
> -	pid_t			owner;
> -	u32			options;
> -	u8			srq;
> -	u8			tos;
> -	bool			tos_set;
> -	u8			reuseaddr;
> -	u8			afonly;
> -	enum ib_gid_type	gid_type;
> -};
> 
>  struct cma_multicast {
>  	struct rdma_id_private *id_priv;
> diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index
> 6538a5c..5984225 100644
> --- a/include/rdma/rdma_cm.h
> +++ b/include/rdma/rdma_cm.h
> @@ -157,6 +157,47 @@ struct rdma_cm_id {
>  	u8			 port_num;
>  };
> 
> +struct rdma_id_private {
> +	struct rdma_cm_id	id;
> +
> +	struct rdma_bind_list	*bind_list;
> +	struct hlist_node	node;
> +	struct list_head	list; /* listen_any_list or cma_device.list */
> +	struct list_head	listen_list; /* per device listens */
> +	struct cma_device	*cma_dev;
> +	struct list_head	mc_list;
> +
> +	int			internal_id;
> +	enum rdma_cm_state	state;
> +	spinlock_t		lock;
> +	struct mutex		qp_mutex;
> +
> +	struct completion	comp;
> +	atomic_t		refcount;
> +	struct mutex		handler_mutex;
> +
> +	int			backlog;
> +	int			timeout_ms;
> +	struct ib_sa_query	*query;
> +	int			query_id;
> +	union {
> +		struct ib_cm_id	*ib;
> +		struct iw_cm_id	*iw;
> +	} cm_id;
> +
> +	u32			seq_num;
> +	u32			qkey;
> +	u32			qp_num;
> +	pid_t			owner;
> +	u32			options;
> +	u8			srq;
> +	u8			tos;
> +	bool			tos_set;
> +	u8			reuseaddr;
> +	u8			afonly;
> +	enum ib_gid_type	gid_type;
> +};
> +
>  /**
>   * rdma_create_id - Create an RDMA identifier.
>   *
> --
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
> of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]     ` <531889e6a24f7919dec71734c91298d266aa9721.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-01-31 20:47       ` Parav Pandit
       [not found]         ` <VI1PR0502MB3008805F1A6056F50A12DEDBD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  2018-02-01  8:49       ` Leon Romanovsky
  1 sibling, 1 reply; 35+ messages in thread
From: Parav Pandit @ 2018-01-31 20:47 UTC (permalink / raw)
  To: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA, Jason Gunthorpe
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A



> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> Sent: Tuesday, January 30, 2018 10:59 AM
> To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; Jason Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Subject: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> 
> Implement RDMA nldev netlink interface to get detailed CM_ID information.
> 
> Because cm_id's are attached to rdma devices in various work queue contexts,
> the pid and task information at device-attach time is sometimes not useful.
> For example, an nvme/f host connection ends up being bound to a device in a
> work queue context and the resulting pid at attach time no longer exists after
> connection setup.  So instead we mark all cm_id's created via the rdma_ucm as
> "user", and all others as "kernel".  This required tweaking the restrack code a
> little.  It also required wrapping some rdma_cm functions to allow passing the
> module name string.
> 
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
>  drivers/infiniband/core/cma.c      |  55 ++++++---
>  drivers/infiniband/core/nldev.c    | 245
> +++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/core/restrack.c |  29 ++++-
>  drivers/infiniband/core/ucma.c     |   8 +-
>  include/rdma/rdma_cm.h             |  24 +++-
>  include/rdma/restrack.h            |   4 +
>  include/uapi/rdma/rdma_netlink.h   |  30 +++++
>  7 files changed, 365 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index
> 72ad52b..51fbfa1 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct rdma_id_private
> *id_priv,
>  	id_priv->id.route.addr.dev_addr.transport =
>  		rdma_node_get_transport(cma_dev->device->node_type);
>  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> +	id_priv->id.res.kern_name = id_priv->id.caller;
> +	rdma_restrack_add(&id_priv->id.res);
>  }
> 
>  static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -737,10
> +740,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
>  		complete(&id_priv->comp);
>  }
> 
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type)
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				    rdma_cm_event_handler event_handler,
> +				    void *context, enum rdma_port_space ps,
> +				    enum ib_qp_type qp_type, const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
> 
> @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>  	if (!id_priv)
>  		return ERR_PTR(-ENOMEM);
> 
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
>  	id_priv->state = RDMA_CM_IDLE;
>  	id_priv->id.context = context;
>  	id_priv->id.event_handler = event_handler; @@ -768,7 +774,7 @@
> struct rdma_cm_id *rdma_create_id(struct net *net,
> 
>  	return &id_priv->id;
>  }
> -EXPORT_SYMBOL(rdma_create_id);
> +EXPORT_SYMBOL(__rdma_create_id);
> 
>  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)  {
> @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
>  	mutex_unlock(&id_priv->handler_mutex);
> 
>  	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
>  			if (id_priv->cm_id.ib)
>  				ib_destroy_cm_id(id_priv->cm_id.ib);
> @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> *cma_new_conn_id(struct rdma_cm_id *listen_id,
>  		ib_event->param.req_rcvd.primary_path->service_id;
>  	int ret;
> 
> -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
>  			    listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
> +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> +			    listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
> 
> @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> *cma_new_udp_id(struct rdma_cm_id *listen_id,
>  	struct net *net = listen_id->route.addr.dev_addr.net;
>  	int ret;
> 
> -	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, IB_QPT_UD);
> +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> >context,
> +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
> 
> @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> *cm_id,
>  		goto out;
> 
>  	/* Create a new RDMA id for the new IW CM ID */
> -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> -				   listen_id->id.event_handler,
> -				   listen_id->id.context,
> -				   RDMA_PS_TCP, IB_QPT_RC);
> +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> +				     listen_id->id.event_handler,
> +				     listen_id->id.context,
> +				     RDMA_PS_TCP, IB_QPT_RC,
> +				     listen_id->id.caller);
>  	if (IS_ERR(new_cm_id)) {
>  		ret = -ENOMEM;
>  		goto out;
> @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct rdma_id_private
> *id_priv,
>  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> >device, 1))
>  		return;
> 
> -	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> -			    id_priv->id.qp_type);
> +	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> +			      id_priv->id.qp_type, id_priv->id.caller);
>  	if (IS_ERR(id))
>  		return;
> 
> @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> sockaddr *addr)
> 
>  	return 0;
>  err2:
> -	if (id_priv->cma_dev)
> +	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		cma_release_dev(id_priv);
> +	}
>  err1:
>  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
>  	return ret;
> @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> rdma_id_private *id_priv,
>  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);  }
> 
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param)
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> +		  const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>  	int ret;
> 
>  	id_priv = container_of(id, struct rdma_id_private, id);
> 
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
> 
>  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
>  		return -EINVAL;
> @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> rdma_conn_param *conn_param)
>  	rdma_reject(id, NULL, 0);
>  	return ret;
>  }
> -EXPORT_SYMBOL(rdma_accept);
> +EXPORT_SYMBOL(__rdma_accept);
> 
>  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)  { diff --git
> a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index
> fa8655e..a4091b5 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -34,6 +34,7 @@
>  #include <linux/pid.h>
>  #include <linux/pid_namespace.h>
>  #include <net/netlink.h>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/rdma_netlink.h>
> 
>  #include "core_priv.h"
> @@ -71,6 +72,22 @@
>  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
>  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> NLA_NUL_STRING,
>  						    .len = TASK_COMM_LEN },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type =
> NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
>  };
> 
>  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -
> 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device
> *device)
>  		[RDMA_RESTRACK_PD] = "pd",
>  		[RDMA_RESTRACK_CQ] = "cq",
>  		[RDMA_RESTRACK_QP] = "qp",
> +		[RDMA_RESTRACK_CM_ID] = "cm_id",

May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h
       [not found]         ` <VI1PR0502MB300809BAC31D5CBC0FA2311CD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-01-31 20:50           ` Steve Wise
  0 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-01-31 20:50 UTC (permalink / raw)
  To: 'Parav Pandit',
	dledford-H+wXaHxf7aLQT0dZR+AlfA, 'Jason Gunthorpe'
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

> 
> Hi Steve,
> 
> > -----Original Message-----
> > From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> > owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> > Sent: Tuesday, January 30, 2018 10:59 AM
> > To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; Jason Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> > Subject: [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into
> > include/rdma/rdma_cm.h
> >
> > So the resource tracking services in core/nldev.c can see useful
information
> > about cm_ids.
> >
> > There are other approaches.  I just moved rdma_id_private to develop
this
> > prototype quickly, and it was simple.  Other approaches include:
> >
> > 1) move the nldev cm_id dumpit functions into cma.c, and have nldev.c
call it.
> > This, however puts a ib_core->rdma_cm module dependency which makes the
> > two modules interdependent in both directions.  Thus, rdma_cm would have
to
> > be merged into ib_core.  This might not be a bad idea with all the
kernel rdma
> > ULPs now using the rdma_cm.
> >
> > 2) move the specific attributes that are being dumped from the
> rdma_id_private
> > struct to the rdma_cm_id struct, so nldev.c has access to them.
> >
> 
> I prefer to see drivers/infiniband/core/cma_priv.h to contain
> rdma_cm_id_private structure.
> This allows not to expose that to ULPs which includes
include/rdma/rdma_cm.h.
> This also includes keeping cm_id state enum within cma_priv.h.
> Core files include this new cma_priv.h.
> 
> This approach is similar to core_priv.h where structures are not place in
> include/rdma/ib_*.h.
> I actually have that cleanup patch as part of other patches that I have
been
> doing.
> But it was not worth enough as stand along patch, so didn't send it.
> But not that this RFC is present, I think it should be done in cma_priv.h.

Hey Parav,  Good idea.  That works for me.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]         ` <VI1PR0502MB3008805F1A6056F50A12DEDBD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-01-31 20:56           ` Steve Wise
  2018-01-31 21:18             ` Parav Pandit
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-01-31 20:56 UTC (permalink / raw)
  To: 'Parav Pandit',
	dledford-H+wXaHxf7aLQT0dZR+AlfA, 'Jason Gunthorpe'
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A

> > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
ib_device
> > *device)
> >  		[RDMA_RESTRACK_PD] = "pd",
> >  		[RDMA_RESTRACK_CQ] = "cq",
> >  		[RDMA_RESTRACK_QP] = "qp",
> > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> 
> May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?

Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as
part of this.  So perhaps the name "cm_id" is ok?  I prefer it also because
its shorter (lazy typer! :) )



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-01-31 20:56           ` Steve Wise
@ 2018-01-31 21:18             ` Parav Pandit
       [not found]               ` <VI1PR0502MB30088B50BEA14B4C05EA2BC7D1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Parav Pandit @ 2018-01-31 21:18 UTC (permalink / raw)
  To: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA, Jason Gunthorpe
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A



> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> Sent: Wednesday, January 31, 2018 2:56 PM
> To: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; Jason
> Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Subject: RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> 
> > > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> ib_device
> > > *device)
> > >  		[RDMA_RESTRACK_PD] = "pd",
> > >  		[RDMA_RESTRACK_CQ] = "cq",
> > >  		[RDMA_RESTRACK_QP] = "qp",
> > > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> >
> > May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
> 
> Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as part
Yes. But it is still coming through id of rdmacm_id as transport id information.
> of this.  So perhaps the name "cm_id" is ok? 
It's just that longer name removes the ambiguity.
I am not too particular about this particular naming.

> I prefer it also because its shorter (lazy typer! :) )
Sure, shorter names are useful to end user.
But I think this is handled well at iproute2 tool level.
For example below two commands achieves the same task.
#ip address show
#ip a s
So may be shorter naming at command level is better left at user space tool level.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]               ` <VI1PR0502MB30088B50BEA14B4C05EA2BC7D1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-02-01  8:01                 ` Leon Romanovsky
       [not found]                   ` <20180201080109.GG2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Leon Romanovsky @ 2018-02-01  8:01 UTC (permalink / raw)
  To: Parav Pandit
  Cc: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA, Jason Gunthorpe,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 2346 bytes --]

On Wed, Jan 31, 2018 at 09:18:39PM +0000, Parav Pandit wrote:
>
>
> > -----Original Message-----
> > From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> > owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> > Sent: Wednesday, January 31, 2018 2:56 PM
> > To: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; Jason
> > Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> > Subject: RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> >
> > > > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> > ib_device
> > > > *device)
> > > >  		[RDMA_RESTRACK_PD] = "pd",
> > > >  		[RDMA_RESTRACK_CQ] = "cq",
> > > >  		[RDMA_RESTRACK_QP] = "qp",
> > > > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> > >
> > > May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
> >
> > Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as part
> Yes. But it is still coming through id of rdmacm_id as transport id information.
> > of this.  So perhaps the name "cm_id" is ok?
> It's just that longer name removes the ambiguity.
> I am not too particular about this particular naming.
>
> > I prefer it also because its shorter (lazy typer! :) )
> Sure, shorter names are useful to end user.
> But I think this is handled well at iproute2 tool level.
> For example below two commands achieves the same task.
> #ip address show
> #ip a s
> So may be shorter naming at command level is better left at user space tool level.

The shorter naming is working on commands and not on objects.
In rdamtool, you can write "rdma d s" instead of "rdma dev show", but
the name of device (mlx5_0) or object (qp) should be full.

Regarding the name, I personally think that cm_id is better because it
is general and applicable both to ib_cm and iw_cm. The separation
between them can be done with introduction of new netlink attribute, e.g.
cm_id_type.

Thanks

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]     ` <531889e6a24f7919dec71734c91298d266aa9721.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  2018-01-31 20:47       ` Parav Pandit
@ 2018-02-01  8:49       ` Leon Romanovsky
       [not found]         ` <20180201084944.GH2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Leon Romanovsky @ 2018-02-01  8:49 UTC (permalink / raw)
  To: Steve Wise
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 24362 bytes --]

On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> Implement RDMA nldev netlink interface to get detailed CM_ID information.
>
> Because cm_id's are attached to rdma devices in various work queue contexts,
> the pid and task information at device-attach time is sometimes not useful.
> For example, an nvme/f host connection ends up being bound to a device
> in a work queue context and the resulting pid at attach time no longer exists
> after connection setup.  So instead we mark all cm_id's created via the
> rdma_ucm as "user", and all others as "kernel".  This required tweaking
> the restrack code a little.  It also required wrapping some rdma_cm
> functions to allow passing the module name string.
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
>  drivers/infiniband/core/cma.c      |  55 ++++++---
>  drivers/infiniband/core/nldev.c    | 245 +++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/core/restrack.c |  29 ++++-
>  drivers/infiniband/core/ucma.c     |   8 +-
>  include/rdma/rdma_cm.h             |  24 +++-
>  include/rdma/restrack.h            |   4 +
>  include/uapi/rdma/rdma_netlink.h   |  30 +++++
>  7 files changed, 365 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 72ad52b..51fbfa1 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
>  	id_priv->id.route.addr.dev_addr.transport =
>  		rdma_node_get_transport(cma_dev->device->node_type);
>  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> +	id_priv->id.res.kern_name = id_priv->id.caller;

Steve, I don't like it, I worked hard to hide it from the users of restrack,
and don't see reason why the same trick as with ib_create_cq/ib_create_pd won't
work here.

> +	rdma_restrack_add(&id_priv->id.res);
>  }
>
>  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
>  		complete(&id_priv->comp);
>  }
>
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type)
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				    rdma_cm_event_handler event_handler,
> +				    void *context, enum rdma_port_space ps,
> +				    enum ib_qp_type qp_type, const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>
> @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>  	if (!id_priv)
>  		return ERR_PTR(-ENOMEM);
>
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);

See the comment above

>  	id_priv->state = RDMA_CM_IDLE;
>  	id_priv->id.context = context;
>  	id_priv->id.event_handler = event_handler;

Not saying that we need to do it now, but it is important to write, most
probably we can drop certain initialization from rdma_create_id() adn
reuse rdma_restrack_put/_get.

> @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>
>  	return &id_priv->id;
>  }
> -EXPORT_SYMBOL(rdma_create_id);
> +EXPORT_SYMBOL(__rdma_create_id);
>
>  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
>  {
> @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
>  	mutex_unlock(&id_priv->handler_mutex);
>
>  	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);

You should count all created cm_ids and not only binded.

>  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
>  			if (id_priv->cm_id.ib)
>  				ib_destroy_cm_id(id_priv->cm_id.ib);
> @@ -1786,9 +1793,10 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
>  		ib_event->param.req_rcvd.primary_path->service_id;
>  	int ret;
>
> -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
>  			    listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
> +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> +			    listen_id->caller);

I think the cleanest way will be to create some struct and pass pointer to it so
you can unfold all relevant data inside of __rdma_create_id().

>  	if (IS_ERR(id))
>  		return NULL;
>
> @@ -1843,8 +1851,8 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
>  	struct net *net = listen_id->route.addr.dev_addr.net;
>  	int ret;
>
> -	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, IB_QPT_UD);
> +	id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
> +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
>
> @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
>  		goto out;
>
>  	/* Create a new RDMA id for the new IW CM ID */
> -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> -				   listen_id->id.event_handler,
> -				   listen_id->id.context,
> -				   RDMA_PS_TCP, IB_QPT_RC);
> +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> +				     listen_id->id.event_handler,
> +				     listen_id->id.context,
> +				     RDMA_PS_TCP, IB_QPT_RC,
> +				     listen_id->id.caller);
>  	if (IS_ERR(new_cm_id)) {
>  		ret = -ENOMEM;
>  		goto out;
> @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
>  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
>  		return;
>
> -	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> -			    id_priv->id.qp_type);
> +	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> +			      id_priv->id.qp_type, id_priv->id.caller);
>  	if (IS_ERR(id))
>  		return;
>
> @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
>
>  	return 0;
>  err2:
> -	if (id_priv->cma_dev)
> +	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		cma_release_dev(id_priv);
> +	}
>  err1:
>  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
>  	return ret;
> @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
>  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
>  }
>
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
> +		  const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>  	int ret;
>
>  	id_priv = container_of(id, struct rdma_id_private, id);
>
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
>
>  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
>  		return -EINVAL;
> @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
>  	rdma_reject(id, NULL, 0);
>  	return ret;
>  }
> -EXPORT_SYMBOL(rdma_accept);
> +EXPORT_SYMBOL(__rdma_accept);
>
>  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
>  {
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index fa8655e..a4091b5 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -34,6 +34,7 @@
>  #include <linux/pid.h>
>  #include <linux/pid_namespace.h>
>  #include <net/netlink.h>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/rdma_netlink.h>
>
>  #include "core_priv.h"
> @@ -71,6 +72,22 @@
>  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
>  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
>  						    .len = TASK_COMM_LEN },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },

I would like to use this opportunity. There is CM_ID, so users will be
able to query nldev directly on this ID (once we implement .doit), but
can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?

I want to have that all resources will have something similar to ifindex/ibindex.


> +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
>  };
>
>  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
> @@ -182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
>  		[RDMA_RESTRACK_PD] = "pd",
>  		[RDMA_RESTRACK_CQ] = "cq",
>  		[RDMA_RESTRACK_QP] = "qp",
> +		[RDMA_RESTRACK_CM_ID] = "cm_id",
>  	};
>
>  	struct rdma_restrack_root *res = &device->res;
> @@ -284,6 +302,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
>  	return -EMSGSIZE;
>  }
>
> +static int fill_res_cm_id_entry(struct sk_buff *msg,
> +				struct rdma_cm_id *cm_id, uint32_t port)
> +{
> +	struct rdma_id_private *id_priv;
> +	struct nlattr *entry_attr;
> +
> +	if (port && port != cm_id->port_num)
> +		return 0;
> +
> +	id_priv = container_of(cm_id, struct rdma_id_private, id);
> +	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
> +	if (!entry_attr)
> +		goto out;
> +
> +	if (cm_id->port_num &&
> +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
> +		goto err;
> +
> +	if (id_priv->qp_num &&
> +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
> +		goto err;
> +
> +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
> +		goto err;
> +
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
> +		       id_priv->id.route.addr.dev_addr.dev_type))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
> +		       id_priv->id.route.addr.dev_addr.transport))
> +		goto err;
> +
> +	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
> +		struct sockaddr_in *sin;
> +
> +		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
> +		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
> +				    sin->sin_addr.s_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> +				  sin->sin_port))
> +			goto err;
> +
> +		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
> +		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
> +				    sin->sin_addr.s_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> +				  sin->sin_port))
> +			goto err;
> +	} else {
> +		struct sockaddr_in6 *sin6;
> +
> +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
> +		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
> +				     &sin6->sin6_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> +				  sin6->sin6_port))
> +			goto err;
> +
> +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
> +		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
> +				     &sin6->sin6_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> +				  sin6->sin6_port))
> +			goto err;
> +	}
> +
> +	if (id_priv->id.caller) {
> +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> +				   id_priv->id.caller))
> +			goto err;
> +	} else {
> +		/* CMA keeps the owning pid. */
> +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner))
> +			goto err;
> +	}
> +
> +	nla_nest_end(msg, entry_attr);
> +	return 0;
> +
> +err:
> +	nla_nest_cancel(msg, entry_attr);
> +out:
> +	return -EMSGSIZE;
> +}
> +
>  static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
>  			  struct netlink_ext_ack *extack)
>  {
> @@ -686,6 +797,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
>  	return ret;
>  }
>
> +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
> +				      struct netlink_callback *cb)
> +{
> +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> +	struct rdma_restrack_entry *res;
> +	int err, ret = 0, idx = 0;
> +	struct nlattr *table_attr;
> +	struct ib_device *device;
> +	int start = cb->args[0];
> +	struct rdma_cm_id *cm_id = NULL;
> +	struct nlmsghdr *nlh;
> +	u32 index, port = 0;
> +
> +	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> +			  nldev_policy, NULL);
> +	/*
> +	 * Right now, we are expecting the device index to get QP information,
> +	 * but it is possible to extend this code to return all devices in
> +	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
> +	 * if it doesn't exist, we will iterate over all devices.
> +	 *
> +	 * But it is not needed for now.
> +	 */
> +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> +		return -EINVAL;
> +
> +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> +	device = ib_device_get_by_index(index);
> +	if (!device)
> +		return -EINVAL;
> +
> +	/*
> +	 * If no PORT_INDEX is supplied, we will return all QPs from that device

QPs->CM_IDs

> +	 */
> +	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
> +		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> +		if (!rdma_is_port_valid(device, port)) {
> +			ret = -EINVAL;
> +			goto err_index;
> +		}
> +	}
> +
> +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> +		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),

RDMA_NLDEV_CMD_RES_QP_GET -> RDMA_NLDEV_CMD_RES_CM_ID_GET

> +		0, NLM_F_MULTI);
> +
> +	if (fill_nldev_handle(skb, device)) {
> +		ret = -EMSGSIZE;
> +		goto err;
> +	}
> +
> +	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
> +	if (!table_attr) {
> +		ret = -EMSGSIZE;
> +		goto err;
> +	}
> +
> +	down_read(&device->res.rwsem);
> +	hash_for_each_possible(device->res.hash, res, node,
> +			       RDMA_RESTRACK_CM_ID) {
> +		if (idx < start)
> +			goto next;
> +
> +		if ((rdma_is_kernel_res(res) &&
> +		     task_active_pid_ns(current) != &init_pid_ns) ||
> +		    (!rdma_is_kernel_res(res) &&
> +		     task_active_pid_ns(current) !=
> +		     task_active_pid_ns(res->task)))
> +			/*
> +			 * 1. Kernel QPs should be visible in init namsapce only
> +			 * 2. Preent only QPs visible in the current namespace
> +			 */
> +			goto next;
> +
> +		if (!rdma_restrack_get(res))
> +			/*
> +			 * Resource is under release now, but we are not
> +			 * relesing lock now, so it will be released in
> +			 * our next pass, once we will get ->next pointer.
> +			 */
> +			goto next;
> +
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +
> +		up_read(&device->res.rwsem);
> +		ret = fill_res_cm_id_entry(skb, cm_id, port);
> +		down_read(&device->res.rwsem);
> +		/*
> +		 * Return resource back, but it won't be released till
> +		 * the &device->res.rwsem will be released for write.
> +		 */
> +		rdma_restrack_put(res);
> +
> +		if (ret == -EMSGSIZE)
> +			/*
> +			 * There is a chance to optimize here.
> +			 * It can be done by using list_prepare_entry
> +			 * and list_for_each_entry_continue afterwards.
> +			 */
> +			break;
> +		if (ret)
> +			goto res_err;
> +next:		idx++;
> +	}
> +	up_read(&device->res.rwsem);
> +
> +	nla_nest_end(skb, table_attr);
> +	nlmsg_end(skb, nlh);
> +	cb->args[0] = idx;
> +
> +	/*
> +	 * No more CM_IDs to fill, cancel the message and
> +	 * return 0 to mark end of dumpit.
> +	 */
> +	if (!cm_id)
> +		goto err;
> +
> +	put_device(&device->dev);
> +	return skb->len;
> +
> +res_err:
> +	nla_nest_cancel(skb, table_attr);
> +	up_read(&device->res.rwsem);
> +
> +err:
> +	nlmsg_cancel(skb, nlh);
> +
> +err_index:
> +	put_device(&device->dev);
> +	return ret;
> +}
>  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
>  	[RDMA_NLDEV_CMD_GET] = {
>  		.doit = nldev_get_doit,
> @@ -712,6 +954,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
>  		 * too.
>  		 */
>  	},
> +	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
> +		.dump = nldev_res_get_cm_id_dumpit,
> +	},
>  };
>
>  void __init nldev_init(void)
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 857637b..bb13169 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
>   */
>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/ib_verbs.h>
>  #include <rdma/restrack.h>
>  #include <linux/mutex.h>
> @@ -67,6 +68,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  	struct ib_pd *pd;
>  	struct ib_cq *cq;
>  	struct ib_qp *qp;
> +	struct rdma_cm_id *cm_id;
>
>  	switch (type) {
>  	case RDMA_RESTRACK_PD:
> @@ -85,6 +87,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  		xrcd = container_of(res, struct ib_xrcd, res);
>  		dev = xrcd->device;
>  		break;
> +	case RDMA_RESTRACK_CM_ID:
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +		dev = cm_id->device;
> +		break;
>  	default:
>  		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
>  		return NULL;
> @@ -93,6 +99,27 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  	return dev;
>  }
>
> +static bool res_is_user(struct rdma_restrack_entry *res)
> +{
> +	enum rdma_restrack_type type = res->type;
> +	bool is_user;
> +
> +	switch (type) {
> +	case RDMA_RESTRACK_CM_ID: {
> +		struct rdma_cm_id *cm_id;
> +
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +		is_user = !cm_id->caller;
> +		break;
> +	}
> +	default:
> +		is_user = !uaccess_kernel();
> +		break;
> +	}
> +
> +	return is_user;
> +}
> +
>  void rdma_restrack_add(struct rdma_restrack_entry *res)
>  {
>  	struct ib_device *dev = res_to_dev(res);
> @@ -100,7 +127,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
>  	if (!dev)
>  		return;
>
> -	if (!uaccess_kernel()) {
> +	if (res_is_user(res)) {
>  		get_task_struct(current);
>  		res->task = current;
>  		res->kern_name = NULL;
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index d67219d..f7f0282 100644
> --- a/drivers/infiniband/core/ucma.c
> +++ b/drivers/infiniband/core/ucma.c
> @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
>  		return -ENOMEM;
>
>  	ctx->uid = cmd.uid;
> -	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
> -				    ucma_event_handler, ctx, cmd.ps, qp_type);
> +	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
> +			      ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
>  	if (IS_ERR(ctx->cm_id)) {
>  		ret = PTR_ERR(ctx->cm_id);
>  		goto err1;
> @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
>  	if (cmd.conn_param.valid) {
>  		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
>  		mutex_lock(&file->mut);
> -		ret = rdma_accept(ctx->cm_id, &conn_param);
> +		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
>  		if (!ret)
>  			ctx->uid = cmd.uid;
>  		mutex_unlock(&file->mut);
>  	} else
> -		ret = rdma_accept(ctx->cm_id, NULL);
> +		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
>
>  	ucma_put_ctx(ctx);
>  	return ret;
> diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
> index 5984225..73dac9b 100644
> --- a/include/rdma/rdma_cm.h
> +++ b/include/rdma/rdma_cm.h
> @@ -155,6 +155,12 @@ struct rdma_cm_id {
>  	enum rdma_port_space	 ps;
>  	enum ib_qp_type		 qp_type;
>  	u8			 port_num;
> +	const char *caller;
> +
> +	/*
> +	 * Internal to RDMA/core, don't use in the drivers
> +	 */
> +	struct rdma_restrack_entry     res;
>  };
>
>  struct rdma_id_private {
> @@ -198,6 +204,11 @@ struct rdma_id_private {
>  	enum ib_gid_type	gid_type;
>  };
>
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				  rdma_cm_event_handler event_handler,
> +				  void *context, enum rdma_port_space ps,
> +				  enum ib_qp_type qp_type, const char *caller);
> +
>  /**
>   * rdma_create_id - Create an RDMA identifier.
>   *
> @@ -210,10 +221,9 @@ struct rdma_id_private {
>   *
>   * The id holds a reference on the network namespace until it is destroyed.
>   */
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type);
> +#define rdma_create_id(net, event_handler, context, ps, qp_type) \
> +	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
> +			 KBUILD_MODNAME)
>
>  /**
>    * rdma_destroy_id - Destroys an RDMA identifier.
> @@ -325,6 +335,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>   */
>  int rdma_listen(struct rdma_cm_id *id, int backlog);
>
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
> +		  const char *caller);
> +
>  /**
>   * rdma_accept - Called to accept a connection request or response.
>   * @id: Connection identifier associated with the request.
> @@ -340,7 +353,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>   * state of the qp associated with the id is modified to error, such that any
>   * previously posted receive buffers would be flushed.
>   */
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
> +#define rdma_accept(id, conn_param) \
> +	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
>
>  /**
>   * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
> diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
> index c2d8116..a794e0e 100644
> --- a/include/rdma/restrack.h
> +++ b/include/rdma/restrack.h
> @@ -33,6 +33,10 @@ enum rdma_restrack_type {
>  	 */
>  	RDMA_RESTRACK_XRCD,
>  	/**
> +	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
> +	 */
> +	RDMA_RESTRACK_CM_ID,
> +	/**
>  	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
>  	 */
>  	RDMA_RESTRACK_MAX
> diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
> index 17e59be..4ed21ee 100644
> --- a/include/uapi/rdma/rdma_netlink.h
> +++ b/include/uapi/rdma/rdma_netlink.h
> @@ -240,6 +240,8 @@ enum rdma_nldev_command {
>
>  	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
>
> +	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
> +
>  	RDMA_NLDEV_NUM_OPS
>  };
>
> @@ -352,6 +354,34 @@ enum rdma_nldev_attr {
>  	 */
>  	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
>
> +	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
> +	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
> +	/*
> +	 * rdma_cm_id port space.
> +	 */
> +	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
> +	/*
> +	 * IP Addresses/port attributes.
> +	 */
> +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */

Can you please document the meaning of S (source) and D (destination)
in regards of this netlink output? It is needed to remove ambiguity.

> +	/*
> +	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
> +	 */
> +	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
> +	/*
> +	 * enum enum rdma_transport_type (IB, IWARP, ...)
> +	 */
> +	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
> +	/*
> +	 * enum rdma_network_type (IB, IPv4, IPv6,...)
> +	 */
> +	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
> +
>  	RDMA_NLDEV_ATTR_MAX
>  };
>  #endif /* _UAPI_RDMA_NETLINK_H */
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]         ` <20180201084944.GH2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-01 16:07           ` Steve Wise
  2018-02-04 15:05             ` Leon Romanovsky
  2018-02-01 17:53           ` Jason Gunthorpe
  1 sibling, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-02-01 16:07 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA


> 
> On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > Implement RDMA nldev netlink interface to get detailed CM_ID
information.
> >
> > Because cm_id's are attached to rdma devices in various work queue
contexts,
> > the pid and task information at device-attach time is sometimes not
useful.
> > For example, an nvme/f host connection ends up being bound to a device
> > in a work queue context and the resulting pid at attach time no longer
exists
> > after connection setup.  So instead we mark all cm_id's created via the
> > rdma_ucm as "user", and all others as "kernel".  This required tweaking
> > the restrack code a little.  It also required wrapping some rdma_cm
> > functions to allow passing the module name string.
> >
> > Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> > ---
> >  drivers/infiniband/core/cma.c      |  55 ++++++---
> >  drivers/infiniband/core/nldev.c    | 245
> +++++++++++++++++++++++++++++++++++++
> >  drivers/infiniband/core/restrack.c |  29 ++++-
> >  drivers/infiniband/core/ucma.c     |   8 +-
> >  include/rdma/rdma_cm.h             |  24 +++-
> >  include/rdma/restrack.h            |   4 +
> >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> >  7 files changed, 365 insertions(+), 30 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/cma.c
b/drivers/infiniband/core/cma.c
> > index 72ad52b..51fbfa1 100644
> > --- a/drivers/infiniband/core/cma.c
> > +++ b/drivers/infiniband/core/cma.c
> > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> rdma_id_private *id_priv,
> >  	id_priv->id.route.addr.dev_addr.transport =
> >  		rdma_node_get_transport(cma_dev->device->node_type);
> >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > +	id_priv->id.res.kern_name = id_priv->id.caller;
> 
> Steve, I don't like it, I worked hard to hide it from the users of
restrack,
> and don't see reason why the same trick as with ib_create_cq/ib_create_pd
> won't
> work here.

I am doing the same trick, no?  rdma_create_id() is a static inline that
passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
created, it is not associated with any ib_device.  That only happens at
cma_attach time.  So how can the resource be added if there is no device?

> 
> > +	rdma_restrack_add(&id_priv->id.res);
> >  }
> >
> >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private
> *id_priv)
> >  		complete(&id_priv->comp);
> >  }
> >
> > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > -				  rdma_cm_event_handler event_handler,
> > -				  void *context, enum rdma_port_space ps,
> > -				  enum ib_qp_type qp_type)
> > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > +				    rdma_cm_event_handler event_handler,
> > +				    void *context, enum rdma_port_space ps,
> > +				    enum ib_qp_type qp_type, const char
*caller)
> >  {
> >  	struct rdma_id_private *id_priv;
> >
> > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> >  	if (!id_priv)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	id_priv->owner = task_pid_nr(current);
> > +	if (caller)
> > +		id_priv->id.caller = caller;
> > +	else
> > +		id_priv->owner = task_pid_nr(current);
> 
> See the comment above

There is no ib_device at this point, so caller (and owner) must be saved
until the cm_id is bound to a device (or possibly devices for listening
ids).

> 
> >  	id_priv->state = RDMA_CM_IDLE;
> >  	id_priv->id.context = context;
> >  	id_priv->id.event_handler = event_handler;
> 
> Not saying that we need to do it now, but it is important to write, most
> probably we can drop certain initialization from rdma_create_id() adn
> reuse rdma_restrack_put/_get.
> 

I don't understand this comment.  Can you please elaborate? 

> > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> >
> >  	return &id_priv->id;
> >  }
> > -EXPORT_SYMBOL(rdma_create_id);
> > +EXPORT_SYMBOL(__rdma_create_id);
> >
> >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp
*qp)
> >  {
> > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> >  	mutex_unlock(&id_priv->handler_mutex);
> >
> >  	if (id_priv->cma_dev) {
> > +		rdma_restrack_del(&id_priv->id.res);
> 
> You should count all created cm_ids and not only binded.

No ib_device if they aren't bound.

> 
> >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> >  			if (id_priv->cm_id.ib)
> >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> *cma_new_conn_id(struct rdma_cm_id *listen_id,
> >  		ib_event->param.req_rcvd.primary_path->service_id;
> >  	int ret;
> >
> > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> >  			    listen_id->event_handler, listen_id->context,
> > -			    listen_id->ps,
ib_event->param.req_rcvd.qp_type);
> > +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> > +			    listen_id->caller);
> 
> I think the cleanest way will be to create some struct and pass pointer to
it so
> you can unfold all relevant data inside of __rdma_create_id().
> 

Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
unmarshall it all...


> >  	if (IS_ERR(id))
> >  		return NULL;
> >
> > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> *cma_new_udp_id(struct rdma_cm_id *listen_id,
> >  	struct net *net = listen_id->route.addr.dev_addr.net;
> >  	int ret;
> >
> > -	id = rdma_create_id(net, listen_id->event_handler,
listen_id->context,
> > -			    listen_id->ps, IB_QPT_UD);
> > +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> >context,
> > +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
> >  	if (IS_ERR(id))
> >  		return NULL;
> >
> > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> *cm_id,
> >  		goto out;
> >
> >  	/* Create a new RDMA id for the new IW CM ID */
> > -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > -				   listen_id->id.event_handler,
> > -				   listen_id->id.context,
> > -				   RDMA_PS_TCP, IB_QPT_RC);
> > +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > +				     listen_id->id.event_handler,
> > +				     listen_id->id.context,
> > +				     RDMA_PS_TCP, IB_QPT_RC,
> > +				     listen_id->id.caller);
> >  	if (IS_ERR(new_cm_id)) {
> >  		ret = -ENOMEM;
> >  		goto out;
> > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> rdma_id_private *id_priv,
> >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> >device, 1))
> >  		return;
> >
> > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
id_priv->id.ps,
> > -			    id_priv->id.qp_type);
> > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
id_priv->id.ps,
> > +			      id_priv->id.qp_type, id_priv->id.caller);
> >  	if (IS_ERR(id))
> >  		return;
> >
> > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> sockaddr *addr)
> >
> >  	return 0;
> >  err2:
> > -	if (id_priv->cma_dev)
> > +	if (id_priv->cma_dev) {
> > +		rdma_restrack_del(&id_priv->id.res);
> >  		cma_release_dev(id_priv);
> > +	}
> >  err1:
> >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> >  	return ret;
> > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> rdma_id_private *id_priv,
> >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> >  }
> >
> > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param)
> > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> > +		  const char *caller)
> >  {
> >  	struct rdma_id_private *id_priv;
> >  	int ret;
> >
> >  	id_priv = container_of(id, struct rdma_id_private, id);
> >
> > -	id_priv->owner = task_pid_nr(current);
> > +	if (caller)
> > +		id_priv->id.caller = caller;
> > +	else
> > +		id_priv->owner = task_pid_nr(current);
> >
> >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> >  		return -EINVAL;
> > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> rdma_conn_param *conn_param)
> >  	rdma_reject(id, NULL, 0);
> >  	return ret;
> >  }
> > -EXPORT_SYMBOL(rdma_accept);
> > +EXPORT_SYMBOL(__rdma_accept);
> >
> >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> >  {
> > diff --git a/drivers/infiniband/core/nldev.c
b/drivers/infiniband/core/nldev.c
> > index fa8655e..a4091b5 100644
> > --- a/drivers/infiniband/core/nldev.c
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -34,6 +34,7 @@
> >  #include <linux/pid.h>
> >  #include <linux/pid_namespace.h>
> >  #include <net/netlink.h>
> > +#include <rdma/rdma_cm.h>
> >  #include <rdma/rdma_netlink.h>
> >
> >  #include "core_priv.h"
> > @@ -71,6 +72,22 @@
> >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
> >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> NLA_NUL_STRING,
> >  						    .len = TASK_COMM_LEN },
> > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> NLA_NESTED },
> 
> I would like to use this opportunity. There is CM_ID, so users will be
> able to query nldev directly on this ID (once we implement .doit), but
> can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?
> 
> I want to have that all resources will have something similar to
ifindex/ibindex.
> 

Pds, cqs, and qps, all have  a device-unique number.  So
ibindex/restrack_type/object_id should work.  But cm_id's don't have that.
Similar to a socket I guess.  So I'm not sure how to identify cm_ids other
than by the ipaddresses/ip ports.

> 
> > +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type =
> NLA_NESTED },
> > +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> > +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> > +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> > +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> > +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> > +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> > +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr)
},
> > +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> > +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr)
},
> > +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> > +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> > +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> > +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> > +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
> >  };
> >
> >  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device
*device)
> > @@ -182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> ib_device *device)
> >  		[RDMA_RESTRACK_PD] = "pd",
> >  		[RDMA_RESTRACK_CQ] = "cq",
> >  		[RDMA_RESTRACK_QP] = "qp",
> > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> >  	};
> >
> >  	struct rdma_restrack_root *res = &device->res;
> > @@ -284,6 +302,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
> >  	return -EMSGSIZE;
> >  }
> >
> > +static int fill_res_cm_id_entry(struct sk_buff *msg,
> > +				struct rdma_cm_id *cm_id, uint32_t port)
> > +{
> > +	struct rdma_id_private *id_priv;
> > +	struct nlattr *entry_attr;
> > +
> > +	if (port && port != cm_id->port_num)
> > +		return 0;
> > +
> > +	id_priv = container_of(cm_id, struct rdma_id_private, id);
> > +	entry_attr = nla_nest_start(msg,
> RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
> > +	if (!entry_attr)
> > +		goto out;
> > +
> > +	if (cm_id->port_num &&
> > +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id-
> >port_num))
> > +		goto err;
> > +
> > +	if (id_priv->qp_num &&
> > +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv-
> >qp_num))
> > +		goto err;
> > +
> > +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
> > +		goto err;
> > +
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
> > +		       id_priv->id.route.addr.dev_addr.dev_type))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
> > +		       id_priv->id.route.addr.dev_addr.transport))
> > +		goto err;
> > +
> > +	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
> > +		struct sockaddr_in *sin;
> > +
> > +		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
> > +		if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
> > +				    sin->sin_addr.s_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> > +				  sin->sin_port))
> > +			goto err;
> > +
> > +		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
> > +		if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
> > +				    sin->sin_addr.s_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> > +				  sin->sin_port))
> > +			goto err;
> > +	} else {
> > +		struct sockaddr_in6 *sin6;
> > +
> > +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
> > +		if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
> > +				     &sin6->sin6_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> > +				  sin6->sin6_port))
> > +			goto err;
> > +
> > +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
> > +		if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
> > +				     &sin6->sin6_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> > +				  sin6->sin6_port))
> > +			goto err;
> > +	}
> > +
> > +	if (id_priv->id.caller) {
> > +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> > +				   id_priv->id.caller))
> > +			goto err;
> > +	} else {
> > +		/* CMA keeps the owning pid. */
> > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv-
> >owner))
> > +			goto err;
> > +	}
> > +
> > +	nla_nest_end(msg, entry_attr);
> > +	return 0;
> > +
> > +err:
> > +	nla_nest_cancel(msg, entry_attr);
> > +out:
> > +	return -EMSGSIZE;
> > +}
> > +
> >  static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >  			  struct netlink_ext_ack *extack)
> >  {
> > @@ -686,6 +797,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff
> *skb,
> >  	return ret;
> >  }
> >
> > +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
> > +				      struct netlink_callback *cb)
> > +{
> > +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> > +	struct rdma_restrack_entry *res;
> > +	int err, ret = 0, idx = 0;
> > +	struct nlattr *table_attr;
> > +	struct ib_device *device;
> > +	int start = cb->args[0];
> > +	struct rdma_cm_id *cm_id = NULL;
> > +	struct nlmsghdr *nlh;
> > +	u32 index, port = 0;
> > +
> > +	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> > +			  nldev_policy, NULL);
> > +	/*
> > +	 * Right now, we are expecting the device index to get QP
information,
> > +	 * but it is possible to extend this code to return all devices in
> > +	 * one shot by checking the existence of
> RDMA_NLDEV_ATTR_DEV_INDEX.
> > +	 * if it doesn't exist, we will iterate over all devices.
> > +	 *
> > +	 * But it is not needed for now.
> > +	 */
> > +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> > +		return -EINVAL;
> > +
> > +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> > +	device = ib_device_get_by_index(index);
> > +	if (!device)
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * If no PORT_INDEX is supplied, we will return all QPs from that
device
> 
> QPs->CM_IDs
> 

Oops!

> > +	 */
> > +	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
> > +		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> > +		if (!rdma_is_port_valid(device, port)) {
> > +			ret = -EINVAL;
> > +			goto err_index;
> > +		}
> > +	}
> > +
> > +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> > +		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
> RDMA_NLDEV_CMD_RES_QP_GET),
> 
> RDMA_NLDEV_CMD_RES_QP_GET -> RDMA_NLDEV_CMD_RES_CM_ID_GET
>

Oops2!

 
> > +		0, NLM_F_MULTI);
> > +
> > +	if (fill_nldev_handle(skb, device)) {
> > +		ret = -EMSGSIZE;
> > +		goto err;
> > +	}
> > +
> > +	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
> > +	if (!table_attr) {
> > +		ret = -EMSGSIZE;
> > +		goto err;
> > +	}
> > +
> > +	down_read(&device->res.rwsem);
> > +	hash_for_each_possible(device->res.hash, res, node,
> > +			       RDMA_RESTRACK_CM_ID) {
> > +		if (idx < start)
> > +			goto next;
> > +
> > +		if ((rdma_is_kernel_res(res) &&
> > +		     task_active_pid_ns(current) != &init_pid_ns) ||
> > +		    (!rdma_is_kernel_res(res) &&
> > +		     task_active_pid_ns(current) !=
> > +		     task_active_pid_ns(res->task)))
> > +			/*
> > +			 * 1. Kernel QPs should be visible in init namsapce
only
> > +			 * 2. Preent only QPs visible in the current
namespace
> > +			 */
> > +			goto next;
> > +
> > +		if (!rdma_restrack_get(res))
> > +			/*
> > +			 * Resource is under release now, but we are not
> > +			 * relesing lock now, so it will be released in
> > +			 * our next pass, once we will get ->next pointer.
> > +			 */
> > +			goto next;
> > +
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +
> > +		up_read(&device->res.rwsem);
> > +		ret = fill_res_cm_id_entry(skb, cm_id, port);
> > +		down_read(&device->res.rwsem);
> > +		/*
> > +		 * Return resource back, but it won't be released till
> > +		 * the &device->res.rwsem will be released for write.
> > +		 */
> > +		rdma_restrack_put(res);
> > +
> > +		if (ret == -EMSGSIZE)
> > +			/*
> > +			 * There is a chance to optimize here.
> > +			 * It can be done by using list_prepare_entry
> > +			 * and list_for_each_entry_continue afterwards.
> > +			 */
> > +			break;
> > +		if (ret)
> > +			goto res_err;
> > +next:		idx++;
> > +	}
> > +	up_read(&device->res.rwsem);
> > +
> > +	nla_nest_end(skb, table_attr);
> > +	nlmsg_end(skb, nlh);
> > +	cb->args[0] = idx;
> > +
> > +	/*
> > +	 * No more CM_IDs to fill, cancel the message and
> > +	 * return 0 to mark end of dumpit.
> > +	 */
> > +	if (!cm_id)
> > +		goto err;
> > +
> > +	put_device(&device->dev);
> > +	return skb->len;
> > +
> > +res_err:
> > +	nla_nest_cancel(skb, table_attr);
> > +	up_read(&device->res.rwsem);
> > +
> > +err:
> > +	nlmsg_cancel(skb, nlh);
> > +
> > +err_index:
> > +	put_device(&device->dev);
> > +	return ret;
> > +}
> >  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
> >  	[RDMA_NLDEV_CMD_GET] = {
> >  		.doit = nldev_get_doit,
> > @@ -712,6 +954,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff
> *skb,
> >  		 * too.
> >  		 */
> >  	},
> > +	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
> > +		.dump = nldev_res_get_cm_id_dumpit,
> > +	},
> >  };
> >
> >  void __init nldev_init(void)
> > diff --git a/drivers/infiniband/core/restrack.c
> b/drivers/infiniband/core/restrack.c
> > index 857637b..bb13169 100644
> > --- a/drivers/infiniband/core/restrack.c
> > +++ b/drivers/infiniband/core/restrack.c
> > @@ -3,6 +3,7 @@
> >   * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
> >   */
> >
> > +#include <rdma/rdma_cm.h>
> >  #include <rdma/ib_verbs.h>
> >  #include <rdma/restrack.h>
> >  #include <linux/mutex.h>
> > @@ -67,6 +68,7 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  	struct ib_pd *pd;
> >  	struct ib_cq *cq;
> >  	struct ib_qp *qp;
> > +	struct rdma_cm_id *cm_id;
> >
> >  	switch (type) {
> >  	case RDMA_RESTRACK_PD:
> > @@ -85,6 +87,10 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  		xrcd = container_of(res, struct ib_xrcd, res);
> >  		dev = xrcd->device;
> >  		break;
> > +	case RDMA_RESTRACK_CM_ID:
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +		dev = cm_id->device;
> > +		break;
> >  	default:
> >  		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> >  		return NULL;
> > @@ -93,6 +99,27 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  	return dev;
> >  }
> >
> > +static bool res_is_user(struct rdma_restrack_entry *res)
> > +{
> > +	enum rdma_restrack_type type = res->type;
> > +	bool is_user;
> > +
> > +	switch (type) {
> > +	case RDMA_RESTRACK_CM_ID: {
> > +		struct rdma_cm_id *cm_id;
> > +
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +		is_user = !cm_id->caller;
> > +		break;
> > +	}
> > +	default:
> > +		is_user = !uaccess_kernel();
> > +		break;
> > +	}
> > +
> > +	return is_user;
> > +}
> > +
> >  void rdma_restrack_add(struct rdma_restrack_entry *res)
> >  {
> >  	struct ib_device *dev = res_to_dev(res);
> > @@ -100,7 +127,7 @@ void rdma_restrack_add(struct rdma_restrack_entry
> *res)
> >  	if (!dev)
> >  		return;
> >
> > -	if (!uaccess_kernel()) {
> > +	if (res_is_user(res)) {
> >  		get_task_struct(current);
> >  		res->task = current;
> >  		res->kern_name = NULL;
> > diff --git a/drivers/infiniband/core/ucma.c
b/drivers/infiniband/core/ucma.c
> > index d67219d..f7f0282 100644
> > --- a/drivers/infiniband/core/ucma.c
> > +++ b/drivers/infiniband/core/ucma.c
> > @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file
*file,
> const char __user *inbuf,
> >  		return -ENOMEM;
> >
> >  	ctx->uid = cmd.uid;
> > -	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
> > -				    ucma_event_handler, ctx, cmd.ps,
qp_type);
> > +	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
> > +			      ucma_event_handler, ctx, cmd.ps, qp_type,
NULL);
> >  	if (IS_ERR(ctx->cm_id)) {
> >  		ret = PTR_ERR(ctx->cm_id);
> >  		goto err1;
> > @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file
*file,
> const char __user *inbuf,
> >  	if (cmd.conn_param.valid) {
> >  		ucma_copy_conn_param(ctx->cm_id, &conn_param,
> &cmd.conn_param);
> >  		mutex_lock(&file->mut);
> > -		ret = rdma_accept(ctx->cm_id, &conn_param);
> > +		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
> >  		if (!ret)
> >  			ctx->uid = cmd.uid;
> >  		mutex_unlock(&file->mut);
> >  	} else
> > -		ret = rdma_accept(ctx->cm_id, NULL);
> > +		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
> >
> >  	ucma_put_ctx(ctx);
> >  	return ret;
> > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
> > index 5984225..73dac9b 100644
> > --- a/include/rdma/rdma_cm.h
> > +++ b/include/rdma/rdma_cm.h
> > @@ -155,6 +155,12 @@ struct rdma_cm_id {
> >  	enum rdma_port_space	 ps;
> >  	enum ib_qp_type		 qp_type;
> >  	u8			 port_num;
> > +	const char *caller;
> > +
> > +	/*
> > +	 * Internal to RDMA/core, don't use in the drivers
> > +	 */
> > +	struct rdma_restrack_entry     res;
> >  };
> >
> >  struct rdma_id_private {
> > @@ -198,6 +204,11 @@ struct rdma_id_private {
> >  	enum ib_gid_type	gid_type;
> >  };
> >
> > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > +				  rdma_cm_event_handler event_handler,
> > +				  void *context, enum rdma_port_space ps,
> > +				  enum ib_qp_type qp_type, const char
*caller);
> > +
> >  /**
> >   * rdma_create_id - Create an RDMA identifier.
> >   *
> > @@ -210,10 +221,9 @@ struct rdma_id_private {
> >   *
> >   * The id holds a reference on the network namespace until it is
destroyed.
> >   */
> > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > -				  rdma_cm_event_handler event_handler,
> > -				  void *context, enum rdma_port_space ps,
> > -				  enum ib_qp_type qp_type);
> > +#define rdma_create_id(net, event_handler, context, ps, qp_type) \
> > +	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type),
\
> > +			 KBUILD_MODNAME)
> >
> >  /**
> >    * rdma_destroy_id - Destroys an RDMA identifier.
> > @@ -325,6 +335,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct
> ib_qp_attr *qp_attr,
> >   */
> >  int rdma_listen(struct rdma_cm_id *id, int backlog);
> >
> > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> > +		  const char *caller);
> > +
> >  /**
> >   * rdma_accept - Called to accept a connection request or response.
> >   * @id: Connection identifier associated with the request.
> > @@ -340,7 +353,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct
> ib_qp_attr *qp_attr,
> >   * state of the qp associated with the id is modified to error, such
that any
> >   * previously posted receive buffers would be flushed.
> >   */
> > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param);
> > +#define rdma_accept(id, conn_param) \
> > +	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
> >
> >  /**
> >   * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
> > diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
> > index c2d8116..a794e0e 100644
> > --- a/include/rdma/restrack.h
> > +++ b/include/rdma/restrack.h
> > @@ -33,6 +33,10 @@ enum rdma_restrack_type {
> >  	 */
> >  	RDMA_RESTRACK_XRCD,
> >  	/**
> > +	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
> > +	 */
> > +	RDMA_RESTRACK_CM_ID,
> > +	/**
> >  	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
> >  	 */
> >  	RDMA_RESTRACK_MAX
> > diff --git a/include/uapi/rdma/rdma_netlink.h
> b/include/uapi/rdma/rdma_netlink.h
> > index 17e59be..4ed21ee 100644
> > --- a/include/uapi/rdma/rdma_netlink.h
> > +++ b/include/uapi/rdma/rdma_netlink.h
> > @@ -240,6 +240,8 @@ enum rdma_nldev_command {
> >
> >  	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
> >
> > +	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
> > +
> >  	RDMA_NLDEV_NUM_OPS
> >  };
> >
> > @@ -352,6 +354,34 @@ enum rdma_nldev_attr {
> >  	 */
> >  	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
> >
> > +	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
> > +	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
> > +	/*
> > +	 * rdma_cm_id port space.
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
> > +	/*
> > +	 * IP Addresses/port attributes.
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> 
> Can you please document the meaning of S (source) and D (destination)
> in regards of this netlink output? It is needed to remove ambiguity.

Sure. 

> 
> > +	/*
> > +	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
> > +	/*
> > +	 * enum enum rdma_transport_type (IB, IWARP, ...)
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
> > +	/*
> > +	 * enum rdma_network_type (IB, IPv4, IPv6,...)
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
> > +
> >  	RDMA_NLDEV_ATTR_MAX
> >  };
> >  #endif /* _UAPI_RDMA_NETLINK_H */
> > --
> > 1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                   ` <20180201080109.GG2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-01 17:50                     ` Jason Gunthorpe
       [not found]                       ` <20180201175028.GS17053-uk2M96/98Pc@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-01 17:50 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Parav Pandit, Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Feb 01, 2018 at 10:01:09AM +0200, Leon Romanovsky wrote:

> Regarding the name, I personally think that cm_id is better because it
> is general and applicable both to ib_cm and iw_cm. The separation
> between them can be done with introduction of new netlink attribute, e.g.
> cm_id_type.

In netlink the attributes should be self-describing, or intrinsically
related to something mandatory and fundamental about their container
(eg AF_ family in rtnl)

What information do we actually need from the cm_id in various
protocol families?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]         ` <20180201084944.GH2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  2018-02-01 16:07           ` Steve Wise
@ 2018-02-01 17:53           ` Jason Gunthorpe
       [not found]             ` <20180201175353.GU17053-uk2M96/98Pc@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-01 17:53 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:

> > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> 
> Can you please document the meaning of S (source) and D (destination)
> in regards of this netlink output? It is needed to remove ambiguity.

And no on BE's in netlink, I think.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                       ` <20180201175028.GS17053-uk2M96/98Pc@public.gmane.org>
@ 2018-02-01 18:14                         ` Steve Wise
  0 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-01 18:14 UTC (permalink / raw)
  To: 'Jason Gunthorpe', 'Leon Romanovsky'
  Cc: 'Parav Pandit',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> 
> On Thu, Feb 01, 2018 at 10:01:09AM +0200, Leon Romanovsky wrote:
> 
> > Regarding the name, I personally think that cm_id is better because it
> > is general and applicable both to ib_cm and iw_cm. The separation
> > between them can be done with introduction of new netlink attribute,
e.g.
> > cm_id_type.
> 
> In netlink the attributes should be self-describing, or intrinsically
> related to something mandatory and fundamental about their container
> (eg AF_ family in rtnl)
> 
> What information do we actually need from the cm_id in various
> protocol families?
> 

Looking at iw_cm_id and iwcm_id_private: tos, mapped, state, flags,
refcount.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]             ` <20180201175353.GU17053-uk2M96/98Pc@public.gmane.org>
@ 2018-02-01 18:18               ` Steve Wise
  2018-02-01 18:32                 ` Jason Gunthorpe
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-02-01 18:18 UTC (permalink / raw)
  To: 'Jason Gunthorpe', 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, linux-rdma-u79uwXL29TY76Z2rM5mHXA



> -----Original Message-----
> From: Jason Gunthorpe [mailto:jgg-uk2M96/98Pc@public.gmane.org]
> Sent: Thursday, February 01, 2018 11:54 AM
> To: Leon Romanovsky
> Cc: Steve Wise; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Subject: Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID
information
> 
> On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> 
> > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> >
> > Can you please document the meaning of S (source) and D (destination)
> > in regards of this netlink output? It is needed to remove ambiguity.
> 
> And no on BE's in netlink, I think.


Sure there are, see nla_put_be32() for example. I'm using nla_put_net16() to
store the ports.  Perhaps I need to change the above comment to ne16 instead
of BE u16?





--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-01 18:18               ` Steve Wise
@ 2018-02-01 18:32                 ` Jason Gunthorpe
       [not found]                   ` <20180201183232.GV17053-uk2M96/98Pc@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-01 18:32 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Feb 01, 2018 at 12:18:05PM -0600, Steve Wise wrote:
> 
> 
> > From: Jason Gunthorpe [mailto:jgg-uk2M96/98Pc@public.gmane.org]
> > Sent: Thursday, February 01, 2018 11:54 AM
> > To: Leon Romanovsky
> > Cc: Steve Wise; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> > Subject: Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID
> information
> > 
> > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > 
> > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > >
> > > Can you please document the meaning of S (source) and D (destination)
> > > in regards of this netlink output? It is needed to remove ambiguity.
> > 
> > And no on BE's in netlink, I think.
> 
> 
> Sure there are, see nla_put_be32() for example. I'm using nla_put_net16() to
> store the ports.  Perhaps I need to change the above comment to ne16 instead
> of BE u16?

There is no reason thes ports should be be, so don't make it be.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                   ` <20180201183232.GV17053-uk2M96/98Pc@public.gmane.org>
@ 2018-02-01 18:37                     ` Steve Wise
  2018-02-01 22:01                       ` Jason Gunthorpe
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-02-01 18:37 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> > > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > >
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > > >
> > > > Can you please document the meaning of S (source) and D
(destination)
> > > > in regards of this netlink output? It is needed to remove ambiguity.
> > >
> > > And no on BE's in netlink, I think.
> >
> >
> > Sure there are, see nla_put_be32() for example. I'm using
nla_put_net16() to
> > store the ports.  Perhaps I need to change the above comment to ne16
instead
> > of BE u16?
> 
> There is no reason thes ports should be be, so don't make it be.

Well, they're BE on the wire.  That's the reason.  And other APIs return
them to user space in their NBO, like getsockname().  

You would rather the nldev code puts it in HBO before sending it up?  

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-01 18:37                     ` Steve Wise
@ 2018-02-01 22:01                       ` Jason Gunthorpe
  0 siblings, 0 replies; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-01 22:01 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Feb 01, 2018 at 12:37:31PM -0600, Steve Wise wrote:
> > > > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > > >
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > > > >
> > > > > Can you please document the meaning of S (source) and D
> (destination)
> > > > > in regards of this netlink output? It is needed to remove ambiguity.
> > > >
> > > > And no on BE's in netlink, I think.
> > >
> > >
> > > Sure there are, see nla_put_be32() for example. I'm using
> nla_put_net16() to
> > > store the ports.  Perhaps I need to change the above comment to ne16
> instead
> > > of BE u16?
> > 
> > There is no reason thes ports should be be, so don't make it be.
> 
> Well, they're BE on the wire.  That's the reason.  And other APIs return
> them to user space in their NBO, like getsockname().  
> 
> You would rather the nldev code puts it in HBO before sending it up?  

Yes, we have far too much 'guess the endian' in our uapi, it doesn't
make sense in modern HW, the swap costs nothing.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-01 16:07           ` Steve Wise
@ 2018-02-04 15:05             ` Leon Romanovsky
       [not found]               ` <20180204150553.GH27780-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
                                 ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Leon Romanovsky @ 2018-02-04 15:05 UTC (permalink / raw)
  To: Steve Wise
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 10956 bytes --]

On Thu, Feb 01, 2018 at 10:07:20AM -0600, Steve Wise wrote:
>
> >
> > On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > > Implement RDMA nldev netlink interface to get detailed CM_ID
> information.
> > >
> > > Because cm_id's are attached to rdma devices in various work queue
> contexts,
> > > the pid and task information at device-attach time is sometimes not
> useful.
> > > For example, an nvme/f host connection ends up being bound to a device
> > > in a work queue context and the resulting pid at attach time no longer
> exists
> > > after connection setup.  So instead we mark all cm_id's created via the
> > > rdma_ucm as "user", and all others as "kernel".  This required tweaking
> > > the restrack code a little.  It also required wrapping some rdma_cm
> > > functions to allow passing the module name string.
> > >
> > > Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> > > ---
> > >  drivers/infiniband/core/cma.c      |  55 ++++++---
> > >  drivers/infiniband/core/nldev.c    | 245
> > +++++++++++++++++++++++++++++++++++++
> > >  drivers/infiniband/core/restrack.c |  29 ++++-
> > >  drivers/infiniband/core/ucma.c     |   8 +-
> > >  include/rdma/rdma_cm.h             |  24 +++-
> > >  include/rdma/restrack.h            |   4 +
> > >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> > >  7 files changed, 365 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/core/cma.c
> b/drivers/infiniband/core/cma.c
> > > index 72ad52b..51fbfa1 100644
> > > --- a/drivers/infiniband/core/cma.c
> > > +++ b/drivers/infiniband/core/cma.c
> > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > rdma_id_private *id_priv,
> > >  	id_priv->id.route.addr.dev_addr.transport =
> > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> >
> > Steve, I don't like it, I worked hard to hide it from the users of
> restrack,
> > and don't see reason why the same trick as with ib_create_cq/ib_create_pd
> > won't
> > work here.
>
> I am doing the same trick, no?  rdma_create_id() is a static inline that
> passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
> created, it is not associated with any ib_device.  That only happens at
> cma_attach time.  So how can the resource be added if there is no device?
>

So maybe, we don't need to add resource to the DB at rdma_create_id
stage and do it in cma_attach only, and in that stage you will update
the kern_name with KBUILD_MODNAME.

> >
> > > +	rdma_restrack_add(&id_priv->id.res);
> > >  }
> > >
> > >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > > @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private
> > *id_priv)
> > >  		complete(&id_priv->comp);
> > >  }
> > >
> > > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > > -				  rdma_cm_event_handler event_handler,
> > > -				  void *context, enum rdma_port_space ps,
> > > -				  enum ib_qp_type qp_type)
> > > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > > +				    rdma_cm_event_handler event_handler,
> > > +				    void *context, enum rdma_port_space ps,
> > > +				    enum ib_qp_type qp_type, const char
> *caller)
> > >  {
> > >  	struct rdma_id_private *id_priv;
> > >
> > > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> > >  	if (!id_priv)
> > >  		return ERR_PTR(-ENOMEM);
> > >
> > > -	id_priv->owner = task_pid_nr(current);
> > > +	if (caller)
> > > +		id_priv->id.caller = caller;
> > > +	else
> > > +		id_priv->owner = task_pid_nr(current);
> >
> > See the comment above
>
> There is no ib_device at this point, so caller (and owner) must be saved
> until the cm_id is bound to a device (or possibly devices for listening
> ids).

Why do we need previous owner? Can it be that rdma_create_id was
performed by one process and cma_attach by another?

>
> >
> > >  	id_priv->state = RDMA_CM_IDLE;
> > >  	id_priv->id.context = context;
> > >  	id_priv->id.event_handler = event_handler;
> >
> > Not saying that we need to do it now, but it is important to write, most
> > probably we can drop certain initialization from rdma_create_id() adn
> > reuse rdma_restrack_put/_get.
> >
>
> I don't understand this comment.  Can you please elaborate?

Most probably, we will be able to drop id_priv->qp_mutex in the future,
but let's not discuss it now, it is not related to this series.

>
> > > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> > >
> > >  	return &id_priv->id;
> > >  }
> > > -EXPORT_SYMBOL(rdma_create_id);
> > > +EXPORT_SYMBOL(__rdma_create_id);
> > >
> > >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp
> *qp)
> > >  {
> > > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> > >  	mutex_unlock(&id_priv->handler_mutex);
> > >
> > >  	if (id_priv->cma_dev) {
> > > +		rdma_restrack_del(&id_priv->id.res);
> >
> > You should count all created cm_ids and not only binded.
>
> No ib_device if they aren't bound.
>
> >
> > >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> > >  			if (id_priv->cm_id.ib)
> > >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > >  	int ret;
> > >
> > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > >  			    listen_id->event_handler, listen_id->context,
> > > -			    listen_id->ps,
> ib_event->param.req_rcvd.qp_type);
> > > +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> > > +			    listen_id->caller);
> >
> > I think the cleanest way will be to create some struct and pass pointer to
> it so
> > you can unfold all relevant data inside of __rdma_create_id().
> >
>
> Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> unmarshall it all...

I counted 6 arguments, and for me, it smells like something wrong.

>
>
> > >  	if (IS_ERR(id))
> > >  		return NULL;
> > >
> > > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> > *cma_new_udp_id(struct rdma_cm_id *listen_id,
> > >  	struct net *net = listen_id->route.addr.dev_addr.net;
> > >  	int ret;
> > >
> > > -	id = rdma_create_id(net, listen_id->event_handler,
> listen_id->context,
> > > -			    listen_id->ps, IB_QPT_UD);
> > > +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> > >context,
> > > +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
> > >  	if (IS_ERR(id))
> > >  		return NULL;
> > >
> > > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> > *cm_id,
> > >  		goto out;
> > >
> > >  	/* Create a new RDMA id for the new IW CM ID */
> > > -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > -				   listen_id->id.event_handler,
> > > -				   listen_id->id.context,
> > > -				   RDMA_PS_TCP, IB_QPT_RC);
> > > +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > +				     listen_id->id.event_handler,
> > > +				     listen_id->id.context,
> > > +				     RDMA_PS_TCP, IB_QPT_RC,
> > > +				     listen_id->id.caller);
> > >  	if (IS_ERR(new_cm_id)) {
> > >  		ret = -ENOMEM;
> > >  		goto out;
> > > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> > rdma_id_private *id_priv,
> > >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> > >device, 1))
> > >  		return;
> > >
> > > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
> id_priv->id.ps,
> > > -			    id_priv->id.qp_type);
> > > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
> id_priv->id.ps,
> > > +			      id_priv->id.qp_type, id_priv->id.caller);
> > >  	if (IS_ERR(id))
> > >  		return;
> > >
> > > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> > sockaddr *addr)
> > >
> > >  	return 0;
> > >  err2:
> > > -	if (id_priv->cma_dev)
> > > +	if (id_priv->cma_dev) {
> > > +		rdma_restrack_del(&id_priv->id.res);
> > >  		cma_release_dev(id_priv);
> > > +	}
> > >  err1:
> > >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> > >  	return ret;
> > > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> > rdma_id_private *id_priv,
> > >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> > >  }
> > >
> > > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > *conn_param)
> > > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > *conn_param,
> > > +		  const char *caller)
> > >  {
> > >  	struct rdma_id_private *id_priv;
> > >  	int ret;
> > >
> > >  	id_priv = container_of(id, struct rdma_id_private, id);
> > >
> > > -	id_priv->owner = task_pid_nr(current);
> > > +	if (caller)
> > > +		id_priv->id.caller = caller;
> > > +	else
> > > +		id_priv->owner = task_pid_nr(current);
> > >
> > >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> > >  		return -EINVAL;
> > > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> > rdma_conn_param *conn_param)
> > >  	rdma_reject(id, NULL, 0);
> > >  	return ret;
> > >  }
> > > -EXPORT_SYMBOL(rdma_accept);
> > > +EXPORT_SYMBOL(__rdma_accept);
> > >
> > >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> > >  {
> > > diff --git a/drivers/infiniband/core/nldev.c
> b/drivers/infiniband/core/nldev.c
> > > index fa8655e..a4091b5 100644
> > > --- a/drivers/infiniband/core/nldev.c
> > > +++ b/drivers/infiniband/core/nldev.c
> > > @@ -34,6 +34,7 @@
> > >  #include <linux/pid.h>
> > >  #include <linux/pid_namespace.h>
> > >  #include <net/netlink.h>
> > > +#include <rdma/rdma_cm.h>
> > >  #include <rdma/rdma_netlink.h>
> > >
> > >  #include "core_priv.h"
> > > @@ -71,6 +72,22 @@
> > >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
> > >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> > NLA_NUL_STRING,
> > >  						    .len = TASK_COMM_LEN },
> > > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> > NLA_NESTED },
> >
> > I would like to use this opportunity. There is CM_ID, so users will be
> > able to query nldev directly on this ID (once we implement .doit), but
> > can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?
> >
> > I want to have that all resources will have something similar to
> ifindex/ibindex.
> >
>
> Pds, cqs, and qps, all have  a device-unique number.  So
> ibindex/restrack_type/object_id should work.  But cm_id's don't have that.
> Similar to a socket I guess.  So I'm not sure how to identify cm_ids other
> than by the ipaddresses/ip ports.

It is opposite, cm_id is a unique number, but other objects don't have
such. What about PD and CQ?

We can declare that access to QP .doit willbe based on QPN, PD .doit
will be based on local_dma_lkey, but what will be CQ identifier?

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]               ` <20180204150553.GH27780-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-05 15:33                 ` Steve Wise
  2018-02-05 15:43                   ` Leon Romanovsky
  2018-02-05 20:00                   ` Jason Gunthorpe
  0 siblings, 2 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 15:33 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

Hey Leon,

> On Thu, Feb 01, 2018 at 10:07:20AM -0600, Steve Wise wrote:
> >
> > >
> > > On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > > > Implement RDMA nldev netlink interface to get detailed CM_ID
> > information.
> > > >
> > > > Because cm_id's are attached to rdma devices in various work queue
> > contexts,
> > > > the pid and task information at device-attach time is sometimes not
> > useful.
> > > > For example, an nvme/f host connection ends up being bound to a
device
> > > > in a work queue context and the resulting pid at attach time no
longer
> > exists
> > > > after connection setup.  So instead we mark all cm_id's created via
the
> > > > rdma_ucm as "user", and all others as "kernel".  This required
tweaking
> > > > the restrack code a little.  It also required wrapping some rdma_cm
> > > > functions to allow passing the module name string.
> > > >
> > > > Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> > > > ---
> > > >  drivers/infiniband/core/cma.c      |  55 ++++++---
> > > >  drivers/infiniband/core/nldev.c    | 245
> > > +++++++++++++++++++++++++++++++++++++
> > > >  drivers/infiniband/core/restrack.c |  29 ++++-
> > > >  drivers/infiniband/core/ucma.c     |   8 +-
> > > >  include/rdma/rdma_cm.h             |  24 +++-
> > > >  include/rdma/restrack.h            |   4 +
> > > >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> > > >  7 files changed, 365 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/core/cma.c
> > b/drivers/infiniband/core/cma.c
> > > > index 72ad52b..51fbfa1 100644
> > > > --- a/drivers/infiniband/core/cma.c
> > > > +++ b/drivers/infiniband/core/cma.c
> > > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > > rdma_id_private *id_priv,
> > > >  	id_priv->id.route.addr.dev_addr.transport =
> > > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> > >
> > > Steve, I don't like it, I worked hard to hide it from the users of
> > restrack,
> > > and don't see reason why the same trick as with
ib_create_cq/ib_create_pd
> > > won't
> > > work here.
> >
> > I am doing the same trick, no?  rdma_create_id() is a static inline that
> > passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
> > created, it is not associated with any ib_device.  That only happens at
> > cma_attach time.  So how can the resource be added if there is no
device?
> >
> 
> So maybe, we don't need to add resource to the DB at rdma_create_id
> stage and do it in cma_attach only, and in that stage you will update
> the kern_name with KBUILD_MODNAME.

Yea, I'll look into this. 

> 
> > >
> > > > +	rdma_restrack_add(&id_priv->id.res);
> > > >  }
> > > >
> > > >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > > > @@ -737,10 +740,10 @@ static void cma_deref_id(struct
rdma_id_private
> > > *id_priv)
> > > >  		complete(&id_priv->comp);
> > > >  }
> > > >
> > > > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > > > -				  rdma_cm_event_handler
event_handler,
> > > > -				  void *context, enum
rdma_port_space ps,
> > > > -				  enum ib_qp_type qp_type)
> > > > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > > > +				    rdma_cm_event_handler
event_handler,
> > > > +				    void *context, enum
rdma_port_space ps,
> > > > +				    enum ib_qp_type qp_type, const
char
> > *caller)
> > > >  {
> > > >  	struct rdma_id_private *id_priv;
> > > >
> > > > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net
> *net,
> > > >  	if (!id_priv)
> > > >  		return ERR_PTR(-ENOMEM);
> > > >
> > > > -	id_priv->owner = task_pid_nr(current);
> > > > +	if (caller)
> > > > +		id_priv->id.caller = caller;
> > > > +	else
> > > > +		id_priv->owner = task_pid_nr(current);
> > >
> > > See the comment above
> >
> > There is no ib_device at this point, so caller (and owner) must be saved
> > until the cm_id is bound to a device (or possibly devices for listening
> > ids).
> 
> Why do we need previous owner? Can it be that rdma_create_id was
> performed by one process and cma_attach by another?

Yes.  Connection setup events are processed on rdma_cm (and ib_cm/iw_cm)
workqueue threads.   Here's one example:  The application creates a cm_id,
binds to 0.0.0.0/0 and listens.  But there are no rdma devices at this
point.  There is a cm_id owned by the application, but not bound to any
device.  Then, lets say mlx4 and cxgb4 both get inserted.  The rdma_cm will
discover the new rdma devices, create and attach child cm_ids to those
devices.  This is done in a workq thread driven off of the ib_client
device_add upcall.  So what we really want to show is that these per-device
cm_ids are owned by the same application. 

There are other cases, that I've seen with just nvme/f host.   I'll produce
more examples to help us understand if there is a better path than what I've
proposed in this patch.

> 
> >
> > >
> > > >  	id_priv->state = RDMA_CM_IDLE;
> > > >  	id_priv->id.context = context;
> > > >  	id_priv->id.event_handler = event_handler;
> > >
> > > Not saying that we need to do it now, but it is important to write,
most
> > > probably we can drop certain initialization from rdma_create_id() adn
> > > reuse rdma_restrack_put/_get.
> > >
> >
> > I don't understand this comment.  Can you please elaborate?
> 
> Most probably, we will be able to drop id_priv->qp_mutex in the future,
> but let's not discuss it now, it is not related to this series.
> 
> >
> > > > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net
> *net,
> > > >
> > > >  	return &id_priv->id;
> > > >  }
> > > > -EXPORT_SYMBOL(rdma_create_id);
> > > > +EXPORT_SYMBOL(__rdma_create_id);
> > > >
> > > >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct
ib_qp
> > *qp)
> > > >  {
> > > > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> > > >  	mutex_unlock(&id_priv->handler_mutex);
> > > >
> > > >  	if (id_priv->cma_dev) {
> > > > +		rdma_restrack_del(&id_priv->id.res);
> > >
> > > You should count all created cm_ids and not only binded.
> >
> > No ib_device if they aren't bound.
> >
> > >
> > > >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> > > >  			if (id_priv->cm_id.ib)
> > > >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > > >  	int ret;
> > > >
> > > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > >  			    listen_id->event_handler,
listen_id->context,
> > > > -			    listen_id->ps,
> > ib_event->param.req_rcvd.qp_type);
> > > > +			    listen_id->ps,
ib_event->param.req_rcvd.qp_type,
> > > > +			    listen_id->caller);
> > >
> > > I think the cleanest way will be to create some struct and pass
pointer to
> > it so
> > > you can unfold all relevant data inside of __rdma_create_id().
> > >
> >
> > Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> > unmarshall it all...
> 
> I counted 6 arguments, and for me, it smells like something wrong.
> 

I'll look into changing this. 

> >
> >
> > > >  	if (IS_ERR(id))
> > > >  		return NULL;
> > > >
> > > > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> > > *cma_new_udp_id(struct rdma_cm_id *listen_id,
> > > >  	struct net *net = listen_id->route.addr.dev_addr.net;
> > > >  	int ret;
> > > >
> > > > -	id = rdma_create_id(net, listen_id->event_handler,
> > listen_id->context,
> > > > -			    listen_id->ps, IB_QPT_UD);
> > > > +	id = __rdma_create_id(net, listen_id->event_handler,
listen_id-
> > > >context,
> > > > +			      listen_id->ps, IB_QPT_UD,
listen_id->caller);
> > > >  	if (IS_ERR(id))
> > > >  		return NULL;
> > > >
> > > > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct
> iw_cm_id
> > > *cm_id,
> > > >  		goto out;
> > > >
> > > >  	/* Create a new RDMA id for the new IW CM ID */
> > > > -	new_cm_id =
rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > > -				   listen_id->id.event_handler,
> > > > -				   listen_id->id.context,
> > > > -				   RDMA_PS_TCP, IB_QPT_RC);
> > > > +	new_cm_id =
__rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > > +				     listen_id->id.event_handler,
> > > > +				     listen_id->id.context,
> > > > +				     RDMA_PS_TCP, IB_QPT_RC,
> > > > +				     listen_id->id.caller);
> > > >  	if (IS_ERR(new_cm_id)) {
> > > >  		ret = -ENOMEM;
> > > >  		goto out;
> > > > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> > > rdma_id_private *id_priv,
> > > >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> > > >device, 1))
> > > >  		return;
> > > >
> > > > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
> > id_priv->id.ps,
> > > > -			    id_priv->id.qp_type);
> > > > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
> > id_priv->id.ps,
> > > > +			      id_priv->id.qp_type,
id_priv->id.caller);
> > > >  	if (IS_ERR(id))
> > > >  		return;
> > > >
> > > > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id,
> struct
> > > sockaddr *addr)
> > > >
> > > >  	return 0;
> > > >  err2:
> > > > -	if (id_priv->cma_dev)
> > > > +	if (id_priv->cma_dev) {
> > > > +		rdma_restrack_del(&id_priv->id.res);
> > > >  		cma_release_dev(id_priv);
> > > > +	}
> > > >  err1:
> > > >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> > > >  	return ret;
> > > > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> > > rdma_id_private *id_priv,
> > > >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> > > >  }
> > > >
> > > > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > > *conn_param)
> > > > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > > *conn_param,
> > > > +		  const char *caller)
> > > >  {
> > > >  	struct rdma_id_private *id_priv;
> > > >  	int ret;
> > > >
> > > >  	id_priv = container_of(id, struct rdma_id_private, id);
> > > >
> > > > -	id_priv->owner = task_pid_nr(current);
> > > > +	if (caller)
> > > > +		id_priv->id.caller = caller;
> > > > +	else
> > > > +		id_priv->owner = task_pid_nr(current);
> > > >
> > > >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> > > >  		return -EINVAL;
> > > > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> > > rdma_conn_param *conn_param)
> > > >  	rdma_reject(id, NULL, 0);
> > > >  	return ret;
> > > >  }
> > > > -EXPORT_SYMBOL(rdma_accept);
> > > > +EXPORT_SYMBOL(__rdma_accept);
> > > >
> > > >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> > > >  {
> > > > diff --git a/drivers/infiniband/core/nldev.c
> > b/drivers/infiniband/core/nldev.c
> > > > index fa8655e..a4091b5 100644
> > > > --- a/drivers/infiniband/core/nldev.c
> > > > +++ b/drivers/infiniband/core/nldev.c
> > > > @@ -34,6 +34,7 @@
> > > >  #include <linux/pid.h>
> > > >  #include <linux/pid_namespace.h>
> > > >  #include <net/netlink.h>
> > > > +#include <rdma/rdma_cm.h>
> > > >  #include <rdma/rdma_netlink.h>
> > > >
> > > >  #include "core_priv.h"
> > > > @@ -71,6 +72,22 @@
> > > >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32
},
> > > >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> > > NLA_NUL_STRING,
> > > >  						    .len =
TASK_COMM_LEN },
> > > > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> > > NLA_NESTED },
> > >
> > > I would like to use this opportunity. There is CM_ID, so users will be
> > > able to query nldev directly on this ID (once we implement .doit), but
> > > can we found a proper abstraction for other objects (PD, CQ, QP
e.t.c.)?
> > >
> > > I want to have that all resources will have something similar to
> > ifindex/ibindex.
> > >
> >
> > Pds, cqs, and qps, all have  a device-unique number.  So
> > ibindex/restrack_type/object_id should work.  But cm_id's don't have
that.
> > Similar to a socket I guess.  So I'm not sure how to identify cm_ids
other
> > than by the ipaddresses/ip ports.
> 
> It is opposite, cm_id is a unique number, but other objects don't have
> such. What about PD and CQ?
> 
> We can declare that access to QP .doit willbe based on QPN, PD .doit
> will be based on local_dma_lkey, but what will be CQ identifier?
> 

Now that I've thought about this more, I realize the core verbs data
structures don't have identifiers for most objects.  I'm not sure why the
qp_num is exposed, but I guess it is because the qp num values must be
exchanged between both ends of an IB connection to setup the connection.    

The  local_dma_lkey  value will not be unique for each PD.  For all iwarp
devices the local_dma_lkey is 0.  (I think the same is true for IB devices).
So PD has the same issue as CQ.   

I suppose we can use rkey for MRs, but if the MR is not in the VALID state,
the rkey isn't necessarily accurate (the LSB of the rkey can be changed by
the application with each REG_MR operation).

The cm_id doesn't have any unique identifier either, except maybe the
combination of src/dst addresses and port space. 

With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like a QP
does.  I expect all hw implementations of rdma have numeric identifiers for
CQs, PDs, QPs, etc.    They aren't exposed to the core API though.  

Perhaps we add a device-unique (or globally unique) identifier as part of
the restrack struct and use that for GET/SET?

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 15:33                 ` Steve Wise
@ 2018-02-05 15:43                   ` Leon Romanovsky
       [not found]                     ` <20180205154351.GG2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  2018-02-05 20:00                   ` Jason Gunthorpe
  1 sibling, 1 reply; 35+ messages in thread
From: Leon Romanovsky @ 2018-02-05 15:43 UTC (permalink / raw)
  To: Steve Wise
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 1562 bytes --]

On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> Hey Leon,
>
> >
> > It is opposite, cm_id is a unique number, but other objects don't have
> > such. What about PD and CQ?
> >
> > We can declare that access to QP .doit willbe based on QPN, PD .doit
> > will be based on local_dma_lkey, but what will be CQ identifier?
> >
>
> Now that I've thought about this more, I realize the core verbs data
> structures don't have identifiers for most objects.  I'm not sure why the
> qp_num is exposed, but I guess it is because the qp num values must be
> exchanged between both ends of an IB connection to setup the connection.
>
> The  local_dma_lkey  value will not be unique for each PD.  For all iwarp
> devices the local_dma_lkey is 0.  (I think the same is true for IB devices).
> So PD has the same issue as CQ.
>
> I suppose we can use rkey for MRs, but if the MR is not in the VALID state,
> the rkey isn't necessarily accurate (the LSB of the rkey can be changed by
> the application with each REG_MR operation).
>
> The cm_id doesn't have any unique identifier either, except maybe the
> combination of src/dst addresses and port space.
>
> With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like a QP
> does.  I expect all hw implementations of rdma have numeric identifiers for
> CQs, PDs, QPs, etc.    They aren't exposed to the core API though.
>
> Perhaps we add a device-unique (or globally unique) identifier as part of
> the restrack struct and use that for GET/SET?

+1,
I like it, it is very clean.

Thanks

>
> Steve.
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                     ` <20180205154351.GG2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-05 17:06                       ` Steve Wise
  0 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 17:06 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA


> > >
> > > It is opposite, cm_id is a unique number, but other objects don't have
> > > such. What about PD and CQ?
> > >
> > > We can declare that access to QP .doit willbe based on QPN, PD .doit
> > > will be based on local_dma_lkey, but what will be CQ identifier?
> > >
> >
> > Now that I've thought about this more, I realize the core verbs data
> > structures don't have identifiers for most objects.  I'm not sure why
the
> > qp_num is exposed, but I guess it is because the qp num values must be
> > exchanged between both ends of an IB connection to setup the connection.
> >
> > The  local_dma_lkey  value will not be unique for each PD.  For all
iwarp
> > devices the local_dma_lkey is 0.  (I think the same is true for IB
devices).
> > So PD has the same issue as CQ.
> >
> > I suppose we can use rkey for MRs, but if the MR is not in the VALID
state,
> > the rkey isn't necessarily accurate (the LSB of the rkey can be changed
by
> > the application with each REG_MR operation).
> >
> > The cm_id doesn't have any unique identifier either, except maybe the
> > combination of src/dst addresses and port space.
> >
> > With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like
a QP
> > does.  I expect all hw implementations of rdma have numeric identifiers
for
> > CQs, PDs, QPs, etc.    They aren't exposed to the core API though.
> >
> > Perhaps we add a device-unique (or globally unique) identifier as part
of
> > the restrack struct and use that for GET/SET?
> 
> +1,
> I like it, it is very clean.
> 
> Thanks

This should be done to a subsequent series.  Perhaps when GET/SET are
actually implemented...

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-04 15:05             ` Leon Romanovsky
       [not found]               ` <20180204150553.GH27780-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-05 17:12               ` Steve Wise
  2018-02-05 19:06               ` Steve Wise
  2018-02-05 19:35               ` Steve Wise
  3 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 17:12 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> Hey Leon,
> 

...

> > > > > diff --git a/drivers/infiniband/core/cma.c
> > > b/drivers/infiniband/core/cma.c
> > > > > index 72ad52b..51fbfa1 100644
> > > > > --- a/drivers/infiniband/core/cma.c
> > > > > +++ b/drivers/infiniband/core/cma.c
> > > > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > > > rdma_id_private *id_priv,
> > > > >  	id_priv->id.route.addr.dev_addr.transport =
> > > > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > > > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> > > >
> > > > Steve, I don't like it, I worked hard to hide it from the users of
> > > restrack,
> > > > and don't see reason why the same trick as with
> ib_create_cq/ib_create_pd
> > > > won't
> > > > work here.
> > >
> > > I am doing the same trick, no?  rdma_create_id() is a static inline
that
> > > passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id
is
> > > created, it is not associated with any ib_device.  That only happens
at
> > > cma_attach time.  So how can the resource be added if there is no
device?
> > >
> >
> > So maybe, we don't need to add resource to the DB at rdma_create_id
> > stage and do it in cma_attach only, and in that stage you will update
> > the kern_name with KBUILD_MODNAME.
> 
> Yea, I'll look into this.

The current patch is only adding the resource in _cma_attach_to_dev().  And
using KBUILD_MODNAME in _cma_attach_to_dev() would always end up with
"[rdma_cm]".   That's why the caller string is saved in rdma_create_id()
static inline function; so it will get the true module name.  

I don't see any change needed here.

Thanks,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-04 15:05             ` Leon Romanovsky
       [not found]               ` <20180204150553.GH27780-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  2018-02-05 17:12               ` Steve Wise
@ 2018-02-05 19:06               ` Steve Wise
  2018-02-05 19:35               ` Steve Wise
  3 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 19:06 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> > >
> > > There is no ib_device at this point, so caller (and owner) must be
saved
> > > until the cm_id is bound to a device (or possibly devices for
listening
> > > ids).
> >
> > Why do we need previous owner? Can it be that rdma_create_id was
> > performed by one process and cma_attach by another?
> 
> Yes.  Connection setup events are processed on rdma_cm (and ib_cm/iw_cm)
> workqueue threads.   Here's one example:  The application creates a cm_id,
> binds to 0.0.0.0/0 and listens.  But there are no rdma devices at this
point.  There
> is a cm_id owned by the application, but not bound to any device.  Then,
lets say
> mlx4 and cxgb4 both get inserted.  The rdma_cm will  discover the new rdma
> devices, create and attach child cm_ids to those devices.  This is done in
a workq
> thread driven off of the ib_client device_add upcall.  So what we really
want to
> show is that these per-device cm_ids are owned by the same application.
> 
> There are other cases, that I've seen with just nvme/f host.   I'll
produce more
> examples to help us understand if there is a better path than what I've
proposed
> in this patch.

Here are trace events for setting up an nvmef connection.   They trace
_rdma_create_id(), _cma_attach_to_dev(), and _rdma_accept().  This was
gathered with the following patch on top of cm_id my series (just so you
understand where I took these stack traces).  Beyond the patch at the end of
this email, I show the pr_debug() output at create_id() and attach_to_dev()
for the cm_ids created as part of the nvmf connection setup.  

>From this, I conclude 2 things:  

1) the KBUILD_MODNAME needs to come from the rdma_create_id() or
rdma_accept() calls since _cma_attach_to_dev can be done in the rdma_cm
workqueue context.

2) uaccess_kernel() is not an indicator of a kernel rdma application.  At
least in the context of cm_ids.  So I simply key on if the cm_id is created
by rdma_ucm module or not.  If the cm_id is created by rdma_ucm, the pid is
saved and the kern_name is null, Otherwise, the pid is 0 and the kern_name
is set to the module name of the caller of rdma_create_id() or rdma_accept()
static inlines using KBUILD_MODNAME.

Debug patch:

----
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ddd8174..5e17a27 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -479,6 +479,8 @@ static void _cma_attach_to_dev(struct rdma_id_private
*id_priv,
        list_add_tail(&id_priv->list, &cma_dev->id_list);
        id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
        id_priv->id.res.kern_name = id_priv->id.caller;
+       pr_debug("caller/pid from create_id %s/%u current pid %u
uaccess_kernel() %u\n", id_priv->id.caller, id_priv->owner,
task_pid_nr(current), uaccess_kernel());
+       WARN_ON(1);
        rdma_restrack_add(&id_priv->id.res);
 }

@@ -763,6 +765,8 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
        if (!id_priv)
                return ERR_PTR(-ENOMEM);

+       pr_debug("caller/pid %s/%u uaccess_kernel() %u\n", caller,
task_pid_nr(current), uaccess_kernel());
+       WARN_ON(1);
        if (caller)
                id_priv->id.caller = caller;
        else
@@ -3762,6 +3766,9 @@ int __rdma_accept(struct rdma_cm_id *id, struct
rdma_conn_param *conn_param,

        id_priv = container_of(id, struct rdma_id_private, id);

+       pr_debug("caller/pid from create_id %s/%u current pid %u
uaccess_kernel() %u\n", id_priv->id.caller, id_priv->owner,
task_pid_nr(current), uaccess_kernel());
+       WARN_ON(1);
+
        if (caller)
                id_priv->id.caller = caller;
        else

------

Nvmf connection setup traces:

This is the nvmf target creating the cm_id and binding/listening:

create_id - notice uaccess_kernel() is 0, even though this is a kernel
application:

[42241.528534] __rdma_create_id: caller/pid nvmet_rdma/9591 uaccess_kernel()
0
...
[42241.717259]  nvmet_rdma_add_port+0x98/0x1c0 [nvmet_rdma]
[42241.723261]  nvmet_enable_port+0x36/0xd0 [nvmet]
[42241.733007]  nvmet_port_subsys_allow_link+0xfd/0x130 [nvmet]
...

attach_to_dev - same issue - uaccess_kernel() returns 0 for a kernel
application cm_id:

[42241.838916] _cma_attach_to_dev: caller/pid from create_id nvmet_rdma/0
current pid 9591 uaccess_kernel() 0
...
[42242.030931]  cma_attach_to_dev+0x12/0x40 [rdma_cm]
[42242.036485]  cma_acquire_dev+0x2d1/0x380 [rdma_cm]
[42242.042029]  rdma_bind_addr+0x783/0x830 [rdma_cm]
[42242.053275]  nvmet_rdma_add_port+0xc9/0x1c0 [nvmet_rdma]
[42242.059321]  nvmet_enable_port+0x36/0xd0 [nvmet]
[42242.069128]  nvmet_port_subsys_allow_link+0xfd/0x130 [nvmet]

Here is the nvmf host side creating and connecting:

create_id - again uaccess_kernel() indicates this is user, but the
application is kernel:

[42508.786910] __rdma_create_id: caller/pid nvme_rdma/9602 uaccess_kernel()
0
...
[42508.978059]  nvme_rdma_alloc_queue+0x8c/0x180 [nvme_rdma]
[42508.984182]  nvme_rdma_configure_admin_queue+0x1d/0x2a0 [nvme_rdma]
[42508.991178]  nvme_rdma_create_ctrl+0x36c/0x626 [nvme_rdma]
[42508.997397]  nvmf_create_ctrl.isra.6+0x72f/0x900 [nvme_fabrics]
[42509.008708]  nvmf_dev_write+0x87/0xec [nvme_fabrics]

attach_to_dev, as part of RESOLVE_ADDR/ROUTE.  Note uaccess_kernel() is 1
here, and it is called from ib_core, so the real module name would be lost
if it wasn't saved at create_id time.

[42509.123809] _cma_attach_to_dev: caller/pid from create_id nvme_rdma/0
current pid 3340 uaccess_kernel() 1
...
[42509.322662]  cma_attach_to_dev+0x12/0x40 [rdma_cm]
[42509.328198]  cma_acquire_dev+0x2d1/0x380 [rdma_cm]
[42509.333726]  addr_handler+0xca/0x1a0 [rdma_cm]
[42509.338910]  process_one_req+0x87/0x120 [ib_core]
[42509.344350]  process_one_work+0x141/0x340
[42509.349091]  worker_thread+0x47/0x3e0

Here is the target side getting the incoming connection request and
creating/attaching the child cm_id:

Create_id.  Note the uaccess_kernel() is 1 and the module is rdma_cm:

[42509.406886] __rdma_create_id: caller/pid nvmet_rdma/3340 uaccess_kernel()
1
...
[42509.606618]  iw_conn_req_handler+0x6e/0x1f0 [rdma_cm]
[42509.633472]  cm_work_handler+0xd6a/0xd80 [iw_cm]
[42509.638810]  process_one_work+0x141/0x340
[42509.643528]  worker_thread+0x47/0x3e0

Attach_to_dev - caller is again rdma_cm.

[42509.700727] _cma_attach_to_dev: caller/pid from create_id nvmet_rdma/0
current pid 3340 uaccess_kernel() 1
...
[42509.899997]  cma_attach_to_dev+0x12/0x40 [rdma_cm]
[42509.905527]  cma_acquire_dev+0x2d1/0x380 [rdma_cm]
[42509.911056]  iw_conn_req_handler+0xc1/0x1f0 [rdma_cm]
[42509.933385]  cm_work_handler+0xd6a/0xd80 [iw_cm]
[42509.938710]  process_one_work+0x141/0x340
[42509.943428]  worker_thread+0x47/0x3e0

And here the nvmf target accepts the connection:

[42510.000986] __rdma_accept: caller/pid from create_id nvmet_rdma/0 current
pid 3340 uaccess_kernel() 1
...
[42510.199502]  nvmet_rdma_queue_connect+0x6ac/0x990 [nvmet_rdma]
[42510.206071]  iw_conn_req_handler+0x16f/0x1f0 [rdma_cm]
[42510.211949]  cm_work_handler+0xd6a/0xd80 [iw_cm]
[42510.217297]  process_one_work+0x141/0x340
[42510.222036]  worker_thread+0x47/0x3e0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-04 15:05             ` Leon Romanovsky
                                 ` (2 preceding siblings ...)
  2018-02-05 19:06               ` Steve Wise
@ 2018-02-05 19:35               ` Steve Wise
  3 siblings, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 19:35 UTC (permalink / raw)
  To: 'Leon Romanovsky'
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, jgg-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> > > > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > > > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > > > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > > > >  	int ret;
> > > > >
> > > > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > >  			    listen_id->event_handler,
listen_id->context,
> > > > > -			    listen_id->ps,
> > > ib_event->param.req_rcvd.qp_type);
> > > > > +			    listen_id->ps, ib_event-
> >param.req_rcvd.qp_type,
> > > > > +			    listen_id->caller);
> > > >
> > > > I think the cleanest way will be to create some struct and pass
pointer to
> > > it so
> > > > you can unfold all relevant data inside of __rdma_create_id().
> > > >
> > >
> > > Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> > > unmarshall it all...
> >
> > I counted 6 arguments, and for me, it smells like something wrong.
> >
> 
> I'll look into changing this.
> 

Changing this will force changing all the applications using
rdma_create_id().  I'd rather not do that as part of this series.  It
dilutes the subject of the series.

Does anyone else care either way? 

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 15:33                 ` Steve Wise
  2018-02-05 15:43                   ` Leon Romanovsky
@ 2018-02-05 20:00                   ` Jason Gunthorpe
       [not found]                     ` <20180205200020.GH11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  1 sibling, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-05 20:00 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:

> Perhaps we add a device-unique (or globally unique) identifier as part of
> the restrack struct and use that for GET/SET?

What use is an ID if the user can't associate that with something
meaningful?

For userspace, a tuple of the filehandle and existing the per-file
handle # would make some sense as a unique ID, but userspace is going
to have a pointer not necessarily the handle number :(

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                     ` <20180205200020.GH11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2018-02-05 20:28                       ` Steve Wise
  2018-02-05 20:36                         ` Jason Gunthorpe
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-02-05 20:28 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> 
> > Perhaps we add a device-unique (or globally unique) identifier as part
of
> > the restrack struct and use that for GET/SET?
> 
> What use is an ID if the user can't associate that with something
> meaningful?

It uniquely identifies the widget you want to GET or SET.  Ideally, having
it be something meaningful is nice, but if we want to support GET/SET on
various single objects, we need identifiers.  And assigning them as part of
restrack would do the trick.  

However, I don't see GET/SET being useful for QP, CQ, PD, CM_ID, nor MR
resources.  Just DUMP.  I do see perhaps devices and links having GET/SET.
For instance setting up stuff for RXE and SIW soft devices. 

> 
> For userspace, a tuple of the filehandle and existing the per-file
> handle # would make some sense as a unique ID, but userspace is going
> to have a pointer not necessarily the handle number :(
> 

I'm not following you here.  Who does filehandle/handle# pertain to rdmatool
fetching objects via netlnk and possibly setting them?  

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 20:28                       ` Steve Wise
@ 2018-02-05 20:36                         ` Jason Gunthorpe
       [not found]                           ` <20180205203608.GJ11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-05 20:36 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Feb 05, 2018 at 02:28:02PM -0600, Steve Wise wrote:
> > On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> > 
> > > Perhaps we add a device-unique (or globally unique) identifier as part
> of
> > > the restrack struct and use that for GET/SET?
> > 
> > What use is an ID if the user can't associate that with something
> > meaningful?
> 
> It uniquely identifies the widget you want to GET or SET.  Ideally, having
> it be something meaningful is nice, but if we want to support GET/SET on
> various single objects, we need identifiers.  And assigning them as part of
> restrack would do the trick.  
> 
> However, I don't see GET/SET being useful for QP, CQ, PD, CM_ID, nor MR
> resources.  Just DUMP.  I do see perhaps devices and links having GET/SET.
> For instance setting up stuff for RXE and SIW soft devices. 

Well exactly.. I don't expect GET/SET for restrack objects..

> > For userspace, a tuple of the filehandle and existing the per-file
> > handle # would make some sense as a unique ID, but userspace is going
> > to have a pointer not necessarily the handle number :(
> 
> I'm not following you here.  Who does filehandle/handle# pertain to rdmatool
> fetching objects via netlnk and possibly setting them?  

It is the other side of the question - when rdmatool presents me an
object how do I figure out what it is in 'my world' as userspace.

Like a PD for instance.. If I have two in a process how do I know
which is which compared to a dump?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                           ` <20180205203608.GJ11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2018-02-05 20:53                             ` Steve Wise
  2018-02-05 21:16                               ` Jason Gunthorpe
  0 siblings, 1 reply; 35+ messages in thread
From: Steve Wise @ 2018-02-05 20:53 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> 
> > > For userspace, a tuple of the filehandle and existing the per-file
> > > handle # would make some sense as a unique ID, but userspace is going
> > > to have a pointer not necessarily the handle number :(
> >
> > I'm not following you here.  Who does filehandle/handle# pertain to
rdmatool
> > fetching objects via netlnk and possibly setting them?
> 
> It is the other side of the question - when rdmatool presents me an
> object how do I figure out what it is in 'my world' as userspace.
> 
> Like a PD for instance.. If I have two in a process how do I know
> which is which compared to a dump?

You don't. :)  As it stands now, you can tell only if a given PD is in your
process or not based on the process id.

Even if we create a device-unique ID for each restrack object, to make it
useful to for userspace analysis would require exposing that ID in the
various user object structures (or via query methods).   EG having
pd->res_id, cq->res_id, etc...

Steve. 


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 20:53                             ` Steve Wise
@ 2018-02-05 21:16                               ` Jason Gunthorpe
       [not found]                                 ` <20180205211618.GL11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-05 21:16 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:

> Even if we create a device-unique ID for each restrack object, to make it
> useful to for userspace analysis would require exposing that ID in the
> various user object structures (or via query methods).   EG having
> pd->res_id, cq->res_id, etc...

As I said, we have that, it is the fd, handle # tuple and most verbs
objects expose the handle # in the user visible part of the object
struct..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                                 ` <20180205211618.GL11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2018-02-05 22:16                                   ` Steve Wise
  2018-02-05 22:20                                     ` Jason Gunthorpe
  2018-02-05 22:22                                     ` Steve Wise
  0 siblings, 2 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 22:16 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> 
> On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> 
> > Even if we create a device-unique ID for each restrack object, to make
it
> > useful to for userspace analysis would require exposing that ID in the
> > various user object structures (or via query methods).   EG having
> > pd->res_id, cq->res_id, etc...
> 
> As I said, we have that, it is the fd, handle # tuple and most verbs
> objects expose the handle # in the user visible part of the object
> struct..

Can you please show me an example of the handle # for some object like PD or
CQ?  This doesn't address kernel users' object, though, correct?

Thanks,

Steve.




--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 22:16                                   ` Steve Wise
@ 2018-02-05 22:20                                     ` Jason Gunthorpe
       [not found]                                       ` <20180205222025.GC10095-uk2M96/98Pc@public.gmane.org>
  2018-02-05 22:22                                     ` Steve Wise
  1 sibling, 1 reply; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-05 22:20 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > 
> > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > 
> > > Even if we create a device-unique ID for each restrack object, to make
> it
> > > useful to for userspace analysis would require exposing that ID in the
> > > various user object structures (or via query methods).   EG having
> > > pd->res_id, cq->res_id, etc...
> > 
> > As I said, we have that, it is the fd, handle # tuple and most verbs
> > objects expose the handle # in the user visible part of the object
> > struct..
> 
> Can you please show me an example of the handle # for some object like PD or
> CQ?

>From rdma-core verbs.h:

struct ibv_pd {
        struct ibv_context     *context;
        uint32_t                handle;
};

> This doesn't address kernel users' object, though, correct?

Right.

And id'ing a file descriptor gets kinda hard too..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
  2018-02-05 22:16                                   ` Steve Wise
  2018-02-05 22:20                                     ` Jason Gunthorpe
@ 2018-02-05 22:22                                     ` Steve Wise
  1 sibling, 0 replies; 35+ messages in thread
From: Steve Wise @ 2018-02-05 22:22 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Leon Romanovsky',
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

> >
> > As I said, we have that, it is the fd, handle # tuple and most verbs
> > objects expose the handle # in the user visible part of the object
> > struct..
> 
> Can you please show me an example of the handle # for some object like PD
or
> CQ?  This doesn't address kernel users' object, though, correct?


Never mind, I see: ibv_pd.handle,  ibv_mr.handle, etc...


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                                       ` <20180205222025.GC10095-uk2M96/98Pc@public.gmane.org>
@ 2018-02-06  8:40                                         ` Leon Romanovsky
       [not found]                                           ` <20180206084019.GL2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 35+ messages in thread
From: Leon Romanovsky @ 2018-02-06  8:40 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 1069 bytes --]

On Mon, Feb 05, 2018 at 03:20:25PM -0700, Jason Gunthorpe wrote:
> On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > >
> > > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > >
> > > > Even if we create a device-unique ID for each restrack object, to make
> > it
> > > > useful to for userspace analysis would require exposing that ID in the
> > > > various user object structures (or via query methods).   EG having
> > > > pd->res_id, cq->res_id, etc...
> > >
> > > As I said, we have that, it is the fd, handle # tuple and most verbs
> > > objects expose the handle # in the user visible part of the object
> > > struct..
> >
> > Can you please show me an example of the handle # for some object like PD or
> > CQ?
>
> From rdma-core verbs.h:
>
> struct ibv_pd {
>         struct ibv_context     *context;
>         uint32_t                handle;
> };
>
> > This doesn't address kernel users' object, though, correct?
>
> Right.
>
> And id'ing a file descriptor gets kinda hard too..

And how should we handle kernel objects?

>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
       [not found]                                           ` <20180206084019.GL2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-06 15:25                                             ` Jason Gunthorpe
  0 siblings, 0 replies; 35+ messages in thread
From: Jason Gunthorpe @ 2018-02-06 15:25 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Feb 06, 2018 at 10:40:19AM +0200, Leon Romanovsky wrote:
> On Mon, Feb 05, 2018 at 03:20:25PM -0700, Jason Gunthorpe wrote:
> > On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > > >
> > > > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > > >
> > > > > Even if we create a device-unique ID for each restrack object, to make
> > > it
> > > > > useful to for userspace analysis would require exposing that ID in the
> > > > > various user object structures (or via query methods).   EG having
> > > > > pd->res_id, cq->res_id, etc...
> > > >
> > > > As I said, we have that, it is the fd, handle # tuple and most verbs
> > > > objects expose the handle # in the user visible part of the object
> > > > struct..
> > >
> > > Can you please show me an example of the handle # for some object like PD or
> > > CQ?
> >
> > From rdma-core verbs.h:
> >
> > struct ibv_pd {
> >         struct ibv_context     *context;
> >         uint32_t                handle;
> > };
> >
> > > This doesn't address kernel users' object, though, correct?
> >
> > Right.
> >
> > And id'ing a file descriptor gets kinda hard too..
> 
> And how should we handle kernel objects?

hash'd pointer to internal struct?

Jason

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2018-02-06 15:25 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-31 17:09 [PATCH RFC 0/2] cm_id resource tracking Steve Wise
     [not found] ` <cover.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-30 16:59   ` [PATCH RFC 1/2] RDMA/CM: move rdma_id_private into include/rdma/rdma_cm.h Steve Wise
     [not found]     ` <a85bb48eb9fc8846c81118a6777ab9ccbd27e9d7.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-31 20:42       ` Parav Pandit
     [not found]         ` <VI1PR0502MB300809BAC31D5CBC0FA2311CD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-01-31 20:50           ` Steve Wise
2018-01-30 16:59   ` [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information Steve Wise
     [not found]     ` <531889e6a24f7919dec71734c91298d266aa9721.1517418595.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-31 20:47       ` Parav Pandit
     [not found]         ` <VI1PR0502MB3008805F1A6056F50A12DEDBD1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-01-31 20:56           ` Steve Wise
2018-01-31 21:18             ` Parav Pandit
     [not found]               ` <VI1PR0502MB30088B50BEA14B4C05EA2BC7D1FB0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-02-01  8:01                 ` Leon Romanovsky
     [not found]                   ` <20180201080109.GG2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-01 17:50                     ` Jason Gunthorpe
     [not found]                       ` <20180201175028.GS17053-uk2M96/98Pc@public.gmane.org>
2018-02-01 18:14                         ` Steve Wise
2018-02-01  8:49       ` Leon Romanovsky
     [not found]         ` <20180201084944.GH2055-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-01 16:07           ` Steve Wise
2018-02-04 15:05             ` Leon Romanovsky
     [not found]               ` <20180204150553.GH27780-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-05 15:33                 ` Steve Wise
2018-02-05 15:43                   ` Leon Romanovsky
     [not found]                     ` <20180205154351.GG2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-05 17:06                       ` Steve Wise
2018-02-05 20:00                   ` Jason Gunthorpe
     [not found]                     ` <20180205200020.GH11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2018-02-05 20:28                       ` Steve Wise
2018-02-05 20:36                         ` Jason Gunthorpe
     [not found]                           ` <20180205203608.GJ11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2018-02-05 20:53                             ` Steve Wise
2018-02-05 21:16                               ` Jason Gunthorpe
     [not found]                                 ` <20180205211618.GL11446-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2018-02-05 22:16                                   ` Steve Wise
2018-02-05 22:20                                     ` Jason Gunthorpe
     [not found]                                       ` <20180205222025.GC10095-uk2M96/98Pc@public.gmane.org>
2018-02-06  8:40                                         ` Leon Romanovsky
     [not found]                                           ` <20180206084019.GL2567-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-06 15:25                                             ` Jason Gunthorpe
2018-02-05 22:22                                     ` Steve Wise
2018-02-05 17:12               ` Steve Wise
2018-02-05 19:06               ` Steve Wise
2018-02-05 19:35               ` Steve Wise
2018-02-01 17:53           ` Jason Gunthorpe
     [not found]             ` <20180201175353.GU17053-uk2M96/98Pc@public.gmane.org>
2018-02-01 18:18               ` Steve Wise
2018-02-01 18:32                 ` Jason Gunthorpe
     [not found]                   ` <20180201183232.GV17053-uk2M96/98Pc@public.gmane.org>
2018-02-01 18:37                     ` Steve Wise
2018-02-01 22:01                       ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.