* [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-01-30 16:59 ` Steve Wise
[not found] ` <1510713d3e0f14b101bea7dc9e02084e46e580ec.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-01 16:20 ` [PATCH RESEND v1 rdma-next 1/6] RDMA/CM: move rdma_id_private to cma_priv.h Steve Wise
` (4 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-01-30 16:59 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Implement RDMA nldev netlink interface to get detailed CM_ID information.
Because cm_id's are attached to rdma devices in various work queue
contexts, the pid and task information at device-attach time is sometimes
not useful. For example, an nvme/f host connection cm_id ends up being
bound to a device in a work queue context and the resulting pid at attach
time no longer exists after connection setup. So instead we mark all
cm_id's created via the rdma_ucm as "user", and all others as "kernel".
This required tweaking the restrack code a little. It also required
wrapping some rdma_cm functions to allow passing the module name string.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/core/cma.c | 55 ++++++---
drivers/infiniband/core/nldev.c | 246 +++++++++++++++++++++++++++++++++++++
drivers/infiniband/core/restrack.c | 15 ++-
drivers/infiniband/core/ucma.c | 8 +-
include/rdma/rdma_cm.h | 24 +++-
include/rdma/restrack.h | 4 +
include/uapi/rdma/rdma_netlink.h | 30 +++++
7 files changed, 352 insertions(+), 30 deletions(-)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 203519e..61ea800 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -466,6 +466,9 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
+ id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
+ id_priv->id.res.kern_name = id_priv->id.caller;
+ rdma_restrack_add(&id_priv->id.res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -738,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
{
struct rdma_id_private *id_priv;
@@ -749,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->id.caller = caller;
+ else
+ id_priv->owner = task_pid_nr(current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -769,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
return &id_priv->id;
}
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1629,6 +1635,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->id.res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1787,9 +1794,10 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type,
+ listen_id->caller);
if (IS_ERR(id))
return NULL;
@@ -1844,8 +1852,8 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD);
+ id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+ listen_id->ps, IB_QPT_UD, listen_id->caller);
if (IS_ERR(id))
return NULL;
@@ -2111,10 +2119,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC);
+ new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP, IB_QPT_RC,
+ listen_id->id.caller);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
@@ -2239,8 +2248,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type);
+ id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type, id_priv->id.caller);
if (IS_ERR(id))
return;
@@ -3348,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev)
+ if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->id.res);
cma_release_dev(id_priv);
+ }
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@@ -3732,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->id.caller = caller;
+ else
+ id_priv->owner = task_pid_nr(current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3779,7 +3794,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
rdma_reject(id, NULL, 0);
return ret;
}
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index fa8655e..13f5c46 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -34,9 +34,11 @@
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <net/netlink.h>
+#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
+#include "cma_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
@@ -71,6 +73,22 @@
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
.len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_IPV4_SADDR] = {
+ .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [RDMA_NLDEV_ATTR_RES_IPV4_DADDR] = {
+ .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [RDMA_NLDEV_ATTR_RES_IPV6_SADDR] = {
+ .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+ [RDMA_NLDEV_ATTR_RES_IPV6_DADDR] = {
+ .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+ [RDMA_NLDEV_ATTR_RES_IP_SPORT] = { .type = NLA_U16 },
+ [RDMA_NLDEV_ATTR_RES_IP_DPORT] = { .type = NLA_U16 },
+ [RDMA_NLDEV_ATTR_RES_DEV_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_NETWORK_TYPE] = { .type = NLA_U8 },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -182,6 +200,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_PD] = "pd",
[RDMA_RESTRACK_CQ] = "cq",
[RDMA_RESTRACK_QP] = "qp",
+ [RDMA_RESTRACK_CM_ID] = "cm_id",
};
struct rdma_restrack_root *res = &device->res;
@@ -284,6 +303,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
return -EMSGSIZE;
}
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+ struct rdma_cm_id *cm_id, uint32_t port)
+{
+ struct rdma_id_private *id_priv;
+ struct nlattr *entry_attr;
+
+ if (port && port != cm_id->port_num)
+ return 0;
+
+ id_priv = container_of(cm_id, struct rdma_id_private, id);
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+ goto err;
+
+ if (id_priv->qp_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
+ id_priv->id.route.addr.dev_addr.dev_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
+ id_priv->id.route.addr.dev_addr.transport))
+ goto err;
+
+ if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
+ if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
+ sin->sin_addr.s_addr))
+ goto err;
+ if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+ be16_to_cpu(sin->sin_port)))
+ goto err;
+
+ sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
+ if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
+ sin->sin_addr.s_addr))
+ goto err;
+ if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+ be16_to_cpu(sin->sin_port)))
+ goto err;
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
+ if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
+ &sin6->sin6_addr))
+ goto err;
+ if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+ sin6->sin6_port))
+ goto err;
+
+ sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
+ if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
+ &sin6->sin6_addr))
+ goto err;
+ if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+ sin6->sin6_port))
+ goto err;
+ }
+
+ if (id_priv->id.caller) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ id_priv->id.caller))
+ goto err;
+ } else {
+ /* CMA keeps the owning pid. */
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -686,6 +798,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
return ret;
}
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct rdma_cm_id *cm_id = NULL;
+ struct nlmsghdr *nlh;
+ u32 index, port = 0;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get QP information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ /*
+ * If no PORT_INDEX is supplied, return all CM_IDs from that device
+ */
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_index;
+ }
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_CM_ID_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node,
+ RDMA_RESTRACK_CM_ID) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel QPs should be visible in init namsapce only
+ * 2. Preent only QPs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ cm_id = container_of(res, struct rdma_cm_id, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_cm_id_entry(skb, cm_id, port);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more CM_IDs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!cm_id)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+
+err_index:
+ put_device(&device->dev);
+ return ret;
+}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -712,6 +955,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
* too.
*/
},
+ [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+ .dump = nldev_res_get_cm_id_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 83bce7e..6385914 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -3,12 +3,15 @@
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
+#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
+#include "cma_priv.h"
+
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
@@ -44,7 +47,7 @@ static void set_kern_name(struct rdma_restrack_entry *res)
struct ib_qp *qp;
if (type != RDMA_RESTRACK_QP)
- /* PD and CQ types already have this name embedded in */
+ /* Other types already have this name embedded in */
return;
qp = container_of(res, struct ib_qp, res);
@@ -61,6 +64,7 @@ static void set_kern_name(struct rdma_restrack_entry *res)
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
+ struct rdma_cm_id *cm_id;
struct ib_device *dev;
struct ib_xrcd *xrcd;
struct ib_pd *pd;
@@ -84,6 +88,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
xrcd = container_of(res, struct ib_xrcd, res);
dev = xrcd->device;
break;
+ case RDMA_RESTRACK_CM_ID:
+ cm_id = container_of(res, struct rdma_cm_id, res);
+ dev = cm_id->device;
+ break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
return NULL;
@@ -95,6 +103,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
static bool res_is_user(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
+ struct rdma_cm_id *cm_id;
struct ib_xrcd *xrcd;
struct ib_pd *pd;
struct ib_cq *cq;
@@ -119,6 +128,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
xrcd = container_of(res, struct ib_xrcd, res);
is_user = xrcd->inode;
break;
+ case RDMA_RESTRACK_CM_ID:
+ cm_id = container_of(res, struct rdma_cm_id, res);
+ is_user = !cm_id->caller;
+ break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d67219d..f7f0282 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type);
+ ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
@@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
- ret = rdma_accept(ctx->cm_id, &conn_param);
+ ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
- ret = rdma_accept(ctx->cm_id, NULL);
+ ret = __rdma_accept(ctx->cm_id, NULL, NULL);
ucma_put_ctx(ctx);
return ret;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6538a5c..3e90501 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -155,8 +155,19 @@ struct rdma_cm_id {
enum rdma_port_space ps;
enum ib_qp_type qp_type;
u8 port_num;
+ const char *caller;
+
+ /*
+ * Internal to RDMA/core, don't use in the drivers
+ */
+ struct rdma_restrack_entry res;
};
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type, const char *caller);
+
/**
* rdma_create_id - Create an RDMA identifier.
*
@@ -169,10 +180,9 @@ struct rdma_cm_id {
*
* The id holds a reference on the network namespace until it is destroyed.
*/
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type);
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+ __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
+ KBUILD_MODNAME)
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@@ -284,6 +294,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
*/
int rdma_listen(struct rdma_cm_id *id, int backlog);
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller);
+
/**
* rdma_accept - Called to accept a connection request or response.
* @id: Connection identifier associated with the request.
@@ -299,7 +312,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
* state of the qp associated with the id is modified to error, such that any
* previously posted receive buffers would be flushed.
*/
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+#define rdma_accept(id, conn_param) \
+ __rdma_accept((id), (conn_param), KBUILD_MODNAME)
/**
* rdma_notify - Notifies the RDMA CM of an asynchronous event that has
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index c2d8116..a794e0e 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -33,6 +33,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_XRCD,
/**
+ * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
+ */
+ RDMA_RESTRACK_CM_ID,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 17e59be..13f0bed 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -240,6 +240,8 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
+ RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -352,6 +354,34 @@ enum rdma_nldev_attr {
*/
RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */
+ RDMA_NLDEV_ATTR_RES_CM_ID, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, /* nested table */
+ /*
+ * rdma_cm_id port space.
+ */
+ RDMA_NLDEV_ATTR_RES_PS, /* u32 */
+ /*
+ * Source and destination IP address and port attributes.
+ */
+ RDMA_NLDEV_ATTR_RES_IPV4_SADDR, /* u8[4] */
+ RDMA_NLDEV_ATTR_RES_IPV4_DADDR, /* u8[4] */
+ RDMA_NLDEV_ATTR_RES_IPV6_SADDR, /* u8[16] */
+ RDMA_NLDEV_ATTR_RES_IPV6_DADDR, /* u8[16] */
+ RDMA_NLDEV_ATTR_RES_IP_SPORT, /* u16 */
+ RDMA_NLDEV_ATTR_RES_IP_DPORT, /* u16 */
+ /*
+ * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
+ */
+ RDMA_NLDEV_ATTR_RES_DEV_TYPE, /* u8 */
+ /*
+ * enum enum rdma_transport_type (IB, IWARP, ...)
+ */
+ RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE, /* u8 */
+ /*
+ * enum rdma_network_type (IB, IPv4, IPv6,...)
+ */
+ RDMA_NLDEV_ATTR_RES_NETWORK_TYPE, /* u8 */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 1/6] RDMA/CM: move rdma_id_private to cma_priv.h
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-30 16:59 ` [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information Steve Wise
@ 2018-02-01 16:20 ` Steve Wise
2018-02-01 16:58 ` [PATCH RESEND v1 rdma-next 3/6] RDMA/nldev: provide detailed CQ information Steve Wise
` (3 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Steve Wise @ 2018-02-01 16:20 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Move struct rdma_id_private to a new header cma_priv.h so the resource
tracking services in core/nldev.c can read useful information about cm_ids.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/core/cma.c | 41 +--------------------
drivers/infiniband/core/cma_priv.h | 75 ++++++++++++++++++++++++++++++++++++++
2 files changed, 76 insertions(+), 40 deletions(-)
create mode 100644 drivers/infiniband/core/cma_priv.h
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963c..203519e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -62,6 +62,7 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
+#include "cma_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -327,46 +328,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
-struct rdma_id_private {
- struct rdma_cm_id id;
-
- struct rdma_bind_list *bind_list;
- struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
- struct cma_device *cma_dev;
- struct list_head mc_list;
-
- int internal_id;
- enum rdma_cm_state state;
- spinlock_t lock;
- struct mutex qp_mutex;
-
- struct completion comp;
- atomic_t refcount;
- struct mutex handler_mutex;
-
- int backlog;
- int timeout_ms;
- struct ib_sa_query *query;
- int query_id;
- union {
- struct ib_cm_id *ib;
- struct iw_cm_id *iw;
- } cm_id;
-
- u32 seq_num;
- u32 qkey;
- u32 qp_num;
- pid_t owner;
- u32 options;
- u8 srq;
- u8 tos;
- bool tos_set;
- u8 reuseaddr;
- u8 afonly;
- enum ib_gid_type gid_type;
-};
struct cma_multicast {
struct rdma_id_private *id_priv;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644
index 0000000..1b6dfbf
--- /dev/null
+++ b/drivers/infiniband/core/cma_priv.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum rdma_cm_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ atomic_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ pid_t owner;
+ u32 options;
+ u8 srq;
+ u8 tos;
+ bool tos_set;
+ u8 reuseaddr;
+ u8 afonly;
+ enum ib_gid_type gid_type;
+};
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 3/6] RDMA/nldev: provide detailed CQ information
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-30 16:59 ` [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information Steve Wise
2018-02-01 16:20 ` [PATCH RESEND v1 rdma-next 1/6] RDMA/CM: move rdma_id_private to cma_priv.h Steve Wise
@ 2018-02-01 16:58 ` Steve Wise
[not found] ` <624bf08040287a4c558f3b84d2bd60a9423b8dca.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-01 20:51 ` [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information Steve Wise
` (2 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-02-01 16:58 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Implement the RDMA nldev netlink interface for dumping detailed
CQ information.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/core/nldev.c | 172 +++++++++++++++++++++++++++++++++++++++
include/uapi/rdma/rdma_netlink.h | 8 ++
2 files changed, 180 insertions(+)
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 13f5c46..34fb0d3 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -89,6 +89,11 @@
[RDMA_NLDEV_ATTR_RES_DEV_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -396,6 +401,51 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
return -EMSGSIZE;
}
+static int fill_res_cq_entry(struct sk_buff *msg,
+ struct ib_cq *cq)
+{
+ struct rdma_restrack_entry *res = &cq->res;
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&cq->usecnt), 0))
+ goto err;
+
+ /* Poll context is only valid for kernel CQs */
+ if (rdma_is_kernel_res(res) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+ goto err;
+
+ /*
+ * Existence of task means that it is user CQ and netlink
+ * user is invited to go and read /proc/PID/comm to get name
+ * of the task file and res->task_com should be NULL.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -929,6 +979,125 @@ static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
put_device(&device->dev);
return ret;
}
+
+static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct ib_cq *cq = NULL;
+ struct nlmsghdr *nlh;
+ u32 index;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get CQ information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_CQ_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CQ);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_CQ) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel CQs should be visible in init namspace only
+ * 2. Present only CQs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ cq = container_of(res, struct ib_cq, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_cq_entry(skb, cq);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more CQs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!cq)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+ put_device(&device->dev);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -958,6 +1127,9 @@ static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
.dump = nldev_res_get_cm_id_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+ .dump = nldev_res_get_cq_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 13f0bed..fa677ef 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -242,6 +242,8 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
+ RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -382,6 +384,12 @@ enum rdma_nldev_attr {
*/
RDMA_NLDEV_ATTR_RES_NETWORK_TYPE, /* u8 */
+ RDMA_NLDEV_ATTR_RES_CQ, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CQ_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CQE, /* u32 */
+ RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */
+ RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
` (2 preceding siblings ...)
2018-02-01 16:58 ` [PATCH RESEND v1 rdma-next 3/6] RDMA/nldev: provide detailed CQ information Steve Wise
@ 2018-02-01 20:51 ` Steve Wise
[not found] ` <be7f33b8ce1e7311e9ed408267d8475166e48d9b.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-01 22:41 ` [PATCH RESEND v1 rdma-next 4/6] iw_cxgb4: initialize ib_mr fields for user mrs Steve Wise
2018-02-02 21:24 ` [PATCH RESEND v1 rdma-next 6/6] RDMA/nldev: provide detailed PD information Steve Wise
5 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-02-01 20:51 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Implement the RDMA nldev netlink interface for dumping detailed
MR information.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/core/nldev.c | 174 +++++++++++++++++++++++++++++++++++
drivers/infiniband/core/restrack.c | 48 +++++++---
drivers/infiniband/core/uverbs_cmd.c | 6 ++
drivers/infiniband/core/verbs.c | 3 +
include/rdma/ib_verbs.h | 5 +
include/rdma/restrack.h | 4 +
include/uapi/rdma/rdma_netlink.h | 10 ++
7 files changed, 238 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 34fb0d3..8d96f3e 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -94,6 +94,13 @@
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_PGSIZE] = { .type = NLA_U32 },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -206,6 +213,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_CQ] = "cq",
[RDMA_RESTRACK_QP] = "qp",
[RDMA_RESTRACK_CM_ID] = "cm_id",
+ [RDMA_RESTRACK_MR] = "mr",
};
struct rdma_restrack_root *res = &device->res;
@@ -446,6 +454,51 @@ static int fill_res_cq_entry(struct sk_buff *msg,
return -EMSGSIZE;
}
+static int fill_res_mr_entry(struct sk_buff *msg,
+ struct ib_mr *mr)
+{
+ struct rdma_restrack_entry *res = &mr->res;
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA, mr->iova, 0))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr->page_size))
+ goto err;
+
+ /*
+ * Existence of task means that it is user MR and netlink
+ * user is invited to go and read /proc/PID/comm to get name
+ * of the task file and res->task_com should be NULL.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1098,6 +1151,124 @@ static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
return ret;
}
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct ib_mr *mr = NULL;
+ struct nlmsghdr *nlh;
+ u32 index;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get MR information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_MR_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_MR);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_MR) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel MRs should be visible in init namspace only
+ * 2. Present only MRs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ mr = container_of(res, struct ib_mr, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_mr_entry(skb, mr);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more MRs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!mr)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+ put_device(&device->dev);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -1130,6 +1301,9 @@ static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
[RDMA_NLDEV_CMD_RES_CQ_GET] = {
.dump = nldev_res_get_cq_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_MR_GET] = {
+ .dump = nldev_res_get_mr_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 6385914..d3ad0ab 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -43,22 +43,36 @@ int rdma_restrack_count(struct rdma_restrack_root *res,
static void set_kern_name(struct rdma_restrack_entry *res)
{
- enum rdma_restrack_type type = res->type;
- struct ib_qp *qp;
+ struct ib_pd *pd = NULL;
- if (type != RDMA_RESTRACK_QP)
- /* Other types already have this name embedded in */
- return;
+ switch (res->type) {
+ case RDMA_RESTRACK_QP: {
+ struct ib_qp *qp;
- qp = container_of(res, struct ib_qp, res);
- if (!qp->pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- return;
+ qp = container_of(res, struct ib_qp, res);
+ if (qp->pd) {
+ pd = qp->pd;
+ } else {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ }
+ break;
+ }
+ case RDMA_RESTRACK_MR: {
+ struct ib_mr *mr;
+
+ mr = container_of(res, struct ib_mr, res);
+ pd = mr->pd;
+ break;
+ }
+ default:
+ /* Other types set kern_name directly */
+ break;
}
- res->kern_name = qp->pd->res.kern_name;
+ if (pd)
+ res->kern_name = pd->res.kern_name;
}
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
@@ -70,6 +84,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
+ struct ib_mr *mr;
switch (type) {
case RDMA_RESTRACK_PD:
@@ -92,6 +107,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
cm_id = container_of(res, struct rdma_cm_id, res);
dev = cm_id->device;
break;
+ case RDMA_RESTRACK_MR:
+ mr = container_of(res, struct ib_mr, res);
+ dev = mr->device;
+ break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
return NULL;
@@ -108,6 +127,7 @@ static bool res_is_user(struct rdma_restrack_entry *res)
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
+ struct ib_mr *mr;
bool is_user = false;
switch (type) {
@@ -132,6 +152,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
cm_id = container_of(res, struct rdma_cm_id, res);
is_user = !cm_id->caller;
break;
+ case RDMA_RESTRACK_MR:
+ mr = container_of(res, struct ib_mr, res);
+ is_user = mr->pd->uobject;
+ break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 256934d..3f026c4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -694,6 +694,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
@@ -819,6 +821,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd;
struct ib_uobject *uobj;
int ret = -EINVAL;
+ struct ib_mr *mr;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -828,6 +831,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (IS_ERR(uobj))
return PTR_ERR(uobj);
+ mr = uobj->object;
+ rdma_restrack_del(&mr->res);
+
ret = uobj_remove_commit(uobj);
return ret ?: in_len;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 16ebc63..c3265f7 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1623,6 +1623,7 @@ int ib_dereg_mr(struct ib_mr *mr)
struct ib_pd *pd = mr->pd;
int ret;
+ rdma_restrack_del(&mr->res);
ret = mr->device->dereg_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
@@ -1659,6 +1660,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
}
return mr;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5263c86..33d8c5d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1771,6 +1771,11 @@ struct ib_mr {
struct ib_uobject *uobject; /* user */
struct list_head qp_entry; /* FR */
};
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_mw {
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index a794e0e..bfd1140 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -37,6 +37,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_CM_ID,
/**
+ * @RDMA_RESTRACK_MR: Memory Region (MR)
+ */
+ RDMA_RESTRACK_MR,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index fa677ef..6adaeaa 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -244,6 +244,8 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */
+ RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -390,6 +392,14 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */
RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */
+ RDMA_NLDEV_ATTR_RES_MR, /* nested table */
+ RDMA_NLDEV_ATTR_RES_MR_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_RKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_LKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_IOVA, /* u64 */
+ RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */
+ RDMA_NLDEV_ATTR_RES_PGSIZE, /* u32 */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 4/6] iw_cxgb4: initialize ib_mr fields for user mrs
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
` (3 preceding siblings ...)
2018-02-01 20:51 ` [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information Steve Wise
@ 2018-02-01 22:41 ` Steve Wise
2018-02-02 21:24 ` [PATCH RESEND v1 rdma-next 6/6] RDMA/nldev: provide detailed PD information Steve Wise
5 siblings, 0 replies; 14+ messages in thread
From: Steve Wise @ 2018-02-01 22:41 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Some of the struct ib_mr fields weren't getting initialized. This was
benign, but will cause problems when dumping the mr resource via
nldev/restrack.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/hw/cxgb4/mem.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 7e0eb20..e90f2fd 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -391,6 +391,9 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ mhp->ibmr.length = mhp->attr.len;
+ mhp->ibmr.iova = mhp->attr.va_fbo;
+ mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 6/6] RDMA/nldev: provide detailed PD information
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
` (4 preceding siblings ...)
2018-02-01 22:41 ` [PATCH RESEND v1 rdma-next 4/6] iw_cxgb4: initialize ib_mr fields for user mrs Steve Wise
@ 2018-02-02 21:24 ` Steve Wise
[not found] ` <f272bfd34c913d540bd74008cccfa2544d8e100f.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
5 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-02-02 21:24 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
Implement the RDMA nldev netlink interface for dumping detailed PD
information.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/core/nldev.c | 173 +++++++++++++++++++++++++++++++++++++++
include/uapi/rdma/rdma_netlink.h | 8 ++
2 files changed, 181 insertions(+)
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 8d96f3e..42cbec0 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -101,6 +101,11 @@
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_PGSIZE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PD_FLAGS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -499,6 +504,53 @@ static int fill_res_mr_entry(struct sk_buff *msg,
return -EMSGSIZE;
}
+static int fill_res_pd_entry(struct sk_buff *msg,
+ struct ib_pd *pd)
+{
+ struct rdma_restrack_entry *res = &pd->res;
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+ pd->local_dma_lkey))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&pd->usecnt), 0))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PD_FLAGS, pd->flags))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+
+ /*
+ * Existence of task means that it is user PD and netlink
+ * user is invited to go and read /proc/PID/comm to get name
+ * of the task file and res->task_com should be NULL.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1269,6 +1321,124 @@ static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
return ret;
}
+static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct ib_pd *pd = NULL;
+ struct nlmsghdr *nlh;
+ u32 index;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get PD information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_PD_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_PD);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_PD) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel PDs should be visible in init namspace only
+ * 2. Present only PDs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ pd = container_of(res, struct ib_pd, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_pd_entry(skb, pd);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more PDs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!pd)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+ put_device(&device->dev);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -1304,6 +1474,9 @@ static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
[RDMA_NLDEV_CMD_RES_MR_GET] = {
.dump = nldev_res_get_mr_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_PD_GET] = {
+ .dump = nldev_res_get_pd_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 6adaeaa..4aeb59c 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -246,6 +246,8 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */
+ RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -400,6 +402,12 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */
RDMA_NLDEV_ATTR_RES_PGSIZE, /* u32 */
+ RDMA_NLDEV_ATTR_RES_PD, /* nested table */
+ RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_PD_FLAGS, /* u32 */
+ RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH RESEND v1 rdma-next 0/6] cm_id, cq, mr, and pd resource tracking
@ 2018-02-13 20:13 Steve Wise
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
0 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-02-13 20:13 UTC (permalink / raw)
To: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
NOTE: I added cq, mr, and pd resources in this series.
This series adds rdma_cm_id, ib_cq, ib_mr, and ib_pd information to the
new resource tracking database. The patches are on top of Jason's merged
rdma-next branch [1] plus the recent restrack fix destined for rdma-rc
[2]. I'll rebase everything on rdma-next once it rebases on 4.16-rc2.
Changes since v0 RFC:
- move rdma_id_private into new header cma_priv.h
- fixed up code comments as suggested
- fixed RDMA_NLDEV_CMD_RES_QP_GET cut/paste error
- no BE attributes
- add cq, mr, pd resource information
- remove RFC tag
Thanks,
Steve.
[1] git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
branch wip/for-linus-merged.
[2] https://www.spinics.net/lists/linux-rdma/msg60672.html
Steve Wise (6):
RDMA/CM: move rdma_id_private to cma_priv.h
RDMA/nldev: provide detailed CM_ID information
RDMA/nldev: provide detailed CQ information
iw_cxgb4: initialize ib_mr fields for user mrs
RDMA/nldev: provide detailed MR information
RDMA/nldev: provide detailed PD information
drivers/infiniband/core/cma.c | 96 ++---
drivers/infiniband/core/cma_priv.h | 75 ++++
drivers/infiniband/core/nldev.c | 811 ++++++++++++++++++++++++++++++++++-
drivers/infiniband/core/restrack.c | 61 ++-
drivers/infiniband/core/ucma.c | 8 +-
drivers/infiniband/core/uverbs_cmd.c | 6 +
drivers/infiniband/core/verbs.c | 3 +
drivers/infiniband/hw/cxgb4/mem.c | 3 +
include/rdma/ib_verbs.h | 5 +
include/rdma/rdma_cm.h | 24 +-
include/rdma/restrack.h | 8 +
include/uapi/rdma/rdma_netlink.h | 56 +++
12 files changed, 1052 insertions(+), 104 deletions(-)
create mode 100644 drivers/infiniband/core/cma_priv.h
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information
[not found] ` <1510713d3e0f14b101bea7dc9e02084e46e580ec.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-02-13 20:43 ` Parav Pandit
[not found] ` <VI1PR0502MB300833C7A62DB411AB8E7982D1F60-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
0 siblings, 1 reply; 14+ messages in thread
From: Parav Pandit @ 2018-02-13 20:43 UTC (permalink / raw)
To: Steve Wise, Jason Gunthorpe, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
> Sent: Tuesday, January 30, 2018 10:59 AM
> To: Jason Gunthorpe <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Subject: [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed
> CM_ID information
>
> Implement RDMA nldev netlink interface to get detailed CM_ID information.
>
> Because cm_id's are attached to rdma devices in various work queue contexts,
> the pid and task information at device-attach time is sometimes not useful. For
> example, an nvme/f host connection cm_id ends up being bound to a device in a
> work queue context and the resulting pid at attach time no longer exists after
> connection setup. So instead we mark all cm_id's created via the rdma_ucm as
> "user", and all others as "kernel".
> This required tweaking the restrack code a little. It also required wrapping some
> rdma_cm functions to allow passing the module name string.
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
> drivers/infiniband/core/cma.c | 55 ++++++---
> drivers/infiniband/core/nldev.c | 246
> +++++++++++++++++++++++++++++++++++++
> drivers/infiniband/core/restrack.c | 15 ++-
> drivers/infiniband/core/ucma.c | 8 +-
> include/rdma/rdma_cm.h | 24 +++-
> include/rdma/restrack.h | 4 +
> include/uapi/rdma/rdma_netlink.h | 30 +++++
> 7 files changed, 352 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index
> 203519e..61ea800 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -466,6 +466,9 @@ static void _cma_attach_to_dev(struct rdma_id_private
> *id_priv,
> id_priv->id.route.addr.dev_addr.transport =
> rdma_node_get_transport(cma_dev->device->node_type);
> list_add_tail(&id_priv->list, &cma_dev->id_list);
> + id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> + id_priv->id.res.kern_name = id_priv->id.caller;
> + rdma_restrack_add(&id_priv->id.res);
> }
>
> static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -738,10
> +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
> complete(&id_priv->comp);
> }
>
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> - rdma_cm_event_handler event_handler,
> - void *context, enum rdma_port_space ps,
> - enum ib_qp_type qp_type)
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> + rdma_cm_event_handler event_handler,
> + void *context, enum rdma_port_space ps,
> + enum ib_qp_type qp_type, const char *caller)
> {
> struct rdma_id_private *id_priv;
>
> @@ -749,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> if (!id_priv)
> return ERR_PTR(-ENOMEM);
>
> - id_priv->owner = task_pid_nr(current);
> + if (caller)
> + id_priv->id.caller = caller;
> + else
> + id_priv->owner = task_pid_nr(current);
> id_priv->state = RDMA_CM_IDLE;
> id_priv->id.context = context;
> id_priv->id.event_handler = event_handler; @@ -769,7 +775,7 @@
> struct rdma_cm_id *rdma_create_id(struct net *net,
>
> return &id_priv->id;
> }
> -EXPORT_SYMBOL(rdma_create_id);
> +EXPORT_SYMBOL(__rdma_create_id);
>
> static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) {
> @@ -1629,6 +1635,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> mutex_unlock(&id_priv->handler_mutex);
>
> if (id_priv->cma_dev) {
> + rdma_restrack_del(&id_priv->id.res);
> if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> if (id_priv->cm_id.ib)
> ib_destroy_cm_id(id_priv->cm_id.ib);
> @@ -1787,9 +1794,10 @@ static struct rdma_id_private
> *cma_new_conn_id(struct rdma_cm_id *listen_id,
> ib_event->param.req_rcvd.primary_path->service_id;
> int ret;
>
> - id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> + id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> listen_id->event_handler, listen_id->context,
> - listen_id->ps, ib_event->param.req_rcvd.qp_type);
> + listen_id->ps, ib_event->param.req_rcvd.qp_type,
> + listen_id->caller);
> if (IS_ERR(id))
> return NULL;
>
> @@ -1844,8 +1852,8 @@ static struct rdma_id_private
> *cma_new_udp_id(struct rdma_cm_id *listen_id,
> struct net *net = listen_id->route.addr.dev_addr.net;
> int ret;
>
> - id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
> - listen_id->ps, IB_QPT_UD);
> + id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> >context,
> + listen_id->ps, IB_QPT_UD, listen_id->caller);
> if (IS_ERR(id))
> return NULL;
>
> @@ -2111,10 +2119,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> *cm_id,
> goto out;
>
> /* Create a new RDMA id for the new IW CM ID */
> - new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> - listen_id->id.event_handler,
> - listen_id->id.context,
> - RDMA_PS_TCP, IB_QPT_RC);
> + new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> + listen_id->id.event_handler,
> + listen_id->id.context,
> + RDMA_PS_TCP, IB_QPT_RC,
> + listen_id->id.caller);
> if (IS_ERR(new_cm_id)) {
> ret = -ENOMEM;
> goto out;
> @@ -2239,8 +2248,8 @@ static void cma_listen_on_dev(struct rdma_id_private
> *id_priv,
> if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> >device, 1))
> return;
>
> - id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> - id_priv->id.qp_type);
> + id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> + id_priv->id.qp_type, id_priv->id.caller);
> if (IS_ERR(id))
> return;
>
> @@ -3348,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> sockaddr *addr)
>
> return 0;
> err2:
> - if (id_priv->cma_dev)
> + if (id_priv->cma_dev) {
> + rdma_restrack_del(&id_priv->id.res);
> cma_release_dev(id_priv);
> + }
> err1:
> cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> return ret;
> @@ -3732,14 +3743,18 @@ static int cma_send_sidr_rep(struct
> rdma_id_private *id_priv,
> return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); }
>
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param)
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> + const char *caller)
> {
> struct rdma_id_private *id_priv;
> int ret;
>
> id_priv = container_of(id, struct rdma_id_private, id);
>
> - id_priv->owner = task_pid_nr(current);
> + if (caller)
> + id_priv->id.caller = caller;
> + else
> + id_priv->owner = task_pid_nr(current);
>
> if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> return -EINVAL;
> @@ -3779,7 +3794,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> rdma_conn_param *conn_param)
> rdma_reject(id, NULL, 0);
> return ret;
> }
> -EXPORT_SYMBOL(rdma_accept);
> +EXPORT_SYMBOL(__rdma_accept);
>
> int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { diff --git
> a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index
> fa8655e..13f5c46 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -34,9 +34,11 @@
> #include <linux/pid.h>
> #include <linux/pid_namespace.h>
> #include <net/netlink.h>
> +#include <rdma/rdma_cm.h>
> #include <rdma/rdma_netlink.h>
>
> #include "core_priv.h"
> +#include "cma_priv.h"
>
> static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
> [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
> @@ -71,6 +73,22 @@
> [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
> [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type =
> NLA_NUL_STRING,
> .len = TASK_COMM_LEN },
> + [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type =
> NLA_NESTED },
> + [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type =
> NLA_NESTED },
> + [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
> + [RDMA_NLDEV_ATTR_RES_IPV4_SADDR] = {
> + .len = FIELD_SIZEOF(struct iphdr, saddr) },
> + [RDMA_NLDEV_ATTR_RES_IPV4_DADDR] = {
> + .len = FIELD_SIZEOF(struct iphdr, saddr) },
> + [RDMA_NLDEV_ATTR_RES_IPV6_SADDR] = {
> + .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> + [RDMA_NLDEV_ATTR_RES_IPV6_DADDR] = {
> + .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> + [RDMA_NLDEV_ATTR_RES_IP_SPORT] = { .type = NLA_U16 },
> + [RDMA_NLDEV_ATTR_RES_IP_DPORT] = { .type = NLA_U16 },
> + [RDMA_NLDEV_ATTR_RES_DEV_TYPE] = { .type = NLA_U8 },
> + [RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE] = { .type = NLA_U8 },
> + [RDMA_NLDEV_ATTR_RES_NETWORK_TYPE] = { .type = NLA_U8 },
> };
>
> static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -
> 182,6 +200,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device
> *device)
> [RDMA_RESTRACK_PD] = "pd",
> [RDMA_RESTRACK_CQ] = "cq",
> [RDMA_RESTRACK_QP] = "qp",
> + [RDMA_RESTRACK_CM_ID] = "cm_id",
> };
>
> struct rdma_restrack_root *res = &device->res; @@ -284,6 +303,99
> @@ static int fill_res_qp_entry(struct sk_buff *msg,
> return -EMSGSIZE;
> }
>
> +static int fill_res_cm_id_entry(struct sk_buff *msg,
> + struct rdma_cm_id *cm_id, uint32_t port) {
> + struct rdma_id_private *id_priv;
> + struct nlattr *entry_attr;
> +
> + if (port && port != cm_id->port_num)
> + return 0;
> +
> + id_priv = container_of(cm_id, struct rdma_id_private, id);
> + entry_attr = nla_nest_start(msg,
> RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
> + if (!entry_attr)
> + goto out;
> +
> + if (cm_id->port_num &&
> + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id-
> >port_num))
> + goto err;
> +
> + if (id_priv->qp_num &&
> + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv-
> >qp_num))
> + goto err;
> +
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
> + goto err;
> +
> + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
> + goto err;
> + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
> + goto err;
> + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
> + id_priv->id.route.addr.dev_addr.dev_type))
> + goto err;
> + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
> + id_priv->id.route.addr.dev_addr.transport))
> + goto err;
> +
> + if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
> + struct sockaddr_in *sin;
> +
> + sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
> + if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
> + sin->sin_addr.s_addr))
> + goto err;
> + if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> + be16_to_cpu(sin->sin_port)))
> + goto err;
> +
> + sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
> + if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
> + sin->sin_addr.s_addr))
> + goto err;
> + if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> + be16_to_cpu(sin->sin_port)))
> + goto err;
> + } else {
> + struct sockaddr_in6 *sin6;
> +
> + sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
> + if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
> + &sin6->sin6_addr))
> + goto err;
> + if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> + sin6->sin6_port))
> + goto err;
> +
> + sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
> + if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
> + &sin6->sin6_addr))
> + goto err;
> + if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> + sin6->sin6_port))
> + goto err;
> + }
> +
> + if (id_priv->id.caller) {
> + if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> + id_priv->id.caller))
> + goto err;
> + } else {
> + /* CMA keeps the owning pid. */
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv-
> >owner))
> + goto err;
> + }
> +
> + nla_nest_end(msg, entry_attr);
> + return 0;
> +
> +err:
> + nla_nest_cancel(msg, entry_attr);
> +out:
> + return -EMSGSIZE;
> +}
> +
> static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> struct netlink_ext_ack *extack)
> {
> @@ -686,6 +798,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff
> *skb,
> return ret;
> }
>
> +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
> + struct netlink_callback *cb)
> +{
> + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> + struct rdma_restrack_entry *res;
> + int err, ret = 0, idx = 0;
> + struct nlattr *table_attr;
> + struct ib_device *device;
> + int start = cb->args[0];
> + struct rdma_cm_id *cm_id = NULL;
> + struct nlmsghdr *nlh;
> + u32 index, port = 0;
> +
> + err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> + nldev_policy, NULL);
> + /*
> + * Right now, we are expecting the device index to get QP information,
> + * but it is possible to extend this code to return all devices in
> + * one shot by checking the existence of
> RDMA_NLDEV_ATTR_DEV_INDEX.
> + * if it doesn't exist, we will iterate over all devices.
> + *
> + * But it is not needed for now.
> + */
> + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> + return -EINVAL;
> +
> + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> + device = ib_device_get_by_index(index);
> + if (!device)
> + return -EINVAL;
> +
> + /*
> + * If no PORT_INDEX is supplied, return all CM_IDs from that device
> + */
> + if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
> + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> + if (!rdma_is_port_valid(device, port)) {
> + ret = -EINVAL;
> + goto err_index;
> + }
> + }
> +
> + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
> RDMA_NLDEV_CMD_RES_CM_ID_GET),
> + 0, NLM_F_MULTI);
> +
> + if (fill_nldev_handle(skb, device)) {
> + ret = -EMSGSIZE;
> + goto err;
> + }
> +
> + table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
> + if (!table_attr) {
> + ret = -EMSGSIZE;
> + goto err;
> + }
> +
> + down_read(&device->res.rwsem);
> + hash_for_each_possible(device->res.hash, res, node,
> + RDMA_RESTRACK_CM_ID) {
> + if (idx < start)
> + goto next;
> +
> + if ((rdma_is_kernel_res(res) &&
> + task_active_pid_ns(current) != &init_pid_ns) ||
> + (!rdma_is_kernel_res(res) &&
> + task_active_pid_ns(current) !=
> + task_active_pid_ns(res->task)))
> + /*
> + * 1. Kernel QPs should be visible in init namsapce only
> + * 2. Preent only QPs visible in the current namespace
Present only
> + */
> + goto next;
> +
> + if (!rdma_restrack_get(res))
> + /*
> + * Resource is under release now, but we are not
> + * relesing lock now, so it will be released in
> + * our next pass, once we will get ->next pointer.
> + */
> + goto next;
> +
> + cm_id = container_of(res, struct rdma_cm_id, res);
> +
> + up_read(&device->res.rwsem);
> + ret = fill_res_cm_id_entry(skb, cm_id, port);
> + down_read(&device->res.rwsem);
> + /*
> + * Return resource back, but it won't be released till
> + * the &device->res.rwsem will be released for write.
> + */
> + rdma_restrack_put(res);
> +
> + if (ret == -EMSGSIZE)
> + /*
> + * There is a chance to optimize here.
> + * It can be done by using list_prepare_entry
> + * and list_for_each_entry_continue afterwards.
> + */
> + break;
> + if (ret)
> + goto res_err;
> +next: idx++;
> + }
> + up_read(&device->res.rwsem);
> +
> + nla_nest_end(skb, table_attr);
> + nlmsg_end(skb, nlh);
> + cb->args[0] = idx;
> +
> + /*
> + * No more CM_IDs to fill, cancel the message and
> + * return 0 to mark end of dumpit.
> + */
> + if (!cm_id)
> + goto err;
> +
> + put_device(&device->dev);
> + return skb->len;
> +
> +res_err:
> + nla_nest_cancel(skb, table_attr);
> + up_read(&device->res.rwsem);
> +
> +err:
> + nlmsg_cancel(skb, nlh);
> +
> +err_index:
> + put_device(&device->dev);
> + return ret;
> +}
> static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
> [RDMA_NLDEV_CMD_GET] = {
> .doit = nldev_get_doit,
> @@ -712,6 +955,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
> * too.
> */
> },
> + [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
> + .dump = nldev_res_get_cm_id_dumpit,
> + },
> };
>
> void __init nldev_init(void)
> diff --git a/drivers/infiniband/core/restrack.c
> b/drivers/infiniband/core/restrack.c
> index 83bce7e..6385914 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -3,12 +3,15 @@
> * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
> */
>
> +#include <rdma/rdma_cm.h>
> #include <rdma/ib_verbs.h>
> #include <rdma/restrack.h>
> #include <linux/mutex.h>
> #include <linux/sched/task.h>
> #include <linux/pid_namespace.h>
>
> +#include "cma_priv.h"
> +
> void rdma_restrack_init(struct rdma_restrack_root *res) {
> init_rwsem(&res->rwsem);
> @@ -44,7 +47,7 @@ static void set_kern_name(struct rdma_restrack_entry
> *res)
> struct ib_qp *qp;
>
> if (type != RDMA_RESTRACK_QP)
> - /* PD and CQ types already have this name embedded in */
> + /* Other types already have this name embedded in */
> return;
>
> qp = container_of(res, struct ib_qp, res); @@ -61,6 +64,7 @@ static
> void set_kern_name(struct rdma_restrack_entry *res) static struct ib_device
> *res_to_dev(struct rdma_restrack_entry *res) {
> enum rdma_restrack_type type = res->type;
> + struct rdma_cm_id *cm_id;
> struct ib_device *dev;
> struct ib_xrcd *xrcd;
> struct ib_pd *pd;
> @@ -84,6 +88,10 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> xrcd = container_of(res, struct ib_xrcd, res);
> dev = xrcd->device;
> break;
> + case RDMA_RESTRACK_CM_ID:
> + cm_id = container_of(res, struct rdma_cm_id, res);
> + dev = cm_id->device;
> + break;
> default:
> WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> return NULL;
> @@ -95,6 +103,7 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res) static bool res_is_user(struct rdma_restrack_entry
> *res) {
> enum rdma_restrack_type type = res->type;
> + struct rdma_cm_id *cm_id;
> struct ib_xrcd *xrcd;
> struct ib_pd *pd;
> struct ib_cq *cq;
> @@ -119,6 +128,10 @@ static bool res_is_user(struct rdma_restrack_entry
> *res)
> xrcd = container_of(res, struct ib_xrcd, res);
> is_user = xrcd->inode;
> break;
> + case RDMA_RESTRACK_CM_ID:
> + cm_id = container_of(res, struct rdma_cm_id, res);
> + is_user = !cm_id->caller;
> + break;
> default:
> WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> }
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index d67219d..f7f0282 100644
> --- a/drivers/infiniband/core/ucma.c
> +++ b/drivers/infiniband/core/ucma.c
> @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file,
> const char __user *inbuf,
> return -ENOMEM;
>
> ctx->uid = cmd.uid;
> - ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
> - ucma_event_handler, ctx, cmd.ps, qp_type);
> + ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
> + ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
> if (IS_ERR(ctx->cm_id)) {
> ret = PTR_ERR(ctx->cm_id);
> goto err1;
> @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file,
> const char __user *inbuf,
> if (cmd.conn_param.valid) {
> ucma_copy_conn_param(ctx->cm_id, &conn_param,
> &cmd.conn_param);
> mutex_lock(&file->mut);
> - ret = rdma_accept(ctx->cm_id, &conn_param);
> + ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
> if (!ret)
> ctx->uid = cmd.uid;
> mutex_unlock(&file->mut);
> } else
> - ret = rdma_accept(ctx->cm_id, NULL);
> + ret = __rdma_accept(ctx->cm_id, NULL, NULL);
>
> ucma_put_ctx(ctx);
> return ret;
> diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index
> 6538a5c..3e90501 100644
> --- a/include/rdma/rdma_cm.h
> +++ b/include/rdma/rdma_cm.h
> @@ -155,8 +155,19 @@ struct rdma_cm_id {
> enum rdma_port_space ps;
> enum ib_qp_type qp_type;
> u8 port_num;
> + const char *caller;
> +
> + /*
> + * Internal to RDMA/core, don't use in the drivers
> + */
> + struct rdma_restrack_entry res;
I didn't review the whole code; can this be kept in rdma_cm_priv_id instead?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information
[not found] ` <VI1PR0502MB300833C7A62DB411AB8E7982D1F60-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-02-13 20:59 ` Steve Wise
2018-02-13 21:13 ` Parav Pandit
0 siblings, 1 reply; 14+ messages in thread
From: Steve Wise @ 2018-02-13 20:59 UTC (permalink / raw)
To: 'Parav Pandit', 'Jason Gunthorpe',
dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
...
> > + /*
> > + * 1. Kernel QPs should be visible in init namsapce
only
> > + * 2. Preent only QPs visible in the current
namespace
> Present only
Yup. Thanks.
...
> > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index
> > 6538a5c..3e90501 100644
> > --- a/include/rdma/rdma_cm.h
> > +++ b/include/rdma/rdma_cm.h
> > @@ -155,8 +155,19 @@ struct rdma_cm_id {
> > enum rdma_port_space ps;
> > enum ib_qp_type qp_type;
> > u8 port_num;
> > + const char *caller;
> > +
> > + /*
> > + * Internal to RDMA/core, don't use in the drivers
> > + */
> > + struct rdma_restrack_entry res;
> I didn't review the whole code; can this be kept in rdma_cm_priv_id
instead?
Yes. I guess that probably makes sense. The other objects (ib_qp, ib_cq,
etc) don't have a public/private separation though.
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information
2018-02-13 20:59 ` Steve Wise
@ 2018-02-13 21:13 ` Parav Pandit
0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2018-02-13 21:13 UTC (permalink / raw)
To: Steve Wise, Jason Gunthorpe, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, leon-DgEjT+Ai2ygdnm+yROfE0A
> -----Original Message-----
> From: Steve Wise [mailto:swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org]
> Sent: Tuesday, February 13, 2018 2:59 PM
> To: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; Jason Gunthorpe
> <jgg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Subject: RE: [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed
> CM_ID information
>
> ...
>
> > > + /*
> > > + * 1. Kernel QPs should be visible in init namsapce
> only
> > > + * 2. Preent only QPs visible in the current
> namespace
> > Present only
>
>
> Yup. Thanks.
>
> ...
>
> > > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index
> > > 6538a5c..3e90501 100644
> > > --- a/include/rdma/rdma_cm.h
> > > +++ b/include/rdma/rdma_cm.h
> > > @@ -155,8 +155,19 @@ struct rdma_cm_id {
> > > enum rdma_port_space ps;
> > > enum ib_qp_type qp_type;
> > > u8 port_num;
> > > + const char *caller;
> > > +
> > > + /*
> > > + * Internal to RDMA/core, don't use in the drivers
> > > + */
> > > + struct rdma_restrack_entry res;
> > I didn't review the whole code; can this be kept in rdma_cm_priv_id
> instead?
>
> Yes. I guess that probably makes sense. The other objects (ib_qp, ib_cq,
> etc) don't have a public/private separation though.
>
True. rdma_cm_priv_id has lot more fields private than rdma_cm_id which I think deserves avoid exposing them to ULPs.
Creating anything such private for cq, qp would be obviously overkill too at this point anyway.
> Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RESEND v1 rdma-next 6/6] RDMA/nldev: provide detailed PD information
[not found] ` <f272bfd34c913d540bd74008cccfa2544d8e100f.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-02-14 13:33 ` Leon Romanovsky
0 siblings, 0 replies; 14+ messages in thread
From: Leon Romanovsky @ 2018-02-14 13:33 UTC (permalink / raw)
To: Steve Wise
Cc: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: Type: text/plain, Size: 492 bytes --]
On Fri, Feb 02, 2018 at 01:24:44PM -0800, Steve Wise wrote:
> Implement the RDMA nldev netlink interface for dumping detailed PD
> information.
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
> drivers/infiniband/core/nldev.c | 173 +++++++++++++++++++++++++++++++++++++++
> include/uapi/rdma/rdma_netlink.h | 8 ++
> 2 files changed, 181 insertions(+)
>
Thanks,
Reviewed-by: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information
[not found] ` <be7f33b8ce1e7311e9ed408267d8475166e48d9b.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-02-14 13:43 ` Leon Romanovsky
[not found] ` <20180214134346.GZ2197-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
0 siblings, 1 reply; 14+ messages in thread
From: Leon Romanovsky @ 2018-02-14 13:43 UTC (permalink / raw)
To: Steve Wise
Cc: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: Type: text/plain, Size: 13474 bytes --]
On Thu, Feb 01, 2018 at 12:51:16PM -0800, Steve Wise wrote:
> Implement the RDMA nldev netlink interface for dumping detailed
> MR information.
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
> drivers/infiniband/core/nldev.c | 174 +++++++++++++++++++++++++++++++++++
> drivers/infiniband/core/restrack.c | 48 +++++++---
> drivers/infiniband/core/uverbs_cmd.c | 6 ++
> drivers/infiniband/core/verbs.c | 3 +
> include/rdma/ib_verbs.h | 5 +
> include/rdma/restrack.h | 4 +
> include/uapi/rdma/rdma_netlink.h | 10 ++
> 7 files changed, 238 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index 34fb0d3..8d96f3e 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -94,6 +94,13 @@
> [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
> [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
> [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
> + [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
> + [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
> + [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
> + [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
> + [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
> + [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
> + [RDMA_NLDEV_ATTR_RES_PGSIZE] = { .type = NLA_U32 },
> };
>
> static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
> @@ -206,6 +213,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
> [RDMA_RESTRACK_CQ] = "cq",
> [RDMA_RESTRACK_QP] = "qp",
> [RDMA_RESTRACK_CM_ID] = "cm_id",
> + [RDMA_RESTRACK_MR] = "mr",
> };
>
> struct rdma_restrack_root *res = &device->res;
> @@ -446,6 +454,51 @@ static int fill_res_cq_entry(struct sk_buff *msg,
> return -EMSGSIZE;
> }
>
> +static int fill_res_mr_entry(struct sk_buff *msg,
> + struct ib_mr *mr)
> +{
> + struct rdma_restrack_entry *res = &mr->res;
> + struct nlattr *entry_attr;
> +
> + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
> + if (!entry_attr)
> + goto out;
> +
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
> + goto err;
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
> + goto err;
> + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA, mr->iova, 0))
> + goto err;
> + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
> + goto err;
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PGSIZE, mr->page_size))
> + goto err;
> +
> + /*
> + * Existence of task means that it is user MR and netlink
> + * user is invited to go and read /proc/PID/comm to get name
> + * of the task file and res->task_com should be NULL.
> + */
> + if (rdma_is_kernel_res(res)) {
> + if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> + res->kern_name))
> + goto err;
> + } else {
> + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
> + task_pid_vnr(res->task)))
> + goto err;
> + }
> +
> + nla_nest_end(msg, entry_attr);
> + return 0;
> +
> +err:
> + nla_nest_cancel(msg, entry_attr);
> +out:
> + return -EMSGSIZE;
> +}
> +
> static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> struct netlink_ext_ack *extack)
> {
> @@ -1098,6 +1151,124 @@ static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
> return ret;
> }
>
> +static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
> + struct netlink_callback *cb)
> +{
> + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> + struct rdma_restrack_entry *res;
> + int err, ret = 0, idx = 0;
> + struct nlattr *table_attr;
> + struct ib_device *device;
> + int start = cb->args[0];
> + struct ib_mr *mr = NULL;
> + struct nlmsghdr *nlh;
> + u32 index;
> +
> + err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> + nldev_policy, NULL);
> + /*
> + * Right now, we are expecting the device index to get MR information,
> + * but it is possible to extend this code to return all devices in
> + * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
> + * if it doesn't exist, we will iterate over all devices.
> + *
> + * But it is not needed for now.
> + */
> + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> + return -EINVAL;
> +
> + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> + device = ib_device_get_by_index(index);
> + if (!device)
> + return -EINVAL;
> +
> + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_MR_GET),
> + 0, NLM_F_MULTI);
> +
> + if (fill_nldev_handle(skb, device)) {
> + ret = -EMSGSIZE;
> + goto err;
> + }
> +
> + table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_MR);
> + if (!table_attr) {
> + ret = -EMSGSIZE;
> + goto err;
> + }
> +
> + down_read(&device->res.rwsem);
> + hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_MR) {
> + if (idx < start)
> + goto next;
> +
> + if ((rdma_is_kernel_res(res) &&
> + task_active_pid_ns(current) != &init_pid_ns) ||
> + (!rdma_is_kernel_res(res) &&
> + task_active_pid_ns(current) !=
> + task_active_pid_ns(res->task)))
> + /*
> + * 1. Kernel MRs should be visible in init namspace only
> + * 2. Present only MRs visible in the current namespace
> + */
> + goto next;
> +
> + if (!rdma_restrack_get(res))
> + /*
> + * Resource is under release now, but we are not
> + * relesing lock now, so it will be released in
> + * our next pass, once we will get ->next pointer.
> + */
> + goto next;
> +
> + mr = container_of(res, struct ib_mr, res);
> +
> + up_read(&device->res.rwsem);
> + ret = fill_res_mr_entry(skb, mr);
> + down_read(&device->res.rwsem);
> + /*
> + * Return resource back, but it won't be released till
> + * the &device->res.rwsem will be released for write.
> + */
> + rdma_restrack_put(res);
> +
> + if (ret == -EMSGSIZE)
> + /*
> + * There is a chance to optimize here.
> + * It can be done by using list_prepare_entry
> + * and list_for_each_entry_continue afterwards.
> + */
> + break;
> + if (ret)
> + goto res_err;
> +next: idx++;
> + }
> + up_read(&device->res.rwsem);
> +
> + nla_nest_end(skb, table_attr);
> + nlmsg_end(skb, nlh);
> + cb->args[0] = idx;
> +
> + /*
> + * No more MRs to fill, cancel the message and
> + * return 0 to mark end of dumpit.
> + */
> + if (!mr)
> + goto err;
> +
> + put_device(&device->dev);
> + return skb->len;
> +
> +res_err:
> + nla_nest_cancel(skb, table_attr);
> + up_read(&device->res.rwsem);
> +
> +err:
> + nlmsg_cancel(skb, nlh);
> + put_device(&device->dev);
> + return ret;
> +}
> +
> static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
> [RDMA_NLDEV_CMD_GET] = {
> .doit = nldev_get_doit,
> @@ -1130,6 +1301,9 @@ static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
> [RDMA_NLDEV_CMD_RES_CQ_GET] = {
> .dump = nldev_res_get_cq_dumpit,
> },
> + [RDMA_NLDEV_CMD_RES_MR_GET] = {
> + .dump = nldev_res_get_mr_dumpit,
> + },
> };
>
> void __init nldev_init(void)
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 6385914..d3ad0ab 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -43,22 +43,36 @@ int rdma_restrack_count(struct rdma_restrack_root *res,
>
> static void set_kern_name(struct rdma_restrack_entry *res)
> {
> - enum rdma_restrack_type type = res->type;
> - struct ib_qp *qp;
> + struct ib_pd *pd = NULL;
>
> - if (type != RDMA_RESTRACK_QP)
> - /* Other types already have this name embedded in */
> - return;
> + switch (res->type) {
> + case RDMA_RESTRACK_QP: {
> + struct ib_qp *qp;
>
> - qp = container_of(res, struct ib_qp, res);
> - if (!qp->pd) {
> - WARN_ONCE(true, "XRC QPs are not supported\n");
> - /* Survive, despite the programmer's error */
> - res->kern_name = " ";
> - return;
> + qp = container_of(res, struct ib_qp, res);
> + if (qp->pd) {
> + pd = qp->pd;
> + } else {
> + WARN_ONCE(true, "XRC QPs are not supported\n");
> + /* Survive, despite the programmer's error */
> + res->kern_name = " ";
> + }
> + break;
> + }
> + case RDMA_RESTRACK_MR: {
> + struct ib_mr *mr;
> +
> + mr = container_of(res, struct ib_mr, res);
> + pd = mr->pd;
> + break;
> + }
> + default:
> + /* Other types set kern_name directly */
> + break;
> }
>
> - res->kern_name = qp->pd->res.kern_name;
> + if (pd)
> + res->kern_name = pd->res.kern_name;
> }
>
> static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
> @@ -70,6 +84,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
> struct ib_pd *pd;
> struct ib_cq *cq;
> struct ib_qp *qp;
> + struct ib_mr *mr;
>
> switch (type) {
> case RDMA_RESTRACK_PD:
> @@ -92,6 +107,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
> cm_id = container_of(res, struct rdma_cm_id, res);
> dev = cm_id->device;
> break;
> + case RDMA_RESTRACK_MR:
> + mr = container_of(res, struct ib_mr, res);
> + dev = mr->device;
> + break;
> default:
> WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> return NULL;
> @@ -108,6 +127,7 @@ static bool res_is_user(struct rdma_restrack_entry *res)
> struct ib_pd *pd;
> struct ib_cq *cq;
> struct ib_qp *qp;
> + struct ib_mr *mr;
> bool is_user = false;
>
> switch (type) {
> @@ -132,6 +152,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
> cm_id = container_of(res, struct rdma_cm_id, res);
> is_user = !cm_id->caller;
> break;
> + case RDMA_RESTRACK_MR:
> + mr = container_of(res, struct ib_mr, res);
> + is_user = mr->pd->uobject;
> + break;
> default:
> WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> }
> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index 256934d..3f026c4 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -694,6 +694,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
> mr->pd = pd;
> mr->uobject = uobj;
> atomic_inc(&pd->usecnt);
> + mr->res.type = RDMA_RESTRACK_MR;
> + rdma_restrack_add(&mr->res);
>
> uobj->object = mr;
>
> @@ -819,6 +821,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
> struct ib_uverbs_dereg_mr cmd;
> struct ib_uobject *uobj;
> int ret = -EINVAL;
> + struct ib_mr *mr;
>
> if (copy_from_user(&cmd, buf, sizeof cmd))
> return -EFAULT;
> @@ -828,6 +831,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
> if (IS_ERR(uobj))
> return PTR_ERR(uobj);
>
> + mr = uobj->object;
> + rdma_restrack_del(&mr->res);
> +
It is wrong function to rdma_restrack_del(). The best place to put
rdma_restrack_add() is right after device->xxx() call and
rdma_restrack_del() is right before device->yyy() call.
Thanks
> ret = uobj_remove_commit(uobj);
>
> return ret ?: in_len;
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index 16ebc63..c3265f7 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -1623,6 +1623,7 @@ int ib_dereg_mr(struct ib_mr *mr)
> struct ib_pd *pd = mr->pd;
> int ret;
>
> + rdma_restrack_del(&mr->res);
> ret = mr->device->dereg_mr(mr);
> if (!ret)
> atomic_dec(&pd->usecnt);
> @@ -1659,6 +1660,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
> mr->uobject = NULL;
> atomic_inc(&pd->usecnt);
> mr->need_inval = false;
> + mr->res.type = RDMA_RESTRACK_MR;
> + rdma_restrack_add(&mr->res);
> }
>
> return mr;
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 5263c86..33d8c5d 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -1771,6 +1771,11 @@ struct ib_mr {
> struct ib_uobject *uobject; /* user */
> struct list_head qp_entry; /* FR */
> };
> +
> + /*
> + * Implementation details of the RDMA core, don't use in drivers:
> + */
> + struct rdma_restrack_entry res;
> };
>
> struct ib_mw {
> diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
> index a794e0e..bfd1140 100644
> --- a/include/rdma/restrack.h
> +++ b/include/rdma/restrack.h
> @@ -37,6 +37,10 @@ enum rdma_restrack_type {
> */
> RDMA_RESTRACK_CM_ID,
> /**
> + * @RDMA_RESTRACK_MR: Memory Region (MR)
> + */
> + RDMA_RESTRACK_MR,
> + /**
> * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
> */
> RDMA_RESTRACK_MAX
> diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
> index fa677ef..6adaeaa 100644
> --- a/include/uapi/rdma/rdma_netlink.h
> +++ b/include/uapi/rdma/rdma_netlink.h
> @@ -244,6 +244,8 @@ enum rdma_nldev_command {
>
> RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */
>
> + RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */
> +
> RDMA_NLDEV_NUM_OPS
> };
>
> @@ -390,6 +392,14 @@ enum rdma_nldev_attr {
> RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */
> RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */
>
> + RDMA_NLDEV_ATTR_RES_MR, /* nested table */
> + RDMA_NLDEV_ATTR_RES_MR_ENTRY, /* nested table */
> + RDMA_NLDEV_ATTR_RES_RKEY, /* u32 */
> + RDMA_NLDEV_ATTR_RES_LKEY, /* u32 */
> + RDMA_NLDEV_ATTR_RES_IOVA, /* u64 */
> + RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */
> + RDMA_NLDEV_ATTR_RES_PGSIZE, /* u32 */
> +
> RDMA_NLDEV_ATTR_MAX
> };
> #endif /* _UAPI_RDMA_NETLINK_H */
> --
> 1.8.3.1
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information
[not found] ` <20180214134346.GZ2197-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2018-02-14 16:31 ` Steve Wise
0 siblings, 0 replies; 14+ messages in thread
From: Steve Wise @ 2018-02-14 16:31 UTC (permalink / raw)
To: 'Leon Romanovsky'
Cc: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
Hey Leon,
...
> > diff --git a/drivers/infiniband/core/uverbs_cmd.c
> b/drivers/infiniband/core/uverbs_cmd.c
> > index 256934d..3f026c4 100644
> > --- a/drivers/infiniband/core/uverbs_cmd.c
> > +++ b/drivers/infiniband/core/uverbs_cmd.c
> > @@ -694,6 +694,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file
> *file,
> > mr->pd = pd;
> > mr->uobject = uobj;
> > atomic_inc(&pd->usecnt);
> > + mr->res.type = RDMA_RESTRACK_MR;
> > + rdma_restrack_add(&mr->res);
> >
> > uobj->object = mr;
> >
> > @@ -819,6 +821,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file
> *file,
> > struct ib_uverbs_dereg_mr cmd;
> > struct ib_uobject *uobj;
> > int ret = -EINVAL;
> > + struct ib_mr *mr;
> >
> > if (copy_from_user(&cmd, buf, sizeof cmd))
> > return -EFAULT;
> > @@ -828,6 +831,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file
> *file,
> > if (IS_ERR(uobj))
> > return PTR_ERR(uobj);
> >
> > + mr = uobj->object;
> > + rdma_restrack_del(&mr->res);
> > +
>
> It is wrong function to rdma_restrack_del(). The best place to put
> rdma_restrack_add() is right after device->xxx() call and
> rdma_restrack_del() is right before device->yyy() call.
>
> Thanks
You're right. In fact, the resource is already being deleted from the
restrack db in ib_dereg_mr() called by uverbs_free_mr(). So the above chunk
is not needed at all.
Thanks,
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RESEND v1 rdma-next 3/6] RDMA/nldev: provide detailed CQ information
[not found] ` <624bf08040287a4c558f3b84d2bd60a9423b8dca.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2018-02-15 13:28 ` Leon Romanovsky
0 siblings, 0 replies; 14+ messages in thread
From: Leon Romanovsky @ 2018-02-15 13:28 UTC (permalink / raw)
To: Steve Wise
Cc: jgg-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: Type: text/plain, Size: 492 bytes --]
On Thu, Feb 01, 2018 at 08:58:48AM -0800, Steve Wise wrote:
> Implement the RDMA nldev netlink interface for dumping detailed
> CQ information.
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
> ---
> drivers/infiniband/core/nldev.c | 172 +++++++++++++++++++++++++++++++++++++++
> include/uapi/rdma/rdma_netlink.h | 8 ++
> 2 files changed, 180 insertions(+)
>
Thanks,
Reviewed-by: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2018-02-15 13:28 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-13 20:13 [PATCH RESEND v1 rdma-next 0/6] cm_id, cq, mr, and pd resource tracking Steve Wise
[not found] ` <cover.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-01-30 16:59 ` [PATCH RESEND v1 rdma-next 2/6] RDMA/nldev: provide detailed CM_ID information Steve Wise
[not found] ` <1510713d3e0f14b101bea7dc9e02084e46e580ec.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-13 20:43 ` Parav Pandit
[not found] ` <VI1PR0502MB300833C7A62DB411AB8E7982D1F60-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-02-13 20:59 ` Steve Wise
2018-02-13 21:13 ` Parav Pandit
2018-02-01 16:20 ` [PATCH RESEND v1 rdma-next 1/6] RDMA/CM: move rdma_id_private to cma_priv.h Steve Wise
2018-02-01 16:58 ` [PATCH RESEND v1 rdma-next 3/6] RDMA/nldev: provide detailed CQ information Steve Wise
[not found] ` <624bf08040287a4c558f3b84d2bd60a9423b8dca.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-15 13:28 ` Leon Romanovsky
2018-02-01 20:51 ` [PATCH RESEND v1 rdma-next 5/6] RDMA/nldev: provide detailed MR information Steve Wise
[not found] ` <be7f33b8ce1e7311e9ed408267d8475166e48d9b.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-14 13:43 ` Leon Romanovsky
[not found] ` <20180214134346.GZ2197-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2018-02-14 16:31 ` Steve Wise
2018-02-01 22:41 ` [PATCH RESEND v1 rdma-next 4/6] iw_cxgb4: initialize ib_mr fields for user mrs Steve Wise
2018-02-02 21:24 ` [PATCH RESEND v1 rdma-next 6/6] RDMA/nldev: provide detailed PD information Steve Wise
[not found] ` <f272bfd34c913d540bd74008cccfa2544d8e100f.1518552800.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2018-02-14 13:33 ` Leon Romanovsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.