All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports
@ 2010-12-13 16:22 Nir Muchtar
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

IB Netlink infrastructure and module for rdma_cm

This patch set provides means for communicating internal data from
IB modules to the userspace.
It is composed of two components:
1. The main ib_netlink infrastructure which lives and is initialized by ib_core.
2. additional clients which are implemented inside existing IB modules.
   Clients are responsible for adding/removing their modules during init/exit
   to/from the infrastructure.
   They also supply an array of callbacks for the infrastructure to call
   based on the module/operation type. 

ib_netlink uses the standard Netlink module and defines a new Netlink unit
(NETLINK_INFINIBAND) in netlink.h.
Upon receiving a request from userspace, it finds the target client
using the add/remove mechanism, and then uses client's callback table to call
the callback which is associated with the requested op, using the 
netlink_dump_start helper function.
The callbacks must be of the form:
int (*dump)(struct sk_buff *skb, struct netlink_callback *cb)
and must use the netlink_callback context in order to save state when called 
multiple times.
There is no guarantee that the returned data will be consistent as data
structures can change between calls.
The exact format of the returned data is unknown to ib_netlink itself.
It is shared between the kernel and userspace in the form of common headers.

Changelog:
1. Callbacks are now called via netlink_dump_start.
2. Op dependent callbacks are now by the infrastructure itself using
   supplied callback tables.
3. src/dst addresses are now returned as an attribute. (A very large one...)

A quick and dirty userspace demo application is attached for reference.
Here's a sample output:
Type  Port  PID    Net_dev    Src Address          Dst Address          Space  State           QPN      
IB    1     27404  ib0        192.168.168.3/7174   N/A                  TCP    LISTEN          0        
IB    2     27415  ib1        192.168.2.3/7174     N/A                  TCP    LISTEN          0        
IB    1     30     ib0        192.168.168.3/7174   192.168.168.2/57354  TCP    CONNECT         590854   
IB    2     15     ib1        192.168.2.3/7174     192.168.2.4/33290    TCP    CONNECT         590855   

And here's the source:

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>

#include <sys/socket.h>
#include <sys/types.h>
#include <dirent.h>

#include <netinet/in.h>
#include <linux/netlink.h>
#include <netlink/attr.h>
#include "rdma_cma.h"
#include "ib_netlink.h"

#include <sys/ioctl.h>
#include <net/if.h>
#include <net/if_arp.h>

#define MAX_PAYLOAD 8192

void get_ifname(int index, char *if_name)
{
	struct ifreq req;
	int sock = socket(AF_INET, SOCK_DGRAM, 0);

	strcpy(if_name, "N/A");
	req.ifr_ifindex = index;
	if (index != 0) {
		
		if (ioctl(sock, SIOCGIFNAME, &req) < 0) {
			fprintf(stderr, "SIOCGIFNAME failed for index %d\n", index);
		}
		else {
			strcpy(if_name, req.ifr_name);
		}
	}
}
/*
void get_devname(const char *if_name, char *dev_name)
{
	char path[128];
	DIR *dir;
	struct dirent *dirent;

	strcpy(dev_name, "N/A");
	sprintf(path, "/sys/class/net/%s/device/infiniband", if_name);
	if ((dir = opendir(path)) == NULL) {
		return;
	}
	while ((dirent = readdir(dir)) != NULL) {
		if (strcmp(dirent->d_name, ".") && 
		    strcmp(dirent->d_name, "..")) {
			strcpy(dev_name, dirent->d_name);
			break;
		}
	}
	//closedir(dir);
}
*/

static const char *format_rdma_cm_state(enum rdma_cm_state s)
{
	switch (s) {
	case RDMA_CM_IDLE:           return "IDLE";
	case RDMA_CM_ADDR_QUERY:     return "ADDR_QUERY";
	case RDMA_CM_ADDR_RESOLVED:  return "ADDR_RESOLVED";
	case RDMA_CM_ROUTE_QUERY:    return "ROUTE_QUERY";
	case RDMA_CM_ROUTE_RESOLVED: return "ROUTE_RESOLVED";
	case RDMA_CM_CONNECT:        return "CONNECT";
	case RDMA_CM_DISCONNECT:     return "DISCONNECT";
	case RDMA_CM_ADDR_BOUND:     return "ADDR_BOUND";
	case RDMA_CM_LISTEN:         return "LISTEN";
	case RDMA_CM_DEVICE_REMOVAL: return "DEVICE_REMOVAL";
	case RDMA_CM_DESTROYING:     return "DESTROYING";
	default: 	         return "N/A";
	}
}

static const char *format_port_space(enum rdma_port_space ps)
{
	switch (ps) {
	case RDMA_PS_SDP:       return "SDP";
	case RDMA_PS_IPOIB:     return "IPOIB";
	case RDMA_PS_TCP:       return "TCP";
	case RDMA_PS_UDP:       return "UDP";
	default: 	        return "N/A";
	}
}

static const char *format_node_type(enum rdma_node_type nt)
{
	switch (nt) {
	case ARPHRD_INFINIBAND:	return "IB";
	case ARPHRD_ETHER: 	return "IW";
	default:		return "N/A";
	}
}

static int format_address(void *addr, char *buff)
{
	struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
	if (addr_in->sin_addr.s_addr) {
		sprintf(buff, "%s/%d", inet_ntoa(addr_in->sin_addr), ntohs(addr_in->sin_port));
	} 
	else
		sprintf(buff, "N/A");
	return 0;
}

int main()
{
	struct sockaddr_nl src_addr, dest_addr;
	struct msghdr msg;
	struct iovec iov;
	int sock_fd;
	struct rdma_cm_id_stats *cur_id_stats;
	char tmp_buf[64];
	int len;
	char if_name[64];
	//char dev_name[64];
	struct nlmsghdr *nlh = NULL;
	int ret;
	//u32 ret1=256, ret2=4;
	struct nlattr * tb[10];

	sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_INFINIBAND);
	//setsockopt(sock_fd, SOL_SOCKET, SO_RCVBUF, &ret1, ret2);
	//getsockopt(sock_fd, SOL_SOCKET, SO_RCVBUF, &ret1, &ret2);
	//printf("rcvbuf=%d len=%d\n", ret1, ret2);

	if (sock_fd < 0) {
		printf("Failed to create socket. Error: %s (%d)\n", strerror(errno), errno);
		return -1;
	}

	memset(&src_addr, 0, sizeof(src_addr));
	src_addr.nl_family = AF_NETLINK;
	src_addr.nl_pid = getpid();
	src_addr.nl_groups = 0;  /* not in mcast groups */
	bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

	memset(&dest_addr, 0, sizeof(dest_addr));
	dest_addr.nl_family = AF_NETLINK;
	dest_addr.nl_pid = 0;   /* For Linux Kernel */
	dest_addr.nl_groups = 0; /* unicast */

	nlh=(struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
	nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
	nlh->nlmsg_pid = getpid();
	nlh->nlmsg_flags = NLM_F_REQUEST;
	nlh->nlmsg_type = IBNL_GET_TYPE(IBNL_RDMA_CM, IBNL_RDMA_CM_STATS);

	iov.iov_base = (void *)nlh;
	iov.iov_len = nlh->nlmsg_len;
	msg.msg_name = (void *)&dest_addr;
	msg.msg_namelen = sizeof(dest_addr);
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

	sendmsg(sock_fd, &msg, 0);
	printf("%-5s %-5s %-6s %-10s %-25s %-25s %-6s %-15s %-8s \n",
		"Type", "Port", "PID", "Net_dev", "Src Address",
		"Dst Address", "Space", "State", "QPN");
	while (1) {
		memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
		iov.iov_base = (void *)nlh;
		iov.iov_len = NLMSG_SPACE(MAX_PAYLOAD);
		msg.msg_name = (void *)&dest_addr;
		msg.msg_namelen = sizeof(dest_addr);
		msg.msg_iov = &iov;
		msg.msg_iovlen = 1;

		len = recvmsg(sock_fd, &msg, 0);
		if (len <= 0)
			break;
		cur_id_stats = NLMSG_DATA(nlh);
		while ((ret = NLMSG_OK(nlh, len)) != 0) {
			if (nlh->nlmsg_type == NLMSG_DONE) {
				close(sock_fd);
				return 0;
			}
			cur_id_stats = NLMSG_DATA(nlh);
			
			get_ifname(cur_id_stats->bound_dev_if, if_name);
			//get_devname(if_name, dev_name);
			printf("%-5s %-5d %-6u %-10s ", 
			       format_node_type(cur_id_stats->nt), 
			       cur_id_stats->port_num,
			       cur_id_stats->pid,
			       if_name);
			nla_parse(tb, IBNL_RDMA_CM_NUM_ATTR, (struct nlattr *)(cur_id_stats+1),
			          nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*cur_id_stats)), NULL);
			format_address(nla_data(tb[IBNL_RDMA_CM_ATTR_SRC_ADDR]), tmp_buf);
			printf("%-25s ",tmp_buf);
			format_address(nla_data(tb[IBNL_RDMA_CM_ATTR_DST_ADDR]), tmp_buf);
			printf("%-25s ",tmp_buf);
			printf("%-6s %-15s 0x%-8x \n",
			       format_port_space(cur_id_stats->ps),
			       format_rdma_cm_state(cur_id_stats->cm_state),
			       cur_id_stats->qp_num);
			nlh = NLMSG_NEXT(nlh, len);
		}
	}
	close(sock_fd);
	return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH V3 1/6] IB Netlink Infrastructure
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-12-13 16:22   ` Nir Muchtar
       [not found]     ` <1292257370-24391-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-12-13 16:22   ` [PATCH V3 2/6] IB Core: Error Handler Nir Muchtar
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

The basic IB netlink infrastructure.
It allows for registration of IB clients for which data is to be exported.
It supplies message construction callbacks.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/netlink.c |  179 +++++++++++++++++++++++++++++++++++++
 include/linux/netlink.h           |    1 +
 include/rdma/ib_netlink.h         |   59 ++++++++++++
 3 files changed, 239 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/core/netlink.c
 create mode 100644 include/rdma/ib_netlink.h

diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
new file mode 100644
index 0000000..c004f90
--- /dev/null
+++ b/drivers/infiniband/core/netlink.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2010 Voltaire Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <linux/netlink.h>
+
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <rdma/ib_netlink.h>
+
+struct ibnl_client {
+	struct list_head list;
+	int index;
+	int nops;
+	int (**cb_table)(struct sk_buff *skb,
+			struct netlink_callback *nlcb);
+};
+
+static DEFINE_MUTEX(ibnl_mutex);
+static struct sock *nls;
+static LIST_HEAD(client_list);
+
+int ibnl_add_client(int index, int nops,
+		    int (*cb_table[])(struct sk_buff *skb,
+				      struct netlink_callback *nlcb))
+{
+	struct ibnl_client *cur;
+	struct ibnl_client *nl_client = kmalloc(sizeof *nl_client, GFP_ATOMIC);
+
+	if (!nl_client)
+		return -ENOMEM;
+	nl_client->index = index;
+	nl_client->nops = nops;
+	nl_client->cb_table = cb_table;
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry(cur, &client_list, list) {
+		if (cur->index == index) {
+			pr_warn("Client for %d already exists\n", index);
+			mutex_unlock(&ibnl_mutex);
+			kfree(nl_client);
+			return -EINVAL;
+		}
+	}
+	list_add_tail(&nl_client->list, &client_list);
+	mutex_unlock(&ibnl_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ibnl_add_client);
+
+int ibnl_remove_client(int index)
+{
+	struct ibnl_client *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &client_list, list) {
+		if (cur->index == index) {
+			list_del(&(cur->list));
+			mutex_unlock(&ibnl_mutex);
+			kfree(cur);
+			return 0;
+		}
+	}
+	pr_warn("Can't remove callback for client idx %d. Not found\n", index);
+	mutex_unlock(&ibnl_mutex);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ibnl_remove_client);
+
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+		   int len, int client, int op)
+{
+	unsigned char *prev_tail;
+
+	prev_tail = skb_tail_pointer(skb);
+	*nlh = NLMSG_NEW(skb, 0, seq, IBNL_GET_TYPE(client, op),
+			len, NLM_F_MULTI);
+	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
+	return NLMSG_DATA(*nlh);
+nlmsg_failure:
+	nlmsg_trim(skb, prev_tail);
+	return NULL;
+}
+EXPORT_SYMBOL(ibnl_put_msg);
+
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+		  int len, void *data, int type)
+{
+	unsigned char *prev_tail;
+
+	prev_tail = skb_tail_pointer(skb);
+	NLA_PUT(skb, type, len, data);
+	nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
+	return 0;
+nla_put_failure:
+	nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ibnl_put_attr);
+
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct ibnl_client *client;
+	int type = nlh->nlmsg_type;
+	int index = IBNL_GET_CLIENT(type);
+	int op = IBNL_GET_OP(type);
+	list_for_each_entry(client, &client_list, list) {
+		if (client->index == index) {
+			if (op < 0 || op >= client->nops ||
+			    !client->cb_table[IBNL_GET_OP(op)])
+				return -EINVAL;
+			return netlink_dump_start(nls, skb, nlh,
+						  client->cb_table[op], NULL);
+		}
+	}
+	pr_info("Index %d wasn't found in client list\n", index);
+	return -EINVAL;
+}
+
+static void ibnl_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&ibnl_mutex);
+	netlink_rcv_skb(skb, &ibnl_rcv_msg);
+	mutex_unlock(&ibnl_mutex);
+}
+
+int ibnl_init(void)
+{
+	nls = netlink_kernel_create(&init_net, NETLINK_INFINIBAND, 0, ibnl_rcv,
+				    NULL, THIS_MODULE);
+	if (!nls) {
+		pr_warn("Failed to create netlink socket\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void ibnl_cleanup(void)
+{
+	struct ibnl_client *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &client_list, list) {
+		list_del(&(cur->list));
+		kfree(cur);
+	}
+	mutex_unlock(&ibnl_mutex);
+	netlink_kernel_release(nls);
+}
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1235669..c9693f9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -24,6 +24,7 @@
 /* leave room for NETLINK_DM (DM Events) */
 #define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 #define NETLINK_ECRYPTFS	19
+#define NETLINK_INFINIBAND	20
 
 #define MAX_LINKS 32		
 
diff --git a/include/rdma/ib_netlink.h b/include/rdma/ib_netlink.h
new file mode 100644
index 0000000..0db338c
--- /dev/null
+++ b/include/rdma/ib_netlink.h
@@ -0,0 +1,59 @@
+#ifndef _IBNETLINK_H
+#define _IBNETLINK_H
+
+#define IBNL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
+#define IBNL_GET_OP(type) (type & ((1 << 10) - 1))
+#define IBNL_GET_TYPE(client, op) ((client << 10) + op)
+
+#ifdef __KERNEL__
+
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Add a a client to the list of IB netlink exporters.
+ * @index: Index of the added client
+ * @nops: Number of supported ops by the added client.
+ * @cb_table: A table for op->callback
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_add_client(int index, int nops,
+		    int (*cb_table[])(struct sk_buff *skb,
+				      struct netlink_callback *nlcb));
+
+/**
+ * Remove a client from IB netlink.
+ * @index: Index of the removed IB client.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_remove_client(int index);
+
+/**
+ * Put a new message in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Pointer to put the header of the new netlink message.
+ * @seq: The message sequence number.
+ * @len: The requested message length to allocate.
+ * @client: Calling IB netlink client.
+ * @op: message content op.
+ * Returns the allocated buffer on success and NULL on failure.
+ */
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+		   int len, int client, int op);
+/**
+ * Put a new attribute in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Header of the netlink message to append the attribute to.
+ * @len: The length of the attribute data.
+ * @data: The attribute data to put.
+ * @type: The attribute type.
+ * Returns the 0 and a negative error code on failure.
+ */
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+		  int len, void *data, int type);
+
+#endif /* __KERNEL__ */
+
+#endif /* _IBNETLINK_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH V3 2/6] IB Core: Error Handler
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-12-13 16:22   ` [PATCH V3 1/6] IB Netlink Infrastructure Nir Muchtar
@ 2010-12-13 16:22   ` Nir Muchtar
  2010-12-13 16:22   ` [PATCH V3 3/6] IB Core Run Netlink Nir Muchtar
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Added missing error handling in ib_core init. (Wasn't intentional right?)

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/device.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a19effa..6e06e37 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -719,15 +719,22 @@ static int __init ib_core_init(void)
 	int ret;
 
 	ret = ib_sysfs_setup();
-	if (ret)
+	if (ret) {
 		printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+		goto err;
+	}
 
 	ret = ib_cache_setup();
 	if (ret) {
 		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
-		ib_sysfs_cleanup();
+		goto err_sysfs;
 	}
 
+	return 0;
+
+err_sysfs:
+	ib_sysfs_cleanup();
+err:
 	return ret;
 }
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH V3 3/6] IB Core Run Netlink
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-12-13 16:22   ` [PATCH V3 1/6] IB Netlink Infrastructure Nir Muchtar
  2010-12-13 16:22   ` [PATCH V3 2/6] IB Core: Error Handler Nir Muchtar
@ 2010-12-13 16:22   ` Nir Muchtar
  2010-12-13 16:22   ` [PATCH V3 4/6] RDMA CM: Export State Enum Nir Muchtar
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Include and initialize IB netlink from IB core.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/Makefile |    2 +-
 drivers/infiniband/core/device.c |   11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index cb1ab3e..c8bbaef 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 					$(user_access-y)
 
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
-				device.o fmr_pool.o cache.o
+				device.o fmr_pool.o cache.o netlink.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6e06e37..3229102 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,8 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 
+#include <net/ib_netlink.h>
+
 #include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
@@ -730,8 +732,16 @@ static int __init ib_core_init(void)
 		goto err_sysfs;
 	}
 
+	ret = ibnl_init();
+	if (ret) {
+		printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+		goto err_cache;
+	}
+
 	return 0;
 
+err_cache:
+	ib_cache_cleanup();
 err_sysfs:
 	ib_sysfs_cleanup();
 err:
@@ -740,6 +750,7 @@ err:
 
 static void __exit ib_core_cleanup(void)
 {
+	ibnl_cleanup();
 	ib_cache_cleanup();
 	ib_sysfs_cleanup();
 	/* Make sure that any pending umem accounting work is done. */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH V3 4/6] RDMA CM: Export State Enum
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-12-13 16:22   ` [PATCH V3 3/6] IB Core Run Netlink Nir Muchtar
@ 2010-12-13 16:22   ` Nir Muchtar
  2010-12-13 16:22   ` [PATCH V3 5/6] RDMA CM: Save Owning PID Nir Muchtar
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

exported enum cma_state into rdma_cm.h

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/cma.c |  166 +++++++++++++++++++----------------------
 include/rdma/rdma_cm.h        |   14 ++++
 2 files changed, 92 insertions(+), 88 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da2..5821f93 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -89,20 +89,6 @@ struct cma_device {
 	struct list_head	id_list;
 };
 
-enum cma_state {
-	CMA_IDLE,
-	CMA_ADDR_QUERY,
-	CMA_ADDR_RESOLVED,
-	CMA_ROUTE_QUERY,
-	CMA_ROUTE_RESOLVED,
-	CMA_CONNECT,
-	CMA_DISCONNECT,
-	CMA_ADDR_BOUND,
-	CMA_LISTEN,
-	CMA_DEVICE_REMOVAL,
-	CMA_DESTROYING
-};
-
 struct rdma_bind_list {
 	struct idr		*ps;
 	struct hlist_head	owners;
@@ -126,7 +112,7 @@ struct rdma_id_private {
 	struct list_head	mc_list;
 
 	int			internal_id;
-	enum cma_state		state;
+	enum rdma_cm_state	state;
 	spinlock_t		lock;
 	struct mutex		qp_mutex;
 
@@ -164,8 +150,8 @@ struct cma_multicast {
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
-	enum cma_state		old_state;
-	enum cma_state		new_state;
+	enum rdma_cm_state	old_state;
+	enum rdma_cm_state	new_state;
 	struct rdma_cm_event	event;
 };
 
@@ -216,7 +202,7 @@ struct sdp_hah {
 #define CMA_VERSION 0x00
 #define SDP_MAJ_VERSION 0x2
 
-static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 {
 	unsigned long flags;
 	int ret;
@@ -228,7 +214,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
 }
 
 static int cma_comp_exch(struct rdma_id_private *id_priv,
-			 enum cma_state comp, enum cma_state exch)
+			 enum rdma_cm_state comp, enum rdma_cm_state exch)
 {
 	unsigned long flags;
 	int ret;
@@ -240,11 +226,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
 	return ret;
 }
 
-static enum cma_state cma_exch(struct rdma_id_private *id_priv,
-			       enum cma_state exch)
+static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
+				   enum rdma_cm_state exch)
 {
 	unsigned long flags;
-	enum cma_state old;
+	enum rdma_cm_state old;
 
 	spin_lock_irqsave(&id_priv->lock, flags);
 	old = id_priv->state;
@@ -408,7 +394,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 }
 
 static int cma_disable_callback(struct rdma_id_private *id_priv,
-			      enum cma_state state)
+				enum rdma_cm_state state)
 {
 	mutex_lock(&id_priv->handler_mutex);
 	if (id_priv->state != state) {
@@ -432,7 +418,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	id_priv->state = CMA_IDLE;
+	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
@@ -838,16 +824,16 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
 }
 
 static void cma_cancel_operation(struct rdma_id_private *id_priv,
-				 enum cma_state state)
+				 enum rdma_cm_state state)
 {
 	switch (state) {
-	case CMA_ADDR_QUERY:
+	case RDMA_CM_ADDR_QUERY:
 		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
 		break;
-	case CMA_ROUTE_QUERY:
+	case RDMA_CM_ROUTE_QUERY:
 		cma_cancel_route(id_priv);
 		break;
-	case CMA_LISTEN:
+	case RDMA_CM_LISTEN:
 		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
 				&& !id_priv->cma_dev)
 			cma_cancel_listens(id_priv);
@@ -898,10 +884,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
-	enum cma_state state;
+	enum rdma_cm_state state;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	state = cma_exch(id_priv, CMA_DESTROYING);
+	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
 	mutex_lock(&lock);
@@ -992,9 +978,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	int ret = 0;
 
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, CMA_CONNECT)) ||
+		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, CMA_DISCONNECT)))
+		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -1025,7 +1011,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		event.status = -ETIMEDOUT; /* fall through */
 	case IB_CM_DREQ_RECEIVED:
 	case IB_CM_DREP_RECEIVED:
-		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
+				   RDMA_CM_DISCONNECT))
 			goto out;
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
@@ -1052,7 +1039,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -1109,7 +1096,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	id_priv->state = CMA_CONNECT;
+	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 
 destroy_id:
@@ -1149,7 +1136,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	}
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	id_priv->state = CMA_CONNECT;
+	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 err:
 	rdma_destroy_id(id);
@@ -1178,7 +1165,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	int offset, ret;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, CMA_LISTEN))
+	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
@@ -1217,7 +1204,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		 * while we're accessing the cm_id.
 		 */
 		mutex_lock(&lock);
-		if (cma_comp(conn_id, CMA_CONNECT) &&
+		if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
 		    !cma_is_ud_ps(conn_id->id.ps))
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 		mutex_unlock(&lock);
@@ -1229,7 +1216,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	conn_id->cm_id.ib = NULL;
 
 release_conn_id:
-	cma_exch(conn_id, CMA_DESTROYING);
+	cma_exch(conn_id, RDMA_CM_DESTROYING);
 	mutex_unlock(&conn_id->handler_mutex);
 	rdma_destroy_id(&conn_id->id);
 
@@ -1300,7 +1287,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr_in *sin;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, CMA_CONNECT))
+	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -1343,7 +1330,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.iw = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -1365,7 +1352,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct ib_device_attr attr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, CMA_LISTEN))
+	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	/* Create a new RDMA id for the new IW CM ID */
@@ -1378,7 +1365,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	}
 	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-	conn_id->state = CMA_CONNECT;
+	conn_id->state = RDMA_CM_CONNECT;
 
 	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
 	if (!dev) {
@@ -1429,7 +1416,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	if (ret) {
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
-		cma_exch(conn_id, CMA_DESTROYING);
+		cma_exch(conn_id, RDMA_CM_DESTROYING);
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(&conn_id->id);
 		goto out;
@@ -1520,7 +1507,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
 
-	dev_id_priv->state = CMA_ADDR_BOUND;
+	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
 	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 
@@ -1552,14 +1539,14 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == CMA_IDLE) {
+	if (id_priv->state == RDMA_CM_IDLE) {
 		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
 		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
 		if (ret)
 			return ret;
 	}
 
-	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
 		return -EINVAL;
 
 	id_priv->backlog = backlog;
@@ -1585,7 +1572,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	return 0;
 err:
 	id_priv->backlog = 0;
-	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_listen);
@@ -1611,8 +1598,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
 		route->num_paths = 1;
 		*route->path_rec = *path_rec;
 	} else {
-		work->old_state = CMA_ROUTE_QUERY;
-		work->new_state = CMA_ADDR_RESOLVED;
+		work->old_state = RDMA_CM_ROUTE_QUERY;
+		work->new_state = RDMA_CM_ADDR_RESOLVED;
 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
 		work->event.status = status;
 	}
@@ -1670,7 +1657,7 @@ static void cma_work_handler(struct work_struct *_work)
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 out:
@@ -1688,12 +1675,12 @@ static void cma_ndev_work_handler(struct work_struct *_work)
 	int destroy = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state == CMA_DESTROYING ||
-	    id_priv->state == CMA_DEVICE_REMOVAL)
+	if (id_priv->state == RDMA_CM_DESTROYING ||
+	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 
@@ -1717,8 +1704,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 
 	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
@@ -1747,7 +1734,8 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+			   RDMA_CM_ROUTE_RESOLVED))
 		return -EINVAL;
 
 	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
@@ -1760,7 +1748,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 	id->route.num_paths = num_paths;
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
@@ -1775,8 +1763,8 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -1840,8 +1828,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 		goto err2;
 	}
 
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	work->event.status = 0;
 
@@ -1863,7 +1851,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
@@ -1892,7 +1880,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
 	cma_deref_id(id_priv);
 	return ret;
 }
@@ -1957,7 +1945,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	 * we're trying to acquire it.
 	 */
 	mutex_lock(&lock);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
+			   RDMA_CM_ADDR_RESOLVED)) {
 		mutex_unlock(&lock);
 		goto out;
 	}
@@ -1967,7 +1956,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	mutex_unlock(&lock);
 
 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+				   RDMA_CM_ADDR_BOUND))
 			goto out;
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.status = status;
@@ -1978,7 +1968,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	}
 
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		cma_deref_id(id_priv);
 		rdma_destroy_id(&id_priv->id);
@@ -2023,8 +2013,8 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ADDR_QUERY;
-	work->new_state = CMA_ADDR_RESOLVED;
+	work->old_state = RDMA_CM_ADDR_QUERY;
+	work->new_state = RDMA_CM_ADDR_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -2053,13 +2043,13 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == CMA_IDLE) {
+	if (id_priv->state == RDMA_CM_IDLE) {
 		ret = cma_bind_addr(id, src_addr, dst_addr);
 		if (ret)
 			return ret;
 	}
 
-	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
@@ -2075,7 +2065,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
 	cma_deref_id(id_priv);
 	return ret;
 }
@@ -2253,7 +2243,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
 		return -EINVAL;
 
 	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
@@ -2285,7 +2275,7 @@ err2:
 		mutex_unlock(&lock);
 	}
 err1:
-	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_bind_addr);
@@ -2358,7 +2348,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, CMA_CONNECT))
+	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -2404,7 +2394,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -2570,7 +2560,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp) {
@@ -2597,7 +2587,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_connect);
@@ -2683,7 +2673,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_CONNECT))
+	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp && conn_param) {
@@ -2812,8 +2802,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	int ret;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
+	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
+	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return 0;
 
 	mutex_lock(&id_priv->qp_mutex);
@@ -2837,7 +2827,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return 0;
@@ -3020,8 +3010,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
-	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
+	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return -EINVAL;
 
 	mc = kmalloc(sizeof *mc, GFP_KERNEL);
@@ -3186,19 +3176,19 @@ static void cma_add_one(struct ib_device *device)
 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_cm_event event;
-	enum cma_state state;
+	enum rdma_cm_state state;
 	int ret = 0;
 
 	/* Record that we want to remove the device */
-	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
-	if (state == CMA_DESTROYING)
+	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
+	if (state == RDMA_CM_DESTROYING)
 		return 0;
 
 	cma_cancel_operation(id_priv, state);
 	mutex_lock(&id_priv->handler_mutex);
 
 	/* Check for destruction from another callback. */
-	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
 		goto out;
 
 	memset(&event, 0, sizeof event);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 4fae903..c766da9 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -111,6 +111,20 @@ struct rdma_cm_event {
 	} param;
 };
 
+enum rdma_cm_state {
+	RDMA_CM_IDLE,
+	RDMA_CM_ADDR_QUERY,
+	RDMA_CM_ADDR_RESOLVED,
+	RDMA_CM_ROUTE_QUERY,
+	RDMA_CM_ROUTE_RESOLVED,
+	RDMA_CM_CONNECT,
+	RDMA_CM_DISCONNECT,
+	RDMA_CM_ADDR_BOUND,
+	RDMA_CM_LISTEN,
+	RDMA_CM_DEVICE_REMOVAL,
+	RDMA_CM_DESTROYING
+};
+
 struct rdma_cm_id;
 
 /**
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2010-12-13 16:22   ` [PATCH V3 4/6] RDMA CM: Export State Enum Nir Muchtar
@ 2010-12-13 16:22   ` Nir Muchtar
       [not found]     ` <1292257370-24391-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-12-13 16:22   ` [PATCH V3 6/6] RDMA CM: Netlink Client Nir Muchtar
  2010-12-14 18:27   ` [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports Jason Gunthorpe
  6 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Save owning PID to id-priv when creating id's/accepting connections.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/cma.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5821f93..9629a90 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -134,6 +134,7 @@ struct rdma_id_private {
 	u32			qp_num;
 	u8			srq;
 	u8			tos;
+	pid_t			owner;
 };
 
 struct cma_multicast {
@@ -418,6 +419,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
+	id_priv->owner = current->pid;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
@@ -2671,8 +2673,14 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
+	unsigned long flags;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
+
+	spin_lock_irqsave(&id_priv->lock, flags);
+	id_priv->owner = current->pid;
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH V3 6/6] RDMA CM: Netlink Client
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (4 preceding siblings ...)
  2010-12-13 16:22   ` [PATCH V3 5/6] RDMA CM: Save Owning PID Nir Muchtar
@ 2010-12-13 16:22   ` Nir Muchtar
       [not found]     ` <1292257370-24391-7-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-12-14 18:27   ` [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports Jason Gunthorpe
  6 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-13 16:22 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Add callbacks and data types for statistics export.
One callback is implemented that exports all of the current devices/ids.
Add/remove the callback to IB Netlink on init/cleanup.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/cma.c |   77 +++++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_netlink.h     |   15 ++++++++
 include/rdma/rdma_cm.h        |   10 +++++
 3 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9629a90..e3280d3 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -51,6 +51,7 @@
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
+#include <rdma/ib_netlink.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -3251,6 +3252,79 @@ static void cma_remove_one(struct ib_device *device)
 	kfree(cma_dev);
 }
 
+static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh;
+	struct rdma_cm_id_stats *id_stats;
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id = NULL;
+	struct cma_device *cma_dev;
+	int i_dev, i_id;
+
+	/* We export all of the id's as a sequence of messages.
+	   Each id gets its own netlink message */	
+	mutex_lock(&lock);
+	i_dev = 0;
+	list_for_each_entry(cma_dev, &dev_list, list) {
+		if (i_dev < cb->args[0]) {
+			i_dev++;
+			continue;
+		}
+		i_id = 0;
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			if (i_id < cb->args[1]) {
+				i_id++;
+				continue;
+			}
+			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
+						sizeof *id_stats, IBNL_RDMA_CM,
+						IBNL_RDMA_CM_ID_STATS);
+			if (!id_stats)
+				goto out;
+			memset(id_stats, 0, sizeof *id_stats);
+			id = &id_priv->id;
+			id_stats->nt = id->route.addr.dev_addr.dev_type;
+			id_stats->port_num = id->port_num;
+			id_stats->bound_dev_if =
+				id->route.addr.dev_addr.bound_dev_if;
+
+			if (ibnl_put_attr(skb, nlh,
+					  sizeof id->route.addr.src_addr,
+					  &id->route.addr.src_addr,
+					  IBNL_RDMA_CM_ATTR_SRC_ADDR)) {
+				goto out;
+			}
+
+			if (ibnl_put_attr(skb, nlh,
+					  sizeof id->route.addr.dst_addr,
+					  &id->route.addr.dst_addr,
+					  IBNL_RDMA_CM_ATTR_DST_ADDR)) {
+				goto out;
+			}
+
+			id_stats->ps = id->ps;
+			id_stats->cm_state = id_priv->state;
+			id_stats->qp_num = id_priv->qp_num;
+			id_stats->pid = id_priv->owner;
+
+			i_id++;
+		}
+		cb->args[1] = 0;
+		i_dev++;
+	}
+out:
+	mutex_unlock(&lock);
+	cb->args[0] = i_dev;
+	cb->args[1] = i_id;
+
+	return skb->len;
+}
+
+static int (*cma_cb_table[])(struct sk_buff *skb,
+			     struct netlink_callback *cb) = {
+	[IBNL_RDMA_CM_ID_STATS] = cma_get_id_stats,
+};
+
 static int __init cma_init(void)
 {
 	int ret;
@@ -3266,6 +3340,8 @@ static int __init cma_init(void)
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
+	if (ibnl_add_client(IBNL_RDMA_CM, IBNL_RDMA_CM_NUM_OPS, cma_cb_table))
+		printk(KERN_WARNING "RDMA CM failed to add netlink callback\n");
 	return 0;
 
 err:
@@ -3278,6 +3354,7 @@ err:
 
 static void __exit cma_cleanup(void)
 {
+	ibnl_remove_client(IBNL_RDMA_CM);
 	ib_unregister_client(&cma_client);
 	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
diff --git a/include/rdma/ib_netlink.h b/include/rdma/ib_netlink.h
index 0db338c..470318f 100644
--- a/include/rdma/ib_netlink.h
+++ b/include/rdma/ib_netlink.h
@@ -1,6 +1,21 @@
 #ifndef _IBNETLINK_H
 #define _IBNETLINK_H
 
+enum {
+	IBNL_RDMA_CM = 1
+};
+
+enum {
+	IBNL_RDMA_CM_ID_STATS = 0,
+	IBNL_RDMA_CM_NUM_OPS
+};
+
+enum {
+	IBNL_RDMA_CM_ATTR_SRC_ADDR = 1,
+	IBNL_RDMA_CM_ATTR_DST_ADDR,
+	IBNL_RDMA_CM_NUM_ATTR,
+};
+
 #define IBNL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
 #define IBNL_GET_OP(type) (type & ((1 << 10) - 1))
 #define IBNL_GET_TYPE(client, op) ((client << 10) + op)
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c766da9..ec47f11 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -147,6 +147,16 @@ struct rdma_cm_id {
 	u8			 port_num;
 };
 
+struct rdma_cm_id_stats {
+	u8 nt;
+	u8 port_num;
+	u32 bound_dev_if;
+	u32 ps;
+	u8 cm_state;
+	u32 qp_num;
+	pid_t pid;
+};
+
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports
       [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (5 preceding siblings ...)
  2010-12-13 16:22   ` [PATCH V3 6/6] RDMA CM: Netlink Client Nir Muchtar
@ 2010-12-14 18:27   ` Jason Gunthorpe
       [not found]     ` <20101214182746.GB2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  6 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-14 18:27 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, Dec 13, 2010 at 06:22:44PM +0200, Nir Muchtar wrote:

> static int format_address(void *addr, char *buff)
> {
> 	struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
> 	if (addr_in->sin_addr.s_addr) {
> 		sprintf(buff, "%s/%d", inet_ntoa(addr_in->sin_addr), ntohs(addr_in->sin_port));
> 	} 
> 	else
> 		sprintf(buff, "N/A");
> 	return 0;
> }

This should be:

static void format_address(const void *addr,size_t alen,
			   char *buff, size_t len)
{
	const sockaddr_storage *ss = addr;
	if (ss->ss_family == AF_INET && alen == sizeof(sockaddr_in)) {
	        const sockaddr_in *ss4 = addr;
                char S[64];
                snprintf(buff,len,"%s-%d",inet_ntop(ss->ss_family,ss4->sin_addr,S,sizeof(S)),ntohs(ss4->sin_port));
		return;
        }
	if (ss->ss_family == AF_INET6 && alen == sizeof(sockaddr_in6)) {
	        const sockaddr_in6 *ss6 = addr;
                char S[64];
                snprintf(buff,len,"%s-%d",inet_ntop(ss->ss_family,ss6->sin6_addr,S,sizeof(S)),ntohs(ss6->sin6_port));
		return;
        }
	snprintf(buff,len,"??");
}

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 1/6] IB Netlink Infrastructure
       [not found]     ` <1292257370-24391-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-12-14 18:34       ` Jason Gunthorpe
       [not found]         ` <20101214183401.GC2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-14 18:34 UTC (permalink / raw)
  To: Nir Muchtar; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Dec 13, 2010 at 06:22:45PM +0200, Nir Muchtar wrote:

> +int ibnl_add_client(int index, int nops,
> +		    int (*cb_table[])(struct sk_buff *skb,
> +				      struct netlink_callback *nlcb))

If you are going this way, then I think it would be better to have
netlink_dump_start be optional - not many calls need it.

ibnl_add_client(const struct ibnl_desc *desc);

struct inl_op
{
  int (*dump_start)(sk_buff *skb,struct netlink_callback *nlcb);
  int (*get)(...);
};
struct ibnl_desc
{
   int index;
   int ops;
   const struct ibnl_op *ops;
};

Or something.

In any event cb_table[] needs to be const.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]     ` <1292257370-24391-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-12-14 18:34       ` Jason Gunthorpe
       [not found]         ` <20101214183458.GD2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-14 18:34 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, Dec 13, 2010 at 06:22:49PM +0200, Nir Muchtar wrote:
> Save owning PID to id-priv when creating id's/accepting connections.

This should be called creator_pid, not owner - to avoid confusion.

It may be better to not include it at all. See next letter

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 6/6] RDMA CM: Netlink Client
       [not found]     ` <1292257370-24391-7-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-12-14 18:45       ` Jason Gunthorpe
       [not found]         ` <20101214184514.GE2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-14 18:45 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, Dec 13, 2010 at 06:22:50PM +0200, Nir Muchtar wrote:
> +			if (ibnl_put_attr(skb, nlh,
> +					  sizeof id->route.addr.src_addr,
> +					  &id->route.addr.src_addr,
> +					  IBNL_RDMA_CM_ATTR_SRC_ADDR)) {
> +				goto out;
> +			}

The sizeof the attribute should be sizeof(sockaddr_in) or
sizeof(sockaddr_in6), not sizeof(sockaddr_storage).

Other rdma_cm code uses this sort of construct:

        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
                                     sizeof(struct sockaddr_in) :
                                     sizeof(struct sockaddr_in6));

But I can't belive there isn't a kernel function for the length of 
a sockaddr...

> +struct rdma_cm_id_stats {
> +	u8 nt;
> +	u8 port_num;
> +	u32 bound_dev_if;
> +	u32 ps;
> +	u8 cm_state;
> +	u32 qp_num;
> +	pid_t pid;
> +};

Careful of alignment issues and and type issues:

struct rdma_cm_id_stats {
  u32 bound_dev_if;
  u32 resereved_for_ib_bound_dev_ib;
  u32 port_space;
  u32 creator_pid;
  u8 nt;  // Use a better name
  u8 port_num;
  u8 cm_state;
  u8 reserved;
};

Rather than pid I think it is better to include enough information to
cross reference the RDMA_CM fd aginst /proc/../fd. Ie to get the pid(s)
you trundle through proc looking for that signature. Isn't that what
ss does?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports
       [not found]     ` <20101214182746.GB2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-19 14:30       ` Nir Muchtar
  2010-12-20 21:55         ` Jason Gunthorpe
  0 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-19 14:30 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, 2010-12-14 at 11:27 -0700, Jason Gunthorpe wrote:

> This should be:
> 
> static void format_address(const void *addr,size_t alen,
> 			   char *buff, size_t len)
> {
> 	const sockaddr_storage *ss = addr;
> 	if (ss->ss_family == AF_INET && alen == sizeof(sockaddr_in)) {
> 	        const sockaddr_in *ss4 = addr;
>                 char S[64];
>                 snprintf(buff,len,"%s-%d",inet_ntop(ss->ss_family,ss4->sin_addr,S,sizeof(S)),ntohs(ss4->sin_port));
> 		return;
>         }
> 	if (ss->ss_family == AF_INET6 && alen == sizeof(sockaddr_in6)) {
> 	        const sockaddr_in6 *ss6 = addr;
>                 char S[64];
>                 snprintf(buff,len,"%s-%d",inet_ntop(ss->ss_family,ss6->sin6_addr,S,sizeof(S)),ntohs(ss6->sin6_port));
> 		return;
>         }
> 	snprintf(buff,len,"??");
> }
> 
> Jason

Don't believe all this IPv6 hype ;)
seriously though, The demo application is solely meant as an
illustration for the means of communications with the current design.
It's going to be rewritten in the future.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 1/6] IB Netlink Infrastructure
       [not found]         ` <20101214183401.GC2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-19 14:34           ` Nir Muchtar
  0 siblings, 0 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-19 14:34 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, 2010-12-14 at 11:34 -0700, Jason Gunthorpe wrote:
> On Mon, Dec 13, 2010 at 06:22:45PM +0200, Nir Muchtar wrote:
> 
> > +int ibnl_add_client(int index, int nops,
> > +		    int (*cb_table[])(struct sk_buff *skb,
> > +				      struct netlink_callback *nlcb))
> 
> If you are going this way, then I think it would be better to have
> netlink_dump_start be optional - not many calls need it.
> 

Yes, we may need such functionality in the future, but I'm reluctant to
add APIs that are not currently used, So I think we should wait until we
actually need this addition. It will be very straightforward to add once
we decide to. Don't you agree?


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]         ` <20101214183458.GD2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-19 14:36           ` Nir Muchtar
  2010-12-20 21:54             ` Jason Gunthorpe
  0 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-19 14:36 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, 2010-12-14 at 11:34 -0700, Jason Gunthorpe wrote:
> On Mon, Dec 13, 2010 at 06:22:49PM +0200, Nir Muchtar wrote:
> > Save owning PID to id-priv when creating id's/accepting connections.
> 
> This should be called creator_pid, not owner - to avoid confusion.
> 

But it's meant to be the owner and not necessarily the creator.
That's why its value is replaced in rdma_accept.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 6/6] RDMA CM: Netlink Client
       [not found]         ` <20101214184514.GE2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-19 14:47           ` Nir Muchtar
  2010-12-20  7:24             ` Or Gerlitz
                               ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-19 14:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, 2010-12-14 at 11:45 -0700, Jason Gunthorpe wrote:
> On Mon, Dec 13, 2010 at 06:22:50PM +0200, Nir Muchtar wrote:
> > +			if (ibnl_put_attr(skb, nlh,
> > +					  sizeof id->route.addr.src_addr,
> > +					  &id->route.addr.src_addr,
> > +					  IBNL_RDMA_CM_ATTR_SRC_ADDR)) {
> > +				goto out;
> > +			}
> 
> The sizeof the attribute should be sizeof(sockaddr_in) or
> sizeof(sockaddr_in6), not sizeof(sockaddr_storage).
> 
> Other rdma_cm code uses this sort of construct:
> 
>         memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
>                                      sizeof(struct sockaddr_in) :
>                                      sizeof(struct sockaddr_in6));
> 

Hmm, I was under the impression that sockaddr_storage was what Sean had
asked for. 
Sean, is sockaddr_in6/sockaddr_in good enough?

Btw, Why do you think this should be changed? Message size
considerations?

> > +struct rdma_cm_id_stats {
> > +	u8 nt;
> > +	u8 port_num;
> > +	u32 bound_dev_if;
> > +	u32 ps;
> > +	u8 cm_state;
> > +	u32 qp_num;
> > +	pid_t pid;
> > +};
> 
> Careful of alignment issues and and type issues:
> 
> struct rdma_cm_id_stats {
>   u32 bound_dev_if;
>   u32 resereved_for_ib_bound_dev_ib;
>   u32 port_space;
>   u32 creator_pid;
>   u8 nt;  // Use a better name
>   u8 port_num;
>   u8 cm_state;
>   u8 reserved;
> };
> 

I'll fix that. Thanks.

> Rather than pid I think it is better to include enough information to
> cross reference the RDMA_CM fd aginst /proc/../fd. Ie to get the pid(s)
> you trundle through proc looking for that signature. Isn't that what
> ss does?
> 
> Jason

Do you mean the file descriptor which is associated with the
rdma_event_channel? The event channel is created using ucma and not cma.
I don't think there's access to that information from cma. Even if there
was such access, ucma doesn't save inode info that can be cross
referenced as sock does.
Also, what about kernel threads that own ID's?

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 6/6] RDMA CM: Netlink Client
  2010-12-19 14:47           ` Nir Muchtar
@ 2010-12-20  7:24             ` Or Gerlitz
  2010-12-20 19:16             ` Hefty, Sean
  2010-12-20 21:52             ` Jason Gunthorpe
  2 siblings, 0 replies; 28+ messages in thread
From: Or Gerlitz @ 2010-12-20  7:24 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: Jason Gunthorpe, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	monis-smomgflXvOZWk0Htik3J/w

Nir Muchtar wrote:
>> struct rdma_cm_id_stats {
>>   u32 bound_dev_if;
>>   u32 resereved_for_ib_bound_dev_ib;

I guess we want ...for_ib_bound_dev_if and not _ib

> Do you mean the file descriptor which is associated with the
> rdma_event_channel? The event channel is created using ucma and not cma.

indeed, we can't assume that the ID was created through ucma

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH V3 6/6] RDMA CM: Netlink Client
  2010-12-19 14:47           ` Nir Muchtar
  2010-12-20  7:24             ` Or Gerlitz
@ 2010-12-20 19:16             ` Hefty, Sean
  2010-12-20 21:52             ` Jason Gunthorpe
  2 siblings, 0 replies; 28+ messages in thread
From: Hefty, Sean @ 2010-12-20 19:16 UTC (permalink / raw)
  To: Nir Muchtar, Jason Gunthorpe
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

> Hmm, I was under the impression that sockaddr_storage was what Sean had
> asked for.
> Sean, is sockaddr_in6/sockaddr_in good enough?

I was asking that the message structure be large enough to contain sockaddr_storage.  Jason suggested specifying the size separately, which also works.  My real concern is that the patches for AF_IB support define sockaddr_ib which is larger than sockaddr_in6.

- Sean

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 6/6] RDMA CM: Netlink Client
  2010-12-19 14:47           ` Nir Muchtar
  2010-12-20  7:24             ` Or Gerlitz
  2010-12-20 19:16             ` Hefty, Sean
@ 2010-12-20 21:52             ` Jason Gunthorpe
  2 siblings, 0 replies; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-20 21:52 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Sun, Dec 19, 2010 at 04:47:32PM +0200, Nir Muchtar wrote:
> On Tue, 2010-12-14 at 11:45 -0700, Jason Gunthorpe wrote:
> > On Mon, Dec 13, 2010 at 06:22:50PM +0200, Nir Muchtar wrote:
> > > +			if (ibnl_put_attr(skb, nlh,
> > > +					  sizeof id->route.addr.src_addr,
> > > +					  &id->route.addr.src_addr,
> > > +					  IBNL_RDMA_CM_ATTR_SRC_ADDR)) {
> > > +				goto out;
> > > +			}
> > 
> > The sizeof the attribute should be sizeof(sockaddr_in) or
> > sizeof(sockaddr_in6), not sizeof(sockaddr_storage).
> > 
> > Other rdma_cm code uses this sort of construct:
> > 
> >         memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
> >                                      sizeof(struct sockaddr_in) :
> >                                      sizeof(struct sockaddr_in6));
> > 

> Hmm, I was under the impression that sockaddr_storage was what Sean
> had asked for.  Sean, is sockaddr_in6/sockaddr_in good enough?
>
> Btw, Why do you think this should be changed? Message size
> considerations?

Message size is one thing, but it is not really correct to have a
known-type sockaddr with the wrong size. Ie a sockaddr_in is fixed to
sizeof(sockaddr_in) so anytime it appears filled in it must be with
that size.

In POSIX sockets sockaddrs are always associated with a socklen_t to
specify the length of the sockaddr and generally that length must
match the type of the sockaddr. With this netlink scheme the socklen_t
is the length of the netlink attribute.

> > Rather than pid I think it is better to include enough information to
> > cross reference the RDMA_CM fd aginst /proc/../fd. Ie to get the pid(s)
> > you trundle through proc looking for that signature. Isn't that what
> > ss does?
 
> Do you mean the file descriptor which is associated with the
> rdma_event_channel? The event channel is created using ucma and not
> cma.

Yes.

> I don't think there's access to that information from cma. Even if there
> was such access, ucma doesn't save inode info that can be cross
> referenced as sock does.

Then maybe don't include anything for now. If it is this complex it
should be another attribute. See my prior comments about how threading
all the modules together into a coherent view of the QP space is
very desirable..

> Also, what about kernel threads that own ID's?

They have no inode and often no meaningful PID either.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
  2010-12-19 14:36           ` Nir Muchtar
@ 2010-12-20 21:54             ` Jason Gunthorpe
       [not found]               ` <20101220215433.GB12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-20 21:54 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Sun, Dec 19, 2010 at 04:36:22PM +0200, Nir Muchtar wrote:
> On Tue, 2010-12-14 at 11:34 -0700, Jason Gunthorpe wrote:
> > On Mon, Dec 13, 2010 at 06:22:49PM +0200, Nir Muchtar wrote:
> > > Save owning PID to id-priv when creating id's/accepting connections.
> > 
> > This should be called creator_pid, not owner - to avoid confusion.

> But it's meant to be the owner and not necessarily the creator.
> That's why its value is replaced in rdma_accept.

There is no such thing as a single owner for a fd based resource
like a RDMA_CM ID. The FD can be held by multiple processes. This is
why the best you can do by storing PIDs at certain times is to
constuct a creator_pid. 

Owner pid can only be determined by cross-referencing the inode of the
FD that holds the ownership of the object to the processes that hold
that FD.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports
  2010-12-19 14:30       ` Nir Muchtar
@ 2010-12-20 21:55         ` Jason Gunthorpe
  0 siblings, 0 replies; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-20 21:55 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Sun, Dec 19, 2010 at 04:30:27PM +0200, Nir Muchtar wrote:

> Don't believe all this IPv6 hype ;)
> seriously though, The demo application is solely meant as an
> illustration for the means of communications with the current design.
> It's going to be rewritten in the future.

Sure, but you need to test that IPv6 works properly to complete the
patches :)

Also, you'll note my version exposes the bug in formatting the netlink
messages with the wrong size of sockaddr.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]               ` <20101220215433.GB12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-21 15:05                 ` Nir Muchtar
  2010-12-21 18:10                   ` Jason Gunthorpe
  0 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-21 15:05 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

On Mon, 2010-12-20 at 14:54 -0700, Jason Gunthorpe wrote:
> On Sun, Dec 19, 2010 at 04:36:22PM +0200, Nir Muchtar wrote:
> > On Tue, 2010-12-14 at 11:34 -0700, Jason Gunthorpe wrote:
> > > On Mon, Dec 13, 2010 at 06:22:49PM +0200, Nir Muchtar wrote:
> > > > Save owning PID to id-priv when creating id's/accepting connections.
> > > 
> > > This should be called creator_pid, not owner - to avoid confusion.
> 
> > But it's meant to be the owner and not necessarily the creator.
> > That's why its value is replaced in rdma_accept.
> 
> There is no such thing as a single owner for a fd based resource
> like a RDMA_CM ID. The FD can be held by multiple processes. This is
> why the best you can do by storing PIDs at certain times is to
> constuct a creator_pid. 
> 
> Owner pid can only be determined by cross-referencing the inode of the
> FD that holds the ownership of the object to the processes that hold
> that FD.
> 
> Jason

But an RDMA CM ID is not a FD based resource. An event channel is, but I
want to export ID stats and not event channel stats.
Are you saying that there's a scenario in which an RDMA CM ID is shared
between multiple processes? 
Even if there is such a scenario, I think that by taking the PID of the
one that calls rdma_accept, the idea of an owner stays consistent. 
I don't mind the name btw, just tying it to something else, which isn't
necessarily related.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
  2010-12-21 15:05                 ` Nir Muchtar
@ 2010-12-21 18:10                   ` Jason Gunthorpe
  2010-12-21 19:43                     ` Nir Muchtar
       [not found]                     ` <20101221181043.GD12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 2 replies; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-21 18:10 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

On Tue, Dec 21, 2010 at 05:05:50PM +0200, Nir Muchtar wrote:

> But an RDMA CM ID is not a FD based resource. An event channel is, but I
> want to export ID stats and not event channel stats.
> Are you saying that there's a scenario in which an RDMA CM ID is shared
> between multiple processes?

It *is* a FD based resource. Nearly everything in Linux is.

It is tied to the ucma_fops FD (ie /dev/rdma_cm, aka the event
channel), which when closed calls ucma_free_ctx which calls
rdma_destroy_id. Processes that can access that FD can control the
RDMA CM IDs associated with it.

The only case where this is not true is in the kernel, and in that
instance the PID is meaningless - you'd be much better off exporting
the name of the module that allocated the RDMA CM ID.

> Even if there is such a scenario, I think that by taking the PID of the
> one that calls rdma_accept, the idea of an owner stays consistent. 
> I don't mind the name btw, just tying it to something else, which isn't
> necessarily related.

The proper thing for userspace is to tie it back to the FD that owns
the resource - which is the FD that destroys the resource when it is
closed.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH V3 5/6] RDMA CM: Save Owning PID
  2010-12-21 18:10                   ` Jason Gunthorpe
@ 2010-12-21 19:43                     ` Nir Muchtar
  2010-12-21 20:33                       ` Nir Muchtar
       [not found]                       ` <7E95F01E94AB484F83061FCFA35B39F8794E3F-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
       [not found]                     ` <20101221181043.GD12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  1 sibling, 2 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-21 19:43 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

Jason Gunthorpe wrote:

> > But an RDMA CM ID is not a FD based resource. An event channel is, but I
> > want to export ID stats and not event channel stats.
> > Are you saying that there's a scenario in which an RDMA CM ID is shared
> > between multiple processes?
> 
> It *is* a FD based resource. Nearly everything in Linux is.

Yes, nearly everything which is exported by Linux. but an RDMA CM ID
is not a resource that has to be exported to the userspace.
it's not FD based on its own.

> 
> It is tied to the ucma_fops FD (ie /dev/rdma_cm, aka the event
> channel), which when closed calls ucma_free_ctx which calls
> rdma_destroy_id. Processes that can access that FD can control the
> RDMA CM IDs associated with it.
> 

Right. Like I already mentioned, an event channel is indeed FD based.
However, an event channel doesn't (necessarily) have a one to one 
relation with IDs. This is also reflected in the need to create/destroy 
IDs separately from event channels, and also in the fact that you can
have many IDs for one event channel. These are some of the differences
from the socket interface, which immediately creates a kernel resource
for each FD that is opened.

> The only case where this is not true is in the kernel, and in that
> instance the PID is meaningless - you'd be much better off exporting
> the name of the module that allocated the RDMA CM ID.
> 

But the PID is not meaningless in this case.
More often then not, the owner will be a kernel thread and will have a
PID.

> > Even if there is such a scenario, I think that by taking the PID of the
> > one that calls rdma_accept, the idea of an owner stays consistent. 
> > I don't mind the name btw, just tying it to something else, which isn't
> > necessarily related.
> 
> The proper thing for userspace is to tie it back to the FD that owns
> the resource - which is the FD that destroys the resource when it is
> closed.
> 
> Jason

What destroys an ID is rdma_destroy_id.
When calling rdma_destroy_event_channel the ID might be destroyed as 
a side effect (Although according to the documentation you have to 
have all of your IDs destroyed prior to calling destroy_event_channel),
but why should we rely on something that might control the ID when we 
can rely on the ID itself? After all, this is the object we're really
interested in.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH V3 5/6] RDMA CM: Save Owning PID
  2010-12-21 19:43                     ` Nir Muchtar
@ 2010-12-21 20:33                       ` Nir Muchtar
       [not found]                       ` <7E95F01E94AB484F83061FCFA35B39F8794E3F-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
  1 sibling, 0 replies; 28+ messages in thread
From: Nir Muchtar @ 2010-12-21 20:33 UTC (permalink / raw)
  To: Nir Muchtar, Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

> These are some of the differences
> from the socket interface, which immediately creates a kernel resource
> for each FD that is opened.

Not immediately of course, but you understand my point hopefully...

Nir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]                       ` <7E95F01E94AB484F83061FCFA35B39F8794E3F-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
@ 2010-12-21 20:36                         ` Jason Gunthorpe
       [not found]                           ` <20101221203627.GE12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-21 20:36 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

On Tue, Dec 21, 2010 at 09:43:01PM +0200, Nir Muchtar wrote:
> Jason Gunthorpe wrote:
> 
> > > But an RDMA CM ID is not a FD based resource. An event channel is, but I
> > > want to export ID stats and not event channel stats.
> > > Are you saying that there's a scenario in which an RDMA CM ID is shared
> > > between multiple processes?
> > 
> > It *is* a FD based resource. Nearly everything in Linux is.
> 
> Yes, nearly everything which is exported by Linux. but an RDMA CM ID
> is not a resource that has to be exported to the userspace.
> it's not FD based on its own.

But we are talking about a PID which primarily a userspace identifier.

> > It is tied to the ucma_fops FD (ie /dev/rdma_cm, aka the event
> > channel), which when closed calls ucma_free_ctx which calls
> > rdma_destroy_id. Processes that can access that FD can control the
> > RDMA CM IDs associated with it.

> Right. Like I already mentioned, an event channel is indeed FD based.
> However, an event channel doesn't (necessarily) have a one to one 
> relation with IDs.

So? Each RDMA CM ID is owned by userspace returns a reference to the
FD that owns it, and there can be many CM ID's referencing one FD.

> > The only case where this is not true is in the kernel, and in that
> > instance the PID is meaningless - you'd be much better off exporting
> > the name of the module that allocated the RDMA CM ID.

> But the PID is not meaningless in this case.  More often then not,
> the owner will be a kernel thread and will have a PID.

Well, there will always be a PID, it just might not be at all
informative (ie it could be kthreadd, ksoftirqd or something).  My
understanding was that actual named kernel threads are not really
preferred anymore, work queues/thread pool/etc are looking more
popular.. (eg http://lwn.net/Articles/347822/)

So I don't see the special case that kernel consumers don't have an
event channel FD as relevant. If you really care about this returning
the name of the module that created the ID is much more useful.

> but why should we rely on something that might control the ID when we 
> can rely on the ID itself? After all, this is the object we're really
> interested in.

The question is how do your correlate the kernel ID with the userspace
process(es) that have ownership of it. The only way to do that is to
associate the ID with the event channel FD with the process(es) that
have access to it.

Unfortunately there is no other way to correlate the in-kernel ID with
the process(es) that control it.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]                           ` <20101221203627.GE12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-22 16:03                             ` Nir Muchtar
  2010-12-22 22:10                               ` Jason Gunthorpe
  0 siblings, 1 reply; 28+ messages in thread
From: Nir Muchtar @ 2010-12-22 16:03 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

On Tue, 2010-12-21 at 13:36 -0700, Jason Gunthorpe wrote:

> The question is how do your correlate the kernel ID with the userspace
> process(es) that have ownership of it. The only way to do that is to
> associate the ID with the event channel FD with the process(es) that
> have access to it.
> 
> Unfortunately there is no other way to correlate the in-kernel ID with
> the process(es) that control it.
> 
> Jason

Ok, seems like we're going in circles...
The fact is that there is a way, that, all arguments aside, just works,
and IMO is coherent with the current way that RDMA CM IDs are managed.

Right now this is also the only practical way to export this
information, because RDMA CM/UCM don't behave like sock does in terms of
inode information. 

Also, in practice, many RDMA CM IDs are, in fact, created by the kernel
(when new connections are established), so I'm not sure if what you're
suggesting is even feasible with the current design of RDMA CM.

I realize you disagree, but I still want to move forward with the rest,
so I'll separate this patch from the rest and we can have a separate
discussion over this in the future.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
  2010-12-22 16:03                             ` Nir Muchtar
@ 2010-12-22 22:10                               ` Jason Gunthorpe
  0 siblings, 0 replies; 28+ messages in thread
From: Jason Gunthorpe @ 2010-12-22 22:10 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua

On Wed, Dec 22, 2010 at 06:03:33PM +0200, Nir Muchtar wrote:

> Right now this is also the only practical way to export this
> information, because RDMA CM/UCM don't behave like sock does in terms of
> inode information. 

Are you referring to how proc/fd currently shows
/dev/infiniband/rdma_cm for the link destination ?

I thought I saw something go by that added the struct file * address
or similar to proc/fdinfo/ but I see it did not get in. Grump.

> Also, in practice, many RDMA CM IDs are, in fact, created by the kernel
> (when new connections are established), so I'm not sure if what you're
> suggesting is even feasible with the current design of RDMA CM.

All RDMA CM IDs are assigned an idr number in ctx_idr and matched to a
ucma_file object before they become visible to userspace and thus
owned by a process. RDMA CM IDs that have not gone through this step
are floating about in the kernel and don't need to be matched to pids.

It is really straighfroward to do: run idr_for_each over ctx_idr, find the
ucma_contex->cm_id that matches the rdma_cm_id you are dumping, fill a
netlink attribute with the ctx id and something from
ucma_context->file to cross refrence with (the currently missing)
something in fdinfo. If you don't find an idr entry then don't emit
anything - it isn't exported to userspace.

Anyhow, I'd be happy if you stuck with creator_pid and left this for
future. But, you might want to consider dumping the ctx id from ucma -
since that is very useful.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH V3 5/6] RDMA CM: Save Owning PID
       [not found]                     ` <20101221181043.GD12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-23 12:21                       ` Or Gerlitz
  0 siblings, 0 replies; 28+ messages in thread
From: Or Gerlitz @ 2010-12-23 12:21 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Nir Muchtar, Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Sean Hefty, Andy Grover

Jason Gunthorpe wrote:
> Nir Muchtar wrote:
> It is tied to the ucma_fops FD (ie /dev/rdma_cm, aka the event
> channel), which when closed calls ucma_free_ctx which calls rdma_destroy_id. 
> Processes that can access that FD can control the RDMA CM IDs associated with it.
> The only case where this is not true is in the kernel, and in that
> instance the PID is meaningless - you'd be much better off exporting
> the name of the module that allocated the RDMA CM ID.

Jason,

I'd like to strengthen the point Nir was trying to make with real-life examples

1. the Linux iscsi initiator - has a daemon named iscsid which opens all the iscsi connections where a connection is over specific transport - e.g iscsi/tcp, iser, iscsi offloads etc. iscsid is talking with the kernel iscsi stack through netlink (...) and in that framework iser connections are started from the **kernel** i.e directly with the rdma-cm and not through librdmacm. With Nir's patches we'd like to be able to track these connections, e.g now I can see through the iscsi tools that I have four sessions (=connections), two of them with iscsi/tcp and two with iser, but I must use a dedicated tool/stack (the iscsi one) for that, wheres if I had a way generic way to track all the connections  made through the rdma-cm things will be much easier.

> # netstat -ntp
> Proto Recv-Q Send-Q Local Address               Foreign Address             State       PID/Program name
> tcp        0      0 192.168.20.1:45858          192.168.20.222:3260         ESTABLISHED 26793/iscsid
> tcp        0      0 192.168.20.1:45859          192.168.20.222:3260         ESTABLISHED 26793/iscsid

no sign for the iser connections, has to use dedicated tool

> # iscsiadm -m session
> iser: [76] 192.168.20.222:3260,1 iqn.iser.A
> tcp: [77] 192.168.20.222:3260,1 iqn.tcp.D
> iser: [79] 192.168.20.222:3260,1 iqn.iser.B
> tcp: [80] 192.168.20.222:3260,1 iqn.tcp.C

2. sdp - the code runs in the kernel, same story, the connections are not tracked, a dedicated "sdpnetstat" tool has to be used. Connections (sockets) are tied to processes and will be best if reported through generic netlink interface

3. rds - again, kernel level rdma-cm, here connections are not tied to processes, but no reporting as of today and dedicated "rds-info" tool has to be used.

I'm sure there are (nfs rdma) and will be more examples for kernel level usage of the rdma-cm for which the pid field is helpful, 

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2010-12-23 12:21 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-12-13 16:22 [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports Nir Muchtar
     [not found] ` <1292257370-24391-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-12-13 16:22   ` [PATCH V3 1/6] IB Netlink Infrastructure Nir Muchtar
     [not found]     ` <1292257370-24391-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-12-14 18:34       ` Jason Gunthorpe
     [not found]         ` <20101214183401.GC2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-19 14:34           ` Nir Muchtar
2010-12-13 16:22   ` [PATCH V3 2/6] IB Core: Error Handler Nir Muchtar
2010-12-13 16:22   ` [PATCH V3 3/6] IB Core Run Netlink Nir Muchtar
2010-12-13 16:22   ` [PATCH V3 4/6] RDMA CM: Export State Enum Nir Muchtar
2010-12-13 16:22   ` [PATCH V3 5/6] RDMA CM: Save Owning PID Nir Muchtar
     [not found]     ` <1292257370-24391-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-12-14 18:34       ` Jason Gunthorpe
     [not found]         ` <20101214183458.GD2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-19 14:36           ` Nir Muchtar
2010-12-20 21:54             ` Jason Gunthorpe
     [not found]               ` <20101220215433.GB12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-21 15:05                 ` Nir Muchtar
2010-12-21 18:10                   ` Jason Gunthorpe
2010-12-21 19:43                     ` Nir Muchtar
2010-12-21 20:33                       ` Nir Muchtar
     [not found]                       ` <7E95F01E94AB484F83061FCFA35B39F8794E3F-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
2010-12-21 20:36                         ` Jason Gunthorpe
     [not found]                           ` <20101221203627.GE12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-22 16:03                             ` Nir Muchtar
2010-12-22 22:10                               ` Jason Gunthorpe
     [not found]                     ` <20101221181043.GD12090-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-23 12:21                       ` Or Gerlitz
2010-12-13 16:22   ` [PATCH V3 6/6] RDMA CM: Netlink Client Nir Muchtar
     [not found]     ` <1292257370-24391-7-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-12-14 18:45       ` Jason Gunthorpe
     [not found]         ` <20101214184514.GE2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-19 14:47           ` Nir Muchtar
2010-12-20  7:24             ` Or Gerlitz
2010-12-20 19:16             ` Hefty, Sean
2010-12-20 21:52             ` Jason Gunthorpe
2010-12-14 18:27   ` [PATCH V3 0/6] IB Netlink Interface and RDMA CM exports Jason Gunthorpe
     [not found]     ` <20101214182746.GB2506-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-19 14:30       ` Nir Muchtar
2010-12-20 21:55         ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.