All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 0/5] IB Netlink Interface and RDMA CM exports
@ 2010-11-29 16:16 Nir Muchtar
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

This patch set provides means for communicating internal data from
IB modules to the userspace.
It is composed of two components:
1. The main ib_netlink infrastructure which lives and is initialized by ib_core.
2. additional clients which are implemented inside existing IB modules.
   Clients are responsible for adding/removing their modules during init/exit
   to/from the infrastructure.
   They also supply callbacks for the infrastructure to call
   based on the module/operation type.

ib_netlink uses the standard Netlink module and defines a new Netlink unit
(NETLINK_INFINIBAND) in netlink.h.
Upon receiving a request from userspace, it finds the target client
using the add/remove mechanism, and then calls its supplied callback.
The callbacks are responsible for allocating skbuffs and Netlink messages
using an infrastructure function (ibnl_put).
This function actually does the work of skbuff allocations,
fragmentation and sending.
The exact format of the returned data is unknown to ib_netlink itself.
It is shared between the kernel and
userspace in the form of common headers.

Changelog:
1. Patch doesn't define new modules. (See above for new design)
2. Fixed ib_core init function - initialized cache regardless of sysfs.
   (Let me know if that was intentional)
2. Messages are now transported in the format of
   multiple netlink messages per skb and multiple skb's per reply.
3. Several changes by Sean's comments:
   - Exported enum cma_state (now enum rdma_cm_state).
   - Made some type changes.
4. Some other minor fixes

A quick and dirty userspace demo application is attached for reference.
Here's a sample output:
Type  Device   Port  PID    Net_dev    Src Address          Dst Address          Space  State           QPN      
IB    mthca0   1     27404  ib0        192.168.168.3/7174   N/A                  TCP    LISTEN          0        
IB    mthca0   2     27415  ib1        192.168.2.3/7174     N/A                  TCP    LISTEN          0        
IB    mthca0   1     30     ib0        192.168.168.3/7174   192.168.168.2/57354  TCP    CONNECT         590854   
IB    mthca0   2     15     ib1        192.168.2.3/7174     192.168.2.4/33290    TCP    CONNECT         590855   

And here's the source:

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>

#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <linux/netlink.h>
#include "rdma_cma.h"
#include "ib_netlink.h"

#include <sys/ioctl.h>
#include <net/if.h>
#include <net/if_arp.h>

#define MAX_PAYLOAD 8192 

char *get_ifname(int index)
{
	static struct ifreq req;
	int sock = socket(AF_INET, SOCK_DGRAM, 0);
	req.ifr_ifindex = index;
	if (index == 0) {
		return "N/A";
	}
	if (ioctl(sock, SIOCGIFNAME, &req) < 0) {
		fprintf(stderr, "SIOCGIFNAME failed for index %d\n", index);
		return "N/A";
	}
	return req.ifr_name;
}

static const char *format_rdma_cm_state(enum rdma_cm_state s)
{
	switch (s) {
	case RDMA_CM_IDLE:           return "IDLE";
	case RDMA_CM_ADDR_QUERY:     return "ADDR_QUERY";
	case RDMA_CM_ADDR_RESOLVED:  return "ADDR_RESOLVED";
	case RDMA_CM_ROUTE_QUERY:    return "ROUTE_QUERY";
	case RDMA_CM_ROUTE_RESOLVED: return "ROUTE_RESOLVED";
	case RDMA_CM_CONNECT:        return "CONNECT";
	case RDMA_CM_DISCONNECT:     return "DISCONNECT";
	case RDMA_CM_ADDR_BOUND:     return "ADDR_BOUND";
	case RDMA_CM_LISTEN:         return "LISTEN";
	case RDMA_CM_DEVICE_REMOVAL: return "DEVICE_REMOVAL";
	case RDMA_CM_DESTROYING:     return "DESTROYING";
	default: 	         return "N/A";
	}
}

static const char *format_port_space(enum rdma_port_space ps)
{
	switch (ps) {
	case RDMA_PS_SDP:       return "SDP";
	case RDMA_PS_IPOIB:     return "IPOIB";
	case RDMA_PS_TCP:       return "TCP";
	case RDMA_PS_UDP:       return "UDP";
	default: 	        return "N/A";
	}
}

static const char *format_node_type(enum rdma_node_type nt)
{
	switch (nt) {
	case ARPHRD_INFINIBAND:	return "IB";
	case ARPHRD_ETHER: 	return "IW";
	default:		return "N/A";
	}
}

static int format_address(__be32 addr[4], __be16 port, char *buff)
{
	if (addr[0] && port) {
		sprintf(buff, "%s/%d", inet_ntoa(*(struct in_addr *)(addr)), ntohs(port));
	} 
	else
		sprintf(buff, "N/A");
	return 0;
}

int main()
{
	struct sockaddr_nl src_addr, dest_addr;
	struct msghdr msg;
	struct iovec iov;
	int sock_fd;
	struct rdma_cm_id_stats *cur_id_stats;
	char tmp_buf[64];
	int len;
	char cur_name[64];
	struct nlmsghdr *nlh = NULL;
	int ret;

	sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_INFINIBAND);

	if (sock_fd < 0) {
		printf("Failed to create socket. Error: %s (%d)\n", strerror(errno), errno);
		return -1;
	}

	memset(&src_addr, 0, sizeof(src_addr));
	src_addr.nl_family = AF_NETLINK;
	src_addr.nl_pid = getpid();
	src_addr.nl_groups = 0;  /* not in mcast groups */
	bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

	memset(&dest_addr, 0, sizeof(dest_addr));
	dest_addr.nl_family = AF_NETLINK;
	dest_addr.nl_pid = 0;   /* For Linux Kernel */
	dest_addr.nl_groups = 0; /* unicast */

	nlh=(struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
	nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
	nlh->nlmsg_pid = getpid();
	nlh->nlmsg_flags = NLM_F_REQUEST;
	nlh->nlmsg_type = IBNL_GET_TYPE(IBNL_RDMA_CM, IBNL_RDMA_CM_STATS);

	iov.iov_base = (void *)nlh;
	iov.iov_len = nlh->nlmsg_len;
	msg.msg_name = (void *)&dest_addr;
	msg.msg_namelen = sizeof(dest_addr);
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

	sendmsg(sock_fd, &msg, 0);

	printf("%-5s %-8s %-5s %-6s %-10s %-25s %-25s %-6s %-15s %-8s \n",
		"Type", "Device", "Port", "PID", "Net_dev", "Src Address",
		"Dst Address", "Space", "State", "QPN");
	while (1) {
		memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
		iov.iov_base = (void *)nlh;
		iov.iov_len = NLMSG_SPACE(MAX_PAYLOAD);
		msg.msg_name = (void *)&dest_addr;
		msg.msg_namelen = sizeof(dest_addr);
		msg.msg_iov = &iov;
		msg.msg_iovlen = 1;

		len = recvmsg(sock_fd, &msg, 0);
		if (len <= 0)
			break;
		cur_id_stats = NLMSG_DATA(nlh);
		while ((ret = NLMSG_OK(nlh, len)) != 0) {
			if (nlh->nlmsg_type == NLMSG_DONE) {
				close(sock_fd);
				return 0;
			}
			if (IBNL_GET_OP(nlh->nlmsg_type) == IBNL_RDMA_CM_DEVICE_NAME) {
				strncpy(cur_name, NLMSG_DATA(nlh), 64);
			}
			else {
				cur_id_stats = NLMSG_DATA(nlh);
				
				printf("%-5s %-8s %-5d %-6u %-10s ", 
					format_node_type(cur_id_stats->nt), 
					cur_name, 
					cur_id_stats->port_num,
					cur_id_stats->pid,
					get_ifname(cur_id_stats->bound_dev_if));
				format_address(cur_id_stats->local_addr, cur_id_stats->local_port, tmp_buf);
				printf("%-25s ",tmp_buf);
				format_address(cur_id_stats->remote_addr, cur_id_stats->remote_port, tmp_buf);
				printf("%-25s ",tmp_buf);
				printf("%-6s %-15s 0x%-8x \n",
					format_port_space(cur_id_stats->ps),
					format_rdma_cm_state(cur_id_stats->cm_state),
					cur_id_stats->qp_num);
			}
			nlh = NLMSG_NEXT(nlh, len);
		}
	}
	close(sock_fd);
	return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH V2 1/5] IB Netlink Infrastructure
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-11-29 16:16   ` Nir Muchtar
       [not found]     ` <1291047399-430-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-11-29 16:16   ` [PATCH V2 2/5] IB Core: Error Handler Nir Muchtar
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

The basic IB netlink infrastructure.
It allows for registration of IB module for which data is to be exported.
It supplies skb/message construction callbacks.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/netlink.c |  202 +++++++++++++++++++++++++++++++++++++
 include/linux/netlink.h           |    1 +
 include/rdma/ib_netlink.h         |   60 +++++++++++
 3 files changed, 263 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/core/netlink.c
 create mode 100644 include/rdma/ib_netlink.h

diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
new file mode 100644
index 0000000..4c53666
--- /dev/null
+++ b/drivers/infiniband/core/netlink.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2010 Voltaire Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <linux/netlink.h>
+
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+
+#include <rdma/ib_netlink.h>
+
+struct ibnl_cb {
+	struct list_head list;
+	int module;
+	int (*get_data)(int op, struct sk_buff **nl_skb, int pid);
+};
+
+static DEFINE_MUTEX(ibnl_mutex);
+static struct sock *nls;
+static LIST_HEAD(cb_list);
+
+int ibnl_add_cb(int module,
+		int (*get_data)(int op, struct sk_buff **skb, int pid))
+{
+	struct ibnl_cb *cur;
+	struct ibnl_cb *nl_cb = kmalloc(sizeof *nl_cb, GFP_KERNEL);
+
+	if (!nl_cb)
+		return -ENOMEM;
+	nl_cb->module = module;
+	nl_cb->get_data = get_data;
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry(cur, &cb_list, list) {
+		if (cur->module == module) {
+			pr_warn("Callback for %d already exists\n", module);
+			mutex_unlock(&ibnl_mutex);
+			kfree(nl_cb);
+			return -EINVAL;
+		}
+	}
+	list_add_tail(&nl_cb->list, &cb_list);
+	mutex_unlock(&ibnl_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ibnl_add_cb);
+
+int ibnl_remove_cb(int module)
+{
+	struct ibnl_cb *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &cb_list, list) {
+		if (cur->module == module) {
+			list_del(&(cur->list));
+			mutex_unlock(&ibnl_mutex);
+			kfree(cur);
+			return 0;
+		}
+	}
+	pr_warn("Can't remove callback for module %d. Not found\n", module);
+	mutex_unlock(&ibnl_mutex);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ibnl_remove_cb);
+
+static void ibnl_unicast(int pid, struct sk_buff *skb)
+{
+	netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+}
+
+static int ibnl_handle_request(int pid, int op, struct ibnl_cb *cb)
+{
+	int ret;
+	struct sk_buff **nl_skb;
+
+	nl_skb = kmalloc(sizeof *nl_skb, GFP_KERNEL);
+	if (!nl_skb) {
+		pr_info("Couldn't allocate skb\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	*nl_skb = NULL;
+	ret = cb->get_data(op, nl_skb, pid);
+	if (ret) {
+		if (ret != -EMSGSIZE)
+			goto err;
+	}
+	if (*nl_skb) {
+		ibnl_put(nl_skb, pid, 0, 0, 0, NLMSG_DONE);
+		ibnl_unicast(pid, *nl_skb);
+	}
+	kfree(nl_skb);
+	return 0;
+
+err:
+	kfree(nl_skb);
+	return ret;
+}
+
+void *ibnl_put(struct sk_buff **skb, int pid, int seq,
+	       int len, int module, int op)
+{
+	unsigned char *prev_tail;
+	struct nlmsghdr *nlh;
+
+	if (*skb && skb_tailroom(*skb) < (int)NLMSG_SPACE(len)) {
+		ibnl_unicast(pid, *skb);
+		*skb = NULL;
+	}
+	if (!*skb) {
+		*skb = alloc_skb(NLMSG_SPACE(NLMSG_GOODSIZE), GFP_KERNEL);
+		if (!*skb) {
+			pr_info("Couldn't allocate skb\n");
+			return NULL;
+		}
+	}
+	prev_tail = skb_tail_pointer(*skb);
+	nlh = NLMSG_NEW(*skb, 0, seq, IBNL_GET_TYPE(module, op),
+			len, NLM_F_MULTI);
+	nlh->nlmsg_len = skb_tail_pointer(*skb) - prev_tail;
+	return NLMSG_DATA(nlh);
+nlmsg_failure:
+	nlmsg_trim(*skb, prev_tail);
+	return NULL;
+}
+EXPORT_SYMBOL(ibnl_put);
+
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct ibnl_cb *cb;
+	int type = nlh->nlmsg_type;
+	int pid = nlh->nlmsg_pid;
+	int module = IBNL_GET_MODULE(type);
+
+	list_for_each_entry(cb, &cb_list, list) {
+		if (cb->module == module)
+			return ibnl_handle_request(pid, IBNL_GET_OP(type), cb);
+	}
+	pr_info("Callback for module %d not found\n", module);
+	return -EINVAL;
+}
+
+static void ibnl_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&ibnl_mutex);
+	netlink_rcv_skb(skb, &ibnl_rcv_msg);
+	mutex_unlock(&ibnl_mutex);
+}
+
+int ibnl_init(void)
+{
+	nls = netlink_kernel_create(&init_net, NETLINK_INFINIBAND, 0, ibnl_rcv,
+				    NULL, THIS_MODULE);
+	if (!nls) {
+		pr_warn("Failed to create netlink socket\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void ibnl_cleanup(void)
+{
+	struct ibnl_cb *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &cb_list, list) {
+		list_del(&(cur->list));
+		kfree(cur);
+	}
+	mutex_unlock(&ibnl_mutex);
+	netlink_kernel_release(nls);
+}
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1235669..c9693f9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -24,6 +24,7 @@
 /* leave room for NETLINK_DM (DM Events) */
 #define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 #define NETLINK_ECRYPTFS	19
+#define NETLINK_INFINIBAND	20
 
 #define MAX_LINKS 32		
 
diff --git a/include/rdma/ib_netlink.h b/include/rdma/ib_netlink.h
new file mode 100644
index 0000000..ebe564d
--- /dev/null
+++ b/include/rdma/ib_netlink.h
@@ -0,0 +1,60 @@
+#ifndef _IBNETLINK_H
+#define _IBNETLINK_H
+
+#include <linux/rtnetlink.h>
+#include <rdma/rdma_cm.h>
+
+enum {
+	IBNL_RDMA_CM = 1
+};
+
+enum {
+	IBNL_RDMA_CM_STATS = 0,
+	IBNL_RDMA_CM_DEVICE_NAME,
+	IBNL_RDMA_CM_ID_STATS,
+};
+
+#define IBNL_GET_MODULE(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
+#define IBNL_GET_OP(type) (type & ((1 << 10) - 1))
+#define IBNL_GET_TYPE(module, op) ((module << 10) + op)
+
+#ifdef __KERNEL__
+
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Add a callback for an IB module to the IB netlink module.
+ * @module: The added IB module
+ * @get_size: A callback for obtaining the necessary size for the returned data
+ * @get_data: A callback for obtaining the data
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_add_cb(int module,
+		int (*get_data)(int op, struct sk_buff **skb, int pid));
+
+/**
+ * Remove a callback for a registered IB module
+ * @module: The removed IB module
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_remove_cb(int module);
+
+/**
+ * Put a new message in a supplied skb.
+ * @skb: The netlink skb.
+ * @pid: The destination pid
+ * @seq: The message sequence number.
+ * @len: The requested message length to allocate.
+ * @module: Calling IB netlink module.
+ * @op: message content op.
+ * Returns the allocated buffer on success and NULL on failure.
+ */
+void *ibnl_put(struct sk_buff **skb, int pid, int seq,
+	       int len, int module, int op);
+
+#endif /* __KERNEL__ */
+
+#endif /* _IBNETLINK_H */
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH V2 2/5] IB Core: Error Handler
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-11-29 16:16   ` [PATCH V2 1/5] IB Netlink Infrastructure Nir Muchtar
@ 2010-11-29 16:16   ` Nir Muchtar
  2010-11-29 16:16   ` [PATCH V2 3/5] IB Core: Run Netlink Nir Muchtar
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Added missing error handling in ib_core init. (Wasn't intentional right?)

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/device.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a19effa..6e06e37 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -719,15 +719,22 @@ static int __init ib_core_init(void)
 	int ret;
 
 	ret = ib_sysfs_setup();
-	if (ret)
+	if (ret) {
 		printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+		goto err;
+	}
 
 	ret = ib_cache_setup();
 	if (ret) {
 		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
-		ib_sysfs_cleanup();
+		goto err_sysfs;
 	}
 
+	return 0;
+
+err_sysfs:
+	ib_sysfs_cleanup();
+err:
 	return ret;
 }
 
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH V2 3/5] IB Core: Run Netlink
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-11-29 16:16   ` [PATCH V2 1/5] IB Netlink Infrastructure Nir Muchtar
  2010-11-29 16:16   ` [PATCH V2 2/5] IB Core: Error Handler Nir Muchtar
@ 2010-11-29 16:16   ` Nir Muchtar
  2010-11-29 16:16   ` [PATCH V2 4/5] RDMA CM: Export State Enum Nir Muchtar
  2010-11-29 16:16   ` [PATCH V2 5/5] RDMA CM: Netlink Client Nir Muchtar
  4 siblings, 0 replies; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Include and initialize IB netlink from IB core.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/Makefile |    2 +-
 drivers/infiniband/core/device.c |   11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index cb1ab3e..c8bbaef 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 					$(user_access-y)
 
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
-				device.o fmr_pool.o cache.o
+				device.o fmr_pool.o cache.o netlink.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6e06e37..3229102 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,8 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 
+#include <net/ib_netlink.h>
+
 #include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
@@ -730,8 +732,16 @@ static int __init ib_core_init(void)
 		goto err_sysfs;
 	}
 
+	ret = ibnl_init();
+	if (ret) {
+		printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+		goto err_cache;
+	}
+
 	return 0;
 
+err_cache:
+	ib_cache_cleanup();
 err_sysfs:
 	ib_sysfs_cleanup();
 err:
@@ -740,6 +750,7 @@ err:
 
 static void __exit ib_core_cleanup(void)
 {
+	ibnl_cleanup();
 	ib_cache_cleanup();
 	ib_sysfs_cleanup();
 	/* Make sure that any pending umem accounting work is done. */
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH V2 4/5] RDMA CM: Export State Enum
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-11-29 16:16   ` [PATCH V2 3/5] IB Core: Run Netlink Nir Muchtar
@ 2010-11-29 16:16   ` Nir Muchtar
  2010-11-29 16:16   ` [PATCH V2 5/5] RDMA CM: Netlink Client Nir Muchtar
  4 siblings, 0 replies; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

exported enum cma_state into rdma_cm.h

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/cma.c |  166 +++++++++++++++++++----------------------
 include/rdma/rdma_cm.h        |   14 ++++
 2 files changed, 92 insertions(+), 88 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da2..5821f93 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -89,20 +89,6 @@ struct cma_device {
 	struct list_head	id_list;
 };
 
-enum cma_state {
-	CMA_IDLE,
-	CMA_ADDR_QUERY,
-	CMA_ADDR_RESOLVED,
-	CMA_ROUTE_QUERY,
-	CMA_ROUTE_RESOLVED,
-	CMA_CONNECT,
-	CMA_DISCONNECT,
-	CMA_ADDR_BOUND,
-	CMA_LISTEN,
-	CMA_DEVICE_REMOVAL,
-	CMA_DESTROYING
-};
-
 struct rdma_bind_list {
 	struct idr		*ps;
 	struct hlist_head	owners;
@@ -126,7 +112,7 @@ struct rdma_id_private {
 	struct list_head	mc_list;
 
 	int			internal_id;
-	enum cma_state		state;
+	enum rdma_cm_state	state;
 	spinlock_t		lock;
 	struct mutex		qp_mutex;
 
@@ -164,8 +150,8 @@ struct cma_multicast {
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
-	enum cma_state		old_state;
-	enum cma_state		new_state;
+	enum rdma_cm_state	old_state;
+	enum rdma_cm_state	new_state;
 	struct rdma_cm_event	event;
 };
 
@@ -216,7 +202,7 @@ struct sdp_hah {
 #define CMA_VERSION 0x00
 #define SDP_MAJ_VERSION 0x2
 
-static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 {
 	unsigned long flags;
 	int ret;
@@ -228,7 +214,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
 }
 
 static int cma_comp_exch(struct rdma_id_private *id_priv,
-			 enum cma_state comp, enum cma_state exch)
+			 enum rdma_cm_state comp, enum rdma_cm_state exch)
 {
 	unsigned long flags;
 	int ret;
@@ -240,11 +226,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
 	return ret;
 }
 
-static enum cma_state cma_exch(struct rdma_id_private *id_priv,
-			       enum cma_state exch)
+static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
+				   enum rdma_cm_state exch)
 {
 	unsigned long flags;
-	enum cma_state old;
+	enum rdma_cm_state old;
 
 	spin_lock_irqsave(&id_priv->lock, flags);
 	old = id_priv->state;
@@ -408,7 +394,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 }
 
 static int cma_disable_callback(struct rdma_id_private *id_priv,
-			      enum cma_state state)
+				enum rdma_cm_state state)
 {
 	mutex_lock(&id_priv->handler_mutex);
 	if (id_priv->state != state) {
@@ -432,7 +418,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	id_priv->state = CMA_IDLE;
+	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
@@ -838,16 +824,16 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
 }
 
 static void cma_cancel_operation(struct rdma_id_private *id_priv,
-				 enum cma_state state)
+				 enum rdma_cm_state state)
 {
 	switch (state) {
-	case CMA_ADDR_QUERY:
+	case RDMA_CM_ADDR_QUERY:
 		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
 		break;
-	case CMA_ROUTE_QUERY:
+	case RDMA_CM_ROUTE_QUERY:
 		cma_cancel_route(id_priv);
 		break;
-	case CMA_LISTEN:
+	case RDMA_CM_LISTEN:
 		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
 				&& !id_priv->cma_dev)
 			cma_cancel_listens(id_priv);
@@ -898,10 +884,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
-	enum cma_state state;
+	enum rdma_cm_state state;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	state = cma_exch(id_priv, CMA_DESTROYING);
+	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
 	mutex_lock(&lock);
@@ -992,9 +978,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	int ret = 0;
 
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, CMA_CONNECT)) ||
+		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, CMA_DISCONNECT)))
+		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -1025,7 +1011,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		event.status = -ETIMEDOUT; /* fall through */
 	case IB_CM_DREQ_RECEIVED:
 	case IB_CM_DREP_RECEIVED:
-		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
+				   RDMA_CM_DISCONNECT))
 			goto out;
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
@@ -1052,7 +1039,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -1109,7 +1096,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	id_priv->state = CMA_CONNECT;
+	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 
 destroy_id:
@@ -1149,7 +1136,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	}
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	id_priv->state = CMA_CONNECT;
+	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 err:
 	rdma_destroy_id(id);
@@ -1178,7 +1165,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	int offset, ret;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, CMA_LISTEN))
+	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
@@ -1217,7 +1204,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		 * while we're accessing the cm_id.
 		 */
 		mutex_lock(&lock);
-		if (cma_comp(conn_id, CMA_CONNECT) &&
+		if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
 		    !cma_is_ud_ps(conn_id->id.ps))
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 		mutex_unlock(&lock);
@@ -1229,7 +1216,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	conn_id->cm_id.ib = NULL;
 
 release_conn_id:
-	cma_exch(conn_id, CMA_DESTROYING);
+	cma_exch(conn_id, RDMA_CM_DESTROYING);
 	mutex_unlock(&conn_id->handler_mutex);
 	rdma_destroy_id(&conn_id->id);
 
@@ -1300,7 +1287,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr_in *sin;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, CMA_CONNECT))
+	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -1343,7 +1330,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.iw = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -1365,7 +1352,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct ib_device_attr attr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, CMA_LISTEN))
+	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	/* Create a new RDMA id for the new IW CM ID */
@@ -1378,7 +1365,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	}
 	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-	conn_id->state = CMA_CONNECT;
+	conn_id->state = RDMA_CM_CONNECT;
 
 	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
 	if (!dev) {
@@ -1429,7 +1416,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	if (ret) {
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
-		cma_exch(conn_id, CMA_DESTROYING);
+		cma_exch(conn_id, RDMA_CM_DESTROYING);
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(&conn_id->id);
 		goto out;
@@ -1520,7 +1507,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
 
-	dev_id_priv->state = CMA_ADDR_BOUND;
+	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
 	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 
@@ -1552,14 +1539,14 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == CMA_IDLE) {
+	if (id_priv->state == RDMA_CM_IDLE) {
 		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
 		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
 		if (ret)
 			return ret;
 	}
 
-	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
 		return -EINVAL;
 
 	id_priv->backlog = backlog;
@@ -1585,7 +1572,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	return 0;
 err:
 	id_priv->backlog = 0;
-	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_listen);
@@ -1611,8 +1598,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
 		route->num_paths = 1;
 		*route->path_rec = *path_rec;
 	} else {
-		work->old_state = CMA_ROUTE_QUERY;
-		work->new_state = CMA_ADDR_RESOLVED;
+		work->old_state = RDMA_CM_ROUTE_QUERY;
+		work->new_state = RDMA_CM_ADDR_RESOLVED;
 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
 		work->event.status = status;
 	}
@@ -1670,7 +1657,7 @@ static void cma_work_handler(struct work_struct *_work)
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 out:
@@ -1688,12 +1675,12 @@ static void cma_ndev_work_handler(struct work_struct *_work)
 	int destroy = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state == CMA_DESTROYING ||
-	    id_priv->state == CMA_DEVICE_REMOVAL)
+	if (id_priv->state == RDMA_CM_DESTROYING ||
+	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 
@@ -1717,8 +1704,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 
 	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
@@ -1747,7 +1734,8 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+			   RDMA_CM_ROUTE_RESOLVED))
 		return -EINVAL;
 
 	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
@@ -1760,7 +1748,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 	id->route.num_paths = num_paths;
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
@@ -1775,8 +1763,8 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -1840,8 +1828,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 		goto err2;
 	}
 
-	work->old_state = CMA_ROUTE_QUERY;
-	work->new_state = CMA_ROUTE_RESOLVED;
+	work->old_state = RDMA_CM_ROUTE_QUERY;
+	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	work->event.status = 0;
 
@@ -1863,7 +1851,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
@@ -1892,7 +1880,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
 	cma_deref_id(id_priv);
 	return ret;
 }
@@ -1957,7 +1945,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	 * we're trying to acquire it.
 	 */
 	mutex_lock(&lock);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
+			   RDMA_CM_ADDR_RESOLVED)) {
 		mutex_unlock(&lock);
 		goto out;
 	}
@@ -1967,7 +1956,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	mutex_unlock(&lock);
 
 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+				   RDMA_CM_ADDR_BOUND))
 			goto out;
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.status = status;
@@ -1978,7 +1968,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	}
 
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		cma_deref_id(id_priv);
 		rdma_destroy_id(&id_priv->id);
@@ -2023,8 +2013,8 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
-	work->old_state = CMA_ADDR_QUERY;
-	work->new_state = CMA_ADDR_RESOLVED;
+	work->old_state = RDMA_CM_ADDR_QUERY;
+	work->new_state = RDMA_CM_ADDR_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -2053,13 +2043,13 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == CMA_IDLE) {
+	if (id_priv->state == RDMA_CM_IDLE) {
 		ret = cma_bind_addr(id, src_addr, dst_addr);
 		if (ret)
 			return ret;
 	}
 
-	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
@@ -2075,7 +2065,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
 	cma_deref_id(id_priv);
 	return ret;
 }
@@ -2253,7 +2243,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
 		return -EINVAL;
 
 	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
@@ -2285,7 +2275,7 @@ err2:
 		mutex_unlock(&lock);
 	}
 err1:
-	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_bind_addr);
@@ -2358,7 +2348,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, CMA_CONNECT))
+	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
@@ -2404,7 +2394,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
@@ -2570,7 +2560,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp) {
@@ -2597,7 +2587,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
 	return 0;
 err:
-	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_connect);
@@ -2683,7 +2673,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_CONNECT))
+	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp && conn_param) {
@@ -2812,8 +2802,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	int ret;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
+	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
+	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return 0;
 
 	mutex_lock(&id_priv->qp_mutex);
@@ -2837,7 +2827,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
-		cma_exch(id_priv, CMA_DESTROYING);
+		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return 0;
@@ -3020,8 +3010,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
-	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
+	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return -EINVAL;
 
 	mc = kmalloc(sizeof *mc, GFP_KERNEL);
@@ -3186,19 +3176,19 @@ static void cma_add_one(struct ib_device *device)
 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_cm_event event;
-	enum cma_state state;
+	enum rdma_cm_state state;
 	int ret = 0;
 
 	/* Record that we want to remove the device */
-	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
-	if (state == CMA_DESTROYING)
+	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
+	if (state == RDMA_CM_DESTROYING)
 		return 0;
 
 	cma_cancel_operation(id_priv, state);
 	mutex_lock(&id_priv->handler_mutex);
 
 	/* Check for destruction from another callback. */
-	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
 		goto out;
 
 	memset(&event, 0, sizeof event);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 4fae903..c766da9 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -111,6 +111,20 @@ struct rdma_cm_event {
 	} param;
 };
 
+enum rdma_cm_state {
+	RDMA_CM_IDLE,
+	RDMA_CM_ADDR_QUERY,
+	RDMA_CM_ADDR_RESOLVED,
+	RDMA_CM_ROUTE_QUERY,
+	RDMA_CM_ROUTE_RESOLVED,
+	RDMA_CM_CONNECT,
+	RDMA_CM_DISCONNECT,
+	RDMA_CM_ADDR_BOUND,
+	RDMA_CM_LISTEN,
+	RDMA_CM_DEVICE_REMOVAL,
+	RDMA_CM_DESTROYING
+};
+
 struct rdma_cm_id;
 
 /**
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2010-11-29 16:16   ` [PATCH V2 4/5] RDMA CM: Export State Enum Nir Muchtar
@ 2010-11-29 16:16   ` Nir Muchtar
       [not found]     ` <1291047399-430-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  4 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-11-29 16:16 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w, nirm-smomgflXvOZWk0Htik3J/w

Add callbacks and data types for statistics export.
One callback is implemented that exports all of the current devices/ids.
Add/remove the callback to IB Netlink on init/cleanup.

Signed-off-by: Nir Muchtar <nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/core/cma.c |   97 +++++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_cm.h        |   14 ++++++
 2 files changed, 111 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5821f93..9c6ce73 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -51,6 +51,7 @@
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
+#include <rdma/ib_netlink.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -134,6 +135,7 @@ struct rdma_id_private {
 	u32			qp_num;
 	u8			srq;
 	u8			tos;
+	pid_t			owner;
 };
 
 struct cma_multicast {
@@ -418,6 +420,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
+	id_priv->owner = current->pid;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
@@ -2671,8 +2674,14 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
+	unsigned long flags;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
+
+	spin_lock_irqsave(&id_priv->lock, flags);
+	id_priv->owner = current->pid;
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
 
@@ -3243,6 +3252,91 @@ static void cma_remove_one(struct ib_device *device)
 	kfree(cma_dev);
 }
 
+static int cma_get_stats(struct sk_buff **nl_skb, int pid)
+{
+	struct rdma_cm_id_stats *id_stats;
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id = NULL;
+	struct cma_device *cma_dev;
+	char *dev_name;
+	struct sockaddr_in *src, *dst;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *src6, *dst6;
+#endif
+	int seq = 0;
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list) {
+		dev_name = ibnl_put(nl_skb, pid, seq++,
+				    strlen(cma_dev->device->name) + 1,
+				    IBNL_RDMA_CM,
+				    IBNL_RDMA_CM_DEVICE_NAME);
+		if (!dev_name)
+			goto errmem;
+		strcpy(dev_name, cma_dev->device->name);
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			id_stats = ibnl_put(nl_skb, pid, seq++,
+					    sizeof *id_stats, IBNL_RDMA_CM,
+					    IBNL_RDMA_CM_ID_STATS);
+			if (!id_stats)
+				goto errmem;
+			memset(id_stats, 0, sizeof *id_stats);
+			id = &id_priv->id;
+			id_stats->nt = id->route.addr.dev_addr.dev_type;
+			id_stats->port_num = id->port_num;
+			id_stats->bound_dev_if =
+				id->route.addr.dev_addr.bound_dev_if;
+
+			if (id->route.addr.src_addr.ss_family == AF_INET &&
+			    id->route.addr.dst_addr.ss_family == AF_INET) {
+				src = (struct sockaddr_in *)
+					(&id->route.addr.src_addr);
+				dst = (struct sockaddr_in *)
+					(&id->route.addr.dst_addr);
+				id_stats->local_port = src->sin_port;
+				id_stats->remote_port = dst->sin_port;
+				id_stats->local_addr[0] = src->sin_addr.s_addr;
+				id_stats->remote_addr[0] = dst->sin_addr.s_addr;
+			}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			if (id->route.addr.src_addr.ss_family == AF_INET6 &&
+			    id->route.addr.dst_addr.ss_family == AF_INET6) {
+				src6 = (struct sockaddr_in6 *)
+					(&id->route.addr.src_addr);
+				dst6 = (struct sockaddr_in6 *)
+					(&id->route.addr.dst_addr);
+
+				ipv6_addr_copy((struct in6_addr *)
+					       (id_stats->local_addr),
+					       &src6->sin6_addr);
+				ipv6_addr_copy((struct in6_addr *)
+					       (id_stats->remote_addr),
+					       &dst6->sin6_addr);
+			}
+#endif
+			id_stats->ps = id->ps;
+			id_stats->cm_state = id_priv->state;
+			id_stats->qp_num = id_priv->qp_num;
+			id_stats->pid = id_priv->owner;
+		}
+	}
+	mutex_unlock(&lock);
+	return 0;
+errmem:
+	printk(KERN_INFO "RDMA CM failed to export data.\n");
+	mutex_unlock(&lock);
+	return -ENOMEM;
+}
+
+static int cma_get_data(int op, struct sk_buff **nl_skb, int pid)
+{
+	if (op == IBNL_RDMA_CM_STATS)
+		return cma_get_stats(nl_skb, pid);
+	printk(KERN_NOTICE "RDMA CM Invalid netlink operation (%d)\n", op);
+	return -EINVAL;
+}
+
 static int __init cma_init(void)
 {
 	int ret;
@@ -3258,6 +3352,8 @@ static int __init cma_init(void)
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
+	if (ibnl_add_cb(IBNL_RDMA_CM, cma_get_data))
+		printk(KERN_WARNING "RDMA CM failed to add netlink callback\n");
 	return 0;
 
 err:
@@ -3270,6 +3366,7 @@ err:
 
 static void __exit cma_cleanup(void)
 {
+	ibnl_remove_cb(IBNL_RDMA_CM);
 	ib_unregister_client(&cma_client);
 	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c766da9..7f341bb 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -147,6 +147,20 @@ struct rdma_cm_id {
 	u8			 port_num;
 };
 
+struct rdma_cm_id_stats {
+	enum rdma_node_type nt;
+	int port_num;
+	int bound_dev_if;
+	__be16	local_port;
+	__be16	remote_port;
+	__be32	local_addr[4];
+	__be32	remote_addr[4];
+	enum rdma_port_space ps;
+	enum rdma_cm_state cm_state;
+	u32 qp_num;
+	pid_t pid;
+};
+
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 1/5] IB Netlink Infrastructure
       [not found]     ` <1291047399-430-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-11-29 18:21       ` Jason Gunthorpe
       [not found]         ` <20101129182159.GB16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-29 18:21 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, Nov 29, 2010 at 06:16:35PM +0200, Nir Muchtar wrote:
> The basic IB netlink infrastructure.
> It allows for registration of IB module for which data is to be exported.
> It supplies skb/message construction callbacks.

You need to setup the module aliases for autoloading ie like:

MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INFINIBAND);

> +enum {
> +	IBNL_RDMA_CM = 1
> +};
> +
> +enum {
> +	IBNL_RDMA_CM_STATS = 0,
> +	IBNL_RDMA_CM_DEVICE_NAME,
> +	IBNL_RDMA_CM_ID_STATS,
> +};

These belong in patch #5 - the one that adds them..

I don't want to see this as a RDMA_CM specific thing.

> +int ibnl_add_cb(int module,
> +		int (*get_data)(int op, struct sk_buff **skb, int pid));

The cb needs to have enough arguments to be able to call
netlink_dump_start.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]     ` <1291047399-430-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-11-29 19:11       ` Jason Gunthorpe
       [not found]         ` <20101129191136.GC16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2010-11-30 16:13       ` Hefty, Sean
  1 sibling, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-29 19:11 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, Nov 29, 2010 at 06:16:39PM +0200, Nir Muchtar wrote:
> Add callbacks and data types for statistics export.
> One callback is implemented that exports all of the current devices/ids.
> Add/remove the callback to IB Netlink on init/cleanup.

Please include the schema for the messages you are adding to netlink
in the comment so other people can review them easily.

Looks to me like you have messages of the form:

{
 [IBNL_RDMA_CM_DEVICE_NAME - char[]]
 [IBNL_RDMA_CM_ID_STATS - struct rdma_cm_id_stats]*
}*

As I've said before, I don't want to see this tied to RDMA_CM, that is
not general enough for a new userspace API. The use of
IBNL_RDMA_CM_DEVICE_NAME is very un-netlink-like and is just an ugly
hack to avoid addressing that problem.

How about messages of the form:
{
 IBNL_QP - struct ib_nl_qp
   IBNL_QP_ATTR - struct ib_qp_attr (or a reasonable subset)
   IBNL_QP_CM_INFO u8[] - struct ib_nl_qp_cm_info
   IBNL_QP_CM_SERVICE_ID u8[]
   IBNL_RDMA_CM_INFO - struct ib_nl_rdma_cm_info
   IBNL_RDMA_CM_SRC - u8[]  // This is a sockaddr_*
   IBNL_RDMA_CM_DST - u8[] 
}+

Meaning there is an array of IBNL_QP messages which contains various
attributes. Similar to how everything else in netlink works.

struct ib_nl_qp
{
        // String names for IB devices was a mistake, don't perpetuate it.
        __u32 ib_dev_if;
	__u32 qp_num;
        __s32 creator_pid; // -1 for kernel consumer
};

struct ib_nl_qp_cm_info
{
      u32 cm_state; // enum ib_cm_state
      u32 lap_state;
};

struct ib_nl_rdma_cm_info
{
	__u32 bound_dev_if;
        __u32 family;
	__u32 cm_state; // enum rdma_cm_state
};

This captures more information and doesn't tie things to RDMA_CM.

iWarp QPs would not export IBNL_QP_CM_INFO and QP_CM_SERVICE_ID, but
ideally we'd have a call out to the NIC to include the TCP diag
information for the underlying TCP socket since there is no other way
to access that.

Non RDMA-CM QPs (ie ipoib) would not include the RDMA_CM bits.

If you study how SS works you'll see it is similar, it uses a message
of type AF_INET/6.. and then includes attributes like
INET_DIAG_MEMINFO/INFO/CONG

> @@ -134,6 +135,7 @@ struct rdma_id_private {
>  	u32			qp_num;
>  	u8			srq;
>  	u8			tos;
> +	pid_t			owner;

Maybe a seperate patch for this? It probably really belongs in
ib_uverbs. What about kernel consumers?

> +static int cma_get_stats(struct sk_buff **nl_skb, int pid)

You really have to use netlink_dump_start here, doing it like this
will deadlock. See how other places use NLM_F_DUMP as well.

> +struct rdma_cm_id_stats {
> +	enum rdma_node_type nt;
> +	int port_num;
> +	int bound_dev_if;
> +	__be16	local_port;
> +	__be16	remote_port;
> +	__be32	local_addr[4];
> +	__be32	remote_addr[4];
> +	enum rdma_port_space ps;
> +	enum rdma_cm_state cm_state;
> +	u32 qp_num;
> +	pid_t pid;
> +};

Putting enums in a user/kernel structure like this makes me nervous
that we'll have a 32/64 bit problem. It would be more consistent with
the uverbs stuff to use explicit fixed width types.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]         ` <20101129191136.GC16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-11-30 12:34           ` Or Gerlitz
       [not found]             ` <4CF4EF73.6060406-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
  2010-11-30 14:09           ` Nir Muchtar
  1 sibling, 1 reply; 27+ messages in thread
From: Or Gerlitz @ 2010-11-30 12:34 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Nir Muchtar, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w

Jason Gunthorpe wrote:
> struct ib_nl_qp
>         // String names for IB devices was a mistake, don't perpetuate it.
>         __u32 ib_dev_if;

Jason,

Do you have a concrete suggestion and/or sketch for a patch
someone can work on to make this enumeration to take place? 

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 1/5] IB Netlink Infrastructure
       [not found]         ` <20101129182159.GB16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-11-30 12:56           ` Nir Muchtar
  2010-11-30 17:51             ` Jason Gunthorpe
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-11-30 12:56 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, 2010-11-29 at 11:21 -0700, Jason Gunthorpe wrote:
> On Mon, Nov 29, 2010 at 06:16:35PM +0200, Nir Muchtar wrote:
> > The basic IB netlink infrastructure.
> > It allows for registration of IB module for which data is to be exported.
> > It supplies skb/message construction callbacks.
> 
> You need to setup the module aliases for autoloading ie like:
> 
> MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INFINIBAND);

Do you mean adding this to ib_core?
I'm not sure we want to autoload ib_core whenever the userspace asks for
"socket(PF_NETLINK, SOCK_RAW, NETLINK_INFINIBAND)"
What's wrong with receiving a "protocol not supported" error?
If ib_core is not loaded then there's no data anyway.
Am I missing other possible effects of MODULE_ALIAS here?
This would make more sense if ib_netlink was in a separate module.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]         ` <20101129191136.GC16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2010-11-30 12:34           ` Or Gerlitz
@ 2010-11-30 14:09           ` Nir Muchtar
  2010-11-30 18:19             ` Jason Gunthorpe
  1 sibling, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-11-30 14:09 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Mon, 2010-11-29 at 12:11 -0700, Jason Gunthorpe wrote:
> On Mon, Nov 29, 2010 at 06:16:39PM +0200, Nir Muchtar wrote:
> > Add callbacks and data types for statistics export.
> > One callback is implemented that exports all of the current devices/ids.
> > Add/remove the callback to IB Netlink on init/cleanup.
> 
> Please include the schema for the messages you are adding to netlink
> in the comment so other people can review them easily.
> 
> Looks to me like you have messages of the form:
> 
> {
>  [IBNL_RDMA_CM_DEVICE_NAME - char[]]
>  [IBNL_RDMA_CM_ID_STATS - struct rdma_cm_id_stats]*
> }*
> 

Yes that's the basic structure. I'll add an explanation next time.

> As I've said before, I don't want to see this tied to RDMA_CM, that is
> not general enough for a new userspace API. The use of
> IBNL_RDMA_CM_DEVICE_NAME is very un-netlink-like and is just an ugly
> hack to avoid addressing that problem.

This is done to save space in the netlink messages.
I am open for ideas for improvements.
I thought of another possibility: We can make another op for rdma
devices only with index mapping. This could create a problems if a
device is added/removed between calls though.
See my question about your suggestion below.

> 
> How about messages of the form:
> {
>  IBNL_QP - struct ib_nl_qp
>    IBNL_QP_ATTR - struct ib_qp_attr (or a reasonable subset)
>    IBNL_QP_CM_INFO u8[] - struct ib_nl_qp_cm_info
>    IBNL_QP_CM_SERVICE_ID u8[]
>    IBNL_RDMA_CM_INFO - struct ib_nl_rdma_cm_info
>    IBNL_RDMA_CM_SRC - u8[]  // This is a sockaddr_*
>    IBNL_RDMA_CM_DST - u8[] 
> }+
> 
> Meaning there is an array of IBNL_QP messages which contains various
> attributes. Similar to how everything else in netlink works.
> 
> struct ib_nl_qp
> {
>         // String names for IB devices was a mistake, don't perpetuate it.

I don't know of an IB device index mapping like the one in netdevice.
Am I missing one? Do you mean we should create one?

>         __u32 ib_dev_if;
> 	__u32 qp_num;
>         __s32 creator_pid; // -1 for kernel consumer
> };
> 
> struct ib_nl_qp_cm_info
> {
>       u32 cm_state; // enum ib_cm_state
>       u32 lap_state;
> };
> 
> struct ib_nl_rdma_cm_info
> {
> 	__u32 bound_dev_if;
>         __u32 family;
> 	__u32 cm_state; // enum rdma_cm_state
> };
> 
> This captures more information and doesn't tie things to RDMA_CM.

My problem with making everything QP related is that not everything
necessarily is. For example, when creating rdma cm id's they are still 
not bound to QP's. I guess you could send all zeros in this case, but as
more and more of such exceptions are needed this framework will become a
bit unnatural. The current implementation is not tying anything to RDMA
CM. It allows other modules to export data exactly the way they need.

> 
> > +static int cma_get_stats(struct sk_buff **nl_skb, int pid)
> 
> You really have to use netlink_dump_start here, doing it like this
> will deadlock. See how other places use NLM_F_DUMP as well.

Well, I already reviewed netlink_dump_start, and this is how it works 
as much as I can see (Please correct me if I'm wrong):
1. Allocates an skb
2. Calls its supplied dump cb
3. Calls its supplied done cb if if applicable
4. Appends NLMSG_DONE

This appears to be executed synchronously, within the context of the
calling thread. So I couldn't figure out how to use it for avoiding long
locking times.

Anyway, what I tried to achieve is a mechanism that allocates more skb's
as they are needed, and separate it from the calling module. Do you know
of an inherent way to make netlink_dump_start to do that?

> 
> > +struct rdma_cm_id_stats {
> > +	enum rdma_node_type nt;
> > +	int port_num;
> > +	int bound_dev_if;
> > +	__be16	local_port;
> > +	__be16	remote_port;
> > +	__be32	local_addr[4];
> > +	__be32	remote_addr[4];
> > +	enum rdma_port_space ps;
> > +	enum rdma_cm_state cm_state;
> > +	u32 qp_num;
> > +	pid_t pid;
> > +};
> 
> Putting enums in a user/kernel structure like this makes me nervous
> that we'll have a 32/64 bit problem. It would be more consistent with
> the uverbs stuff to use explicit fixed width types.

Yes you're right. Also, I see now that this is not normally done this
way, so I'll drop the enums.

> 
> Jason

Thanks again for all your input.
Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]     ` <1291047399-430-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-11-29 19:11       ` Jason Gunthorpe
@ 2010-11-30 16:13       ` Hefty, Sean
       [not found]         ` <CF9C39F99A89134C9CF9C4CCB68B8DDF25B8924212-osO9UTpF0USkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
  1 sibling, 1 reply; 27+ messages in thread
From: Hefty, Sean @ 2010-11-30 16:13 UTC (permalink / raw)
  To: Nir Muchtar, rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

> +struct rdma_cm_id_stats {
> +	enum rdma_node_type nt;
> +	int port_num;
> +	int bound_dev_if;
> +	__be16	local_port;
> +	__be16	remote_port;
> +	__be32	local_addr[4];
> +	__be32	remote_addr[4];

Please use sockaddr_storage, so that we can expand the rdma_cm beyond ipv4/6 address support.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]             ` <4CF4EF73.6060406-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
@ 2010-11-30 17:50               ` Jason Gunthorpe
  0 siblings, 0 replies; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-30 17:50 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Nir Muchtar, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w

On Tue, Nov 30, 2010 at 02:34:59PM +0200, Or Gerlitz wrote:
> Jason Gunthorpe wrote:
> > struct ib_nl_qp
> >         // String names for IB devices was a mistake, don't perpetuate it.
> >         __u32 ib_dev_if;

> Do you have a concrete suggestion and/or sketch for a patch
> someone can work on to make this enumeration to take place? 

Oh, it is really easy, just add an ifindex member to struct
ib_device. alloc_name can be extended to compute an appropriate index,
see how this works in dev_new_index.

Add a 'ifindex' to the ib_device's sysfs directory that exports this
number.

In future we'd want to see a netlink query to get the RDMA device list
to replace trundling through sysfs.

I think this is really important from a netlink perspective, there are
going to be so many places you wan't to refer to an RDMA device.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 1/5] IB Netlink Infrastructure
  2010-11-30 12:56           ` Nir Muchtar
@ 2010-11-30 17:51             ` Jason Gunthorpe
       [not found]               ` <20101130175152.GH16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-30 17:51 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, Nov 30, 2010 at 02:56:41PM +0200, Nir Muchtar wrote:

> Do you mean adding this to ib_core?  I'm not sure we want to
> autoload ib_core whenever the userspace asks for "socket(PF_NETLINK,
> SOCK_RAW, NETLINK_INFINIBAND)" What's wrong with receiving a
> "protocol not supported" error?  If ib_core is not loaded then
> there's no data anyway.  Am I missing other possible effects of
> MODULE_ALIAS here?

It seems to be kernel policy to annotate this sort of autoload
meta-data.

If the module is demand loaded or not is a userspace choice.

Apps using this should handle both cases.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-11-30 14:09           ` Nir Muchtar
@ 2010-11-30 18:19             ` Jason Gunthorpe
       [not found]               ` <20101130181944.GI16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-30 18:19 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, Nov 30, 2010 at 04:09:31PM +0200, Nir Muchtar wrote:

> > struct ib_nl_qp
> > {
> >         // String names for IB devices was a mistake, don't perpetuate it.
> 
> I don't know of an IB device index mapping like the one in netdevice.
> Am I missing one? Do you mean we should create one?

Yes, definately. It is very easy to do and goes hand-in-hand with the
typical netlink protocol design.
 
> >         __u32 ib_dev_if;
> > 	__u32 qp_num;
> >         __s32 creator_pid; // -1 for kernel consumer
> > };
> > 
> > struct ib_nl_qp_cm_info
> > {
> >       u32 cm_state; // enum ib_cm_state
> >       u32 lap_state;
> > };
> > 
> > struct ib_nl_rdma_cm_info
> > {
> > 	__u32 bound_dev_if;
> >         __u32 family;
> > 	__u32 cm_state; // enum rdma_cm_state
> > };
> > 
> > This captures more information and doesn't tie things to RDMA_CM.
> 
> My problem with making everything QP related is that not everything
> necessarily is. For example, when creating rdma cm id's they are still 
> not bound to QP's. I guess you could send all zeros in this case, but as
> more and more of such exceptions are needed this framework will become a
> bit unnatural. The current implementation is not tying anything to RDMA
> CM. It allows other modules to export data exactly the way they need.

Well, I was outlining how I think the QP-centric information can be
returned. You are right that we also have non-QP info, like listening
objects, and I think that can be best returned with a seperate
query. Trying to conflate them seems like it would be
trouble. Certainly, as I've described IBNL_QP messages should only
refer to active QPs.

Remember you can have as many queries as you like, this is just the QP
object query.

I guess an alternative would be to have many tables: RDMA_CM, QP, and
IB_CM and then rely on userspace to 'join' them by ifindex+QPN - but
that seems like alot of work in userspace and I think pretty much
everyone is going to want to have the joined data.

> > > +static int cma_get_stats(struct sk_buff **nl_skb, int pid)
> > 
> > You really have to use netlink_dump_start here, doing it like this
> > will deadlock. See how other places use NLM_F_DUMP as well.
> 
> Well, I already reviewed netlink_dump_start, and this is how it works 
> as much as I can see (Please correct me if I'm wrong):
> 1. Allocates an skb
> 2. Calls its supplied dump cb
> 3. Calls its supplied done cb if if applicable
> 4. Appends NLMSG_DONE

No, this isn't quite right. The dumpcb is also called after userspace
calls recvmsg(), which continues the dump once the buffer is
freed. The idea is to return a bit of the table on every dump call
back.

The way it is used is:
 1. Userspace does send()
 2. Kernel calls netlink_dump_start()
 3. netlink_dump_start calls callback which returns non-zero
 4. send() returns in userspace
 5. Userspace does recv()
 6. Kernel copies the data from #3 into userspace
 7. netlink_dump calls callback which returns non-zero
 8. recv() returns in userspace
 [...]

> Thanks again for all your input.

Thanks for working on this!

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]         ` <CF9C39F99A89134C9CF9C4CCB68B8DDF25B8924212-osO9UTpF0USkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2010-11-30 19:01           ` Jason Gunthorpe
  0 siblings, 0 replies; 27+ messages in thread
From: Jason Gunthorpe @ 2010-11-30 19:01 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: Nir Muchtar, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, Nov 30, 2010 at 08:13:45AM -0800, Hefty, Sean wrote:
> > +struct rdma_cm_id_stats {
> > +	enum rdma_node_type nt;
> > +	int port_num;
> > +	int bound_dev_if;
> > +	__be16	local_port;
> > +	__be16	remote_port;
> > +	__be32	local_addr[4];
> > +	__be32	remote_addr[4];
> 
> Please use sockaddr_storage, so that we can expand the rdma_cm
> beyond ipv4/6 address support.

Actually for netlink the proper thing to do here is to place the
sockaddr in a sub-attribute that can be variable-sized. Do not put
variably sized data into a struct like this.

Further, sockaddr_storage is not actually ABI guaranteed to be
constant in size, it is just the current largest sockaddr
possible. This is why the sockaddr size in/out parameter associated
with all sockaddrs is so important.

Don't put sockaddrs in fixed-size structures for user/kernel
interfaces.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]               ` <20101130181944.GI16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-01 15:58                 ` Nir Muchtar
  2010-12-01 18:35                   ` Jason Gunthorpe
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-12-01 15:58 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, 2010-11-30 at 11:19 -0700, Jason Gunthorpe wrote:

> > I don't know of an IB device index mapping like the one in netdevice.
> > Am I missing one? Do you mean we should create one?
> 
> Yes, definately. It is very easy to do and goes hand-in-hand with the
> typical netlink protocol design.

I agree, but this is a bit out of scope for the current patches and I
think this kind of change should be given some thought. It needs to
supply userspace with mapping functions and I don't think it will be
that easy to complete. The patch in its current state uses names but it
doesn't perpetuate their use because the rdma cm export is separate from
the infrastructure. Once we have such an ability, it will be very easy
to use here.

> Well, I was outlining how I think the QP-centric information can be
> returned. You are right that we also have non-QP info, like listening
> objects, and I think that can be best returned with a seperate
> query. Trying to conflate them seems like it would be
> trouble. Certainly, as I've described IBNL_QP messages should only
> refer to active QPs.
> 
> Remember you can have as many queries as you like, this is just the QP
> object query.
> 
> I guess an alternative would be to have many tables: RDMA_CM, QP, and
> IB_CM and then rely on userspace to 'join' them by ifindex+QPN - but
> that seems like alot of work in userspace and I think pretty much
> everyone is going to want to have the joined data.

So we are in agreement that more then one export type is required here.
I do agree that your suggestion will make sense once we try to export QP
related data, so maybe we can agree that I will fully support such a
scheme, so it will be easy to implement later. By that I mean that the
infrastructure will allow adding arbitrary attributes to messages (in
type and in size). What do you think?

> No, this isn't quite right. The dumpcb is also called after userspace
> calls recvmsg(), which continues the dump once the buffer is
> freed. The idea is to return a bit of the table on every dump call
> back.
> 
> The way it is used is:
>  1. Userspace does send()
>  2. Kernel calls netlink_dump_start()
>  3. netlink_dump_start calls callback which returns non-zero
>  4. send() returns in userspace
>  5. Userspace does recv()
>  6. Kernel copies the data from #3 into userspace
>  7. netlink_dump calls callback which returns non-zero
>  8. recv() returns in userspace

Yes that's correct, but inet_diag takes care of the last two steps by
updating its cb index, and not dump_start. If we use it that way we can
have problems with changes in data structure on subsequent recv calls,
so if we want to keep it the same we would still need to employ locking.
I don't see a way to keep the same data without locking and without a
session mechanism of some sort.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 1/5] IB Netlink Infrastructure
       [not found]               ` <20101130175152.GH16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-01 16:05                 ` Nir Muchtar
  0 siblings, 0 replies; 27+ messages in thread
From: Nir Muchtar @ 2010-12-01 16:05 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, 2010-11-30 at 10:51 -0700, Jason Gunthorpe wrote:

> On Tue, Nov 30, 2010 at 02:56:41PM +0200, Nir Muchtar wrote:
> 
> > Do you mean adding this to ib_core?  I'm not sure we want to
> > autoload ib_core whenever the userspace asks for "socket(PF_NETLINK,
> > SOCK_RAW, NETLINK_INFINIBAND)" What's wrong with receiving a
> > "protocol not supported" error?  If ib_core is not loaded then
> > there's no data anyway.  Am I missing other possible effects of
> > MODULE_ALIAS here?
> 
> It seems to be kernel policy to annotate this sort of autoload
> meta-data.
> 
> If the module is demand loaded or not is a userspace choice.
> 
> Apps using this should handle both cases.
> 
> Jason

MODULE_ALIAS_PF_PROTO is used by the netlink layer once netlink_create
is called by userspace (with request_module), So it is used by dedicated
netlink modules such as inet_diag and nf_netlink. netlink_scsitransport
is a closer example to our design, where there is no use of
MODULE_ALIAS.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-12-01 15:58                 ` Nir Muchtar
@ 2010-12-01 18:35                   ` Jason Gunthorpe
       [not found]                     ` <20101201183538.GQ16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-12-01 18:35 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Wed, Dec 01, 2010 at 05:58:54PM +0200, Nir Muchtar wrote:
> On Tue, 2010-11-30 at 11:19 -0700, Jason Gunthorpe wrote:
> 
> > > I don't know of an IB device index mapping like the one in netdevice.
> > > Am I missing one? Do you mean we should create one?
> > 
> > Yes, definately. It is very easy to do and goes hand-in-hand with the
> > typical netlink protocol design.
> 
> I agree, but this is a bit out of scope for the current patches and I
> think this kind of change should be given some thought. 

I'd view it as a pre-condition, actually.

> It needs to supply userspace with mapping functions and I don't
> think it will be that easy to complete. The patch in its current
> state uses names but it doesn't perpetuate their use because the
> rdma cm export is separate from the infrastructure. Once we have
> such an ability, it will be very easy to use here.

I think you are overthinking things. For now, just including the
ifindex attribute in sysfs in quite enough. As the netlink interface
is completed a by index lookup will naturally fall out.

> So we are in agreement that more then one export type is required here.
> I do agree that your suggestion will make sense once we try to export QP
> related data, so maybe we can agree that I will fully support such a
> scheme, so it will be easy to implement later. By that I mean that the
> infrastructure will allow adding arbitrary attributes to messages (in
> type and in size). What do you think?

I'm happy to see things done later, if we can agree on what everything
should look like later so the pieces we have now fit. Maybe you can
outline the sort of schema you are thinking of as I did?

Having a framework where ib_core generates the QP message and calls
out to RDMA_CM, IB_CM, driver, uverbs, etc to fill in attributes seems
best to me for the QP table.

A table for listening objects would be kind of similar with
information provided by rdma_cm and ib_cm, or just ib_cm

> >  6. Kernel copies the data from #3 into userspace
> >  7. netlink_dump calls callback which returns non-zero
> >  8. recv() returns in userspace
 
> Yes that's correct, but inet_diag takes care of the last two steps by
> updating its cb index, and not dump_start. If we use it that way we can
> have problems with changes in data structure on subsequent recv calls,
> so if we want to keep it the same we would still need to employ locking.
> I don't see a way to keep the same data without locking and without a
> session mechanism of some sort.

That is what the netlink_callback structure is for, you can stick
your current position info into args[].

You shouldn't be attempting to dump the structure in one go while
holding a lock, you need to try best-efforts to dump it by keeping
some kind of current position value.

inet_diag seems to use a pretty simple scheme where it just records
the hash bucket and count into the chain. Not sure what happens if
things are erased - looks like you'll get duplicates/misses? You could
do the same by keeping track of the offset into the linked list.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]                     ` <20101201183538.GQ16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-07 15:40                       ` Nir Muchtar
  2010-12-07 18:54                         ` Jason Gunthorpe
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-12-07 15:40 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Wed, 2010-12-01 at 11:35 -0700, Jason Gunthorpe wrote:
> > I agree, but this is a bit out of scope for the current patches and I
> > think this kind of change should be given some thought. 
> 
> I'd view it as a pre-condition, actually.
> 

I understand. still, I prefer staying focused on the primary goal and
complete the patches before looking into this, So the solution for now
is to just skip on device name and obtain them from userspace using the
net devices and /sys/class. I might return to this once the first
patches are accepted.

> I'm happy to see things done later, if we can agree on what everything
> should look like later so the pieces we have now fit. Maybe you can
> outline the sort of schema you are thinking of as I did?
> 
> Having a framework where ib_core generates the QP message and calls
> out to RDMA_CM, IB_CM, driver, uverbs, etc to fill in attributes seems
> best to me for the QP table.

What I mean is that I will supply the maximum flexibility by supporting
arbitrary netlink messages and attributes. This will support your
suggested schema as well as any changes we'll agree upon. My current
plans are for RDMA CM exports and not QP table exports but this should
be next in line.

> You shouldn't be attempting to dump the structure in one go while
> holding a lock, you need to try best-efforts to dump it by keeping
> some kind of current position value.
> 
> inet_diag seems to use a pretty simple scheme where it just records
> the hash bucket and count into the chain. Not sure what happens if
> things are erased - looks like you'll get duplicates/misses? You could
> do the same by keeping track of the offset into the linked list.
> 
> Jason

The thing is, there's no easy and clean way to retrieve the export when
using dump_start.
The locking problem could be solved using GFP_ATOMIC when using malloc.
This will prevent possible long locking periods.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-12-07 15:40                       ` Nir Muchtar
@ 2010-12-07 18:54                         ` Jason Gunthorpe
  2010-12-07 20:53                           ` Nir Muchtar
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-12-07 18:54 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-smomgflXvOZWk0Htik3J/w

On Tue, Dec 07, 2010 at 05:40:27PM +0200, Nir Muchtar wrote:

> I understand. still, I prefer staying focused on the primary goal and
> complete the patches before looking into this, So the solution for now
> is to just skip on device name and obtain them from userspace using the
> net devices and /sys/class. I might return to this once the first
> patches are accepted.

This doesn't really work if the IB netdevice is part of a bond, and it
won't work at all once Sean is done adding AF_GID.

Churning the userspace ABI is a really bad idea, if something is hard
to do, then fine. But this isn't..

> What I mean is that I will supply the maximum flexibility by supporting
> arbitrary netlink messages and attributes. This will support your
> suggested schema as well as any changes we'll agree upon. My current
> plans are for RDMA CM exports and not QP table exports but this should
> be next in line.

Do you have an idea what this will look like?

> > You shouldn't be attempting to dump the structure in one go while
> > holding a lock, you need to try best-efforts to dump it by keeping
> > some kind of current position value.
> > 
> > inet_diag seems to use a pretty simple scheme where it just records
> > the hash bucket and count into the chain. Not sure what happens if
> > things are erased - looks like you'll get duplicates/misses? You could
> > do the same by keeping track of the offset into the linked list.
> 
> The thing is, there's no easy and clean way to retrieve the export when
> using dump_start.

I don't follow this comment, can you elaborate?

This really needs to use the dump api, and I can't see any reason why
it can't.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-12-07 18:54                         ` Jason Gunthorpe
@ 2010-12-07 20:53                           ` Nir Muchtar
       [not found]                             ` <7E95F01E94AB484F83061FCFA35B39F8794E3B-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-12-07 20:53 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz


Jason Gunthorpe wrote:
>  
> On Tue, Dec 07, 2010 at 05:40:27PM +0200, Nir Muchtar wrote:
> 
> > I understand. still, I prefer staying focused on the primary goal and
> > complete the patches before looking into this, So the solution for now
> > is to just skip on device name and obtain them from userspace using the
> > net devices and /sys/class. I might return to this once the first
> > patches are accepted.
> 
> This doesn't really work if the IB netdevice is part of a bond, and it
> won't work at all once Sean is done adding AF_GID.
> 
> Churning the userspace ABI is a really bad idea, if something is hard
> to do, then fine. But this isn't..

I'm just not convinced this can be easily completed/accepted and not divert
us from the primary goal, so I prefer picking this up later after those
patches are accepted. If I discover there's no good way to obtain this 
through userspace then I won't.

> 
> > What I mean is that I will supply the maximum flexibility by supporting
> > arbitrary netlink messages and attributes. This will support your
> > suggested schema as well as any changes we'll agree upon. My current
> > plans are for RDMA CM exports and not QP table exports but this should
> > be next in line.
> 
> Do you have an idea what this will look like?

I'm submitting an RDMA CM extension and not a QP table extension, so I
don't have a complete design, but the infrastructure will support a
more module collaborative design like you suggested as well as a more
per-module design like in the case of RDMA CM. The exact specification
can be agreed upon later.

> > The thing is, there's no easy and clean way to retrieve the export when
> > using dump_start.
> 
> I don't follow this comment, can you elaborate?
> 
> This really needs to use the dump api, and I can't see any reason why
> it can't.
> 
> Jason

As I said, there's just no way (I know of) to use dump_start, divide data
into several packets, and receive a consistent snapshot of the data, and
this is an issue. We can achieve all that by doing something a little 
different so why shouldn't we? 

dump_start is used by some subsystems and not used by others. It's a
convenience function and it doesn't necessarily fit in every case,
so we shouldn't force it.

Nir








--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]                             ` <7E95F01E94AB484F83061FCFA35B39F8794E3B-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
@ 2010-12-07 21:29                               ` Jason Gunthorpe
       [not found]                                 ` <20101207212924.GG16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-12-07 21:29 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz

On Tue, Dec 07, 2010 at 10:53:23PM +0200, Nir Muchtar wrote:

> I'm just not convinced this can be easily completed/accepted and not
> divert us from the primary goal, so I prefer picking this up later
> after those

This part of the kernel is mature, the 'primary goal' is to not add
new half backed things that need to churn userspace APIs before they
are complete :)

> patches are accepted. If I discover there's no good way to obtain this 
> through userspace then I won't.

There isn't. Read my last message.

> > > The thing is, there's no easy and clean way to retrieve the export when
> > > using dump_start.
> > 
> > I don't follow this comment, can you elaborate?
> > 
> > This really needs to use the dump api, and I can't see any reason why
> > it can't.
 
> As I said, there's just no way (I know of) to use dump_start, divide data
> into several packets, and receive a consistent snapshot of the data, and
> this is an issue. We can achieve all that by doing something a little 
> different so why shouldn't we? 

You have to give up on 100% consistency to use dump_start, which is OK
for diags, and what other dumpers in the kernel do.

What you've done in your v2 patch won't work if the table you are
dumping is too large, once you pass sk_rmem_alloc for the netlink
socket it will deadlock. The purpose of dump_start is to avoid that
deadlock. (review my past messages on the subject)

Your v1 patch wouldn't deadlock, but it would fail to dump with
ENOMEM, and provides an avenue to build an unprivileged kernel OOM
DOS.

The places in the kernel that don't use dump_start have to stay under
sk_rmem_alloc.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]                                 ` <20101207212924.GG16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-08 14:55                                   ` Nir Muchtar
  2010-12-08 18:23                                     ` Jason Gunthorpe
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-12-08 14:55 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz

On Tue, 2010-12-07 at 14:29 -0700, Jason Gunthorpe wrote:

> What you've done in your v2 patch won't work if the table you are
> dumping is too large, once you pass sk_rmem_alloc for the netlink
> socket it will deadlock. The purpose of dump_start is to avoid that
> deadlock. (review my past messages on the subject)
> 
> Your v1 patch wouldn't deadlock, but it would fail to dump with
> ENOMEM, and provides an avenue to build an unprivileged kernel OOM
> DOS.
> 
> The places in the kernel that don't use dump_start have to stay under
> sk_rmem_alloc.
> 
> Jason

Sorry, I still need some clarifications...
When you say deadlocks, do you mean when calling malloc with a lock or
when overflowing a socket receive buffer?
For the second case, when we use netlink_unicast, the skbuff is sent and
freed. It is transferred to the userspace's socket using netlink_sendskb
and accumulated in its recv buff.

Are you referring to a deadlock there? I still fail to see the issue.
Why would the kernel socket recv buff reach a limit? Could you please
elaborate?

Thanks,
Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-12-08 14:55                                   ` Nir Muchtar
@ 2010-12-08 18:23                                     ` Jason Gunthorpe
       [not found]                                       ` <20101208182356.GK16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Jason Gunthorpe @ 2010-12-08 18:23 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz

On Wed, Dec 08, 2010 at 04:55:22PM +0200, Nir Muchtar wrote:
> On Tue, 2010-12-07 at 14:29 -0700, Jason Gunthorpe wrote:
> 
> > What you've done in your v2 patch won't work if the table you are
> > dumping is too large, once you pass sk_rmem_alloc for the netlink
> > socket it will deadlock. The purpose of dump_start is to avoid that
> > deadlock. (review my past messages on the subject)
> > 
> > Your v1 patch wouldn't deadlock, but it would fail to dump with
> > ENOMEM, and provides an avenue to build an unprivileged kernel OOM
> > DOS.
> > 
> > The places in the kernel that don't use dump_start have to stay under
> > sk_rmem_alloc.
> > 
> > Jason
> 
> Sorry, I still need some clarifications...
> When you say deadlocks, do you mean when calling malloc with a lock or
> when overflowing a socket receive buffer?
> For the second case, when we use netlink_unicast, the skbuff is sent and
> freed. It is transferred to the userspace's socket using netlink_sendskb
> and accumulated in its recv buff.
> 
> Are you referring to a deadlock there? I still fail to see the issue.
> Why would the kernel socket recv buff reach a limit? Could you please
> elaborate?

Netlink is all driven from user space syscalls.. so it looks like

sendmsg()
[..]
ibnl_rcv_msg
cma_get_stats
[..]
ibnl_unicast
[..]
netlink_attachskb
(now we block on the socket recv queue once it fills)

The deadlock is that userspace is sitting in sendmsg() while the
kernel is sleeping in netlink_attachskb waiting for the recvbuf to
empty.

User space cannot call recvmsg() while it is in blocked in sendmsg()
so it all goes boom.

Even if cma_get_stats was executed from a kernel thread and
ibnl_rcv_msg returned back to userspace you still hold the dev_list
mutex while calling ibnl_unicast, which can sleep waiting on
userspace, which creates an easy DOS against the RDMA CM (I can write
a program that causes the kernel the hold the mutx indefinitely).

You can't hold the mutex while sleeping for userspace, so you have to
unlock it. If you unlock it you have to fixup your position when you
re-lock it. If you can fixup your position then you can use
dump_start.

I don't see malloc being a concern anywhere in what you've done...

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
       [not found]                                       ` <20101208182356.GK16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-12-09  8:47                                         ` Nir Muchtar
  2010-12-09 17:26                                           ` Jason Gunthorpe
  0 siblings, 1 reply; 27+ messages in thread
From: Nir Muchtar @ 2010-12-09  8:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz

On Wed, 2010-12-08 at 11:23 -0700, Jason Gunthorpe wrote:

> > Sorry, I still need some clarifications...
> > When you say deadlocks, do you mean when calling malloc with a lock or
> > when overflowing a socket receive buffer?
> > For the second case, when we use netlink_unicast, the skbuff is sent and
> > freed. It is transferred to the userspace's socket using netlink_sendskb
> > and accumulated in its recv buff.
> > 
> > Are you referring to a deadlock there? I still fail to see the issue.
> > Why would the kernel socket recv buff reach a limit? Could you please
> > elaborate?
> 
> Netlink is all driven from user space syscalls.. so it looks like
> 
> sendmsg()
> [..]
> ibnl_rcv_msg
> cma_get_stats
> [..]
> ibnl_unicast
> [..]
> netlink_attachskb
> (now we block on the socket recv queue once it fills)
> 
> The deadlock is that userspace is sitting in sendmsg() while the
> kernel is sleeping in netlink_attachskb waiting for the recvbuf to
> empty.
> 
> User space cannot call recvmsg() while it is in blocked in sendmsg()
> so it all goes boom.
> 

Oh, now I see what you mean. I thought you meant the recv buffer in the
netlink socket... 

But I'm using MSG_DONTWAIT when calling netlink_unicast, so attachskb
shouldn't block. I also tested that.
I do agree that freeing the skb and simply giving up is not the best we
can do, so we can try and send as much as we can instead, but either
way, a deadlock shouldn't occur.

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH V2 5/5] RDMA CM: Netlink Client
  2010-12-09  8:47                                         ` Nir Muchtar
@ 2010-12-09 17:26                                           ` Jason Gunthorpe
  0 siblings, 0 replies; 27+ messages in thread
From: Jason Gunthorpe @ 2010-12-09 17:26 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Moni Shoua, Or Gerlitz

On Thu, Dec 09, 2010 at 10:47:18AM +0200, Nir Muchtar wrote:

> But I'm using MSG_DONTWAIT when calling netlink_unicast, so attachskb
> shouldn't block. I also tested that.

But then you are guarenteed to have an incomplete dump once you have
enough entries!

The best trade off is what the other dump_start user's do, you might
have an inconsistent dump sometimes, but at least it is complete and
correct most of the time.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2010-12-09 17:26 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-29 16:16 [PATCH V2 0/5] IB Netlink Interface and RDMA CM exports Nir Muchtar
     [not found] ` <1291047399-430-1-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-11-29 16:16   ` [PATCH V2 1/5] IB Netlink Infrastructure Nir Muchtar
     [not found]     ` <1291047399-430-2-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-11-29 18:21       ` Jason Gunthorpe
     [not found]         ` <20101129182159.GB16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-11-30 12:56           ` Nir Muchtar
2010-11-30 17:51             ` Jason Gunthorpe
     [not found]               ` <20101130175152.GH16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-01 16:05                 ` Nir Muchtar
2010-11-29 16:16   ` [PATCH V2 2/5] IB Core: Error Handler Nir Muchtar
2010-11-29 16:16   ` [PATCH V2 3/5] IB Core: Run Netlink Nir Muchtar
2010-11-29 16:16   ` [PATCH V2 4/5] RDMA CM: Export State Enum Nir Muchtar
2010-11-29 16:16   ` [PATCH V2 5/5] RDMA CM: Netlink Client Nir Muchtar
     [not found]     ` <1291047399-430-6-git-send-email-nirm-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-11-29 19:11       ` Jason Gunthorpe
     [not found]         ` <20101129191136.GC16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-11-30 12:34           ` Or Gerlitz
     [not found]             ` <4CF4EF73.6060406-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
2010-11-30 17:50               ` Jason Gunthorpe
2010-11-30 14:09           ` Nir Muchtar
2010-11-30 18:19             ` Jason Gunthorpe
     [not found]               ` <20101130181944.GI16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-01 15:58                 ` Nir Muchtar
2010-12-01 18:35                   ` Jason Gunthorpe
     [not found]                     ` <20101201183538.GQ16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-07 15:40                       ` Nir Muchtar
2010-12-07 18:54                         ` Jason Gunthorpe
2010-12-07 20:53                           ` Nir Muchtar
     [not found]                             ` <7E95F01E94AB484F83061FCFA35B39F8794E3B-QfUkFaTmzUSUvQqKE/ONIwC/G2K4zDHf@public.gmane.org>
2010-12-07 21:29                               ` Jason Gunthorpe
     [not found]                                 ` <20101207212924.GG16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-08 14:55                                   ` Nir Muchtar
2010-12-08 18:23                                     ` Jason Gunthorpe
     [not found]                                       ` <20101208182356.GK16788-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-12-09  8:47                                         ` Nir Muchtar
2010-12-09 17:26                                           ` Jason Gunthorpe
2010-11-30 16:13       ` Hefty, Sean
     [not found]         ` <CF9C39F99A89134C9CF9C4CCB68B8DDF25B8924212-osO9UTpF0USkrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2010-11-30 19:01           ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.