All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V8 libmlx4 0/2] Add extension and XRC QP support
@ 2013-07-25  8:40 Yishai Hadas
       [not found] ` <1374741618-890-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Yishai Hadas @ 2013-07-25  8:40 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: ogerlitz-VPRAkNaXOzVWk0Htik3J/w, tzahio-VPRAkNaXOzVWk0Htik3J/w,
	yishaih-VPRAkNaXOzVWk0Htik3J/w

Extend XRC support to user space. Because XRC requires
new verbs and extensions to existing verbs, we first implement the
infra-structure changes needed to support verbs extensions matching to
libibverbs scheme, later XRC support is built on top of that infrastructure.

Changes from V7:
Adapted code to use the new helper macro verbs_set_ctx_op from V8 of libibverbs.

Changes from V6:
Adapted to latest master branch.
Bug fixes, details in relevant patches.

Sean Hefty (1):
  Add support for XRC QPs

Yishai Hadas (1):
  Infra-structure changes to support verbs extensions

 src/buf.c      |    6 +-
 src/cq.c       |   42 ++++++++---
 src/mlx4-abi.h |    6 ++
 src/mlx4.c     |   79 +++++++++++---------
 src/mlx4.h     |   70 ++++++++++++++++--
 src/qp.c       |   39 ++++++----
 src/srq.c      |  153 ++++++++++++++++++++++++++++++++++++++
 src/verbs.c    |  224 +++++++++++++++++++++++++++++++++++++++++--------------
 8 files changed, 493 insertions(+), 126 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V8 libmlx4 1/2] Infra-structure changes to support verbs extensions
       [not found] ` <1374741618-890-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2013-07-25  8:40   ` Yishai Hadas
       [not found]     ` <1374741618-890-2-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2013-07-25  8:40   ` [PATCH V8 libmlx4 2/2] Add support for XRC QPs Yishai Hadas
  1 sibling, 1 reply; 5+ messages in thread
From: Yishai Hadas @ 2013-07-25  8:40 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: ogerlitz-VPRAkNaXOzVWk0Htik3J/w, tzahio-VPRAkNaXOzVWk0Htik3J/w,
	yishaih-VPRAkNaXOzVWk0Htik3J/w

This patch implements the infrastructure for working with verbs extension.
It registers with libibverbs with verbs_register_driver function to indicate
supporting verbs extension, supplying its verbs_init_func for further initialization.
Later on worked based on verbs extension scheme as was defined by libibverbs.

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 src/mlx4.c |   77 ++++++++++++++++++++++++++++++++----------------------------
 src/mlx4.h |    8 ++++-
 2 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/src/mlx4.c b/src/mlx4.c
index 4cef2b9..ec7b2f7 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -120,7 +120,8 @@ static struct ibv_context_ops mlx4_ctx_ops = {
 	.detach_mcast  = ibv_cmd_detach_mcast
 };
 
-static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+static int mlx4_init_context(struct verbs_device *v_device,
+				struct ibv_context *ibv_ctx, int cmd_fd)
 {
 	struct mlx4_context	       *context;
 	struct ibv_get_context		cmd;
@@ -128,26 +129,30 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 	int				i;
 	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
 	__u16				bf_reg_size;
-	struct mlx4_device		*dev = to_mdev(ibdev);
+	struct mlx4_device              *dev = to_mdev(&v_device->device);
+	/* verbs_context should be used for new verbs
+	* struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
+	*/
 
-	context = calloc(1, sizeof *context);
-	if (!context)
-		return NULL;
 
-	context->ibv_ctx.cmd_fd = cmd_fd;
+	/* memory footprint of mlx4_context and verbs_context share
+	* struct ibv_context.
+	*/
+	context = to_mctx(ibv_ctx);
+	ibv_ctx->cmd_fd = cmd_fd;
 
 	if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
-		if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+		if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
 					&resp_v3.ibv_resp, sizeof resp_v3))
-			goto err_free;
+			return errno;
 
 		context->num_qps  = resp_v3.qp_tab_size;
 		bf_reg_size	  = resp_v3.bf_reg_size;
 		context->cqe_size = sizeof (struct mlx4_cqe);
 	} else  {
-		if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+		if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
 					&resp.ibv_resp, sizeof resp))
-			goto err_free;
+			return errno;
 
 		context->num_qps  = resp.qp_tab_size;
 		bf_reg_size	  = resp.bf_reg_size;
@@ -169,15 +174,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
-	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
+	context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
 			    MAP_SHARED, cmd_fd, 0);
 	if (context->uar == MAP_FAILED)
-		goto err_free;
+		return errno;
 
 	if (bf_reg_size) {
-		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+		context->bf_page = mmap(NULL, dev->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
-					to_mdev(ibdev)->page_size);
+					dev->page_size);
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -194,32 +199,26 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 	}
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
+	ibv_ctx->ops = mlx4_ctx_ops;
+	/* New verbs should be added as below
+	* verbs_ctx->drv_new_func1 = mlx4_new_func1;
+	*/
+	return 0;
 
-	context->ibv_ctx.ops = mlx4_ctx_ops;
-
-	return &context->ibv_ctx;
-
-err_free:
-	free(context);
-	return NULL;
 }
 
-static void mlx4_free_context(struct ibv_context *ibctx)
+static void mlx4_uninit_context(struct verbs_device *v_device,
+					struct ibv_context *ibv_ctx)
 {
-	struct mlx4_context *context = to_mctx(ibctx);
+	struct mlx4_context *context = to_mctx(ibv_ctx);
 
-	munmap(context->uar, to_mdev(ibctx->device)->page_size);
+	munmap(context->uar, to_mdev(&v_device->device)->page_size);
 	if (context->bf_page)
-		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
-	free(context);
-}
+		munmap(context->bf_page, to_mdev(&v_device->device)->page_size);
 
-static struct ibv_device_ops mlx4_dev_ops = {
-	.alloc_context = mlx4_alloc_context,
-	.free_context  = mlx4_free_context
-};
+}
 
-static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version)
+static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version)
 {
 	char			value[8];
 	struct mlx4_device    *dev;
@@ -254,24 +253,30 @@ found:
 		return NULL;
 	}
 
-	dev = malloc(sizeof *dev);
+	dev = calloc(1, sizeof *dev);
 	if (!dev) {
 		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
 			uverbs_sys_path);
 		return NULL;
 	}
 
-	dev->ibv_dev.ops = mlx4_dev_ops;
 	dev->page_size   = sysconf(_SC_PAGESIZE);
 	dev->abi_version = abi_version;
 
-	return &dev->ibv_dev;
+	dev->verbs_dev.sz = sizeof(*dev);
+	dev->verbs_dev.size_of_context =
+		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
+	/* mlx4_init_context will initialize provider calls */
+	dev->verbs_dev.init_context = mlx4_init_context;
+	dev->verbs_dev.uninit_context = mlx4_uninit_context;
+
+	return &dev->verbs_dev;
 }
 
 #ifdef HAVE_IBV_REGISTER_DRIVER
 static __attribute__((constructor)) void mlx4_register_driver(void)
 {
-	ibv_register_driver("mlx4", mlx4_driver_init);
+	verbs_register_driver("mlx4", mlx4_driver_init);
 }
 #else
 /*
diff --git a/src/mlx4.h b/src/mlx4.h
index be7a632..183e08c 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -131,7 +131,7 @@ enum {
 };
 
 struct mlx4_device {
-	struct ibv_device		ibv_dev;
+	struct verbs_device		verbs_dev;
 	int				page_size;
 	int				abi_version;
 };
@@ -276,7 +276,11 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 
 static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
 {
-	return to_mxxx(dev, device);
+	/* ibv_device is first field of verbs_device
+	  * see try_driver in libibverbs.
+	*/
+	return ((struct mlx4_device *)
+	 ((void *)ibdev - offsetof(struct mlx4_device, verbs_dev)));
 }
 
 static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V8 libmlx4 2/2] Add support for XRC QPs
       [not found] ` <1374741618-890-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2013-07-25  8:40   ` [PATCH V8 libmlx4 1/2] Infra-structure changes to support verbs extensions Yishai Hadas
@ 2013-07-25  8:40   ` Yishai Hadas
  1 sibling, 0 replies; 5+ messages in thread
From: Yishai Hadas @ 2013-07-25  8:40 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: ogerlitz-VPRAkNaXOzVWk0Htik3J/w, tzahio-VPRAkNaXOzVWk0Htik3J/w,
	yishaih-VPRAkNaXOzVWk0Htik3J/w, Sean Hefty

From: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

This patch implements the XRC APIs based on verbs extension scheme.
It hooks its XRC functions as part of mlx4_init_context, then makes
relevant changes on both control and data path to work properly with
XRC.
Main changes include using verbs_qp, verbs_srq, which are extendable based as part of calling
libibverbs command API (e.g. cmd.c), managing an XRC SRQ table for mapping
between mlx4_srq to srqn, differentiate between IBV_QPT_XRC_SEND & IBV_QPT_XRC_RECV as part of
its APIs, etc

Signed-off-by: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---

Changes from V7:
Adapted code to use the new helper macro verbs_set_ctx_op.

 src/buf.c      |    6 +-
 src/cq.c       |   42 ++++++++---
 src/mlx4-abi.h |    6 ++
 src/mlx4.c     |   18 +++--
 src/mlx4.h     |   62 ++++++++++++++-
 src/qp.c       |   39 ++++++----
 src/srq.c      |  153 ++++++++++++++++++++++++++++++++++++++
 src/verbs.c    |  224 +++++++++++++++++++++++++++++++++++++++++--------------
 8 files changed, 454 insertions(+), 96 deletions(-)

diff --git a/src/buf.c b/src/buf.c
index a80bcb1..50957bb 100644
--- a/src/buf.c
+++ b/src/buf.c
@@ -78,6 +78,8 @@ int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-	ibv_dofork_range(buf->buf, buf->length);
-	munmap(buf->buf, buf->length);
+	if (buf->length) {
+		ibv_dofork_range(buf->buf, buf->length);
+		munmap(buf->buf, buf->length);
+	}
 }
diff --git a/src/cq.c b/src/cq.c
index 18447c4..ebb68fa 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -210,33 +210,43 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	rmb();
 
 	qpn = ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK;
+	wc->qp_num = qpn;
 
 	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (!*cur_qp ||
-	    (qpn != (*cur_qp)->ibv_qp.qp_num)) {
+	if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) {
 		/*
-		 * We do not have to take the QP table lock here,
-		 * because CQs will be locked while QPs are removed
+		 * We do not have to take the XSRQ table lock here,
+		 * because CQs will be locked while SRQs are removed
 		 * from the table.
 		 */
-		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
-		if (!*cur_qp)
+		srq = mlx4_find_xsrq(&to_mctx(cq->ibv_cq.context)->xsrq_table,
+				     ntohl(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK);
+		if (!srq)
 			return CQ_POLL_ERR;
+	} else {
+		if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) {
+			/*
+			 * We do not have to take the QP table lock here,
+			 * because CQs will be locked while QPs are removed
+			 * from the table.
+			 */
+			*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
+			if (!*cur_qp)
+				return CQ_POLL_ERR;
+		}
+		srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL;
 	}
 
-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
-
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ntohs(cqe->wqe_index);
 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
-	} else if ((*cur_qp)->ibv_qp.srq) {
-		srq = to_msrq((*cur_qp)->ibv_qp.srq);
+	} else if (srq) {
 		wqe_index = htons(cqe->wqe_index);
 		wc->wr_id = srq->wrid[wqe_index];
 		mlx4_free_srq_wqe(srq, wqe_index);
@@ -312,7 +322,10 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
 		wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
-		if ((*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
+		/* When working with xrc srqs, don't have qp to check link layer.
+		  * Using IB SL, should consider Roce. (TBD)
+		*/
+		if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
 			wc->sl	   = ntohs(cqe->sl_vid) >> 13;
 		else
 			wc->sl	   = ntohs(cqe->sl_vid) >> 12;
@@ -403,7 +416,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
 		cqe += cqe_inc;
-		if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+		if (srq && srq->ext_srq &&
+		    ntohl(cqe->g_mlpath_rqpn & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num &&
+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+			++nfreed;
+		} else if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index a1328af..b48f6fc 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -89,6 +89,12 @@ struct mlx4_create_srq {
 	__u64				db_addr;
 };
 
+struct mlx4_create_xsrq {
+	struct ibv_create_xsrq		ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+};
+
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
 	__u32				srqn;
diff --git a/src/mlx4.c b/src/mlx4.c
index ec7b2f7..bca621a 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -130,9 +130,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
 	__u16				bf_reg_size;
 	struct mlx4_device              *dev = to_mdev(&v_device->device);
-	/* verbs_context should be used for new verbs
-	* struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
-	*/
+	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
 
 
 	/* memory footprint of mlx4_context and verbs_context share
@@ -172,6 +170,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
 
+	mlx4_init_xsrq_table(&context->xsrq_table, context->num_qps);
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
 	context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
@@ -200,9 +199,16 @@ static int mlx4_init_context(struct verbs_device *v_device,
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 	ibv_ctx->ops = mlx4_ctx_ops;
-	/* New verbs should be added as below
-	* verbs_ctx->drv_new_func1 = mlx4_new_func1;
-	*/
+
+	verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
+					VERBS_CONTEXT_QP;
+	verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd);
+	verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd);
+	verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex);
+	verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
+	verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
+	verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
+
 	return 0;
 
 }
diff --git a/src/mlx4.h b/src/mlx4.h
index 183e08c..4d4542e 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -38,6 +38,7 @@
 
 #include <infiniband/driver.h>
 #include <infiniband/arch.h>
+#include <infiniband/verbs.h>
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 
@@ -97,6 +98,36 @@ enum {
 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
 };
 
+#define MLX4_REMOTE_SRQN_FLAGS(wr) htonl(wr->qp_type.xrc.remote_srqn << 8)
+
+enum {
+	MLX4_XSRQ_TABLE_BITS = 8,
+	MLX4_XSRQ_TABLE_SIZE = 1 << MLX4_XSRQ_TABLE_BITS,
+	MLX4_XSRQ_TABLE_MASK = MLX4_XSRQ_TABLE_SIZE - 1
+};
+
+struct mlx4_xsrq_table {
+	struct {
+		struct mlx4_srq **table;
+		int		  refcnt;
+	} xsrq_table[MLX4_XSRQ_TABLE_SIZE];
+
+	pthread_mutex_t		  mutex;
+	int			  num_xsrq;
+	int			  shift;
+	int			  mask;
+};
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+
+enum {
+	MLX4_XRC_QPN_BIT     = (1 << 23)
+};
+
 enum mlx4_db_type {
 	MLX4_DB_TYPE_CQ,
 	MLX4_DB_TYPE_RQ,
@@ -161,6 +192,7 @@ struct mlx4_context {
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
 	pthread_mutex_t			db_list_mutex;
 	int				cqe_size;
+	struct mlx4_xsrq_table		xsrq_table;
 };
 
 struct mlx4_buf {
@@ -187,7 +219,7 @@ struct mlx4_cq {
 };
 
 struct mlx4_srq {
-	struct ibv_srq			ibv_srq;
+	struct verbs_srq		verbs_srq;
 	struct mlx4_buf			buf;
 	pthread_spinlock_t		lock;
 	uint64_t		       *wrid;
@@ -199,6 +231,7 @@ struct mlx4_srq {
 	int				tail;
 	uint32_t		       *db;
 	uint16_t			counter;
+	uint8_t				ext_srq;
 };
 
 struct mlx4_wq {
@@ -214,7 +247,7 @@ struct mlx4_wq {
 };
 
 struct mlx4_qp {
-	struct ibv_qp			ibv_qp;
+	struct verbs_qp			verbs_qp;
 	struct mlx4_buf			buf;
 	int				max_inline_data;
 	int				buf_size;
@@ -269,6 +302,7 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 {
 	return (val + align - 1) & ~(align - 1);
 }
+int align_queue_size(int req);
 
 #define to_mxxx(xxx, type)						\
 	((struct mlx4_##type *)					\
@@ -300,12 +334,14 @@ static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
 
 static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
 {
-	return to_mxxx(srq, srq);
+	return container_of(container_of(ibsrq, struct verbs_srq, srq),
+			    struct mlx4_srq, verbs_srq);
 }
 
 static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
 {
-	return to_mxxx(qp, qp);
+	return container_of(container_of(ibqp, struct verbs_qp, qp),
+			    struct mlx4_qp, verbs_qp);
 }
 
 static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
@@ -326,6 +362,9 @@ int mlx4_query_port(struct ibv_context *context, uint8_t port,
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
 int mlx4_free_pd(struct ibv_pd *pd);
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr);
+int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
@@ -348,20 +387,33 @@ void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 				 struct ibv_srq_init_attr *attr);
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex);
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex);
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int mask);
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq);
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
 		       struct ibv_recv_wr **bad_wr);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr);
@@ -376,7 +428,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 			  struct ibv_recv_wr **bad_wr);
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
diff --git a/src/qp.c b/src/qp.c
index 11c750b..721bed4 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -208,7 +208,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 	ind = qp->sq.head;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+		if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -246,6 +246,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		size = sizeof *ctrl / 16;
 
 		switch (ibqp->qp_type) {
+		case IBV_QPT_XRC_SEND:
+			ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+			/* fall through */
 		case IBV_QPT_RC:
 		case IBV_QPT_UC:
 			switch (wr->opcode) {
@@ -460,7 +463,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+		if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -554,6 +557,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_RC:
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		/*
@@ -583,14 +587,16 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		; /* nothing */
 }
 
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
 	qp->rq.max_gs	 = cap->max_recv_sge;
 
-	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
-	if (!qp->sq.wrid)
-		return -1;
+	if (qp->sq.wqe_cnt) {
+		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->sq.wrid)
+			return -1;
+	}
 
 	if (qp->rq.wqe_cnt) {
 		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
@@ -615,15 +621,19 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		qp->sq.offset = 0;
 	}
 
-	if (mlx4_alloc_buf(&qp->buf,
-			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
-			    to_mdev(pd->context->device)->page_size)) {
-		free(qp->sq.wrid);
-		free(qp->rq.wrid);
-		return -1;
-	}
+	if (qp->buf_size) {
+		if (mlx4_alloc_buf(&qp->buf,
+				   align(qp->buf_size, to_mdev(context->device)->page_size),
+				   to_mdev(context->device)->page_size)) {
+			free(qp->sq.wrid);
+			free(qp->rq.wrid);
+			return -1;
+		}
 
-	memset(qp->buf.buf, 0, qp->buf_size);
+		memset(qp->buf.buf, 0, qp->buf_size);
+	} else {
+		qp->buf.buf = NULL;
+	}
 
 	return 0;
 }
@@ -639,6 +649,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_UC:
 	case IBV_QPT_RC:
 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
diff --git a/src/srq.c b/src/srq.c
index f1d1240..28bc2d4 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -42,6 +42,7 @@
 #include "mlx4.h"
 #include "doorbell.h"
 #include "wqe.h"
+#include "mlx4-abi.h"
 
 static void *get_wqe(struct mlx4_srq *srq, int n)
 {
@@ -173,3 +174,155 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
 	return 0;
 }
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size)
+{
+	memset(xsrq_table, 0, sizeof *xsrq_table);
+	xsrq_table->num_xsrq = size;
+	xsrq_table->shift = ffs(size) - 1 - MLX4_XSRQ_TABLE_BITS;
+	xsrq_table->mask = (1 << xsrq_table->shift) - 1;
+
+	pthread_mutex_init(&xsrq_table->mutex, NULL);
+}
+
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	if (xsrq_table->xsrq_table[index].refcnt)
+		return xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask];
+
+	return NULL;
+}
+
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq)
+{
+	int index, ret = 0;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+	if (!xsrq_table->xsrq_table[index].refcnt) {
+		xsrq_table->xsrq_table[index].table = calloc(xsrq_table->mask + 1,
+							     sizeof(struct mlx4_srq *));
+		if (!xsrq_table->xsrq_table[index].table) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	xsrq_table->xsrq_table[index].refcnt++;
+	xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = srq;
+
+out:
+	pthread_mutex_unlock(&xsrq_table->mutex);
+	return ret;
+}
+
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+
+	if (--xsrq_table->xsrq_table[index].refcnt)
+		xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = NULL;
+	else
+		free(xsrq_table->xsrq_table[index].table);
+
+	pthread_mutex_unlock(&xsrq_table->mutex);
+}
+
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex)
+{
+	struct mlx4_create_xsrq cmd;
+	struct mlx4_create_srq_resp resp;
+	struct mlx4_srq *srq;
+	int ret;
+
+	/* Sanity check SRQ size before proceeding */
+	if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+		return NULL;
+
+	srq = calloc(1, sizeof *srq);
+	if (!srq)
+		return NULL;
+
+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
+	srq->max_gs  = attr_ex->attr.max_sge;
+	srq->counter = 0;
+	srq->ext_srq = 1;
+
+	if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
+		goto err;
+
+	srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
+	if (!srq->db)
+		goto err_free;
+
+	*srq->db = 0;
+
+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
+	cmd.db_addr  = (uintptr_t) srq->db;
+
+	ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq,
+				    sizeof(srq->verbs_srq),
+				    attr_ex,
+				    &cmd.ibv_cmd, sizeof cmd,
+				    &resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_db;
+
+	ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+			      srq->verbs_srq.srq_num, srq);
+	if (ret)
+		goto err_destroy;
+
+	return &srq->verbs_srq.srq;
+
+err_destroy:
+	ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+err_db:
+	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
+err_free:
+	free(srq->wrid);
+	mlx4_free_buf(&srq->buf);
+err:
+	free(srq);
+	return NULL;
+}
+
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
+{
+	struct mlx4_context *mctx = to_mctx(srq->context);
+	struct mlx4_srq *msrq = to_msrq(srq);
+	struct mlx4_cq *mcq;
+	int ret;
+
+	mcq = to_mcq(msrq->verbs_srq.cq);
+	mlx4_cq_clean(mcq, 0, msrq);
+	pthread_spin_lock(&mcq->lock);
+	mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
+	pthread_spin_unlock(&mcq->lock);
+
+	ret = ibv_cmd_destroy_srq(srq);
+	if (ret) {
+		pthread_spin_lock(&mcq->lock);
+		mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
+		pthread_spin_unlock(&mcq->lock);
+		return ret;
+	}
+
+	mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
+	mlx4_free_buf(&msrq->buf);
+	free(msrq->wrid);
+	free(msrq);
+
+	return 0;
+}
diff --git a/src/verbs.c b/src/verbs.c
index 7c5ee53..69e9753 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -107,6 +107,42 @@ int mlx4_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr)
+{
+	struct ibv_open_xrcd cmd;
+	struct ibv_open_xrcd_resp resp;
+	struct verbs_xrcd *xrcd;
+	int ret;
+
+	xrcd = calloc(1, sizeof *xrcd);
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), attr,
+				&cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &xrcd->xrcd;
+
+err:
+	free(xrcd);
+	return NULL;
+}
+
+int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+	struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+	int ret;
+
+	ret = ibv_cmd_close_xrcd(xrcd);
+	if (!ret)
+		free(xrcd);
+
+	return ret;
+}
+
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			   int access)
 {
@@ -150,7 +186,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
 	return 0;
 }
 
-static int align_queue_size(int req)
+int align_queue_size(int req)
 {
 	int nent;
 
@@ -296,7 +332,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
 }
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
-				 struct ibv_srq_init_attr *attr)
+				struct ibv_srq_init_attr *attr)
 {
 	struct mlx4_create_srq      cmd;
 	struct mlx4_create_srq_resp resp;
@@ -317,6 +353,7 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
+	srq->ext_srq = 0;
 
 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
 		goto err;
@@ -330,15 +367,13 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
 
-	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+	ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
 	if (ret)
 		goto err_db;
 
-	srq->srqn = resp.srqn;
-
-	return &srq->ibv_srq;
+	return &srq->verbs_srq.srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
@@ -353,6 +388,18 @@ err:
 	return NULL;
 }
 
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex)
+{
+	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
+	    (attr_ex->srq_type == IBV_SRQT_BASIC))
+		return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
+	else if (attr_ex->srq_type == IBV_SRQT_XRC)
+		return mlx4_create_xrc_srq(context, attr_ex);
+
+	return NULL;
+}
+
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int attr_mask)
@@ -374,6 +421,9 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 {
 	int ret;
 
+	if (to_msrq(srq)->ext_srq)
+		return mlx4_destroy_xrc_srq(srq);
+
 	ret = ibv_cmd_destroy_srq(srq);
 	if (ret)
 		return ret;
@@ -386,7 +436,8 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 	return 0;
 }
 
-struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr)
 {
 	struct mlx4_create_qp     cmd;
 	struct ibv_create_qp_resp resp;
@@ -401,30 +452,34 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    attr->cap.max_inline_data > 1024)
 		return NULL;
 
-	qp = malloc(sizeof *qp);
+	qp = calloc(1, sizeof *qp);
 	if (!qp)
 		return NULL;
 
-	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
-
-	/*
-	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-	 * allow HW to prefetch.
-	 */
-	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
-	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+	if (attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
+	} else {
+		mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
+		/*
+		 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
+		 * allow HW to prefetch.
+		 */
+		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+		qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
+	}
 
-	if (attr->srq)
-		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
-	else {
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+	    attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+	} else {
+		qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
 		if (attr->cap.max_recv_sge < 1)
 			attr->cap.max_recv_sge = 1;
 		if (attr->cap.max_recv_wr < 1)
 			attr->cap.max_recv_wr = 1;
 	}
 
-	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+	if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
 		goto err;
 
 	mlx4_init_qp_indices(qp);
@@ -433,19 +488,18 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		goto err_free;
 
-	if (!attr->srq) {
-		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+	if (attr->cap.max_recv_sge) {
+		qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
 		if (!qp->db)
 			goto err_free;
 
 		*qp->db = 0;
+		cmd.db_addr = (uintptr_t) qp->db;
+	} else {
+		cmd.db_addr = 0;
 	}
 
 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
-	if (attr->srq)
-		cmd.db_addr = 0;
-	else
-		cmd.db_addr = (uintptr_t) qp->db;
 	cmd.log_sq_stride   = qp->sq.wqe_shift;
 	for (cmd.log_sq_bb_count = 0;
 	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
@@ -454,37 +508,41 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
 
-	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+	pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
-				&resp, sizeof resp);
+	ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp,
+				   sizeof(qp->verbs_qp), attr,
+				   &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
 
-	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
-	if (ret)
-		goto err_destroy;
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+		ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
+		if (ret)
+			goto err_destroy;
+	}
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
 	qp->rq.max_gs  = attr->cap.max_recv_sge;
-	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
+	if (attr->qp_type != IBV_QPT_XRC_RECV)
+		mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
 
-	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
+	qp->doorbell_qpn    = htonl(qp->verbs_qp.qp.qp_num << 8);
 	if (attr->sq_sig_all)
 		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
 	else
 		qp->sq_signal_bits = 0;
 
-	return &qp->ibv_qp;
+	return &qp->verbs_qp.qp;
 
 err_destroy:
-	ibv_cmd_destroy_qp(&qp->ibv_qp);
+	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
 
 err_rq_db:
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
-	if (!attr->srq)
-		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
+	if (attr->cap.max_recv_sge)
+		mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
 	free(qp->sq.wrid);
@@ -498,6 +556,43 @@ err:
 	return NULL;
 }
 
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+	struct ibv_qp_init_attr_ex attr_ex;
+	struct ibv_qp *qp;
+
+	memcpy(&attr_ex, attr, sizeof *attr);
+	attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+	attr_ex.pd = pd;
+	qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+	if (qp)
+		memcpy(attr, &attr_ex, sizeof *attr);
+	return qp;
+}
+
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
+{
+	struct ibv_open_qp cmd;
+	struct ibv_create_qp_resp resp;
+	struct mlx4_qp *qp;
+	int ret;
+
+	qp = calloc(1, sizeof *qp);
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr,
+			      &cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &qp->verbs_qp.qp;
+
+err:
+	free(qp);
+	return NULL;
+}
+
 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr)
@@ -528,7 +623,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	int ret;
 
 	if (attr_mask & IBV_QP_PORT) {
-		ret = ibv_query_port(qp->pd->context, attr->port_num,
+		ret = ibv_query_port(qp->context, attr->port_num,
 				     &port_attr);
 		if (ret)
 			return ret;
@@ -546,13 +641,14 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	if (!ret		       &&
 	    (attr_mask & IBV_QP_STATE) &&
 	    attr->qp_state == IBV_QPS_RESET) {
-		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
-			       qp->srq ? to_msrq(qp->srq) : NULL);
-		if (qp->send_cq != qp->recv_cq)
+		if (qp->recv_cq)
+			mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+				      qp->srq ? to_msrq(qp->srq) : NULL);
+		if (qp->send_cq && qp->send_cq != qp->recv_cq)
 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
 		mlx4_init_qp_indices(to_mqp(qp));
-		if (!qp->srq)
+		if (to_mqp(qp)->rq.wqe_cnt)
 			*to_mqp(qp)->db = 0;
 	}
 
@@ -564,9 +660,14 @@ static void mlx4_lock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_lock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_lock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_lock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_lock(&send_cq->lock);
 		pthread_spin_lock(&recv_cq->lock);
 	} else {
@@ -580,9 +681,15 @@ static void mlx4_unlock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_unlock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_unlock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_unlock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_unlock(&recv_cq->lock);
 		pthread_spin_unlock(&send_cq->lock);
 	} else {
@@ -605,21 +712,24 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
 	mlx4_lock_cqs(ibqp);
 
-	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-	if (ibqp->send_cq != ibqp->recv_cq)
+	if (ibqp->recv_cq)
+		__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+				ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+	if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
 		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+		mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
 
 	mlx4_unlock_cqs(ibqp);
 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
 
-	if (!ibqp->srq)
+	if (qp->rq.wqe_cnt) {
 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
-	free(qp->sq.wrid);
-	if (qp->rq.wqe_cnt)
 		free(qp->rq.wrid);
+	}
+	if (qp->sq.wqe_cnt)
+		free(qp->sq.wrid);
 	mlx4_free_buf(&qp->buf);
 	free(qp);
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V8 libmlx4 1/2] Infra-structure changes to support verbs extensions
       [not found]     ` <1374741618-890-2-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2013-07-25 17:19       ` Jason Gunthorpe
       [not found]         ` <20130725171956.GB17616-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Gunthorpe @ 2013-07-25 17:19 UTC (permalink / raw)
  To: Yishai Hadas
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg,
	ogerlitz-VPRAkNaXOzVWk0Htik3J/w, tzahio-VPRAkNaXOzVWk0Htik3J/w

On Thu, Jul 25, 2013 at 11:40:17AM +0300, Yishai Hadas wrote:
>  	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
> +	ibv_ctx->ops = mlx4_ctx_ops;
> +	/* New verbs should be added as below
> +	* verbs_ctx->drv_new_func1 = mlx4_new_func1;
> +	*/

The comment needs revising to refer to verbs_set_ctx_op

>  static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
>  {
> -	return to_mxxx(dev, device);
> +	/* ibv_device is first field of verbs_device
> +	  * see try_driver in libibverbs.
> +	*/
> +	return ((struct mlx4_device *)
> +	 ((void *)ibdev - offsetof(struct mlx4_device, verbs_dev)));
         ^^^^^^^^^^^^^^

container_of please

Also this can have my ack after these changes.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V8 libmlx4 1/2] Infra-structure changes to support verbs extensions
       [not found]         ` <20130725171956.GB17616-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-07-31  7:44           ` Yishai Hadas
  0 siblings, 0 replies; 5+ messages in thread
From: Yishai Hadas @ 2013-07-31  7:44 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Yishai Hadas, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	roland-BHEL68pLQRGGvPXPguhicg, ogerlitz-VPRAkNaXOzVWk0Htik3J/w,
	tzahio-VPRAkNaXOzVWk0Htik3J/w

On 7/25/2013 8:19 PM, Jason Gunthorpe wrote:
> On Thu, Jul 25, 2013 at 11:40:17AM +0300, Yishai Hadas wrote:
>>   	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
>> +	ibv_ctx->ops = mlx4_ctx_ops;
>> +	/* New verbs should be added as below
>> +	* verbs_ctx->drv_new_func1 = mlx4_new_func1;
>> +	*/
> The comment needs revising to refer to verbs_set_ctx_op
>
>>   static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
>>   {
>> -	return to_mxxx(dev, device);
>> +	/* ibv_device is first field of verbs_device
>> +	  * see try_driver in libibverbs.
>> +	*/
>> +	return ((struct mlx4_device *)
>> +	 ((void *)ibdev - offsetof(struct mlx4_device, verbs_dev)));
>           ^^^^^^^^^^^^^^
>
> container_of please
>
> Also this can have my ack after these changes.
     Thanks, I accept both comments, will handle as part of V9.
     Once we agree on V8 comments of libibverbs will re-send both V9.
>
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-07-31  7:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-25  8:40 [PATCH V8 libmlx4 0/2] Add extension and XRC QP support Yishai Hadas
     [not found] ` <1374741618-890-1-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-07-25  8:40   ` [PATCH V8 libmlx4 1/2] Infra-structure changes to support verbs extensions Yishai Hadas
     [not found]     ` <1374741618-890-2-git-send-email-yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-07-25 17:19       ` Jason Gunthorpe
     [not found]         ` <20130725171956.GB17616-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-07-31  7:44           ` Yishai Hadas
2013-07-25  8:40   ` [PATCH V8 libmlx4 2/2] Add support for XRC QPs Yishai Hadas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.