All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] libmlx4: Infra-structure changes to support verbs extensions
@ 2012-09-28 22:53 Hefty, Sean
       [not found] ` <1828884A29C6694DAF28B7E6B8A8237346A981D8-Q3cL8pyY+6ukrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Hefty, Sean @ 2012-09-28 22:53 UTC (permalink / raw)
  To: linux-rdma (linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org),
	miked-VPRAkNaXOzVWk0Htik3J/w,
	Tzahi Oved (tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org),
	Roland Dreier, Jason Gunthorpe, yishaih-VPRAkNaXOzVWk0Htik3J/w

From: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 src/mlx4.c |  103 ++++++++++++++++++++++++++++++++++++++++++------------------
 src/mlx4.h |   16 +++++++++
 2 files changed, 88 insertions(+), 31 deletions(-)

diff --git a/src/mlx4.c b/src/mlx4.c
index 8cf249a..f6c12f9 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -120,22 +120,26 @@ static struct ibv_context_ops mlx4_ctx_ops = {
 	.detach_mcast  = ibv_cmd_detach_mcast
 };
 
-static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+static int mlx4_init_context(struct verbs_device *device,
+			struct ibv_context *ibv_ctx, int cmd_fd)
 {
-	struct mlx4_context	       *context;
+	struct mlx4_context		*context;
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
+	/* verbs_context should be used for new verbs
+	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
+	 */
 
-	context = calloc(1, sizeof *context);
-	if (!context)
-		return NULL;
-
-	context->ibv_ctx.cmd_fd = cmd_fd;
+	/* memory footprint of mlx4_context and verbs_context share
+	  * struct ibv_context.
+	*/
+	context = to_mctx(ibv_ctx);
+	ibv_ctx->cmd_fd = cmd_fd;
 
-	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+	if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof resp))
-		goto err_free;
+		return errno;
 
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
@@ -150,15 +154,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
-	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
+	context->uar = mmap(NULL, to_mdev_ex(device)->page_size, PROT_WRITE,
 			    MAP_SHARED, cmd_fd, 0);
 	if (context->uar == MAP_FAILED)
-		goto err_free;
+		return errno;
 
 	if (resp.bf_reg_size) {
-		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+		context->bf_page = mmap(NULL, to_mdev_ex(device)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
-					to_mdev(ibdev)->page_size);
+					to_mdev_ex(device)->page_size);
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -176,23 +180,52 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
-	context->ibv_ctx.ops = mlx4_ctx_ops;
+	ibv_ctx->ops = mlx4_ctx_ops;
+	/* New verbs should be added as below
+	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
+	  */
+	return 0;
 
-	return &context->ibv_ctx;
+}
 
-err_free:
-	free(context);
-	return NULL;
+static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+{
+	struct verbs_device *vdev;
+	struct verbs_context *context_ex;
+	int ret;
+
+	vdev = container_of(ibdev, struct verbs_device, device);
+	context_ex = calloc(1, sizeof(*context_ex) + vdev->size_of_context);
+	if (!context_ex) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	context_ex->sz = sizeof(*context_ex);
+	ret = mlx4_init_context(vdev, &context_ex->context, cmd_fd);
+	if (ret) {
+		free(context_ex);
+		return NULL;
+	}
+
+	return &context_ex->context;
 }
 
-static void mlx4_free_context(struct ibv_context *ibctx)
+static void mlx4_uninit_context(struct verbs_device *device,
+					struct ibv_context *ibv_ctx)
 {
-	struct mlx4_context *context = to_mctx(ibctx);
+	struct mlx4_context *context = to_mctx(ibv_ctx);
 
-	munmap(context->uar, to_mdev(ibctx->device)->page_size);
+	munmap(context->uar, to_mdev_ex(device)->page_size);
 	if (context->bf_page)
-		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
-	free(context);
+		munmap(context->bf_page, to_mdev_ex(device)->page_size);
+}
+
+static void mlx4_free_context(struct ibv_context *ibctx)
+{
+	mlx4_uninit_context(container_of(ibctx->device, struct verbs_device, device),
+			    ibctx);
+	free(container_of(ibctx, struct verbs_context, context));
 }
 
 static struct ibv_device_ops mlx4_dev_ops = {
@@ -200,11 +233,11 @@ static struct ibv_device_ops mlx4_dev_ops = {
 	.free_context  = mlx4_free_context
 };
 
-static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
-					    int abi_version)
+static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
+					     int abi_version)
 {
 	char			value[8];
-	struct mlx4_device    *dev;
+	struct mlx4_device_ex	*dev;
 	unsigned		vendor, device;
 	int			i;
 
@@ -226,7 +259,7 @@ static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
 	return NULL;
 
 found:
-	if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION ||
+	if (abi_version <= MLX4_UVERBS_MIN_ABI_VERSION ||
 	    abi_version > MLX4_UVERBS_MAX_ABI_VERSION) {
 		fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported "
 			"(min supported %d, max supported %d)\n",
@@ -243,16 +276,24 @@ found:
 		return NULL;
 	}
 
-	dev->ibv_dev.ops = mlx4_dev_ops;
+	dev->verbs_dev.device.ops = mlx4_dev_ops;
 	dev->page_size   = sysconf(_SC_PAGESIZE);
-
-	return &dev->ibv_dev;
+	dev->verbs_dev.sz = sizeof(*dev);
+	dev->verbs_dev.size_of_context =
+		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
+	 /* mlx4_init_context will initialize provider calls */
+	dev->verbs_dev.init_context = mlx4_init_context;
+	dev->verbs_dev.uninit_context = mlx4_uninit_context;
+
+	return &dev->verbs_dev;
 }
 
+
 #ifdef HAVE_IBV_REGISTER_DRIVER
 static __attribute__((constructor)) void mlx4_register_driver(void)
 {
-	ibv_register_driver("mlx4", mlx4_driver_init);
+	verbs_register_driver("mlx4", mlx4_driver_init);
+
 }
 #else
 /*
diff --git a/src/mlx4.h b/src/mlx4.h
index 13c13d8..c06dbd5 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -135,6 +135,11 @@ struct mlx4_device {
 	int				page_size;
 };
 
+struct mlx4_device_ex {
+	struct verbs_device	verbs_dev;
+	int			page_size;
+};
+
 struct mlx4_db_page;
 
 struct mlx4_context {
@@ -261,6 +266,17 @@ static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
 	return to_mxxx(dev, device);
 }
 
+#define to_mxxx_ex(xxx, type)						\
+	((struct mlx4_##type##_ex *)					\
+	 ((void *) verbs##xxx - offsetof(struct mlx4_##type##_ex, verbs_##xxx)))
+
+
+static inline struct mlx4_device_ex *to_mdev_ex(const struct verbs_device
+						*verbsdev)
+{
+	return to_mxxx_ex(dev, device);
+}
+
 static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
 {
 	return to_mxxx(ctx, context);


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found] ` <1828884A29C6694DAF28B7E6B8A8237346A981D8-Q3cL8pyY+6ukrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2012-09-30 21:14   ` Jason Gunthorpe
       [not found]     ` <20120930211414.GA26575-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2013-03-16  0:39   ` [PATCH v4 " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  2013-03-16  0:39   ` [PATCH v4 2/2] libmlx4: " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  2 siblings, 1 reply; 7+ messages in thread
From: Jason Gunthorpe @ 2012-09-30 21:14 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: linux-rdma (linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org),
	miked-VPRAkNaXOzVWk0Htik3J/w,
	Tzahi Oved (tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org),
	Roland Dreier, yishaih-VPRAkNaXOzVWk0Htik3J/w

> -static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
> -					    int abi_version)
> +static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
> +					     int abi_version)
>  {
>  	char			value[8];
> -	struct mlx4_device    *dev;
> +	struct mlx4_device_ex	*dev;
>  	unsigned		vendor, device;
>  	int			i;

[..]

The allocation of 'dev' needs to be via zero'ing calloc so new
unsupported members of verbs_device are zero'd.

> +++ b/src/mlx4.h
> @@ -135,6 +135,11 @@ struct mlx4_device {
>  	int				page_size;
>  };
>  
> +struct mlx4_device_ex {
> +	struct verbs_device	verbs_dev;
> +	int			page_size;
> +};
> +

This looks wrong to me. offsetof(page_size) will be different on
mlx4_device_ex vs mlx4_device, and mlx4_alloc_cq_buf has no test to
correct for that.

mlx4_device_ex should be removed, and mlx4_device changed to always
use verbs_device, as a provider-allocated structure the provider
assumes the ibv_device * pointers it gets were created by driver_init.

> +#define to_mxxx_ex(xxx, type)						\
> +	((struct mlx4_##type##_ex *)					\
> +	 ((void *) verbs##xxx - offsetof(struct mlx4_##type##_ex, verbs_##xxx)))
> +
> +
> +static inline struct mlx4_device_ex *to_mdev_ex(const struct verbs_device
> +						*verbsdev)
> +{
> +	return to_mxxx_ex(dev, device);
> +}

Dump too..

Maybe make your containerof available to the providers and ditch all
these converters?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]     ` <20120930211414.GA26575-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2012-10-11 16:26       ` Yishai Hadas
  0 siblings, 0 replies; 7+ messages in thread
From: Yishai Hadas @ 2012-10-11 16:26 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hefty, Sean,
	linux-rdma (linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org),
	miked-VPRAkNaXOzVWk0Htik3J/w,
	Tzahi Oved (tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org),
	Roland Dreier, yishaih-VPRAkNaXOzVWk0Htik3J/w

Jason,
Just go over your comments I tend to agree.
Relates to second point we could consider other solution as of adapting the "to_mdev" macro to the new verbs mode but your direction seems more clean and simple.

Will supply in coming days a new candidate patch for libmlx4.
Yishai


On 9/30/2012 11:14 PM, Jason Gunthorpe wrote:
>> -static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
>> -					    int abi_version)
>> +static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
>> +					     int abi_version)
>>   {
>>   	char			value[8];
>> -	struct mlx4_device    *dev;
>> +	struct mlx4_device_ex	*dev;
>>   	unsigned		vendor, device;
>>   	int			i;
> [..]
>
> The allocation of 'dev' needs to be via zero'ing calloc so new
> unsupported members of verbs_device are zero'd.
>> +++ b/src/mlx4.h
>> @@ -135,6 +135,11 @@ struct mlx4_device {
>>   	int				page_size;
>>   };
>>   
>> +struct mlx4_device_ex {
>> +	struct verbs_device	verbs_dev;
>> +	int			page_size;
>> +};
>> +
> This looks wrong to me. offsetof(page_size) will be different on
> mlx4_device_ex vs mlx4_device, and mlx4_alloc_cq_buf has no test to
> correct for that.
>
> mlx4_device_ex should be removed, and mlx4_device changed to always
> use verbs_device, as a provider-allocated structure the provider
> assumes the ibv_device * pointers it gets were created by driver_init.
>
>> +#define to_mxxx_ex(xxx, type)						\
>> +	((struct mlx4_##type##_ex *)					\
>> +	 ((void *) verbs##xxx - offsetof(struct mlx4_##type##_ex, verbs_##xxx)))
>> +
>> +
>> +static inline struct mlx4_device_ex *to_mdev_ex(const struct verbs_device
>> +						*verbsdev)
>> +{
>> +	return to_mxxx_ex(dev, device);
>> +}
> Dump too..
>
> Maybe make your containerof available to the providers and ditch all
> these converters?
>
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found] ` <1828884A29C6694DAF28B7E6B8A8237346A981D8-Q3cL8pyY+6ukrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
  2012-09-30 21:14   ` Jason Gunthorpe
@ 2013-03-16  0:39   ` sean.hefty-ral2JQCrhuEAvxtiuMwx3w
       [not found]     ` <1363394396-951-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2013-03-16  0:39   ` [PATCH v4 2/2] libmlx4: " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  2 siblings, 1 reply; 7+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-16  0:39 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Yishai Hadas, Tzahi Oved

From: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 src/mlx4.c |  103 ++++++++++++++++++++++++++++++++++++++++++------------------
 src/mlx4.h |   16 +++++++++
 2 files changed, 88 insertions(+), 31 deletions(-)

diff --git a/src/mlx4.c b/src/mlx4.c
index 8cf249a..f6c12f9 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -120,22 +120,26 @@ static struct ibv_context_ops mlx4_ctx_ops = {
 	.detach_mcast  = ibv_cmd_detach_mcast
 };
 
-static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+static int mlx4_init_context(struct verbs_device *device,
+			struct ibv_context *ibv_ctx, int cmd_fd)
 {
-	struct mlx4_context	       *context;
+	struct mlx4_context		*context;
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
+	/* verbs_context should be used for new verbs
+	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
+	 */
 
-	context = calloc(1, sizeof *context);
-	if (!context)
-		return NULL;
-
-	context->ibv_ctx.cmd_fd = cmd_fd;
+	/* memory footprint of mlx4_context and verbs_context share
+	  * struct ibv_context.
+	*/
+	context = to_mctx(ibv_ctx);
+	ibv_ctx->cmd_fd = cmd_fd;
 
-	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+	if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof resp))
-		goto err_free;
+		return errno;
 
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
@@ -150,15 +154,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
-	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
+	context->uar = mmap(NULL, to_mdev_ex(device)->page_size, PROT_WRITE,
 			    MAP_SHARED, cmd_fd, 0);
 	if (context->uar == MAP_FAILED)
-		goto err_free;
+		return errno;
 
 	if (resp.bf_reg_size) {
-		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+		context->bf_page = mmap(NULL, to_mdev_ex(device)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
-					to_mdev(ibdev)->page_size);
+					to_mdev_ex(device)->page_size);
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -176,23 +180,52 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
-	context->ibv_ctx.ops = mlx4_ctx_ops;
+	ibv_ctx->ops = mlx4_ctx_ops;
+	/* New verbs should be added as below
+	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
+	  */
+	return 0;
 
-	return &context->ibv_ctx;
+}
 
-err_free:
-	free(context);
-	return NULL;
+static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+{
+	struct verbs_device *vdev;
+	struct verbs_context *context_ex;
+	int ret;
+
+	vdev = container_of(ibdev, struct verbs_device, device);
+	context_ex = calloc(1, sizeof(*context_ex) + vdev->size_of_context);
+	if (!context_ex) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	context_ex->sz = sizeof(*context_ex);
+	ret = mlx4_init_context(vdev, &context_ex->context, cmd_fd);
+	if (ret) {
+		free(context_ex);
+		return NULL;
+	}
+
+	return &context_ex->context;
 }
 
-static void mlx4_free_context(struct ibv_context *ibctx)
+static void mlx4_uninit_context(struct verbs_device *device,
+					struct ibv_context *ibv_ctx)
 {
-	struct mlx4_context *context = to_mctx(ibctx);
+	struct mlx4_context *context = to_mctx(ibv_ctx);
 
-	munmap(context->uar, to_mdev(ibctx->device)->page_size);
+	munmap(context->uar, to_mdev_ex(device)->page_size);
 	if (context->bf_page)
-		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
-	free(context);
+		munmap(context->bf_page, to_mdev_ex(device)->page_size);
+}
+
+static void mlx4_free_context(struct ibv_context *ibctx)
+{
+	mlx4_uninit_context(container_of(ibctx->device, struct verbs_device, device),
+			    ibctx);
+	free(container_of(ibctx, struct verbs_context, context));
 }
 
 static struct ibv_device_ops mlx4_dev_ops = {
@@ -200,11 +233,11 @@ static struct ibv_device_ops mlx4_dev_ops = {
 	.free_context  = mlx4_free_context
 };
 
-static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
-					    int abi_version)
+static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
+					     int abi_version)
 {
 	char			value[8];
-	struct mlx4_device    *dev;
+	struct mlx4_device_ex	*dev;
 	unsigned		vendor, device;
 	int			i;
 
@@ -226,7 +259,7 @@ static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
 	return NULL;
 
 found:
-	if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION ||
+	if (abi_version <= MLX4_UVERBS_MIN_ABI_VERSION ||
 	    abi_version > MLX4_UVERBS_MAX_ABI_VERSION) {
 		fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported "
 			"(min supported %d, max supported %d)\n",
@@ -243,16 +276,24 @@ found:
 		return NULL;
 	}
 
-	dev->ibv_dev.ops = mlx4_dev_ops;
+	dev->verbs_dev.device.ops = mlx4_dev_ops;
 	dev->page_size   = sysconf(_SC_PAGESIZE);
-
-	return &dev->ibv_dev;
+	dev->verbs_dev.sz = sizeof(*dev);
+	dev->verbs_dev.size_of_context =
+		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
+	 /* mlx4_init_context will initialize provider calls */
+	dev->verbs_dev.init_context = mlx4_init_context;
+	dev->verbs_dev.uninit_context = mlx4_uninit_context;
+
+	return &dev->verbs_dev;
 }
 
+
 #ifdef HAVE_IBV_REGISTER_DRIVER
 static __attribute__((constructor)) void mlx4_register_driver(void)
 {
-	ibv_register_driver("mlx4", mlx4_driver_init);
+	verbs_register_driver("mlx4", mlx4_driver_init);
+
 }
 #else
 /*
diff --git a/src/mlx4.h b/src/mlx4.h
index 13c13d8..c06dbd5 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -135,6 +135,11 @@ struct mlx4_device {
 	int				page_size;
 };
 
+struct mlx4_device_ex {
+	struct verbs_device	verbs_dev;
+	int			page_size;
+};
+
 struct mlx4_db_page;
 
 struct mlx4_context {
@@ -261,6 +266,17 @@ static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
 	return to_mxxx(dev, device);
 }
 
+#define to_mxxx_ex(xxx, type)						\
+	((struct mlx4_##type##_ex *)					\
+	 ((void *) verbs##xxx - offsetof(struct mlx4_##type##_ex, verbs_##xxx)))
+
+
+static inline struct mlx4_device_ex *to_mdev_ex(const struct verbs_device
+						*verbsdev)
+{
+	return to_mxxx_ex(dev, device);
+}
+
 static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
 {
 	return to_mxxx(ctx, context);
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4 2/2] libmlx4: Add support for XRC QPs
       [not found] ` <1828884A29C6694DAF28B7E6B8A8237346A981D8-Q3cL8pyY+6ukrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
  2012-09-30 21:14   ` Jason Gunthorpe
  2013-03-16  0:39   ` [PATCH v4 " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
@ 2013-03-16  0:39   ` sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  2 siblings, 0 replies; 7+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-16  0:39 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Sean Hefty

From: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Signed-off-by: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 src/buf.c      |    6 +-
 src/cq.c       |   40 +++++++---
 src/mlx4-abi.h |    6 ++
 src/mlx4.c     |   27 +++++---
 src/mlx4.h     |   64 +++++++++++++++--
 src/qp.c       |   39 +++++++----
 src/srq.c      |  151 ++++++++++++++++++++++++++++++++++++++
 src/verbs.c    |  220 +++++++++++++++++++++++++++++++++++++++++--------------
 8 files changed, 454 insertions(+), 99 deletions(-)

diff --git a/src/buf.c b/src/buf.c
index a80bcb1..50957bb 100644
--- a/src/buf.c
+++ b/src/buf.c
@@ -78,6 +78,8 @@ int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-	ibv_dofork_range(buf->buf, buf->length);
-	munmap(buf->buf, buf->length);
+	if (buf->length) {
+		ibv_dofork_range(buf->buf, buf->length);
+		munmap(buf->buf, buf->length);
+	}
 }
diff --git a/src/cq.c b/src/cq.c
index 8f7a8cc..20ce1f1 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -220,33 +220,43 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	rmb();
 
 	qpn = ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK;
+	wc->qp_num = qpn;
 
 	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (!*cur_qp ||
-	    (qpn != (*cur_qp)->ibv_qp.qp_num)) {
+	if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) {
 		/*
-		 * We do not have to take the QP table lock here,
-		 * because CQs will be locked while QPs are removed
+		 * We do not have to take the XSRQ table lock here,
+		 * because CQs will be locked while SRQs are removed
 		 * from the table.
 		 */
-		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
-		if (!*cur_qp)
+		srq = mlx4_find_xsrq(&to_mctx(cq->ibv_cq.context)->xsrq_table,
+				     ntohl(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK);
+		if (!srq)
 			return CQ_POLL_ERR;
+	} else {
+		if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) {
+			/*
+		 	 * We do not have to take the QP table lock here,
+			 * because CQs will be locked while QPs are removed
+		 	 * from the table.
+			 */
+			*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
+			if (!*cur_qp)
+				return CQ_POLL_ERR;
+		}
+		srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL;
 	}
 
-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
-
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ntohs(cqe->wqe_index);
 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
-	} else if ((*cur_qp)->ibv_qp.srq) {
-		srq = to_msrq((*cur_qp)->ibv_qp.srq);
+	} else if (srq) {
 		wqe_index = htons(cqe->wqe_index);
 		wc->wr_id = srq->wrid[wqe_index];
 		mlx4_free_srq_wqe(srq, wqe_index);
@@ -322,7 +332,8 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
 		wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
-		if ((*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
+		/* HACK */
+		if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
 			wc->sl	   = ntohs(cqe->sl_vid) >> 13;
 		else
 			wc->sl	   = ntohs(cqe->sl_vid) >> 12;
@@ -411,7 +422,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
-		if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+		if (srq && srq->ext_srq &&
+		    ntohl(cqe->g_mlpath_rqpn & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num &&
+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+			++nfreed;
+		} else if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 20a40c9..40d0d9a 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -74,6 +74,12 @@ struct mlx4_create_srq {
 	__u64				db_addr;
 };
 
+struct mlx4_create_xsrq {
+	struct ibv_create_xsrq		ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+};
+
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
 	__u32				srqn;
diff --git a/src/mlx4.c b/src/mlx4.c
index f6c12f9..0720405 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -127,13 +127,14 @@ static int mlx4_init_context(struct verbs_device *device,
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
-	/* verbs_context should be used for new verbs
-	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
-	 */
+	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
 
 	/* memory footprint of mlx4_context and verbs_context share
-	  * struct ibv_context.
-	*/
+	 * struct ibv_context.
+	 */
+	if (sizeof(*verbs_ctx) > *(((size_t *) ibv_ctx) - 1))
+		return ENOSYS;
+
 	context = to_mctx(ibv_ctx);
 	ibv_ctx->cmd_fd = cmd_fd;
 
@@ -152,6 +153,7 @@ static int mlx4_init_context(struct verbs_device *device,
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
 
+	mlx4_init_xsrq_table(&context->xsrq_table, resp.qp_tab_size);
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
 	context->uar = mmap(NULL, to_mdev_ex(device)->page_size, PROT_WRITE,
@@ -181,11 +183,16 @@ static int mlx4_init_context(struct verbs_device *device,
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
 	ibv_ctx->ops = mlx4_ctx_ops;
-	/* New verbs should be added as below
-	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
-	  */
-	return 0;
+	verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
+				   VERBS_CONTEXT_QP;
+	verbs_ctx->close_xrcd = mlx4_close_xrcd;
+	verbs_ctx->open_xrcd = mlx4_open_xrcd;
+	verbs_ctx->create_srq_ex = mlx4_create_srq_ex;
+	verbs_ctx->get_srq_num = verbs_get_srq_num;
+	verbs_ctx->create_qp_ex = mlx4_create_qp_ex;
+	verbs_ctx->open_qp = mlx4_open_qp;
 
+	return 0;
 }
 
 static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
@@ -212,7 +219,7 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 }
 
 static void mlx4_uninit_context(struct verbs_device *device,
-					struct ibv_context *ibv_ctx)
+				struct ibv_context *ibv_ctx)
 {
 	struct mlx4_context *context = to_mctx(ibv_ctx);
 
diff --git a/src/mlx4.h b/src/mlx4.h
index c06dbd5..462b9ad 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -38,6 +38,7 @@
 
 #include <infiniband/driver.h>
 #include <infiniband/arch.h>
+#include <infiniband/verbs.h>
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 
@@ -97,6 +98,37 @@ enum {
 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
 };
 
+#define MLX4_REMOTE_SRQN_FLAGS(wr) htonl((wr)->wr.xrc.remote_srqn << 8)
+#define MLX4_GET_SRQN(srq) (srq)->ibv_srq.srq_num
+
+enum {
+	MLX4_XSRQ_TABLE_BITS = 8,
+	MLX4_XSRQ_TABLE_SIZE = 1 << MLX4_XSRQ_TABLE_BITS,
+	MLX4_XSRQ_TABLE_MASK = MLX4_XSRQ_TABLE_SIZE - 1
+};
+
+struct mlx4_xsrq_table {
+	struct {
+		struct mlx4_srq **table;
+		int		  refcnt;
+	} xsrq_table[MLX4_XSRQ_TABLE_SIZE];
+
+	pthread_mutex_t		  mutex;
+	int			  num_xsrq;
+	int			  shift;
+	int			  mask;
+};
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+
+enum {
+	MLX4_XRC_QPN_BIT     = (1 << 23)
+};
+
 enum mlx4_db_type {
 	MLX4_DB_TYPE_CQ,
 	MLX4_DB_TYPE_RQ,
@@ -162,6 +194,8 @@ struct mlx4_context {
 	int				qp_table_shift;
 	int				qp_table_mask;
 
+	struct mlx4_xsrq_table		xsrq_table;
+
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
 	pthread_mutex_t			db_list_mutex;
 };
@@ -189,7 +223,7 @@ struct mlx4_cq {
 };
 
 struct mlx4_srq {
-	struct ibv_srq			ibv_srq;
+	struct verbs_srq		verbs_srq;
 	struct mlx4_buf			buf;
 	pthread_spinlock_t		lock;
 	uint64_t		       *wrid;
@@ -201,6 +235,7 @@ struct mlx4_srq {
 	int				tail;
 	uint32_t		       *db;
 	uint16_t			counter;
+	uint8_t				ext_srq;
 };
 
 struct mlx4_wq {
@@ -216,7 +251,7 @@ struct mlx4_wq {
 };
 
 struct mlx4_qp {
-	struct ibv_qp			ibv_qp;
+	struct verbs_qp			verbs_qp;
 	struct mlx4_buf			buf;
 	int				max_inline_data;
 	int				buf_size;
@@ -256,6 +291,7 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 {
 	return (val + align - 1) & ~(align - 1);
 }
+int align_queue_size(int req);
 
 #define to_mxxx(xxx, type)						\
 	((struct mlx4_##type *)					\
@@ -294,12 +330,14 @@ static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
 
 static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
 {
-	return to_mxxx(srq, srq);
+	return container_of(container_of(ibsrq, struct verbs_srq, srq),
+			    struct mlx4_srq, verbs_srq);
 }
 
 static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
 {
-	return to_mxxx(qp, qp);
+	return container_of(container_of(ibqp, struct verbs_qp, qp),
+			    struct mlx4_qp, verbs_qp);
 }
 
 static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
@@ -320,6 +358,9 @@ int mlx4_query_port(struct ibv_context *context, uint8_t port,
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
 int mlx4_free_pd(struct ibv_pd *pd);
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr);
+int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
@@ -341,20 +382,33 @@ void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 				 struct ibv_srq_init_attr *attr);
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex);
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex);
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int mask);
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq);
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
 		       struct ibv_recv_wr **bad_wr);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr);
@@ -369,7 +423,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 			  struct ibv_recv_wr **bad_wr);
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
diff --git a/src/qp.c b/src/qp.c
index 40a6689..132660f 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -208,7 +208,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 	ind = qp->sq.head;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+		if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -246,6 +246,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		size = sizeof *ctrl / 16;
 
 		switch (ibqp->qp_type) {
+		case IBV_QPT_XRC_SEND:
+			ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+			/* fall through */
 		case IBV_QPT_RC:
 		case IBV_QPT_UC:
 			switch (wr->opcode) {
@@ -452,7 +455,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+		if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -546,6 +549,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_RC:
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		/*
@@ -575,14 +579,16 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		; /* nothing */
 }
 
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
 	qp->rq.max_gs	 = cap->max_recv_sge;
 
-	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
-	if (!qp->sq.wrid)
-		return -1;
+	if (qp->sq.wqe_cnt) {
+		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->sq.wrid)
+			return -1;
+	}
 
 	if (qp->rq.wqe_cnt) {
 		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
@@ -607,15 +613,19 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		qp->sq.offset = 0;
 	}
 
-	if (mlx4_alloc_buf(&qp->buf,
-			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
-			    to_mdev(pd->context->device)->page_size)) {
-		free(qp->sq.wrid);
-		free(qp->rq.wrid);
-		return -1;
-	}
+	if (qp->buf_size) {
+		if (mlx4_alloc_buf(&qp->buf,
+				   align(qp->buf_size, to_mdev(context->device)->page_size),
+				   to_mdev(context->device)->page_size)) {
+			free(qp->sq.wrid);
+			free(qp->rq.wrid);
+			return -1;
+		}
 
-	memset(qp->buf.buf, 0, qp->buf_size);
+		memset(qp->buf.buf, 0, qp->buf_size);
+	} else {
+		qp->buf.buf = NULL;
+	}
 
 	return 0;
 }
@@ -631,6 +641,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_UC:
 	case IBV_QPT_RC:
 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
diff --git a/src/srq.c b/src/srq.c
index f1d1240..bc19c51 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -42,6 +42,7 @@
 #include "mlx4.h"
 #include "doorbell.h"
 #include "wqe.h"
+#include "mlx4-abi.h"
 
 static void *get_wqe(struct mlx4_srq *srq, int n)
 {
@@ -173,3 +174,153 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
 	return 0;
 }
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size)
+{
+	memset(xsrq_table, 0, sizeof *xsrq_table);
+	xsrq_table->num_xsrq = size;
+	xsrq_table->shift = ffs(size) - 1 - MLX4_XSRQ_TABLE_BITS;
+	xsrq_table->mask = (1 << xsrq_table->shift) - 1;
+
+	pthread_mutex_init(&xsrq_table->mutex, NULL);
+}
+
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	if (xsrq_table->xsrq_table[index].refcnt)
+		return xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask];
+
+	return NULL;
+}
+
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq)
+{
+	int index, ret = 0;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+	if (!xsrq_table->xsrq_table[index].refcnt) {
+		xsrq_table->xsrq_table[index].table = calloc(xsrq_table->mask + 1,
+							     sizeof(struct mlx4_srq *));
+		if (!xsrq_table->xsrq_table[index].table) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	xsrq_table->xsrq_table[index].refcnt++;
+	xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = srq;
+
+out:
+	pthread_mutex_unlock(&xsrq_table->mutex);
+	return ret;
+}
+
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+
+	if (--xsrq_table->xsrq_table[index].refcnt)
+		xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = NULL;
+	else
+		free(xsrq_table->xsrq_table[index].table);
+
+	pthread_mutex_unlock(&xsrq_table->mutex);
+}
+
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex)
+{
+	struct mlx4_create_xsrq cmd;
+	struct mlx4_create_srq_resp resp;
+	struct mlx4_srq *srq;
+	int ret;
+
+	/* Sanity check SRQ size before proceeding */
+	if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+		return NULL;
+
+	srq = calloc(1, sizeof *srq);
+	if (!srq)
+		return NULL;
+
+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
+	srq->max_gs  = attr_ex->attr.max_sge;
+	srq->counter = 0;
+	srq->ext_srq = 1;
+
+	if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
+		goto err;
+
+	srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
+	if (!srq->db)
+		goto err_free;
+
+	*srq->db = 0;
+
+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
+	cmd.db_addr  = (uintptr_t) srq->db;
+
+	ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, attr_ex,
+				    &cmd.ibv_cmd, sizeof cmd,
+				    &resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_db;
+
+	ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+			      srq->verbs_srq.srq_num, srq);
+	if (ret)
+		goto err_destroy;
+
+	return &srq->verbs_srq.srq;
+
+err_destroy:
+	ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+err_db:
+	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
+err_free:
+	free(srq->wrid);
+	mlx4_free_buf(&srq->buf);
+err:
+	free(srq);
+	return NULL;
+}
+
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
+{
+	struct mlx4_context *mctx = to_mctx(srq->context);
+	struct mlx4_srq *msrq = to_msrq(srq);
+	struct mlx4_cq *mcq;
+	int ret;
+
+	mcq = to_mcq(msrq->verbs_srq.cq);
+	mlx4_cq_clean(mcq, 0, msrq);
+	pthread_spin_lock(&mcq->lock);
+	mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
+	pthread_spin_unlock(&mcq->lock);
+
+	ret = ibv_cmd_destroy_srq(srq);
+	if (ret) {
+		pthread_spin_lock(&mcq->lock);
+		mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
+		pthread_spin_unlock(&mcq->lock);
+		return ret;
+	}
+
+	mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
+	mlx4_free_buf(&msrq->buf);
+	free(msrq->wrid);
+	free(msrq);
+
+	return 0;
+}
diff --git a/src/verbs.c b/src/verbs.c
index 408fc6d..1ebf766 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -107,6 +107,42 @@ int mlx4_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr)
+{
+	struct ibv_open_xrcd cmd;
+	struct ibv_open_xrcd_resp resp;
+	struct verbs_xrcd *xrcd;
+	int ret;
+
+	xrcd = calloc(1, sizeof *xrcd);
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrcd(context, xrcd, attr,
+				&cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &xrcd->xrcd;
+
+err:
+	free(xrcd);
+	return NULL;
+}
+
+int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+	struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+	int ret;
+
+	ret = ibv_cmd_close_xrcd(xrcd);
+	if (!ret)
+		free(xrcd);
+
+	return ret;
+}
+
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			   int access)
 {
@@ -150,7 +186,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
 	return 0;
 }
 
-static int align_queue_size(int req)
+int align_queue_size(int req)
 {
 	int nent;
 
@@ -294,7 +330,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
 }
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
-				 struct ibv_srq_init_attr *attr)
+				struct ibv_srq_init_attr *attr)
 {
 	struct mlx4_create_srq      cmd;
 	struct mlx4_create_srq_resp resp;
@@ -315,6 +351,7 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
+	srq->ext_srq = 0;
 
 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
 		goto err;
@@ -328,15 +365,13 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
 
-	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+	ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
 	if (ret)
 		goto err_db;
 
-	srq->srqn = resp.srqn;
-
-	return &srq->ibv_srq;
+	return &srq->verbs_srq.srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
@@ -351,6 +386,18 @@ err:
 	return NULL;
 }
 
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex)
+{
+	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
+	    (attr_ex->srq_type == IBV_SRQT_BASIC))
+		return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
+	else if (attr_ex->srq_type == IBV_SRQT_XRC)
+		return mlx4_create_xrc_srq(context, attr_ex);
+
+	return NULL;
+}
+
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int attr_mask)
@@ -372,6 +419,9 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 {
 	int ret;
 
+	if (to_msrq(srq)->ext_srq)
+		return mlx4_destroy_xrc_srq(srq);
+
 	ret = ibv_cmd_destroy_srq(srq);
 	if (ret)
 		return ret;
@@ -384,7 +434,8 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 	return 0;
 }
 
-struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr)
 {
 	struct mlx4_create_qp     cmd;
 	struct ibv_create_qp_resp resp;
@@ -399,30 +450,34 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    attr->cap.max_inline_data > 1024)
 		return NULL;
 
-	qp = malloc(sizeof *qp);
+	qp = calloc(1, sizeof *qp);
 	if (!qp)
 		return NULL;
 
-	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
-
-	/*
-	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-	 * allow HW to prefetch.
-	 */
-	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
-	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+	if (attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
+	} else {
+		mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
+		/*
+		 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
+		 * allow HW to prefetch.
+		 */
+		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+		qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
+	}
 
-	if (attr->srq)
-		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
-	else {
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+	    attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+	} else {
+		qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
 		if (attr->cap.max_recv_sge < 1)
 			attr->cap.max_recv_sge = 1;
 		if (attr->cap.max_recv_wr < 1)
 			attr->cap.max_recv_wr = 1;
 	}
 
-	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+	if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
 		goto err;
 
 	mlx4_init_qp_indices(qp);
@@ -431,19 +486,18 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		goto err_free;
 
-	if (!attr->srq) {
-		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+	if (attr->cap.max_recv_sge) {
+		qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
 		if (!qp->db)
 			goto err_free;
 
 		*qp->db = 0;
+		cmd.db_addr = (uintptr_t) qp->db;
+	} else {
+		cmd.db_addr = 0;
 	}
 
 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
-	if (attr->srq)
-		cmd.db_addr = 0;
-	else
-		cmd.db_addr = (uintptr_t) qp->db;
 	cmd.log_sq_stride   = qp->sq.wqe_shift;
 	for (cmd.log_sq_bb_count = 0;
 	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
@@ -452,37 +506,39 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
 
-	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+	pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
-				&resp, sizeof resp);
+	ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr,
+				   &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
 
-	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
-	if (ret)
-		goto err_destroy;
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+		ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
+		if (ret)
+			goto err_destroy;
+	}
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
 	qp->rq.max_gs  = attr->cap.max_recv_sge;
 	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
 
-	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
+	qp->doorbell_qpn    = htonl(qp->verbs_qp.qp.qp_num << 8);
 	if (attr->sq_sig_all)
 		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
 	else
 		qp->sq_signal_bits = 0;
 
-	return &qp->ibv_qp;
+	return &qp->verbs_qp.qp;
 
 err_destroy:
-	ibv_cmd_destroy_qp(&qp->ibv_qp);
+	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
 
 err_rq_db:
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
-	if (!attr->srq)
-		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
+	if (attr->cap.max_recv_sge)
+		mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
 	free(qp->sq.wrid);
@@ -496,6 +552,43 @@ err:
 	return NULL;
 }
 
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+	struct ibv_qp_init_attr_ex attr_ex;
+	struct ibv_qp *qp;
+
+	memcpy(&attr_ex, attr, sizeof *attr);
+	attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+	attr_ex.pd = pd;
+	qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+	if (qp)
+		memcpy(attr, &attr_ex, sizeof *attr);
+	return qp;
+}
+
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
+{
+	struct ibv_open_qp cmd;
+	struct ibv_create_qp_resp resp;
+	struct mlx4_qp *qp;
+	int ret;
+
+	qp = calloc(1, sizeof *qp);
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, attr,
+			      &cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &qp->verbs_qp.qp;
+
+err:
+	free(qp);
+	return NULL;
+}
+
 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr)
@@ -526,7 +619,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	int ret;
 
 	if (attr_mask & IBV_QP_PORT) {
-		if (ibv_query_port(qp->pd->context, attr->port_num, &port_attr))
+		if (ibv_query_port(qp->context, attr->port_num, &port_attr))
 			return -1;
 		mqp->link_layer = port_attr.link_layer;
 	}
@@ -542,13 +635,14 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	if (!ret		       &&
 	    (attr_mask & IBV_QP_STATE) &&
 	    attr->qp_state == IBV_QPS_RESET) {
-		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
-			       qp->srq ? to_msrq(qp->srq) : NULL);
-		if (qp->send_cq != qp->recv_cq)
+		if (qp->recv_cq)
+			mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+				      qp->srq ? to_msrq(qp->srq) : NULL);
+		if (qp->send_cq && qp->send_cq != qp->recv_cq)
 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
 		mlx4_init_qp_indices(to_mqp(qp));
-		if (!qp->srq)
+		if (to_mqp(qp)->rq.wqe_cnt)
 			*to_mqp(qp)->db = 0;
 	}
 
@@ -560,9 +654,14 @@ static void mlx4_lock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_lock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_lock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_lock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_lock(&send_cq->lock);
 		pthread_spin_lock(&recv_cq->lock);
 	} else {
@@ -576,9 +675,15 @@ static void mlx4_unlock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_unlock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_unlock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_unlock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_unlock(&recv_cq->lock);
 		pthread_spin_unlock(&send_cq->lock);
 	} else {
@@ -601,21 +706,24 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
 	mlx4_lock_cqs(ibqp);
 
-	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-	if (ibqp->send_cq != ibqp->recv_cq)
+	if (ibqp->recv_cq)
+		__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+				ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+	if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
 		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+		mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
 
 	mlx4_unlock_cqs(ibqp);
 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
 
-	if (!ibqp->srq)
+	if (qp->rq.wqe_cnt) {
 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
-	free(qp->sq.wrid);
-	if (qp->rq.wqe_cnt)
 		free(qp->rq.wrid);
+	}
+	if (qp->sq.wqe_cnt)
+		free(qp->sq.wrid);
 	mlx4_free_buf(&qp->buf);
 	free(qp);
 
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH libmlx4 v5 1/2] Infra-structure changes to support verbs extensions
       [not found]     ` <1363394396-951-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2013-03-18 19:10       ` sean.hefty-ral2JQCrhuEAvxtiuMwx3w
       [not found]       ` <1363633827-27885-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  1 sibling, 0 replies; 7+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-18 19:10 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Yishai Hadas, Tzahi Oved

From: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
I used an older version (v4) of the patch series.  This is the latest
version that Yishai had provided.

changes from v4:
increase MLX4_UVERBS_MIN_ABI_VERSION to be 3
- version 2 is no longer supported
fix when error message in mlx4_driver_init is displayed

 src/mlx4-abi.h |    2 +-
 src/mlx4.c     |   82 ++++++++++++++++++++++++++++++--------------------------
 src/mlx4.h     |    8 ++++-
 3 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 20a40c9..7cf68b4 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -35,7 +35,7 @@
 
 #include <infiniband/kern-abi.h>
 
-#define MLX4_UVERBS_MIN_ABI_VERSION	2
+#define MLX4_UVERBS_MIN_ABI_VERSION	3
 #define MLX4_UVERBS_MAX_ABI_VERSION	3
 
 struct mlx4_alloc_ucontext_resp {
diff --git a/src/mlx4.c b/src/mlx4.c
index 8cf249a..dcea026 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -120,22 +120,26 @@ static struct ibv_context_ops mlx4_ctx_ops = {
 	.detach_mcast  = ibv_cmd_detach_mcast
 };
 
-static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+static int mlx4_init_context(struct verbs_device *v_device,
+			struct ibv_context *ibv_ctx, int cmd_fd)
 {
-	struct mlx4_context	       *context;
+	struct mlx4_context		*context;
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
+	/* verbs_context should be used for new verbs
+	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
+	 */
 
-	context = calloc(1, sizeof *context);
-	if (!context)
-		return NULL;
-
-	context->ibv_ctx.cmd_fd = cmd_fd;
+	/* memory footprint of mlx4_context and verbs_context share
+	  * struct ibv_context.
+	*/
+	context = to_mctx(ibv_ctx);
+	ibv_ctx->cmd_fd = cmd_fd;
 
-	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+	if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof resp))
-		goto err_free;
+		return errno;
 
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
@@ -150,15 +154,16 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
-	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
-			    MAP_SHARED, cmd_fd, 0);
+	context->uar = mmap(NULL, to_mdev(&v_device->device)->page_size,
+			    PROT_WRITE, MAP_SHARED, cmd_fd, 0);
 	if (context->uar == MAP_FAILED)
-		goto err_free;
+		return errno;
 
 	if (resp.bf_reg_size) {
-		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+		context->bf_page = mmap(NULL,
+					to_mdev(&v_device->device)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
-					to_mdev(ibdev)->page_size);
+					to_mdev(&v_device->device)->page_size);
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -176,35 +181,29 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
-	context->ibv_ctx.ops = mlx4_ctx_ops;
-
-	return &context->ibv_ctx;
+	ibv_ctx->ops = mlx4_ctx_ops;
+	/* New verbs should be added as below
+	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
+	  */
+	return 0;
 
-err_free:
-	free(context);
-	return NULL;
 }
 
-static void mlx4_free_context(struct ibv_context *ibctx)
+static void mlx4_uninit_context(struct verbs_device *v_device,
+					struct ibv_context *ibv_ctx)
 {
-	struct mlx4_context *context = to_mctx(ibctx);
+	struct mlx4_context *context = to_mctx(ibv_ctx);
 
-	munmap(context->uar, to_mdev(ibctx->device)->page_size);
+	munmap(context->uar, to_mdev(&v_device->device)->page_size);
 	if (context->bf_page)
-		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
-	free(context);
+		munmap(context->bf_page, to_mdev(&v_device->device)->page_size);
 }
 
-static struct ibv_device_ops mlx4_dev_ops = {
-	.alloc_context = mlx4_alloc_context,
-	.free_context  = mlx4_free_context
-};
-
-static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
-					    int abi_version)
+static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
+					     int abi_version)
 {
 	char			value[8];
-	struct mlx4_device    *dev;
+	struct mlx4_device	*dev;
 	unsigned		vendor, device;
 	int			i;
 
@@ -236,23 +235,30 @@ found:
 		return NULL;
 	}
 
-	dev = malloc(sizeof *dev);
+	dev = calloc(1, sizeof(*dev));
 	if (!dev) {
 		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
 			uverbs_sys_path);
 		return NULL;
 	}
 
-	dev->ibv_dev.ops = mlx4_dev_ops;
 	dev->page_size   = sysconf(_SC_PAGESIZE);
-
-	return &dev->ibv_dev;
+	dev->verbs_dev.sz = sizeof(*dev);
+	dev->verbs_dev.size_of_context =
+		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
+	 /* mlx4_init_context will initialize provider calls */
+	dev->verbs_dev.init_context = mlx4_init_context;
+	dev->verbs_dev.uninit_context = mlx4_uninit_context;
+
+	return &dev->verbs_dev;
 }
 
+
 #ifdef HAVE_IBV_REGISTER_DRIVER
 static __attribute__((constructor)) void mlx4_register_driver(void)
 {
-	ibv_register_driver("mlx4", mlx4_driver_init);
+	verbs_register_driver("mlx4", mlx4_driver_init);
+
 }
 #else
 /*
diff --git a/src/mlx4.h b/src/mlx4.h
index 13c13d8..5028fea 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -131,7 +131,7 @@ enum {
 };
 
 struct mlx4_device {
-	struct ibv_device		ibv_dev;
+	struct verbs_device		verbs_dev;
 	int				page_size;
 };
 
@@ -258,7 +258,11 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 
 static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
 {
-	return to_mxxx(dev, device);
+	/* ibv_device is first field of verbs_device
+	 * see try_driver in libibverbs
+	 */
+	return ((struct mlx4_device *)
+		((void *) ibdev - offsetof(struct mlx4_device, verbs_dev)));
 }
 
 static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH libmlx4 v5 2/2] Add support for XRC QPs
       [not found]       ` <1363633827-27885-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2013-03-18 19:10         ` sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  0 siblings, 0 replies; 7+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-18 19:10 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Sean Hefty

From: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Signed-off-by: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
changes from v4:
updated based on changes to patch 1 in series

 src/buf.c      |    6 +-
 src/cq.c       |   40 +++++++---
 src/mlx4-abi.h |    6 ++
 src/mlx4.c     |   27 +++++---
 src/mlx4.h     |   64 +++++++++++++++--
 src/qp.c       |   39 +++++++----
 src/srq.c      |  151 ++++++++++++++++++++++++++++++++++++++
 src/verbs.c    |  220 +++++++++++++++++++++++++++++++++++++++++--------------
 8 files changed, 454 insertions(+), 99 deletions(-)

diff --git a/src/buf.c b/src/buf.c
index a80bcb1..50957bb 100644
--- a/src/buf.c
+++ b/src/buf.c
@@ -78,6 +78,8 @@ int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-	ibv_dofork_range(buf->buf, buf->length);
-	munmap(buf->buf, buf->length);
+	if (buf->length) {
+		ibv_dofork_range(buf->buf, buf->length);
+		munmap(buf->buf, buf->length);
+	}
 }
diff --git a/src/cq.c b/src/cq.c
index 8f7a8cc..20ce1f1 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -220,33 +220,43 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	rmb();
 
 	qpn = ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK;
+	wc->qp_num = qpn;
 
 	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (!*cur_qp ||
-	    (qpn != (*cur_qp)->ibv_qp.qp_num)) {
+	if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) {
 		/*
-		 * We do not have to take the QP table lock here,
-		 * because CQs will be locked while QPs are removed
+		 * We do not have to take the XSRQ table lock here,
+		 * because CQs will be locked while SRQs are removed
 		 * from the table.
 		 */
-		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
-		if (!*cur_qp)
+		srq = mlx4_find_xsrq(&to_mctx(cq->ibv_cq.context)->xsrq_table,
+				     ntohl(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK);
+		if (!srq)
 			return CQ_POLL_ERR;
+	} else {
+		if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) {
+			/*
+		 	 * We do not have to take the QP table lock here,
+			 * because CQs will be locked while QPs are removed
+		 	 * from the table.
+			 */
+			*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
+			if (!*cur_qp)
+				return CQ_POLL_ERR;
+		}
+		srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL;
 	}
 
-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
-
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ntohs(cqe->wqe_index);
 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
-	} else if ((*cur_qp)->ibv_qp.srq) {
-		srq = to_msrq((*cur_qp)->ibv_qp.srq);
+	} else if (srq) {
 		wqe_index = htons(cqe->wqe_index);
 		wc->wr_id = srq->wrid[wqe_index];
 		mlx4_free_srq_wqe(srq, wqe_index);
@@ -322,7 +332,8 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
 		wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
-		if ((*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
+		/* HACK */
+		if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
 			wc->sl	   = ntohs(cqe->sl_vid) >> 13;
 		else
 			wc->sl	   = ntohs(cqe->sl_vid) >> 12;
@@ -411,7 +422,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
-		if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+		if (srq && srq->ext_srq &&
+		    ntohl(cqe->g_mlpath_rqpn & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num &&
+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+			++nfreed;
+		} else if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 7cf68b4..53202f0 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -74,6 +74,12 @@ struct mlx4_create_srq {
 	__u64				db_addr;
 };
 
+struct mlx4_create_xsrq {
+	struct ibv_create_xsrq		ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+};
+
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
 	__u32				srqn;
diff --git a/src/mlx4.c b/src/mlx4.c
index dcea026..801f4f0 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -127,13 +127,14 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
-	/* verbs_context should be used for new verbs
-	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
-	 */
+	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
 
 	/* memory footprint of mlx4_context and verbs_context share
-	  * struct ibv_context.
-	*/
+	 * struct ibv_context.
+	 */
+	if (sizeof(*verbs_ctx) > *(((size_t *) ibv_ctx) - 1))
+		return ENOSYS;
+
 	context = to_mctx(ibv_ctx);
 	ibv_ctx->cmd_fd = cmd_fd;
 
@@ -152,6 +153,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
 
+	mlx4_init_xsrq_table(&context->xsrq_table, resp.qp_tab_size);
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
 	context->uar = mmap(NULL, to_mdev(&v_device->device)->page_size,
@@ -182,15 +184,20 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
 	ibv_ctx->ops = mlx4_ctx_ops;
-	/* New verbs should be added as below
-	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
-	  */
-	return 0;
+	verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
+				   VERBS_CONTEXT_QP;
+	verbs_ctx->close_xrcd = mlx4_close_xrcd;
+	verbs_ctx->open_xrcd = mlx4_open_xrcd;
+	verbs_ctx->create_srq_ex = mlx4_create_srq_ex;
+	verbs_ctx->get_srq_num = verbs_get_srq_num;
+	verbs_ctx->create_qp_ex = mlx4_create_qp_ex;
+	verbs_ctx->open_qp = mlx4_open_qp;
 
+	return 0;
 }
 
 static void mlx4_uninit_context(struct verbs_device *v_device,
-					struct ibv_context *ibv_ctx)
+				struct ibv_context *ibv_ctx)
 {
 	struct mlx4_context *context = to_mctx(ibv_ctx);
 
diff --git a/src/mlx4.h b/src/mlx4.h
index 5028fea..6c627e7 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -38,6 +38,7 @@
 
 #include <infiniband/driver.h>
 #include <infiniband/arch.h>
+#include <infiniband/verbs.h>
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 
@@ -97,6 +98,37 @@ enum {
 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
 };
 
+#define MLX4_REMOTE_SRQN_FLAGS(wr) htonl((wr)->wr.xrc.remote_srqn << 8)
+#define MLX4_GET_SRQN(srq) (srq)->ibv_srq.srq_num
+
+enum {
+	MLX4_XSRQ_TABLE_BITS = 8,
+	MLX4_XSRQ_TABLE_SIZE = 1 << MLX4_XSRQ_TABLE_BITS,
+	MLX4_XSRQ_TABLE_MASK = MLX4_XSRQ_TABLE_SIZE - 1
+};
+
+struct mlx4_xsrq_table {
+	struct {
+		struct mlx4_srq **table;
+		int		  refcnt;
+	} xsrq_table[MLX4_XSRQ_TABLE_SIZE];
+
+	pthread_mutex_t		  mutex;
+	int			  num_xsrq;
+	int			  shift;
+	int			  mask;
+};
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+
+enum {
+	MLX4_XRC_QPN_BIT     = (1 << 23)
+};
+
 enum mlx4_db_type {
 	MLX4_DB_TYPE_CQ,
 	MLX4_DB_TYPE_RQ,
@@ -157,6 +189,8 @@ struct mlx4_context {
 	int				qp_table_shift;
 	int				qp_table_mask;
 
+	struct mlx4_xsrq_table		xsrq_table;
+
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
 	pthread_mutex_t			db_list_mutex;
 };
@@ -184,7 +218,7 @@ struct mlx4_cq {
 };
 
 struct mlx4_srq {
-	struct ibv_srq			ibv_srq;
+	struct verbs_srq		verbs_srq;
 	struct mlx4_buf			buf;
 	pthread_spinlock_t		lock;
 	uint64_t		       *wrid;
@@ -196,6 +230,7 @@ struct mlx4_srq {
 	int				tail;
 	uint32_t		       *db;
 	uint16_t			counter;
+	uint8_t				ext_srq;
 };
 
 struct mlx4_wq {
@@ -211,7 +246,7 @@ struct mlx4_wq {
 };
 
 struct mlx4_qp {
-	struct ibv_qp			ibv_qp;
+	struct verbs_qp			verbs_qp;
 	struct mlx4_buf			buf;
 	int				max_inline_data;
 	int				buf_size;
@@ -251,6 +286,7 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 {
 	return (val + align - 1) & ~(align - 1);
 }
+int align_queue_size(int req);
 
 #define to_mxxx(xxx, type)						\
 	((struct mlx4_##type *)					\
@@ -282,12 +318,14 @@ static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
 
 static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
 {
-	return to_mxxx(srq, srq);
+	return container_of(container_of(ibsrq, struct verbs_srq, srq),
+			    struct mlx4_srq, verbs_srq);
 }
 
 static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
 {
-	return to_mxxx(qp, qp);
+	return container_of(container_of(ibqp, struct verbs_qp, qp),
+			    struct mlx4_qp, verbs_qp);
 }
 
 static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
@@ -308,6 +346,9 @@ int mlx4_query_port(struct ibv_context *context, uint8_t port,
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
 int mlx4_free_pd(struct ibv_pd *pd);
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr);
+int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
@@ -329,20 +370,33 @@ void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 				 struct ibv_srq_init_attr *attr);
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex);
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex);
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int mask);
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq);
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
 		       struct ibv_recv_wr **bad_wr);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr);
@@ -357,7 +411,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 			  struct ibv_recv_wr **bad_wr);
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
diff --git a/src/qp.c b/src/qp.c
index 40a6689..132660f 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -208,7 +208,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 	ind = qp->sq.head;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+		if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -246,6 +246,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		size = sizeof *ctrl / 16;
 
 		switch (ibqp->qp_type) {
+		case IBV_QPT_XRC_SEND:
+			ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+			/* fall through */
 		case IBV_QPT_RC:
 		case IBV_QPT_UC:
 			switch (wr->opcode) {
@@ -452,7 +455,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+		if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -546,6 +549,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_RC:
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		/*
@@ -575,14 +579,16 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		; /* nothing */
 }
 
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
 	qp->rq.max_gs	 = cap->max_recv_sge;
 
-	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
-	if (!qp->sq.wrid)
-		return -1;
+	if (qp->sq.wqe_cnt) {
+		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->sq.wrid)
+			return -1;
+	}
 
 	if (qp->rq.wqe_cnt) {
 		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
@@ -607,15 +613,19 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		qp->sq.offset = 0;
 	}
 
-	if (mlx4_alloc_buf(&qp->buf,
-			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
-			    to_mdev(pd->context->device)->page_size)) {
-		free(qp->sq.wrid);
-		free(qp->rq.wrid);
-		return -1;
-	}
+	if (qp->buf_size) {
+		if (mlx4_alloc_buf(&qp->buf,
+				   align(qp->buf_size, to_mdev(context->device)->page_size),
+				   to_mdev(context->device)->page_size)) {
+			free(qp->sq.wrid);
+			free(qp->rq.wrid);
+			return -1;
+		}
 
-	memset(qp->buf.buf, 0, qp->buf_size);
+		memset(qp->buf.buf, 0, qp->buf_size);
+	} else {
+		qp->buf.buf = NULL;
+	}
 
 	return 0;
 }
@@ -631,6 +641,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_UC:
 	case IBV_QPT_RC:
 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
diff --git a/src/srq.c b/src/srq.c
index f1d1240..bc19c51 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -42,6 +42,7 @@
 #include "mlx4.h"
 #include "doorbell.h"
 #include "wqe.h"
+#include "mlx4-abi.h"
 
 static void *get_wqe(struct mlx4_srq *srq, int n)
 {
@@ -173,3 +174,153 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
 	return 0;
 }
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size)
+{
+	memset(xsrq_table, 0, sizeof *xsrq_table);
+	xsrq_table->num_xsrq = size;
+	xsrq_table->shift = ffs(size) - 1 - MLX4_XSRQ_TABLE_BITS;
+	xsrq_table->mask = (1 << xsrq_table->shift) - 1;
+
+	pthread_mutex_init(&xsrq_table->mutex, NULL);
+}
+
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	if (xsrq_table->xsrq_table[index].refcnt)
+		return xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask];
+
+	return NULL;
+}
+
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq)
+{
+	int index, ret = 0;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+	if (!xsrq_table->xsrq_table[index].refcnt) {
+		xsrq_table->xsrq_table[index].table = calloc(xsrq_table->mask + 1,
+							     sizeof(struct mlx4_srq *));
+		if (!xsrq_table->xsrq_table[index].table) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	xsrq_table->xsrq_table[index].refcnt++;
+	xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = srq;
+
+out:
+	pthread_mutex_unlock(&xsrq_table->mutex);
+	return ret;
+}
+
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+
+	if (--xsrq_table->xsrq_table[index].refcnt)
+		xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = NULL;
+	else
+		free(xsrq_table->xsrq_table[index].table);
+
+	pthread_mutex_unlock(&xsrq_table->mutex);
+}
+
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex)
+{
+	struct mlx4_create_xsrq cmd;
+	struct mlx4_create_srq_resp resp;
+	struct mlx4_srq *srq;
+	int ret;
+
+	/* Sanity check SRQ size before proceeding */
+	if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+		return NULL;
+
+	srq = calloc(1, sizeof *srq);
+	if (!srq)
+		return NULL;
+
+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
+	srq->max_gs  = attr_ex->attr.max_sge;
+	srq->counter = 0;
+	srq->ext_srq = 1;
+
+	if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
+		goto err;
+
+	srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
+	if (!srq->db)
+		goto err_free;
+
+	*srq->db = 0;
+
+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
+	cmd.db_addr  = (uintptr_t) srq->db;
+
+	ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, attr_ex,
+				    &cmd.ibv_cmd, sizeof cmd,
+				    &resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_db;
+
+	ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+			      srq->verbs_srq.srq_num, srq);
+	if (ret)
+		goto err_destroy;
+
+	return &srq->verbs_srq.srq;
+
+err_destroy:
+	ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+err_db:
+	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
+err_free:
+	free(srq->wrid);
+	mlx4_free_buf(&srq->buf);
+err:
+	free(srq);
+	return NULL;
+}
+
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
+{
+	struct mlx4_context *mctx = to_mctx(srq->context);
+	struct mlx4_srq *msrq = to_msrq(srq);
+	struct mlx4_cq *mcq;
+	int ret;
+
+	mcq = to_mcq(msrq->verbs_srq.cq);
+	mlx4_cq_clean(mcq, 0, msrq);
+	pthread_spin_lock(&mcq->lock);
+	mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
+	pthread_spin_unlock(&mcq->lock);
+
+	ret = ibv_cmd_destroy_srq(srq);
+	if (ret) {
+		pthread_spin_lock(&mcq->lock);
+		mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
+		pthread_spin_unlock(&mcq->lock);
+		return ret;
+	}
+
+	mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
+	mlx4_free_buf(&msrq->buf);
+	free(msrq->wrid);
+	free(msrq);
+
+	return 0;
+}
diff --git a/src/verbs.c b/src/verbs.c
index 408fc6d..1ebf766 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -107,6 +107,42 @@ int mlx4_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr)
+{
+	struct ibv_open_xrcd cmd;
+	struct ibv_open_xrcd_resp resp;
+	struct verbs_xrcd *xrcd;
+	int ret;
+
+	xrcd = calloc(1, sizeof *xrcd);
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrcd(context, xrcd, attr,
+				&cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &xrcd->xrcd;
+
+err:
+	free(xrcd);
+	return NULL;
+}
+
+int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+	struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+	int ret;
+
+	ret = ibv_cmd_close_xrcd(xrcd);
+	if (!ret)
+		free(xrcd);
+
+	return ret;
+}
+
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			   int access)
 {
@@ -150,7 +186,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
 	return 0;
 }
 
-static int align_queue_size(int req)
+int align_queue_size(int req)
 {
 	int nent;
 
@@ -294,7 +330,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
 }
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
-				 struct ibv_srq_init_attr *attr)
+				struct ibv_srq_init_attr *attr)
 {
 	struct mlx4_create_srq      cmd;
 	struct mlx4_create_srq_resp resp;
@@ -315,6 +351,7 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
+	srq->ext_srq = 0;
 
 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
 		goto err;
@@ -328,15 +365,13 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
 
-	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+	ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
 	if (ret)
 		goto err_db;
 
-	srq->srqn = resp.srqn;
-
-	return &srq->ibv_srq;
+	return &srq->verbs_srq.srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
@@ -351,6 +386,18 @@ err:
 	return NULL;
 }
 
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex)
+{
+	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
+	    (attr_ex->srq_type == IBV_SRQT_BASIC))
+		return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
+	else if (attr_ex->srq_type == IBV_SRQT_XRC)
+		return mlx4_create_xrc_srq(context, attr_ex);
+
+	return NULL;
+}
+
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int attr_mask)
@@ -372,6 +419,9 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 {
 	int ret;
 
+	if (to_msrq(srq)->ext_srq)
+		return mlx4_destroy_xrc_srq(srq);
+
 	ret = ibv_cmd_destroy_srq(srq);
 	if (ret)
 		return ret;
@@ -384,7 +434,8 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 	return 0;
 }
 
-struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr)
 {
 	struct mlx4_create_qp     cmd;
 	struct ibv_create_qp_resp resp;
@@ -399,30 +450,34 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    attr->cap.max_inline_data > 1024)
 		return NULL;
 
-	qp = malloc(sizeof *qp);
+	qp = calloc(1, sizeof *qp);
 	if (!qp)
 		return NULL;
 
-	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
-
-	/*
-	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-	 * allow HW to prefetch.
-	 */
-	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
-	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+	if (attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
+	} else {
+		mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
+		/*
+		 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
+		 * allow HW to prefetch.
+		 */
+		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+		qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
+	}
 
-	if (attr->srq)
-		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
-	else {
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+	    attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+	} else {
+		qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
 		if (attr->cap.max_recv_sge < 1)
 			attr->cap.max_recv_sge = 1;
 		if (attr->cap.max_recv_wr < 1)
 			attr->cap.max_recv_wr = 1;
 	}
 
-	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+	if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
 		goto err;
 
 	mlx4_init_qp_indices(qp);
@@ -431,19 +486,18 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		goto err_free;
 
-	if (!attr->srq) {
-		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+	if (attr->cap.max_recv_sge) {
+		qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
 		if (!qp->db)
 			goto err_free;
 
 		*qp->db = 0;
+		cmd.db_addr = (uintptr_t) qp->db;
+	} else {
+		cmd.db_addr = 0;
 	}
 
 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
-	if (attr->srq)
-		cmd.db_addr = 0;
-	else
-		cmd.db_addr = (uintptr_t) qp->db;
 	cmd.log_sq_stride   = qp->sq.wqe_shift;
 	for (cmd.log_sq_bb_count = 0;
 	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
@@ -452,37 +506,39 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
 
-	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+	pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
-				&resp, sizeof resp);
+	ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr,
+				   &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
 
-	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
-	if (ret)
-		goto err_destroy;
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+		ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
+		if (ret)
+			goto err_destroy;
+	}
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
 	qp->rq.max_gs  = attr->cap.max_recv_sge;
 	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
 
-	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
+	qp->doorbell_qpn    = htonl(qp->verbs_qp.qp.qp_num << 8);
 	if (attr->sq_sig_all)
 		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
 	else
 		qp->sq_signal_bits = 0;
 
-	return &qp->ibv_qp;
+	return &qp->verbs_qp.qp;
 
 err_destroy:
-	ibv_cmd_destroy_qp(&qp->ibv_qp);
+	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
 
 err_rq_db:
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
-	if (!attr->srq)
-		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
+	if (attr->cap.max_recv_sge)
+		mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
 	free(qp->sq.wrid);
@@ -496,6 +552,43 @@ err:
 	return NULL;
 }
 
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+	struct ibv_qp_init_attr_ex attr_ex;
+	struct ibv_qp *qp;
+
+	memcpy(&attr_ex, attr, sizeof *attr);
+	attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+	attr_ex.pd = pd;
+	qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+	if (qp)
+		memcpy(attr, &attr_ex, sizeof *attr);
+	return qp;
+}
+
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
+{
+	struct ibv_open_qp cmd;
+	struct ibv_create_qp_resp resp;
+	struct mlx4_qp *qp;
+	int ret;
+
+	qp = calloc(1, sizeof *qp);
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, attr,
+			      &cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &qp->verbs_qp.qp;
+
+err:
+	free(qp);
+	return NULL;
+}
+
 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr)
@@ -526,7 +619,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	int ret;
 
 	if (attr_mask & IBV_QP_PORT) {
-		if (ibv_query_port(qp->pd->context, attr->port_num, &port_attr))
+		if (ibv_query_port(qp->context, attr->port_num, &port_attr))
 			return -1;
 		mqp->link_layer = port_attr.link_layer;
 	}
@@ -542,13 +635,14 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	if (!ret		       &&
 	    (attr_mask & IBV_QP_STATE) &&
 	    attr->qp_state == IBV_QPS_RESET) {
-		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
-			       qp->srq ? to_msrq(qp->srq) : NULL);
-		if (qp->send_cq != qp->recv_cq)
+		if (qp->recv_cq)
+			mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+				      qp->srq ? to_msrq(qp->srq) : NULL);
+		if (qp->send_cq && qp->send_cq != qp->recv_cq)
 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
 		mlx4_init_qp_indices(to_mqp(qp));
-		if (!qp->srq)
+		if (to_mqp(qp)->rq.wqe_cnt)
 			*to_mqp(qp)->db = 0;
 	}
 
@@ -560,9 +654,14 @@ static void mlx4_lock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_lock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_lock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_lock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_lock(&send_cq->lock);
 		pthread_spin_lock(&recv_cq->lock);
 	} else {
@@ -576,9 +675,15 @@ static void mlx4_unlock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_unlock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_unlock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_unlock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_unlock(&recv_cq->lock);
 		pthread_spin_unlock(&send_cq->lock);
 	} else {
@@ -601,21 +706,24 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
 	mlx4_lock_cqs(ibqp);
 
-	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-	if (ibqp->send_cq != ibqp->recv_cq)
+	if (ibqp->recv_cq)
+		__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+				ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+	if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
 		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+		mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
 
 	mlx4_unlock_cqs(ibqp);
 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
 
-	if (!ibqp->srq)
+	if (qp->rq.wqe_cnt) {
 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
-	free(qp->sq.wrid);
-	if (qp->rq.wqe_cnt)
 		free(qp->rq.wrid);
+	}
+	if (qp->sq.wqe_cnt)
+		free(qp->sq.wrid);
 	mlx4_free_buf(&qp->buf);
 	free(qp);
 
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-03-18 19:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-28 22:53 [PATCH v3 1/2] libmlx4: Infra-structure changes to support verbs extensions Hefty, Sean
     [not found] ` <1828884A29C6694DAF28B7E6B8A8237346A981D8-Q3cL8pyY+6ukrb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>
2012-09-30 21:14   ` Jason Gunthorpe
     [not found]     ` <20120930211414.GA26575-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2012-10-11 16:26       ` Yishai Hadas
2013-03-16  0:39   ` [PATCH v4 " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
     [not found]     ` <1363394396-951-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2013-03-18 19:10       ` [PATCH libmlx4 v5 1/2] " sean.hefty-ral2JQCrhuEAvxtiuMwx3w
     [not found]       ` <1363633827-27885-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2013-03-18 19:10         ` [PATCH libmlx4 v5 2/2] Add support for XRC QPs sean.hefty-ral2JQCrhuEAvxtiuMwx3w
2013-03-16  0:39   ` [PATCH v4 2/2] libmlx4: " sean.hefty-ral2JQCrhuEAvxtiuMwx3w

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.