linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next v5 0/4] rxe: API extensions
@ 2020-09-18 21:25 Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 1/4] rxe: Implement MW commands Bob Pearson
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Bob Pearson @ 2020-09-18 21:25 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

This patch series implements the user space changes matching the v5 kernel
extensions.

It is an extension of an earlier patch set (v4) that implemented memory
windows.

The current set (v5) also implements:
	ibv_device_query_ex
	ibv_create_cq_ex
	ibv_create_qp_ex

Bob Pearson (4):
  rdma-core/providers/rxe: Implement MW commands
  rxe: added extended query device verb
  rxe: added support for extended CQ operations
  rxe: added support for extended QP operations

 kernel-headers/rdma/rdma_user_rxe.h |  68 ++-
 providers/rxe/CMakeLists.txt        |   5 +
 providers/rxe/rxe-abi.h             |  16 +-
 providers/rxe/rxe.c                 | 624 ++++-----------------
 providers/rxe/rxe.h                 |  98 +++-
 providers/rxe/rxe_cq.c              | 449 +++++++++++++++
 providers/rxe/rxe_dev.c             | 146 +++++
 providers/rxe/rxe_mw.c              | 149 +++++
 providers/rxe/rxe_qp.c              | 810 ++++++++++++++++++++++++++++
 providers/rxe/rxe_queue.h           |  42 +-
 providers/rxe/rxe_sq.c              | 319 +++++++++++
 11 files changed, 2198 insertions(+), 528 deletions(-)
 create mode 100644 providers/rxe/rxe_cq.c
 create mode 100644 providers/rxe/rxe_dev.c
 create mode 100644 providers/rxe/rxe_mw.c
 create mode 100644 providers/rxe/rxe_qp.c
 create mode 100644 providers/rxe/rxe_sq.c

-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH for-next v5 1/4] rxe: Implement MW commands
  2020-09-18 21:25 [PATCH for-next v5 0/4] rxe: API extensions Bob Pearson
@ 2020-09-18 21:25 ` Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 2/4] rxe: add extended query device verb Bob Pearson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Bob Pearson @ 2020-09-18 21:25 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Implemented
        ibv_alloc_mw
        ibv_dealloc_mw
        ibv_bind_mw
        post bind mw operations
        post invalidate rkey commands
        access MW and MR rkeys for RDMA write, read, and atomic

Depends on matching kernel patch set

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 kernel-headers/rdma/rdma_user_rxe.h |  50 ++++-
 providers/rxe/CMakeLists.txt        |   2 +
 providers/rxe/rxe-abi.h             |   4 +
 providers/rxe/rxe.c                 | 293 +++++--------------------
 providers/rxe/rxe.h                 |  44 ++++
 providers/rxe/rxe_mw.c              | 149 +++++++++++++
 providers/rxe/rxe_sq.c              | 319 ++++++++++++++++++++++++++++
 7 files changed, 622 insertions(+), 239 deletions(-)
 create mode 100644 providers/rxe/rxe_mw.c
 create mode 100644 providers/rxe/rxe_sq.c

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index aae2e696..d4912568 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -93,14 +93,46 @@ struct rxe_send_wr {
 			__u32	remote_qkey;
 			__u16	pkey_index;
 		} ud;
-		/* reg is only used by the kernel and is not part of the uapi */
+		struct {
+			__aligned_u64	addr;
+			__aligned_u64	length;
+			union {
+				__u32		mr_index;
+				__aligned_u64	reserved1;
+			};
+			union {
+				__u32		mw_index;
+				__aligned_u64	reserved2;
+			};
+			__u32	rkey;
+			__u32	access;
+			__u32	flags;
+		} umw;
+		/* The following are only used by the kernel
+		 * and are not part of the uapi
+		 */
+		struct {
+			__aligned_u64	addr;
+			__aligned_u64	length;
+			union {
+				struct ib_mr	*mr;
+				__aligned_u64	reserved1;
+			};
+			union {
+				struct ib_mw	*mw;
+				__aligned_u64	reserved2;
+			};
+			__u32	rkey;
+			__u32	access;
+			__u32	flags;
+		} kmw;
 		struct {
 			union {
 				struct ib_mr *mr;
 				__aligned_u64 reserved;
 			};
-			__u32        key;
-			__u32        access;
+			__u32	     key;
+			__u32	     access;
 		} reg;
 	} wr;
 };
@@ -112,7 +144,7 @@ struct rxe_sge {
 };
 
 struct mminfo {
-	__aligned_u64  		offset;
+	__aligned_u64		offset;
 	__u32			size;
 	__u32			pad;
 };
@@ -175,4 +207,14 @@ struct rxe_modify_srq_cmd {
 	__aligned_u64 mmap_info_addr;
 };
 
+struct rxe_reg_mr_resp {
+	__u32 index;
+	__u32 reserved;
+};
+
+struct rxe_alloc_mw_resp {
+	__u32 index;
+	__u32 reserved;
+};
+
 #endif /* RDMA_USER_RXE_H */
diff --git a/providers/rxe/CMakeLists.txt b/providers/rxe/CMakeLists.txt
index d8f32651..ec4f005d 100644
--- a/providers/rxe/CMakeLists.txt
+++ b/providers/rxe/CMakeLists.txt
@@ -1,3 +1,5 @@
 rdma_provider(rxe
   rxe.c
+  rxe_sq.c
+  rxe_mw.c
   )
diff --git a/providers/rxe/rxe-abi.h b/providers/rxe/rxe-abi.h
index b4680a24..2fc09483 100644
--- a/providers/rxe/rxe-abi.h
+++ b/providers/rxe/rxe-abi.h
@@ -49,5 +49,9 @@ DECLARE_DRV_CMD(urxe_modify_srq, IB_USER_VERBS_CMD_MODIFY_SRQ,
 		rxe_modify_srq_cmd, empty);
 DECLARE_DRV_CMD(urxe_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
 		empty, rxe_resize_cq_resp);
+DECLARE_DRV_CMD(urxe_reg_mr, IB_USER_VERBS_CMD_REG_MR,
+		empty, rxe_reg_mr_resp);
+DECLARE_DRV_CMD(urxe_alloc_mw, IB_USER_VERBS_CMD_ALLOC_MW,
+		empty, rxe_alloc_mw_resp);
 
 #endif /* RXE_ABI_H */
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 3af58bfb..ff4285f2 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -69,11 +69,11 @@ static int rxe_query_device(struct ibv_context *context,
 {
 	struct ibv_query_device cmd;
 	uint64_t raw_fw_ver;
-	unsigned major, minor, sub_minor;
+	unsigned int major, minor, sub_minor;
 	int ret;
 
 	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver,
-				   &cmd, sizeof cmd);
+				   &cmd, sizeof(cmd));
 	if (ret)
 		return ret;
 
@@ -81,7 +81,7 @@ static int rxe_query_device(struct ibv_context *context,
 	minor = (raw_fw_ver >> 16) & 0xffff;
 	sub_minor = raw_fw_ver & 0xffff;
 
-	snprintf(attr->fw_ver, sizeof attr->fw_ver,
+	snprintf(attr->fw_ver, sizeof(attr->fw_ver),
 		 "%d.%d.%d", major, minor, sub_minor);
 
 	return 0;
@@ -92,7 +92,7 @@ static int rxe_query_port(struct ibv_context *context, uint8_t port,
 {
 	struct ibv_query_port cmd;
 
-	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
+	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
 }
 
 static struct ibv_pd *rxe_alloc_pd(struct ibv_context *context)
@@ -101,11 +101,12 @@ static struct ibv_pd *rxe_alloc_pd(struct ibv_context *context)
 	struct ib_uverbs_alloc_pd_resp resp;
 	struct ibv_pd *pd;
 
-	pd = malloc(sizeof *pd);
+	pd = malloc(sizeof(*pd));
 	if (!pd)
 		return NULL;
 
-	if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, &resp, sizeof resp)) {
+	if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof(cmd), &resp,
+				sizeof(resp))) {
 		free(pd);
 		return NULL;
 	}
@@ -127,34 +128,38 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
 static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 				 uint64_t hca_va, int access)
 {
-	struct verbs_mr *vmr;
-	struct ibv_reg_mr cmd;
-	struct ib_uverbs_reg_mr_resp resp;
+	struct rxe_mr *mr;
+	struct ibv_reg_mr cmd = {};
+	struct urxe_reg_mr_resp resp = {};
 	int ret;
 
-	vmr = malloc(sizeof(*vmr));
-	if (!vmr)
+	mr = calloc(1, sizeof(*mr));
+	if (!mr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
-			     sizeof(cmd), &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access,
+			     &mr->verbs_mr, &cmd, sizeof(cmd),
+			     &resp.ibv_resp, sizeof(resp));
 	if (ret) {
-		free(vmr);
+		free(mr);
 		return NULL;
 	}
 
-	return &vmr->ibv_mr;
+	mr->index = resp.index;
+
+	return &mr->verbs_mr.ibv_mr;
 }
 
 static int rxe_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
+	struct rxe_mr *mr = to_rmr(&vmr->ibv_mr);
 
-	ret = ibv_cmd_dereg_mr(vmr);
+	ret = ibv_cmd_dereg_mr(&mr->verbs_mr);
 	if (ret)
 		return ret;
 
-	free(vmr);
+	free(mr);
 	return 0;
 }
 
@@ -166,14 +171,13 @@ static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
 	struct urxe_create_cq_resp resp;
 	int ret;
 
-	cq = malloc(sizeof *cq);
-	if (!cq) {
+	cq = malloc(sizeof(*cq));
+	if (!cq)
 		return NULL;
-	}
 
 	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
 				&cq->ibv_cq, NULL, 0,
-				&resp.ibv_resp, sizeof resp);
+				&resp.ibv_resp, sizeof(resp));
 	if (ret) {
 		free(cq);
 		return NULL;
@@ -202,8 +206,8 @@ static int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
 
 	pthread_spin_lock(&cq->lock);
 
-	ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof cmd,
-				&resp.ibv_resp, sizeof resp);
+	ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof(cmd),
+				&resp.ibv_resp, sizeof(resp));
 	if (ret) {
 		pthread_spin_unlock(&cq->lock);
 		return ret;
@@ -277,13 +281,12 @@ static struct ibv_srq *rxe_create_srq(struct ibv_pd *pd,
 	struct urxe_create_srq_resp resp;
 	int ret;
 
-	srq = malloc(sizeof *srq);
-	if (srq == NULL) {
+	srq = malloc(sizeof(*srq));
+	if (srq == NULL)
 		return NULL;
-	}
 
-	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, sizeof cmd,
-				 &resp.ibv_resp, sizeof resp);
+	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, sizeof(cmd),
+				 &resp.ibv_resp, sizeof(resp));
 	if (ret) {
 		free(srq);
 		return NULL;
@@ -298,6 +301,7 @@ static struct ibv_srq *rxe_create_srq(struct ibv_pd *pd,
 		return NULL;
 	}
 
+	srq->srq_num = resp.srq_num;
 	srq->mmap_info = resp.mi;
 	srq->rq.max_sge = attr->attr.max_sge;
 	pthread_spin_init(&srq->rq.lock, PTHREAD_PROCESS_PRIVATE);
@@ -305,6 +309,15 @@ static struct ibv_srq *rxe_create_srq(struct ibv_pd *pd,
 	return &srq->ibv_srq;
 }
 
+static int rxe_get_srq_num(struct ibv_srq *ibsrq, uint32_t *srq_num)
+{
+	struct rxe_srq *srq = to_rsrq(ibsrq);
+
+	*srq_num = srq->srq_num;
+
+	return 0;
+}
+
 static int rxe_modify_srq(struct ibv_srq *ibsrq,
 		   struct ibv_srq_attr *attr, int attr_mask)
 {
@@ -319,9 +332,9 @@ static int rxe_modify_srq(struct ibv_srq *ibsrq,
 	if (attr_mask & IBV_SRQ_MAX_WR)
 		pthread_spin_lock(&srq->rq.lock);
 
-	cmd.mmap_info_addr = (__u64)(uintptr_t) & mi;
+	cmd.mmap_info_addr = (__u64)(uintptr_t) &mi;
 	rc = ibv_cmd_modify_srq(ibsrq, attr, attr_mask,
-				&cmd.ibv_cmd, sizeof cmd);
+				&cmd.ibv_cmd, sizeof(cmd));
 	if (rc)
 		goto out;
 
@@ -351,7 +364,7 @@ static int rxe_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr)
 {
 	struct ibv_query_srq cmd;
 
-	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof(cmd));
 }
 
 static int rxe_destroy_srq(struct ibv_srq *ibvsrq)
@@ -396,9 +409,8 @@ static int rxe_post_one_recv(struct rxe_wq *rq, struct ibv_recv_wr *recv_wr)
 	memcpy(wqe->dma.sge, recv_wr->sg_list,
 	       wqe->num_sge*sizeof(*wqe->dma.sge));
 
-	for (i = 0; i < wqe->num_sge; i++) {
+	for (i = 0; i < wqe->num_sge; i++)
 		length += wqe->dma.sge[i].length;
-	}
 
 	wqe->dma.length = length;
 	wqe->dma.resid = length;
@@ -444,13 +456,12 @@ static struct ibv_qp *rxe_create_qp(struct ibv_pd *pd,
 	struct rxe_qp *qp;
 	int ret;
 
-	qp = malloc(sizeof *qp);
-	if (!qp) {
+	qp = malloc(sizeof(*qp));
+	if (!qp)
 		return NULL;
-	}
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof cmd,
-				&resp.ibv_resp, sizeof resp);
+	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof(cmd),
+				&resp.ibv_resp, sizeof(resp));
 	if (ret) {
 		free(qp);
 		return NULL;
@@ -501,7 +512,7 @@ static int rxe_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	struct ibv_query_qp cmd;
 
 	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr,
-				&cmd, sizeof cmd);
+				&cmd, sizeof(cmd));
 }
 
 static int rxe_modify_qp(struct ibv_qp *ibvqp,
@@ -510,7 +521,7 @@ static int rxe_modify_qp(struct ibv_qp *ibvqp,
 {
 	struct ibv_modify_qp cmd = {};
 
-	return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof cmd);
+	return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof(cmd));
 }
 
 static int rxe_destroy_qp(struct ibv_qp *ibv_qp)
@@ -531,199 +542,6 @@ static int rxe_destroy_qp(struct ibv_qp *ibv_qp)
 	return ret;
 }
 
-/* basic sanity checks for send work request */
-static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
-			    unsigned int length)
-{
-	enum ibv_wr_opcode opcode = ibwr->opcode;
-
-	if (ibwr->num_sge > sq->max_sge)
-		return -EINVAL;
-
-	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
-	    || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
-		if (length < 8 || ibwr->wr.atomic.remote_addr & 0x7)
-			return -EINVAL;
-
-	if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
-{
-	memset(kwr, 0, sizeof(*kwr));
-
-	kwr->wr_id		= uwr->wr_id;
-	kwr->num_sge		= uwr->num_sge;
-	kwr->opcode		= uwr->opcode;
-	kwr->send_flags		= uwr->send_flags;
-	kwr->ex.imm_data	= uwr->imm_data;
-
-	switch(uwr->opcode) {
-	case IBV_WR_RDMA_WRITE:
-	case IBV_WR_RDMA_WRITE_WITH_IMM:
-	case IBV_WR_RDMA_READ:
-		kwr->wr.rdma.remote_addr	= uwr->wr.rdma.remote_addr;
-		kwr->wr.rdma.rkey		= uwr->wr.rdma.rkey;
-		break;
-
-	case IBV_WR_SEND:
-	case IBV_WR_SEND_WITH_IMM:
-		kwr->wr.ud.remote_qpn		= uwr->wr.ud.remote_qpn;
-		kwr->wr.ud.remote_qkey		= uwr->wr.ud.remote_qkey;
-		break;
-
-	case IBV_WR_ATOMIC_CMP_AND_SWP:
-	case IBV_WR_ATOMIC_FETCH_AND_ADD:
-		kwr->wr.atomic.remote_addr	= uwr->wr.atomic.remote_addr;
-		kwr->wr.atomic.compare_add	= uwr->wr.atomic.compare_add;
-		kwr->wr.atomic.swap		= uwr->wr.atomic.swap;
-		kwr->wr.atomic.rkey		= uwr->wr.atomic.rkey;
-		break;
-
-	case IBV_WR_LOCAL_INV:
-	case IBV_WR_BIND_MW:
-	case IBV_WR_SEND_WITH_INV:
-	case IBV_WR_TSO:
-	case IBV_WR_DRIVER1:
-		break;
-	}
-}
-
-static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
-		  struct ibv_send_wr *ibwr, unsigned int length,
-		  struct rxe_send_wqe *wqe)
-{
-	int num_sge = ibwr->num_sge;
-	int i;
-	unsigned int opcode = ibwr->opcode;
-
-	convert_send_wr(&wqe->wr, ibwr);
-
-	if (qp_type(qp) == IBV_QPT_UD)
-		memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
-		       sizeof(struct rxe_av));
-
-	if (ibwr->send_flags & IBV_SEND_INLINE) {
-		uint8_t *inline_data = wqe->dma.inline_data;
-
-		for (i = 0; i < num_sge; i++) {
-			memcpy(inline_data,
-			       (uint8_t *)(long)ibwr->sg_list[i].addr,
-			       ibwr->sg_list[i].length);
-			inline_data += ibwr->sg_list[i].length;
-		}
-	} else
-		memcpy(wqe->dma.sge, ibwr->sg_list,
-		       num_sge*sizeof(struct ibv_sge));
-
-	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
-	    || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
-		wqe->iova	= ibwr->wr.atomic.remote_addr;
-	else
-		wqe->iova	= ibwr->wr.rdma.remote_addr;
-	wqe->dma.length		= length;
-	wqe->dma.resid		= length;
-	wqe->dma.num_sge	= num_sge;
-	wqe->dma.cur_sge	= 0;
-	wqe->dma.sge_offset	= 0;
-	wqe->state		= 0;
-	wqe->ssn		= qp->ssn++;
-
-	return 0;
-}
-
-static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
-			 struct ibv_send_wr *ibwr)
-{
-	int err;
-	struct rxe_send_wqe *wqe;
-	unsigned int length = 0;
-	int i;
-
-	for (i = 0; i < ibwr->num_sge; i++)
-		length += ibwr->sg_list[i].length;
-
-	err = validate_send_wr(sq, ibwr, length);
-	if (err) {
-		printf("validate send failed\n");
-		return err;
-	}
-
-	wqe = (struct rxe_send_wqe *)producer_addr(sq->queue);
-
-	err = init_send_wqe(qp, sq, ibwr, length, wqe);
-	if (err)
-		return err;
-
-	if (queue_full(sq->queue))
-		return -ENOMEM;
-
-	advance_producer(sq->queue);
-
-	return 0;
-}
-
-/* send a null post send as a doorbell */
-static int post_send_db(struct ibv_qp *ibqp)
-{
-	struct ibv_post_send cmd;
-	struct ib_uverbs_post_send_resp resp;
-
-	cmd.hdr.command	= IB_USER_VERBS_CMD_POST_SEND;
-	cmd.hdr.in_words = sizeof(cmd) / 4;
-	cmd.hdr.out_words = sizeof(resp) / 4;
-	cmd.response	= (uintptr_t)&resp;
-	cmd.qp_handle	= ibqp->handle;
-	cmd.wr_count	= 0;
-	cmd.sge_count	= 0;
-	cmd.wqe_size	= sizeof(struct ibv_send_wr);
-
-	if (write(ibqp->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
-		return errno;
-
-	return 0;
-}
-
-/* this API does not make a distinction between
-   restartable and non-restartable errors */
-static int rxe_post_send(struct ibv_qp *ibqp,
-			 struct ibv_send_wr *wr_list,
-			 struct ibv_send_wr **bad_wr)
-{
-	int rc = 0;
-	int err;
-	struct rxe_qp *qp = to_rqp(ibqp);
-	struct rxe_wq *sq = &qp->sq;
-
-	if (!bad_wr)
-		return EINVAL;
-
-	*bad_wr = NULL;
-
-	if (!sq || !wr_list || !sq->queue)
-	 	return EINVAL;
-
-	pthread_spin_lock(&sq->lock);
-
-	while (wr_list) {
-		rc = post_one_send(qp, sq, wr_list);
-		if (rc) {
-			*bad_wr = wr_list;
-			break;
-		}
-
-		wr_list = wr_list->next;
-	}
-
-	pthread_spin_unlock(&sq->lock);
-
-	err =  post_send_db(ibqp);
-	return err ? err : rc;
-}
-
 static int rxe_post_recv(struct ibv_qp *ibqp,
 			 struct ibv_recv_wr *recv_wr,
 			 struct ibv_recv_wr **bad_wr)
@@ -792,7 +610,7 @@ static struct ibv_ah *rxe_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 		return NULL;
 	}
 
-	ah = malloc(sizeof *ah);
+	ah = malloc(sizeof(*ah));
 	if (ah == NULL)
 		return NULL;
 
@@ -860,6 +678,10 @@ static const struct verbs_context_ops rxe_ctx_ops = {
 	.attach_mcast = ibv_cmd_attach_mcast,
 	.detach_mcast = ibv_cmd_detach_mcast,
 	.free_context = rxe_free_context,
+	.alloc_mw = rxe_alloc_mw,
+	.bind_mw = rxe_bind_mw,
+	.dealloc_mw = rxe_dealloc_mw,
+	.get_srq_num = rxe_get_srq_num,
 };
 
 static struct verbs_context *rxe_alloc_context(struct ibv_device *ibdev,
@@ -876,7 +698,7 @@ static struct verbs_context *rxe_alloc_context(struct ibv_device *ibdev,
 		return NULL;
 
 	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd,
-				sizeof cmd, &resp, sizeof resp))
+				sizeof(cmd), &resp, sizeof(resp)))
 		goto out;
 
 	verbs_set_ops(&context->ibv_ctx, &rxe_ctx_ops);
@@ -907,6 +729,7 @@ static void rxe_uninit_device(struct verbs_device *verbs_device)
 static struct verbs_device *rxe_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
 {
 	struct rxe_device *dev;
+
 	dev = calloc(1, sizeof(*dev));
 	if (!dev)
 		return NULL;
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index 96f4ee9c..6dfca0ab 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -94,6 +94,25 @@ struct rxe_srq {
 	uint32_t		srq_num;
 };
 
+struct rxe_mr {
+	struct verbs_mr		verbs_mr;
+	uint32_t		index;
+};
+
+/* private flags to rxe_post_one_send */
+enum rxe_send_flags {
+	/* used to tell bind calls that
+	 * used the verbs API from user
+	 * posted send wr
+	 */
+	RXE_BIND_MW		= (1 << 0),
+};
+
+struct rxe_mw {
+	struct ibv_mw		ibv_mw;
+	uint32_t		index;
+};
+
 #define to_rxxx(xxx, type) container_of(ib##xxx, struct rxe_##type, ibv_##xxx)
 
 static inline struct rxe_context *to_rctx(struct ibv_context *ibctx)
@@ -126,4 +145,29 @@ static inline struct rxe_ah *to_rah(struct ibv_ah *ibah)
 	return to_rxxx(ah, ah);
 }
 
+static inline struct rxe_mr *to_rmr(struct ibv_mr *ibmr)
+{
+	return container_of(ibmr, struct rxe_mr, verbs_mr.ibv_mr);
+}
+
+static inline struct rxe_mw *to_rmw(struct ibv_mw *ibmw)
+{
+	return to_rxxx(mw, mw);
+}
+
+/* rxe_mw.c */
+struct ibv_mw *rxe_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
+int rxe_dealloc_mw(struct ibv_mw *mw);
+int rxe_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
+		struct ibv_mw_bind *mw_bind);
+
+/* rxe_sq.c */
+int rxe_post_one_send(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
+		      unsigned int length, enum rxe_send_flags flags);
+
+int rxe_post_send_db(struct ibv_qp *ibqp);
+
+int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
+		  struct ibv_send_wr **bad_wr);
+
 #endif /* RXE_H */
diff --git a/providers/rxe/rxe_mw.c b/providers/rxe/rxe_mw.c
new file mode 100644
index 00000000..8520a0fa
--- /dev/null
+++ b/providers/rxe/rxe_mw.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include "rxe.h"
+#include "rxe_queue.h"
+#include <rdma/rdma_user_rxe.h>
+#include "rxe-abi.h"
+
+struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
+{
+	int ret;
+	struct rxe_mw *mw;
+	struct ibv_alloc_mw cmd = {};
+	struct urxe_alloc_mw_resp resp = {};
+
+	mw = calloc(1, sizeof(*mw));
+	if (!mw)
+		return NULL;
+
+	ret = ibv_cmd_alloc_mw(ibpd, type, &mw->ibv_mw, &cmd, sizeof(cmd),
+			       &resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		free(mw);
+		return NULL;
+	}
+
+	mw->index = resp.index;
+
+	return &mw->ibv_mw;
+}
+
+static int next_rkey(int rkey)
+{
+	return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
+}
+
+/* private version of rxe_post_one_send to set flags field in wqe */
+int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
+		struct ibv_mw_bind *mw_bind)
+{
+	int ret;
+	struct rxe_qp *qp = to_rqp(ibqp);
+	struct ibv_mw_bind_info	*bind_info = &mw_bind->bind_info;
+	struct ibv_send_wr ibwr;
+
+	if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
+		ret = EINVAL;
+		goto err;
+	}
+
+	if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
+		ret = EINVAL;
+		goto err;
+	}
+
+	if (bind_info->mr) {
+		/* more to do here see mlx5 */
+		if (ibmw->pd != bind_info->mr->pd) {
+			ret = EPERM;
+			goto err;
+		}
+	}
+
+	memset(&ibwr, 0, sizeof(ibwr));
+
+	ibwr.opcode		= IBV_WR_BIND_MW;
+	ibwr.next		= NULL;
+	ibwr.wr_id		= mw_bind->wr_id;
+	ibwr.send_flags		= mw_bind->send_flags;
+	ibwr.bind_mw.bind_info	= mw_bind->bind_info;
+	ibwr.bind_mw.mw		= ibmw;
+	ibwr.bind_mw.rkey	= next_rkey(ibmw->rkey);
+
+	pthread_spin_lock(&qp->sq.lock);
+	ret = rxe_post_one_send(qp, &ibwr, 0, RXE_BIND_MW);
+	pthread_spin_unlock(&qp->sq.lock);
+
+	if (!ret)
+		ret =  rxe_post_send_db(ibqp);
+
+	if (ret)
+		goto err;
+
+	/* user has to undo this if he gets an error wc */
+	ibmw->rkey = ibwr.bind_mw.rkey;
+
+	return 0;
+err:
+	errno = ret;
+	return errno;
+}
+
+int rxe_dealloc_mw(struct ibv_mw *ibmw)
+{
+	struct rxe_mw *mw = to_rmw(ibmw);
+	int ret;
+
+	ret = ibv_cmd_dealloc_mw(ibmw);
+	if (ret)
+		return ret;
+
+	free(mw);
+	return 0;
+}
diff --git a/providers/rxe/rxe_sq.c b/providers/rxe/rxe_sq.c
new file mode 100644
index 00000000..7232891b
--- /dev/null
+++ b/providers/rxe/rxe_sq.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2020 Hewlett Packard Entrprise, Inc. All rights reserved.
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (C) 2006-2007 QLogic Corporation, All rights reserved.
+ * Copyright (c) 2005. PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include "rxe.h"
+#include "rxe_queue.h"
+#include <rdma/rdma_user_rxe.h>
+#include "rxe-abi.h"
+
+/* basic sanity checks for send work request */
+static int validate_send_wr(struct rxe_qp *qp,
+			    struct ibv_send_wr *ibwr,
+			    unsigned int length)
+{
+	struct rxe_wq *sq = &qp->sq;
+	enum ibv_wr_opcode opcode = ibwr->opcode;
+
+	if (ibwr->num_sge > sq->max_sge)
+		goto err;
+
+	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
+	    || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
+		if (length < 8 || ibwr->wr.atomic.remote_addr & 0x7)
+			goto err;
+
+	if ((ibwr->send_flags & IBV_SEND_INLINE) &&
+	    (length > sq->max_inline))
+		goto err;
+
+	if (ibwr->opcode == IBV_WR_BIND_MW) {
+		if (length)
+			goto err;
+		if (ibwr->num_sge)
+			goto err;
+		if (ibwr->imm_data)
+			goto err;
+		if ((qp_type(qp) != IBV_QPT_RC) &&
+		    (qp_type(qp) != IBV_QPT_UC) &&
+		    (qp_type(qp) != IBV_QPT_XRC_SEND))
+			goto err;
+		/* only type 2 MWs may use post send bind
+		 * we skip this test for ibv_bind_mw by
+		 * calling post_one_send
+		 */
+		if (ibwr->bind_mw.mw->type == 1)
+			goto err;
+	}
+
+	return 0;
+err:
+	errno = EINVAL;
+	return errno;
+}
+
+static int convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
+{
+	struct rxe_mr *mr;
+	struct rxe_mw *mw;
+
+	memset(kwr, 0, sizeof(*kwr));
+
+	kwr->wr_id		= uwr->wr_id;
+	kwr->num_sge		= uwr->num_sge;
+	kwr->opcode		= uwr->opcode;
+	kwr->send_flags		= uwr->send_flags;
+	kwr->ex.imm_data	= uwr->imm_data;
+
+	switch (uwr->opcode) {
+	case IBV_WR_RDMA_WRITE:
+	case IBV_WR_RDMA_WRITE_WITH_IMM:
+	case IBV_WR_RDMA_READ:
+		kwr->wr.rdma.remote_addr	= uwr->wr.rdma.remote_addr;
+		kwr->wr.rdma.rkey		= uwr->wr.rdma.rkey;
+		break;
+
+	case IBV_WR_SEND:
+	case IBV_WR_SEND_WITH_IMM:
+		kwr->wr.ud.remote_qpn		= uwr->wr.ud.remote_qpn;
+		kwr->wr.ud.remote_qkey		= uwr->wr.ud.remote_qkey;
+		break;
+
+	case IBV_WR_ATOMIC_CMP_AND_SWP:
+	case IBV_WR_ATOMIC_FETCH_AND_ADD:
+		kwr->wr.atomic.remote_addr	= uwr->wr.atomic.remote_addr;
+		kwr->wr.atomic.compare_add	= uwr->wr.atomic.compare_add;
+		kwr->wr.atomic.swap		= uwr->wr.atomic.swap;
+		kwr->wr.atomic.rkey		= uwr->wr.atomic.rkey;
+		break;
+
+	case IBV_WR_BIND_MW:
+		mr = to_rmr(uwr->bind_mw.bind_info.mr);
+		mw = to_rmw(uwr->bind_mw.mw);
+
+		kwr->wr.umw.addr		= uwr->bind_mw.bind_info.addr;
+		kwr->wr.umw.length		= uwr->bind_mw.bind_info.length;
+		kwr->wr.umw.mr_index		= mr->index;
+		kwr->wr.umw.mw_index		= mw->index;
+		kwr->wr.umw.rkey		= uwr->bind_mw.rkey;
+		kwr->wr.umw.access		= uwr->bind_mw.bind_info.mw_access_flags;
+		break;
+
+	case IBV_WR_LOCAL_INV:
+	case IBV_WR_SEND_WITH_INV:
+	case IBV_WR_TSO:
+	case IBV_WR_DRIVER1:
+		break;
+	default:
+		errno = EINVAL;
+		return errno;
+	}
+
+	return 0;
+}
+
+static int init_send_wqe(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
+			 unsigned int length, struct rxe_send_wqe *wqe,
+			 enum rxe_send_flags flags)
+{
+	int i;
+	int ret;
+	int num_sge = ibwr->num_sge;
+	unsigned int opcode = ibwr->opcode;
+
+	ret = convert_send_wr(&wqe->wr, ibwr);
+	if (ret)
+		return ret;
+
+	wqe->dma.length		= length;
+	wqe->dma.resid		= length;
+	wqe->dma.num_sge	= num_sge;
+	wqe->dma.cur_sge	= 0;
+	wqe->dma.sge_offset	= 0;
+	wqe->state		= 0;
+	wqe->ssn		= qp->ssn++;
+
+	if (qp_type(qp) == IBV_QPT_UD)
+		memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
+		       sizeof(struct rxe_av));
+
+	if (ibwr->send_flags & IBV_SEND_INLINE) {
+		uint8_t *inline_data = wqe->dma.inline_data;
+
+		wqe->dma.resid = 0;
+
+		for (i = 0; i < num_sge; i++) {
+			memcpy(inline_data,
+			       (uint8_t *)(long)ibwr->sg_list[i].addr,
+			       ibwr->sg_list[i].length);
+			inline_data += ibwr->sg_list[i].length;
+		}
+	} else {
+		memcpy(wqe->dma.sge, ibwr->sg_list,
+		       num_sge*sizeof(struct ibv_sge));
+	}
+
+	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP) ||
+	    (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
+		wqe->iova	= ibwr->wr.atomic.remote_addr;
+	else
+		wqe->iova	= ibwr->wr.rdma.remote_addr;
+
+	/* let the kernel know we came through the verbs API */
+	if (flags & RXE_BIND_MW)
+		wqe->wr.wr.umw.flags = RXE_BIND_MW;
+
+	return 0;
+}
+
+/* call with qp->sq->lock held */
+int rxe_post_one_send(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
+		      unsigned int length, enum rxe_send_flags flags)
+{
+	int err;
+	struct rxe_send_wqe *wqe;
+	struct rxe_wq *sq = &qp->sq;
+
+	if (queue_full(sq->queue)) {
+		err = ENOMEM;
+		goto err;
+	}
+
+	wqe = (struct rxe_send_wqe *)producer_addr(sq->queue);
+
+	err = init_send_wqe(qp, ibwr, length, wqe, flags);
+	if (err)
+		goto err;
+
+	advance_producer(qp->sq.queue);
+
+	return 0;
+err:
+	errno = err;
+	return errno;
+}
+
+/* send a null post send as a doorbell */
+int rxe_post_send_db(struct ibv_qp *ibqp)
+{
+	int ret;
+
+	ret = ibv_cmd_post_send(ibqp, NULL, NULL);
+	if (ret)
+		goto err;
+	return 0;
+err:
+	errno = ret;
+	return errno;
+}
+
+/* this API does not make a distinction between
+ * restartable and non-restartable errors
+ */
+int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
+		  struct ibv_send_wr **bad_wr)
+{
+	int i;
+	int ret = 0;
+	int err = 0;
+	unsigned int length = 0;
+	int work_to_do = 0;
+	struct rxe_qp *qp = to_rqp(ibqp);
+	struct ibv_send_wr *ibwr;
+
+	if (!wr_list || !bad_wr) {
+		err = EINVAL;
+		goto err;
+	}
+
+	*bad_wr = NULL;
+
+	pthread_spin_lock(&qp->sq.lock);
+
+	while (wr_list) {
+		ibwr = wr_list;
+
+		for (i = 0; i < ibwr->num_sge; i++)
+			length += ibwr->sg_list[i].length;
+
+		/* moved here from post_one_send to allow
+		 * calling post_one_send without checking
+		 */
+		ret = validate_send_wr(qp, ibwr, length);
+		if (ret) {
+			*bad_wr = ibwr;
+			break;
+		}
+
+		ret = rxe_post_one_send(qp, ibwr, length, 0);
+		if (ret) {
+			*bad_wr = ibwr;
+			break;
+		}
+
+		work_to_do++;
+		wr_list = wr_list->next;
+	}
+
+	pthread_spin_unlock(&qp->sq.lock);
+
+	if (work_to_do)
+		err =  rxe_post_send_db(ibqp);
+
+	err = err ? err : ret;
+	if (err)
+		goto err;
+
+	return 0;
+err:
+	errno = err;
+	return errno;
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH for-next v5 2/4] rxe: add extended query device verb
  2020-09-18 21:25 [PATCH for-next v5 0/4] rxe: API extensions Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 1/4] rxe: Implement MW commands Bob Pearson
@ 2020-09-18 21:25 ` Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 3/4] rxe: add support for extended CQ operations Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 4/4] rxe: add support for extended QP operations Bob Pearson
  3 siblings, 0 replies; 5+ messages in thread
From: Bob Pearson @ 2020-09-18 21:25 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Added ibv_query_device_ex verb.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 providers/rxe/CMakeLists.txt |   1 +
 providers/rxe/rxe.c          |  62 +--------------
 providers/rxe/rxe.h          |  12 +++
 providers/rxe/rxe_dev.c      | 146 +++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+), 61 deletions(-)
 create mode 100644 providers/rxe/rxe_dev.c

diff --git a/providers/rxe/CMakeLists.txt b/providers/rxe/CMakeLists.txt
index ec4f005d..96052555 100644
--- a/providers/rxe/CMakeLists.txt
+++ b/providers/rxe/CMakeLists.txt
@@ -1,5 +1,6 @@
 rdma_provider(rxe
   rxe.c
+  rxe_dev.c
   rxe_sq.c
   rxe_mw.c
   )
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index ff4285f2..79863985 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -64,67 +64,6 @@ static const struct verbs_match_ent hca_table[] = {
 	{},
 };
 
-static int rxe_query_device(struct ibv_context *context,
-			    struct ibv_device_attr *attr)
-{
-	struct ibv_query_device cmd;
-	uint64_t raw_fw_ver;
-	unsigned int major, minor, sub_minor;
-	int ret;
-
-	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver,
-				   &cmd, sizeof(cmd));
-	if (ret)
-		return ret;
-
-	major = (raw_fw_ver >> 32) & 0xffff;
-	minor = (raw_fw_ver >> 16) & 0xffff;
-	sub_minor = raw_fw_ver & 0xffff;
-
-	snprintf(attr->fw_ver, sizeof(attr->fw_ver),
-		 "%d.%d.%d", major, minor, sub_minor);
-
-	return 0;
-}
-
-static int rxe_query_port(struct ibv_context *context, uint8_t port,
-			  struct ibv_port_attr *attr)
-{
-	struct ibv_query_port cmd;
-
-	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
-}
-
-static struct ibv_pd *rxe_alloc_pd(struct ibv_context *context)
-{
-	struct ibv_alloc_pd cmd;
-	struct ib_uverbs_alloc_pd_resp resp;
-	struct ibv_pd *pd;
-
-	pd = malloc(sizeof(*pd));
-	if (!pd)
-		return NULL;
-
-	if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof(cmd), &resp,
-				sizeof(resp))) {
-		free(pd);
-		return NULL;
-	}
-
-	return pd;
-}
-
-static int rxe_dealloc_pd(struct ibv_pd *pd)
-{
-	int ret;
-
-	ret = ibv_cmd_dealloc_pd(pd);
-	if (!ret)
-		free(pd);
-
-	return ret;
-}
-
 static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 				 uint64_t hca_va, int access)
 {
@@ -652,6 +591,7 @@ static int rxe_destroy_ah(struct ibv_ah *ibah)
 
 static const struct verbs_context_ops rxe_ctx_ops = {
 	.query_device = rxe_query_device,
+	.query_device_ex = rxe_query_device_ex,
 	.query_port = rxe_query_port,
 	.alloc_pd = rxe_alloc_pd,
 	.dealloc_pd = rxe_dealloc_pd,
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index 6dfca0ab..11f337ee 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -155,6 +155,18 @@ static inline struct rxe_mw *to_rmw(struct ibv_mw *ibmw)
 	return to_rxxx(mw, mw);
 }
 
+/* rxe_dev.c */
+int rxe_query_device(struct ibv_context *context,
+		     struct ibv_device_attr *attr);
+int rxe_query_device_ex(struct ibv_context *context,
+			const struct ibv_query_device_ex_input *input,
+			struct ibv_device_attr_ex *attr,
+			size_t attr_size);
+int rxe_query_port(struct ibv_context *context, uint8_t port,
+		   struct ibv_port_attr *attr);
+struct ibv_pd *rxe_alloc_pd(struct ibv_context *context);
+int rxe_dealloc_pd(struct ibv_pd *pd);
+
 /* rxe_mw.c */
 struct ibv_mw *rxe_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
 int rxe_dealloc_mw(struct ibv_mw *mw);
diff --git a/providers/rxe/rxe_dev.c b/providers/rxe/rxe_dev.c
new file mode 100644
index 00000000..2156bebf
--- /dev/null
+++ b/providers/rxe/rxe_dev.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (C) 2006-2007 QLogic Corporation, All rights reserved.
+ * Copyright (c) 2005. PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/verbs.h>
+
+#include "rxe_queue.h"
+#include "rxe-abi.h"
+#include "rxe.h"
+
+int rxe_query_device(struct ibv_context *context,
+		     struct ibv_device_attr *attr)
+{
+	struct ibv_query_device cmd;
+	uint64_t raw_fw_ver;
+	unsigned int major, minor, sub_minor;
+	int ret;
+
+	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver,
+				   &cmd, sizeof(cmd));
+	if (ret)
+		return ret;
+
+	major = (raw_fw_ver >> 32) & 0xffff;
+	minor = (raw_fw_ver >> 16) & 0xffff;
+	sub_minor = raw_fw_ver & 0xffff;
+
+	snprintf(attr->fw_ver, sizeof(attr->fw_ver),
+		 "%d.%d.%d", major, minor, sub_minor);
+
+	return 0;
+}
+
+int rxe_query_device_ex(struct ibv_context *context,
+			const struct ibv_query_device_ex_input *input,
+			struct ibv_device_attr_ex *attr,
+			size_t attr_size)
+{
+	int ret;
+	uint64_t raw_fw_ver;
+	unsigned int major, minor, sub_minor;
+	struct ibv_query_device_ex cmd = {};
+	struct ib_uverbs_ex_query_device_resp resp = {};
+
+	ret = ibv_cmd_query_device_ex(context, input, attr, sizeof(*attr),
+				      &raw_fw_ver, &cmd, sizeof(cmd),
+				      &resp, sizeof(resp));
+	if (ret)
+		return ret;
+
+	major = (raw_fw_ver >> 32) & 0xffff;
+	minor = (raw_fw_ver >> 16) & 0xffff;
+	sub_minor = raw_fw_ver & 0xffff;
+
+	snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver),
+		 "%d.%d.%d", major, minor, sub_minor);
+
+	return 0;
+}
+
+int rxe_query_port(struct ibv_context *context, uint8_t port,
+		   struct ibv_port_attr *attr)
+{
+	struct ibv_query_port cmd;
+
+	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
+}
+
+struct ibv_pd *rxe_alloc_pd(struct ibv_context *context)
+{
+	struct ibv_alloc_pd cmd;
+	struct ib_uverbs_alloc_pd_resp resp;
+	struct ibv_pd *pd;
+
+	pd = malloc(sizeof(*pd));
+	if (!pd)
+		return NULL;
+
+	if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof(cmd), &resp,
+				sizeof(resp))) {
+		free(pd);
+		return NULL;
+	}
+
+	return pd;
+}
+
+int rxe_dealloc_pd(struct ibv_pd *pd)
+{
+	int ret;
+
+	ret = ibv_cmd_dealloc_pd(pd);
+	if (!ret)
+		free(pd);
+
+	return ret;
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH for-next v5 3/4] rxe: add support for extended CQ operations
  2020-09-18 21:25 [PATCH for-next v5 0/4] rxe: API extensions Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 1/4] rxe: Implement MW commands Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 2/4] rxe: add extended query device verb Bob Pearson
@ 2020-09-18 21:25 ` Bob Pearson
  2020-09-18 21:25 ` [PATCH for-next v5 4/4] rxe: add support for extended QP operations Bob Pearson
  3 siblings, 0 replies; 5+ messages in thread
From: Bob Pearson @ 2020-09-18 21:25 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Implemented ibv_create_cq_ex verb.
Implemented operations in ibv_cq_ex struct.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 kernel-headers/rdma/rdma_user_rxe.h |  44 ++-
 providers/rxe/CMakeLists.txt        |   1 +
 providers/rxe/rxe-abi.h             |  10 +-
 providers/rxe/rxe.c                 | 215 +++++--------
 providers/rxe/rxe.h                 |  21 +-
 providers/rxe/rxe_cq.c              | 449 ++++++++++++++++++++++++++++
 providers/rxe/rxe_queue.h           |  42 ++-
 7 files changed, 617 insertions(+), 165 deletions(-)
 create mode 100644 providers/rxe/rxe_cq.c

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index d4912568..9de469d7 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -98,29 +98,27 @@ struct rxe_send_wr {
 			__aligned_u64	length;
 			union {
 				__u32		mr_index;
-				__aligned_u64	reserved1;
+				__aligned_u64   pad1;
 			};
 			union {
 				__u32		mw_index;
-				__aligned_u64	reserved2;
+				__aligned_u64   pad2;
 			};
 			__u32	rkey;
 			__u32	access;
 			__u32	flags;
 		} umw;
-		/* The following are only used by the kernel
-		 * and are not part of the uapi
-		 */
+		/* below are only used by the kernel */
 		struct {
 			__aligned_u64	addr;
 			__aligned_u64	length;
 			union {
 				struct ib_mr	*mr;
-				__aligned_u64	reserved1;
+				__aligned_u64   reserved1;
 			};
 			union {
 				struct ib_mw	*mw;
-				__aligned_u64	reserved2;
+				__aligned_u64   reserved2;
 			};
 			__u32	rkey;
 			__u32	access;
@@ -131,8 +129,8 @@ struct rxe_send_wr {
 				struct ib_mr *mr;
 				__aligned_u64 reserved;
 			};
-			__u32	     key;
-			__u32	     access;
+			__u32        key;
+			__u32        access;
 		} reg;
 	} wr;
 };
@@ -144,7 +142,7 @@ struct rxe_sge {
 };
 
 struct mminfo {
-	__aligned_u64		offset;
+	__aligned_u64  		offset;
 	__u32			size;
 	__u32			pad;
 };
@@ -184,6 +182,32 @@ struct rxe_recv_wqe {
 	struct rxe_dma_info	dma;
 };
 
+struct rxe_uverbs_wc {
+	/* keep these the same as ib_uverbs_wc */
+	__aligned_u64		wr_id;
+	__u32			status;
+	__u32			opcode;
+	__u32			vendor_err;
+	__u32			byte_len;
+	union {
+		__be32		imm_data;
+		__u32		invalidate_rkey;
+	} ex;
+	__u32			qp_num;
+	__u32			src_qp;
+	__u32			wc_flags;
+	__u16			pkey_index;
+	__u16			slid;
+	__u8			sl;
+	__u8			dlid_path_bits;
+	__u8			port_num;
+	__u8			reserved;
+
+	/* any extras go here */
+	__aligned_u64		timestamp;
+	__aligned_u64		realtime;
+};
+
 struct rxe_create_cq_resp {
 	struct mminfo mi;
 };
diff --git a/providers/rxe/CMakeLists.txt b/providers/rxe/CMakeLists.txt
index 96052555..0e62aae7 100644
--- a/providers/rxe/CMakeLists.txt
+++ b/providers/rxe/CMakeLists.txt
@@ -1,6 +1,7 @@
 rdma_provider(rxe
   rxe.c
   rxe_dev.c
+  rxe_cq.c
   rxe_sq.c
   rxe_mw.c
   )
diff --git a/providers/rxe/rxe-abi.h b/providers/rxe/rxe-abi.h
index 2fc09483..14d0c038 100644
--- a/providers/rxe/rxe-abi.h
+++ b/providers/rxe/rxe-abi.h
@@ -39,16 +39,18 @@
 #include <rdma/rdma_user_rxe.h>
 #include <kernel-abi/rdma_user_rxe.h>
 
-DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
-		empty, rxe_create_cq_resp);
 DECLARE_DRV_CMD(urxe_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
 		empty, rxe_create_qp_resp);
+DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
+		empty, rxe_create_cq_resp);
+DECLARE_DRV_CMD(urxe_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+		empty, rxe_create_cq_resp);
+DECLARE_DRV_CMD(urxe_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
+		empty, rxe_resize_cq_resp);
 DECLARE_DRV_CMD(urxe_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
 		empty, rxe_create_srq_resp);
 DECLARE_DRV_CMD(urxe_modify_srq, IB_USER_VERBS_CMD_MODIFY_SRQ,
 		rxe_modify_srq_cmd, empty);
-DECLARE_DRV_CMD(urxe_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
-		empty, rxe_resize_cq_resp);
 DECLARE_DRV_CMD(urxe_reg_mr, IB_USER_VERBS_CMD_REG_MR,
 		empty, rxe_reg_mr_resp);
 DECLARE_DRV_CMD(urxe_alloc_mw, IB_USER_VERBS_CMD_ALLOC_MW,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 79863985..308d7a78 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -102,116 +102,6 @@ static int rxe_dereg_mr(struct verbs_mr *vmr)
 	return 0;
 }
 
-static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
-				    struct ibv_comp_channel *channel,
-				    int comp_vector)
-{
-	struct rxe_cq *cq;
-	struct urxe_create_cq_resp resp;
-	int ret;
-
-	cq = malloc(sizeof(*cq));
-	if (!cq)
-		return NULL;
-
-	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
-				&cq->ibv_cq, NULL, 0,
-				&resp.ibv_resp, sizeof(resp));
-	if (ret) {
-		free(cq);
-		return NULL;
-	}
-
-	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
-			 context->cmd_fd, resp.mi.offset);
-	if ((void *)cq->queue == MAP_FAILED) {
-		ibv_cmd_destroy_cq(&cq->ibv_cq);
-		free(cq);
-		return NULL;
-	}
-
-	cq->mmap_info = resp.mi;
-	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
-
-	return &cq->ibv_cq;
-}
-
-static int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
-{
-	struct rxe_cq *cq = to_rcq(ibcq);
-	struct ibv_resize_cq cmd;
-	struct urxe_resize_cq_resp resp;
-	int ret;
-
-	pthread_spin_lock(&cq->lock);
-
-	ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof(cmd),
-				&resp.ibv_resp, sizeof(resp));
-	if (ret) {
-		pthread_spin_unlock(&cq->lock);
-		return ret;
-	}
-
-	munmap(cq->queue, cq->mmap_info.size);
-
-	cq->queue = mmap(NULL, resp.mi.size,
-			 PROT_READ | PROT_WRITE, MAP_SHARED,
-			 ibcq->context->cmd_fd, resp.mi.offset);
-
-	ret = errno;
-	pthread_spin_unlock(&cq->lock);
-
-	if ((void *)cq->queue == MAP_FAILED) {
-		cq->queue = NULL;
-		cq->mmap_info.size = 0;
-		return ret;
-	}
-
-	cq->mmap_info = resp.mi;
-
-	return 0;
-}
-
-static int rxe_destroy_cq(struct ibv_cq *ibcq)
-{
-	struct rxe_cq *cq = to_rcq(ibcq);
-	int ret;
-
-	ret = ibv_cmd_destroy_cq(ibcq);
-	if (ret)
-		return ret;
-
-	if (cq->mmap_info.size)
-		munmap(cq->queue, cq->mmap_info.size);
-	free(cq);
-
-	return 0;
-}
-
-static int rxe_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
-{
-	struct rxe_cq *cq = to_rcq(ibcq);
-	struct rxe_queue *q;
-	int npolled;
-	uint8_t *src;
-
-	pthread_spin_lock(&cq->lock);
-	q = cq->queue;
-
-	for (npolled = 0; npolled < ne; ++npolled, ++wc) {
-		if (queue_empty(q))
-			break;
-
-		atomic_thread_fence(memory_order_acquire);
-		src = consumer_addr(q);
-		memcpy(wc, src, sizeof(*wc));
-		advance_consumer(q);
-	}
-
-	pthread_spin_unlock(&cq->lock);
-	return npolled;
-}
-
 static struct ibv_srq *rxe_create_srq(struct ibv_pd *pd,
 				      struct ibv_srq_init_attr *attr)
 {
@@ -590,38 +480,79 @@ static int rxe_destroy_ah(struct ibv_ah *ibah)
 }
 
 static const struct verbs_context_ops rxe_ctx_ops = {
-	.query_device = rxe_query_device,
-	.query_device_ex = rxe_query_device_ex,
-	.query_port = rxe_query_port,
-	.alloc_pd = rxe_alloc_pd,
-	.dealloc_pd = rxe_dealloc_pd,
-	.reg_mr = rxe_reg_mr,
-	.dereg_mr = rxe_dereg_mr,
-	.create_cq = rxe_create_cq,
-	.poll_cq = rxe_poll_cq,
-	.req_notify_cq = ibv_cmd_req_notify_cq,
-	.resize_cq = rxe_resize_cq,
-	.destroy_cq = rxe_destroy_cq,
-	.create_srq = rxe_create_srq,
-	.modify_srq = rxe_modify_srq,
-	.query_srq = rxe_query_srq,
-	.destroy_srq = rxe_destroy_srq,
-	.post_srq_recv = rxe_post_srq_recv,
-	.create_qp = rxe_create_qp,
-	.query_qp = rxe_query_qp,
-	.modify_qp = rxe_modify_qp,
-	.destroy_qp = rxe_destroy_qp,
-	.post_send = rxe_post_send,
-	.post_recv = rxe_post_recv,
-	.create_ah = rxe_create_ah,
-	.destroy_ah = rxe_destroy_ah,
-	.attach_mcast = ibv_cmd_attach_mcast,
-	.detach_mcast = ibv_cmd_detach_mcast,
-	.free_context = rxe_free_context,
-	.alloc_mw = rxe_alloc_mw,
-	.bind_mw = rxe_bind_mw,
-	.dealloc_mw = rxe_dealloc_mw,
-	.get_srq_num = rxe_get_srq_num,
+	.advise_mr		= NULL,
+	.alloc_dm		= NULL,
+	.alloc_mw		= rxe_alloc_mw,
+	.alloc_null_mr		= NULL,
+	.alloc_parent_domain	= NULL,
+	.alloc_pd		= rxe_alloc_pd,
+	.alloc_td		= NULL,
+	.async_event		= NULL,
+	.attach_counters_point_flow	= NULL,
+	.attach_mcast		= ibv_cmd_attach_mcast,
+	.bind_mw		= rxe_bind_mw,
+	.close_xrcd		= NULL,
+	.cq_event		= NULL,
+	.create_ah		= rxe_create_ah,
+	.create_counters	= NULL,
+	.create_cq_ex		= rxe_create_cq_ex,
+	.create_cq		= rxe_create_cq,
+	.create_flow_action_esp	= NULL,
+	.create_flow		= NULL,
+	.create_qp_ex		= NULL,
+	.create_qp		= rxe_create_qp,
+	.create_rwq_ind_table	= NULL,
+	.create_srq_ex		= NULL,
+	.create_srq		= rxe_create_srq,
+	.create_wq		= NULL,
+	.dealloc_mw		= rxe_dealloc_mw,
+	.dealloc_pd		= rxe_dealloc_pd,
+	.dealloc_td		= NULL,
+	.dereg_mr		= rxe_dereg_mr,
+	.destroy_ah		= rxe_destroy_ah,
+	.destroy_counters	= NULL,
+	.destroy_cq		= rxe_destroy_cq,
+	.destroy_flow_action	= NULL,
+	.destroy_flow		= NULL,
+	.destroy_qp		= rxe_destroy_qp,
+	.destroy_rwq_ind_table	= NULL,
+	.destroy_srq		= rxe_destroy_srq,
+	.destroy_wq		= NULL,
+	.detach_mcast		= ibv_cmd_detach_mcast,
+	.free_context		= rxe_free_context,
+	.free_dm		= NULL,
+	.get_srq_num		= rxe_get_srq_num,
+	.import_mr		= NULL,
+	.import_pd		= NULL,
+	.modify_cq		= NULL,
+	.modify_flow_action_esp	= NULL,
+	.modify_qp_rate_limit	= NULL,
+	.modify_qp		= rxe_modify_qp,
+	.modify_srq		= rxe_modify_srq,
+	.modify_wq		= NULL,
+	.open_qp		= NULL,
+	.open_xrcd		= NULL,
+	.poll_cq		= rxe_poll_cq,
+	.post_recv		= rxe_post_recv,
+	.post_send		= rxe_post_send,
+	.post_srq_ops		= NULL,
+	.post_srq_recv		= rxe_post_srq_recv,
+	.query_device_ex	= rxe_query_device_ex,
+	.query_device		= rxe_query_device,
+	.query_ece		= NULL,
+	.query_port		= rxe_query_port,
+	.query_qp		= rxe_query_qp,
+	.query_rt_values	= NULL,
+	.query_srq		= rxe_query_srq,
+	.read_counters		= NULL,
+	.reg_dm_mr		= NULL,
+	.reg_mr			= rxe_reg_mr,
+	.req_notify_cq		= ibv_cmd_req_notify_cq,
+	.rereg_mr		= NULL,
+	.resize_cq		= rxe_resize_cq,
+	.set_ece		= NULL,
+	.unimport_mr		= NULL,
+	.unimport_pd		= NULL,
 };
 
 static struct verbs_context *rxe_alloc_context(struct ibv_device *ibdev,
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index 11f337ee..69ddba55 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -57,11 +57,16 @@ struct rxe_context {
 	struct verbs_context	ibv_ctx;
 };
 
+/* common between cq and cq_ex */
 struct rxe_cq {
-	struct ibv_cq		ibv_cq;
+	struct verbs_cq		vcq;
 	struct mminfo		mmap_info;
-	struct rxe_queue		*queue;
+	struct rxe_queue	*queue;
 	pthread_spinlock_t	lock;
+
+	/* new API support */
+	struct rxe_uverbs_wc	*wc;
+	uint32_t		cur_index;
 };
 
 struct rxe_ah {
@@ -127,7 +132,7 @@ static inline struct rxe_device *to_rdev(struct ibv_device *ibdev)
 
 static inline struct rxe_cq *to_rcq(struct ibv_cq *ibcq)
 {
-	return to_rxxx(cq, cq);
+	return container_of(ibcq, struct rxe_cq, vcq.cq);
 }
 
 static inline struct rxe_qp *to_rqp(struct ibv_qp *ibqp)
@@ -167,6 +172,16 @@ int rxe_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *rxe_alloc_pd(struct ibv_context *context);
 int rxe_dealloc_pd(struct ibv_pd *pd);
 
+/* rxe_cq.c */
+struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
+			     struct ibv_comp_channel *channel,
+			     int comp_vector);
+struct ibv_cq_ex *rxe_create_cq_ex(struct ibv_context *context,
+				   struct ibv_cq_init_attr_ex *attr);
+int rxe_resize_cq(struct ibv_cq *ibcq, int cqe);
+int rxe_destroy_cq(struct ibv_cq *ibcq);
+int rxe_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc);
+
 /* rxe_mw.c */
 struct ibv_mw *rxe_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
 int rxe_dealloc_mw(struct ibv_mw *mw);
diff --git a/providers/rxe/rxe_cq.c b/providers/rxe/rxe_cq.c
new file mode 100644
index 00000000..3debb1e8
--- /dev/null
+++ b/providers/rxe/rxe_cq.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include "rxe.h"
+#include "rxe_queue.h"
+#include <rdma/rdma_user_rxe.h>
+#include "rxe-abi.h"
+
+static void advance_cur_index(struct rxe_cq *cq)
+{
+	struct rxe_queue *q = cq->queue;
+
+	cq->cur_index = (cq->cur_index + 1) & q->index_mask;
+}
+
+static int check_queue_empty(struct rxe_cq *cq)
+{
+	struct rxe_queue *q = cq->queue;
+	uint32_t producer_index = atomic_load(&q->producer_index);
+
+	return (cq->cur_index == producer_index);
+}
+
+static int cq_start_poll(struct ibv_cq_ex *current,
+			 struct ibv_poll_cq_attr *attr)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	pthread_spin_lock(&cq->lock);
+
+	atomic_thread_fence(memory_order_acquire);
+	cq->cur_index = load_consumer_index(cq->queue);
+
+	if (check_queue_empty(cq)) {
+		pthread_spin_unlock(&cq->lock);
+		errno = ENOENT;
+		return errno;
+	}
+
+	cq->wc = addr_from_index(cq->queue, cq->cur_index);
+	cq->vcq.cq_ex.status = cq->wc->status;
+	cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+	return 0;
+}
+
+static int cq_next_poll(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	advance_cur_index(cq);
+
+	if (check_queue_empty(cq)) {
+		store_consumer_index(cq->queue, cq->cur_index);
+		pthread_spin_unlock(&cq->lock);
+		errno = ENOENT;
+		return errno;
+	}
+
+	cq->wc = addr_from_index(cq->queue, cq->cur_index);
+	cq->vcq.cq_ex.status = cq->wc->status;
+	cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+	return 0;
+}
+
+static void cq_end_poll(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	advance_cur_index(cq);
+	store_consumer_index(cq->queue, cq->cur_index);
+	pthread_spin_unlock(&cq->lock);
+
+	return;
+}
+
+static enum ibv_wc_opcode cq_read_opcode(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->opcode;
+}
+
+static uint32_t cq_read_vendor_err(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->vendor_err;
+}
+
+static uint32_t cq_read_byte_len(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->byte_len;
+}
+
+static __be32 cq_read_imm_data(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->ex.imm_data;
+}
+
+static uint32_t cq_read_qp_num(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->qp_num;
+}
+
+static uint32_t cq_read_src_qp(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->src_qp;
+}
+
+static unsigned int cq_read_wc_flags(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->wc_flags;
+}
+
+/* will always be zero for RoCE */
+static uint32_t cq_read_slid(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->slid;
+}
+
+/* will always be zero for RoCE */
+static uint8_t cq_read_sl(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->sl;
+}
+
+/* will always be zero for RoCE */
+static uint8_t cq_read_dlid_path_bits(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->dlid_path_bits;
+}
+
+static uint64_t cq_read_completion_ts(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->timestamp;
+}
+
+static uint16_t cq_read_cvlan(struct ibv_cq_ex *current)
+{
+	//struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	fprintf(stderr, "%s: TODO\n", __func__);
+
+	return 0;
+}
+
+static uint32_t cq_read_flow_tag(struct ibv_cq_ex *current)
+{
+	//struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	fprintf(stderr, "%s: TODO\n", __func__);
+
+	return 0;
+}
+
+static void cq_read_tm_info(struct ibv_cq_ex *current,
+			    struct ibv_wc_tm_info *tm_info)
+{
+	//struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+	fprintf(stderr, "%s: TODO\n", __func__);
+}
+
+static uint64_t cq_read_completion_wallclock_ns(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->realtime;
+}
+
+struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
+			     struct ibv_comp_channel *channel,
+			     int comp_vector)
+{
+	struct rxe_cq *cq;
+	struct urxe_create_cq_resp resp;
+	int ret;
+
+	cq = malloc(sizeof(*cq));
+	if (!cq)
+		return NULL;
+
+	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
+				&cq->vcq.cq, NULL, 0,
+				&resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		free(cq);
+		return NULL;
+	}
+
+	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			 context->cmd_fd, resp.mi.offset);
+	if ((void *)cq->queue == MAP_FAILED) {
+		ibv_cmd_destroy_cq(&cq->vcq.cq);
+		free(cq);
+		return NULL;
+	}
+
+	cq->mmap_info = resp.mi;
+	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &cq->vcq.cq;
+}
+
+enum rxe_sup_wc_flags {
+	RXE_SUP_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN
+			 | IBV_WC_EX_WITH_IMM
+			 | IBV_WC_EX_WITH_QP_NUM
+			 | IBV_WC_EX_WITH_SRC_QP
+			 | IBV_WC_EX_WITH_SLID
+			 | IBV_WC_EX_WITH_SL
+			 | IBV_WC_EX_WITH_DLID_PATH_BITS
+			 | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
+			 | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK,
+};
+
+struct ibv_cq_ex *rxe_create_cq_ex(struct ibv_context *context,
+				   struct ibv_cq_init_attr_ex *attr)
+{
+	int ret;
+	struct rxe_cq *cq;
+	struct urxe_create_cq_ex_resp resp;
+
+	if (attr->wc_flags & ~RXE_SUP_WC_FLAGS) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	cq = calloc(1, sizeof(*cq));
+	if (!cq)
+		return NULL;
+
+	ret = ibv_cmd_create_cq_ex(context, attr, &cq->vcq, NULL, 0,
+				   &resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		free(cq);
+		return NULL;
+	}
+
+	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			 context->cmd_fd, resp.mi.offset);
+	if ((void *)cq->queue == MAP_FAILED) {
+		ibv_cmd_destroy_cq(&cq->vcq.cq);
+		free(cq);
+		return NULL;
+	}
+
+	cq->mmap_info = resp.mi;
+	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+
+	cq->vcq.cq_ex.start_poll	= cq_start_poll;
+	cq->vcq.cq_ex.next_poll		= cq_next_poll;
+	cq->vcq.cq_ex.end_poll		= cq_end_poll;
+	cq->vcq.cq_ex.read_opcode	= cq_read_opcode;
+	cq->vcq.cq_ex.read_vendor_err	= cq_read_vendor_err;
+	cq->vcq.cq_ex.read_wc_flags	= cq_read_wc_flags;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+		cq->vcq.cq_ex.read_byte_len
+			= cq_read_byte_len;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_IMM)
+		cq->vcq.cq_ex.read_imm_data
+			= cq_read_imm_data;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
+		cq->vcq.cq_ex.read_qp_num
+			= cq_read_qp_num;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
+		cq->vcq.cq_ex.read_src_qp
+			= cq_read_src_qp;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SLID)
+		cq->vcq.cq_ex.read_slid
+			= cq_read_slid;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SL)
+		cq->vcq.cq_ex.read_sl
+			= cq_read_sl;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+		cq->vcq.cq_ex.read_dlid_path_bits
+			= cq_read_dlid_path_bits;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
+		cq->vcq.cq_ex.read_completion_ts
+			= cq_read_completion_ts;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_CVLAN)
+		cq->vcq.cq_ex.read_cvlan
+			= cq_read_cvlan;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
+		cq->vcq.cq_ex.read_flow_tag
+			= cq_read_flow_tag;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_TM_INFO)
+		cq->vcq.cq_ex.read_tm_info
+			= cq_read_tm_info;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK)
+		cq->vcq.cq_ex.read_completion_wallclock_ns
+			= cq_read_completion_wallclock_ns;
+
+	return &cq->vcq.cq_ex;
+}
+
+int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
+{
+	struct rxe_cq *cq = to_rcq(ibcq);
+	struct ibv_resize_cq cmd;
+	struct urxe_resize_cq_resp resp;
+	int ret;
+
+	pthread_spin_lock(&cq->lock);
+
+	ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof(cmd),
+				&resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		pthread_spin_unlock(&cq->lock);
+		return ret;
+	}
+
+	munmap(cq->queue, cq->mmap_info.size);
+
+	cq->queue = mmap(NULL, resp.mi.size,
+			 PROT_READ | PROT_WRITE, MAP_SHARED,
+			 ibcq->context->cmd_fd, resp.mi.offset);
+
+	ret = errno;
+	pthread_spin_unlock(&cq->lock);
+
+	if ((void *)cq->queue == MAP_FAILED) {
+		cq->queue = NULL;
+		cq->mmap_info.size = 0;
+		return ret;
+	}
+
+	cq->mmap_info = resp.mi;
+
+	return 0;
+}
+
+int rxe_destroy_cq(struct ibv_cq *ibcq)
+{
+	struct rxe_cq *cq = to_rcq(ibcq);
+	int ret;
+
+	ret = ibv_cmd_destroy_cq(ibcq);
+	if (ret)
+		return ret;
+
+	if (cq->mmap_info.size)
+		munmap(cq->queue, cq->mmap_info.size);
+	free(cq);
+
+	return 0;
+}
+
+int rxe_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+	struct rxe_cq *cq = to_rcq(ibcq);
+	struct rxe_queue *q;
+	int npolled;
+	uint8_t *src;
+
+	pthread_spin_lock(&cq->lock);
+	q = cq->queue;
+
+	for (npolled = 0; npolled < ne; ++npolled, ++wc) {
+		if (queue_empty(q))
+			break;
+
+		atomic_thread_fence(memory_order_acquire);
+		src = consumer_addr(q);
+		memcpy(wc, src, sizeof(*wc));
+		advance_consumer(q);
+	}
+
+	pthread_spin_unlock(&cq->lock);
+	return npolled;
+}
diff --git a/providers/rxe/rxe_queue.h b/providers/rxe/rxe_queue.h
index 5c57b3e3..5ba04a7e 100644
--- a/providers/rxe/rxe_queue.h
+++ b/providers/rxe/rxe_queue.h
@@ -57,27 +57,27 @@ static inline int next_index(struct rxe_queue *q, int index)
 	return (index + 1) & q->index_mask;
 }
 
+/* Must hold consumer_index lock */
 static inline int queue_empty(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	return ((atomic_load(&q->producer_index) -
 		 atomic_load_explicit(&q->consumer_index,
 				      memory_order_relaxed)) &
 		q->index_mask) == 0;
 }
 
+/* Must hold producer_index lock */
 static inline int queue_full(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	return ((atomic_load_explicit(&q->producer_index,
 				      memory_order_relaxed) +
 		 1 - atomic_load(&q->consumer_index)) &
 		q->index_mask) == 0;
 }
 
+/* Must hold producer_index lock */
 static inline void advance_producer(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	atomic_thread_fence(memory_order_release);
 	atomic_store(
 	    &q->producer_index,
@@ -86,9 +86,9 @@ static inline void advance_producer(struct rxe_queue *q)
 		q->index_mask);
 }
 
+/* Must hold consumer_index lock */
 static inline void advance_consumer(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	atomic_store(
 	    &q->consumer_index,
 	    (atomic_load_explicit(&q->consumer_index, memory_order_relaxed) +
@@ -96,18 +96,48 @@ static inline void advance_consumer(struct rxe_queue *q)
 		q->index_mask);
 }
 
+/* Must hold producer_index lock */
+static inline uint32_t load_producer_index(struct rxe_queue *q)
+{
+	return atomic_load_explicit(&q->producer_index,
+				    memory_order_relaxed);
+}
+
+/* Must hold producer_index lock */
+static inline void store_producer_index(struct rxe_queue *q, uint32_t index)
+{
+	/* flush writes to work queue before moving index */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(&q->producer_index, index);
+}
+
+/* Must hold consumer_index lock */
+static inline uint32_t load_consumer_index(struct rxe_queue *q)
+{
+	return atomic_load_explicit(&q->consumer_index,
+				    memory_order_relaxed);
+}
+
+/* Must hold consumer_index lock */
+static inline void store_consumer_index(struct rxe_queue *q, uint32_t index)
+{
+	/* flush writes to work queue before moving index */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(&q->consumer_index, index);
+}
+
+/* Must hold producer_index lock */
 static inline void *producer_addr(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	return q->data + ((atomic_load_explicit(&q->producer_index,
 						memory_order_relaxed) &
 			   q->index_mask)
 			  << q->log2_elem_size);
 }
 
+/* Must hold consumer_index lock */
 static inline void *consumer_addr(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	return q->data + ((atomic_load_explicit(&q->consumer_index,
 						memory_order_relaxed) &
 			   q->index_mask)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH for-next v5 4/4] rxe: add support for extended QP operations
  2020-09-18 21:25 [PATCH for-next v5 0/4] rxe: API extensions Bob Pearson
                   ` (2 preceding siblings ...)
  2020-09-18 21:25 ` [PATCH for-next v5 3/4] rxe: add support for extended CQ operations Bob Pearson
@ 2020-09-18 21:25 ` Bob Pearson
  3 siblings, 0 replies; 5+ messages in thread
From: Bob Pearson @ 2020-09-18 21:25 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

Added ibv_create_qp_ex verb.
Added WQ operations in verbs_qp struct.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 providers/rxe/CMakeLists.txt |   1 +
 providers/rxe/rxe-abi.h      |   2 +
 providers/rxe/rxe.c          | 104 +----
 providers/rxe/rxe.h          |  21 +-
 providers/rxe/rxe_qp.c       | 810 +++++++++++++++++++++++++++++++++++
 5 files changed, 837 insertions(+), 101 deletions(-)
 create mode 100644 providers/rxe/rxe_qp.c

diff --git a/providers/rxe/CMakeLists.txt b/providers/rxe/CMakeLists.txt
index 0e62aae7..9d357320 100644
--- a/providers/rxe/CMakeLists.txt
+++ b/providers/rxe/CMakeLists.txt
@@ -1,6 +1,7 @@
 rdma_provider(rxe
   rxe.c
   rxe_dev.c
+  rxe_qp.c
   rxe_cq.c
   rxe_sq.c
   rxe_mw.c
diff --git a/providers/rxe/rxe-abi.h b/providers/rxe/rxe-abi.h
index 14d0c038..c9dec140 100644
--- a/providers/rxe/rxe-abi.h
+++ b/providers/rxe/rxe-abi.h
@@ -41,6 +41,8 @@
 
 DECLARE_DRV_CMD(urxe_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
 		empty, rxe_create_qp_resp);
+DECLARE_DRV_CMD(urxe_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP,
+		empty, rxe_create_qp_resp);
 DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
 		empty, rxe_create_cq_resp);
 DECLARE_DRV_CMD(urxe_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 308d7a78..ba5db6cb 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -277,100 +277,6 @@ static int rxe_post_srq_recv(struct ibv_srq *ibvsrq,
 	return rc;
 }
 
-static struct ibv_qp *rxe_create_qp(struct ibv_pd *pd,
-				    struct ibv_qp_init_attr *attr)
-{
-	struct ibv_create_qp cmd;
-	struct urxe_create_qp_resp resp;
-	struct rxe_qp *qp;
-	int ret;
-
-	qp = malloc(sizeof(*qp));
-	if (!qp)
-		return NULL;
-
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof(cmd),
-				&resp.ibv_resp, sizeof(resp));
-	if (ret) {
-		free(qp);
-		return NULL;
-	}
-
-	if (attr->srq) {
-		qp->rq.max_sge = 0;
-		qp->rq.queue = NULL;
-		qp->rq_mmap_info.size = 0;
-	} else {
-		qp->rq.max_sge = attr->cap.max_recv_sge;
-		qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE,
-				    MAP_SHARED,
-				    pd->context->cmd_fd, resp.rq_mi.offset);
-		if ((void *)qp->rq.queue == MAP_FAILED) {
-			ibv_cmd_destroy_qp(&qp->ibv_qp);
-			free(qp);
-			return NULL;
-		}
-
-		qp->rq_mmap_info = resp.rq_mi;
-		pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE);
-	}
-
-	qp->sq.max_sge = attr->cap.max_send_sge;
-	qp->sq.max_inline = attr->cap.max_inline_data;
-	qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE,
-			    MAP_SHARED,
-			    pd->context->cmd_fd, resp.sq_mi.offset);
-	if ((void *)qp->sq.queue == MAP_FAILED) {
-		if (qp->rq_mmap_info.size)
-			munmap(qp->rq.queue, qp->rq_mmap_info.size);
-		ibv_cmd_destroy_qp(&qp->ibv_qp);
-		free(qp);
-		return NULL;
-	}
-
-	qp->sq_mmap_info = resp.sq_mi;
-	pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE);
-
-	return &qp->ibv_qp;
-}
-
-static int rxe_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
-			int attr_mask,
-			struct ibv_qp_init_attr *init_attr)
-{
-	struct ibv_query_qp cmd;
-
-	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr,
-				&cmd, sizeof(cmd));
-}
-
-static int rxe_modify_qp(struct ibv_qp *ibvqp,
-			 struct ibv_qp_attr *attr,
-			 int attr_mask)
-{
-	struct ibv_modify_qp cmd = {};
-
-	return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof(cmd));
-}
-
-static int rxe_destroy_qp(struct ibv_qp *ibv_qp)
-{
-	int ret;
-	struct rxe_qp *qp = to_rqp(ibv_qp);
-
-	ret = ibv_cmd_destroy_qp(ibv_qp);
-	if (!ret) {
-		if (qp->rq_mmap_info.size)
-			munmap(qp->rq.queue, qp->rq_mmap_info.size);
-		if (qp->sq_mmap_info.size)
-			munmap(qp->sq.queue, qp->sq_mmap_info.size);
-
-		free(qp);
-	}
-
-	return ret;
-}
-
 static int rxe_post_recv(struct ibv_qp *ibqp,
 			 struct ibv_recv_wr *recv_wr,
 			 struct ibv_recv_wr **bad_wr)
@@ -440,8 +346,9 @@ static struct ibv_ah *rxe_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 	}
 
 	ah = malloc(sizeof(*ah));
-	if (ah == NULL)
+	if (ah == NULL) {
 		return NULL;
+	}
 
 	av = &ah->av;
 	av->port_num = attr->port_num;
@@ -472,8 +379,9 @@ static int rxe_destroy_ah(struct ibv_ah *ibah)
 	struct rxe_ah *ah = to_rah(ibah);
 
 	ret = ibv_cmd_destroy_ah(&ah->ibv_ah);
-	if (ret)
+	if (ret) {
 		return ret;
+	}
 
 	free(ah);
 	return 0;
@@ -499,7 +407,7 @@ static const struct verbs_context_ops rxe_ctx_ops = {
 	.create_cq		= rxe_create_cq,
 	.create_flow_action_esp	= NULL,
 	.create_flow		= NULL,
-	.create_qp_ex		= NULL,
+	.create_qp_ex		= rxe_create_qp_ex,
 	.create_qp		= rxe_create_qp,
 	.create_rwq_ind_table	= NULL,
 	.create_srq_ex		= NULL,
@@ -537,7 +445,7 @@ static const struct verbs_context_ops rxe_ctx_ops = {
 	.post_send		= rxe_post_send,
 	.post_srq_ops		= NULL,
 	.post_srq_recv		= rxe_post_srq_recv,
-	.query_device_ex	= rxe_query_device_ex,
+	.query_device_ex	= NULL,
 	.query_device		= rxe_query_device,
 	.query_ece		= NULL,
 	.query_port		= rxe_query_port,
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index 69ddba55..632edf5b 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -82,15 +82,19 @@ struct rxe_wq {
 };
 
 struct rxe_qp {
-	struct ibv_qp		ibv_qp;
+	struct verbs_qp		vqp;
 	struct mminfo		rq_mmap_info;
 	struct rxe_wq		rq;
 	struct mminfo		sq_mmap_info;
 	struct rxe_wq		sq;
 	unsigned int		ssn;
+
+	/* new API support */
+	uint32_t		cur_index;
+	int			err;
 };
 
-#define qp_type(qp)		((qp)->ibv_qp.qp_type)
+#define qp_type(qp)		((qp)->vqp.qp.qp_type)
 
 struct rxe_srq {
 	struct ibv_srq		ibv_srq;
@@ -137,7 +141,7 @@ static inline struct rxe_cq *to_rcq(struct ibv_cq *ibcq)
 
 static inline struct rxe_qp *to_rqp(struct ibv_qp *ibqp)
 {
-	return to_rxxx(qp, qp);
+	return container_of(ibqp, struct rxe_qp, vqp.qp);
 }
 
 static inline struct rxe_srq *to_rsrq(struct ibv_srq *ibsrq)
@@ -172,6 +176,17 @@ int rxe_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *rxe_alloc_pd(struct ibv_context *context);
 int rxe_dealloc_pd(struct ibv_pd *pd);
 
+/* rxe_qp.c */
+struct ibv_qp *rxe_create_qp(struct ibv_pd *pd,
+			     struct ibv_qp_init_attr *attr);
+struct ibv_qp *rxe_create_qp_ex(struct ibv_context *context,
+				struct ibv_qp_init_attr_ex *attr);
+int rxe_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+		 int attr_mask, struct ibv_qp_init_attr *init_attr);
+int rxe_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr,
+		  int attr_mask);
+int rxe_destroy_qp(struct ibv_qp *ibv_qp);
+
 /* rxe_cq.c */
 struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
 			     struct ibv_comp_channel *channel,
diff --git a/providers/rxe/rxe_qp.c b/providers/rxe/rxe_qp.c
new file mode 100644
index 00000000..6f0fec65
--- /dev/null
+++ b/providers/rxe/rxe_qp.c
@@ -0,0 +1,810 @@
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include "rxe.h"
+#include "rxe_queue.h"
+#include <rdma/rdma_user_rxe.h>
+#include "rxe-abi.h"
+
+static void advance_cur_index(struct rxe_qp *qp)
+{
+	struct rxe_queue *q = qp->sq.queue;
+
+	qp->cur_index = (qp->cur_index + 1) & q->index_mask;
+}
+
+static int check_queue_full(struct rxe_qp *qp)
+{
+	struct rxe_queue *q = qp->sq.queue;
+	uint32_t consumer_index = atomic_load(&q->consumer_index);
+
+	if (qp->err)
+		goto err;
+
+	if ((qp->cur_index + 1 - consumer_index) % q->index_mask == 0)
+		qp->err = ENOSPC;
+err:
+	return qp->err;
+}
+
+/*
+ * builders always consume one send queue slot
+ * setters (below) reach back and adjust previous build
+ */
+static void wr_atomic_cmp_swp(struct ibv_qp_ex *ibqp, uint32_t rkey,
+			      uint64_t remote_addr, uint64_t compare,
+			      uint64_t swap)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = ibqp->wr_id;
+	wqe->wr.send_flags = ibqp->wr_flags;
+	wqe->wr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP;
+
+	wqe->wr.wr.atomic.remote_addr = remote_addr;
+	wqe->wr.wr.atomic.compare_add = compare;
+	wqe->wr.wr.atomic.swap = swap;
+	wqe->wr.wr.atomic.rkey = rkey;
+	wqe->iova = remote_addr;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
+				uint64_t remote_addr, uint64_t add)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.atomic.remote_addr = remote_addr;
+	wqe->wr.wr.atomic.compare_add = add;
+	wqe->wr.wr.atomic.rkey = rkey;
+	wqe->iova = remote_addr;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
+		       uint32_t rkey,
+		       const struct ibv_mw_bind_info *bind_info)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+	struct rxe_mw *mw = to_rmw(ibmw);
+	struct rxe_mr *mr = to_rmr(bind_info->mr);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_BIND_MW;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.umw.addr = bind_info->addr;
+	wqe->wr.wr.umw.length = bind_info->length;
+	wqe->wr.wr.umw.mr_index = mr->index;
+	wqe->wr.wr.umw.mw_index = mw->index;
+	wqe->wr.wr.umw.rkey = rkey;
+	wqe->wr.wr.umw.access = bind_info->mw_access_flags;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_LOCAL_INV;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.ex.invalidate_rkey = invalidate_rkey;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_rdma_read(struct ibv_qp_ex *ibqp, uint32_t rkey,
+			 uint64_t remote_addr)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_RDMA_READ;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.rdma.remote_addr = remote_addr;
+	wqe->wr.wr.rdma.rkey = rkey;
+	wqe->iova = remote_addr;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey,
+			  uint64_t remote_addr)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_RDMA_WRITE;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.rdma.remote_addr = remote_addr;
+	wqe->wr.wr.rdma.rkey = rkey;
+	wqe->iova = remote_addr;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_rdma_write_imm(struct ibv_qp_ex *ibqp, uint32_t rkey,
+			      uint64_t remote_addr, __be32 imm_data)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.rdma.remote_addr = remote_addr;
+	wqe->wr.wr.rdma.rkey = rkey;
+	wqe->wr.ex.imm_data = (uint32_t)imm_data;
+	wqe->iova = remote_addr;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_send(struct ibv_qp_ex *ibqp)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_SEND;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_send_imm(struct ibv_qp_ex *ibqp, __be32 imm_data)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_SEND_WITH_IMM;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.ex.imm_data = (uint32_t)imm_data;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_send_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_SEND_WITH_INV;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.ex.invalidate_rkey = invalidate_rkey;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_send_tso(struct ibv_qp_ex *ibqp, void *hdr, uint16_t hdr_sz,
+			uint16_t mss)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = qp->vqp.qp_ex.wr_id;
+	wqe->wr.opcode = IBV_WR_TSO;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->ssn = qp->ssn++;;
+
+	advance_cur_index(qp);
+
+	return;
+}
+
+static void wr_set_ud_addr(struct ibv_qp_ex *ibqp, struct ibv_ah *ibah,
+			   uint32_t remote_qpn, uint32_t remote_qkey)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_ah *ah = container_of(ibah, struct rxe_ah, ibv_ah);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
+						   qp->cur_index - 1);
+
+	if (qp->err)
+		return;
+
+	memcpy(&wqe->av, &ah->av, sizeof(ah->av));
+	wqe->wr.wr.ud.remote_qpn = remote_qpn;
+	wqe->wr.wr.ud.remote_qkey = remote_qkey;
+
+	return;
+}
+
+static void wr_set_xrc_srqn(struct ibv_qp_ex *ibqp, uint32_t remote_srqn)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+
+	if (qp->err)
+		return;
+
+	/* TODO when we add xrc */
+
+	return;
+}
+
+
+static void wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr,
+			       size_t length)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
+						   qp->cur_index - 1);
+
+	if (qp->err)
+		return;
+
+	if (length > qp->sq.max_inline) {
+		qp->err = ENOSPC;
+		return;
+	}
+
+	memcpy(wqe->dma.inline_data, addr, length);
+	wqe->dma.length = length;
+	wqe->dma.resid = 0;
+
+	return;
+}
+
+static void wr_set_inline_data_list(struct ibv_qp_ex *ibqp, size_t num_buf,
+				    const struct ibv_data_buf *buf_list)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
+						   qp->cur_index - 1);
+	uint8_t *data = wqe->dma.inline_data;
+	size_t length;
+	size_t tot_length = 0;
+
+	if (qp->err)
+		return;
+
+	while(num_buf--) {
+		length = buf_list->length;
+
+		if (tot_length + length > qp->sq.max_inline) {
+			qp->err = ENOSPC;
+			return;
+		}
+
+		memcpy(data, buf_list->addr, length);
+
+		buf_list++;
+		data += length;
+	}
+
+	wqe->dma.length = tot_length;
+
+	return;
+}
+
+static void wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey, uint64_t addr,
+		       uint32_t length)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
+						   qp->cur_index - 1);
+
+	if (qp->err)
+		return;
+
+	if (length) {
+		wqe->dma.length = length;
+		wqe->dma.resid = length;
+		wqe->dma.num_sge = 1;
+
+		wqe->dma.sge[0].addr = addr;
+		wqe->dma.sge[0].length = length;
+		wqe->dma.sge[0].lkey = lkey;
+	}
+
+	return;
+}
+
+static void wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge,
+			    const struct ibv_sge *sg_list)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue,
+						   qp->cur_index - 1);
+	size_t tot_length = 0;
+
+	if (qp->err)
+		return;
+
+	if (num_sge > qp->sq.max_sge) {
+		qp->err = ENOSPC;
+		return;
+	}
+
+	wqe->dma.num_sge = num_sge;
+	memcpy(wqe->dma.sge, sg_list, num_sge*sizeof(*sg_list));
+
+	while(num_sge--)
+		tot_length += sg_list->length;
+
+	wqe->dma.length = tot_length;
+	wqe->dma.resid = tot_length;
+
+	return;
+}
+
+
+static void wr_start(struct ibv_qp_ex *ibqp)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+
+	pthread_spin_lock(&qp->sq.lock);
+
+	qp->err = 0;
+	qp->cur_index = load_producer_index(qp->sq.queue);
+
+	return;
+}
+
+
+static int wr_complete(struct ibv_qp_ex *ibqp)
+{
+	int ret;
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+
+	if (qp->err) {
+		pthread_spin_unlock(&qp->sq.lock);
+		return qp->err;
+	}
+
+	store_producer_index(qp->sq.queue, qp->cur_index);
+	ret = rxe_post_send_db(&qp->vqp.qp);
+
+	pthread_spin_unlock(&qp->sq.lock);
+	return ret;
+}
+
+static void wr_abort(struct ibv_qp_ex *ibqp)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+
+	pthread_spin_unlock(&qp->sq.lock);
+	return;
+}
+
+struct ibv_qp *rxe_create_qp(struct ibv_pd *ibpd,
+			     struct ibv_qp_init_attr *attr)
+{
+	struct ibv_create_qp cmd;
+	struct urxe_create_qp_resp resp;
+	struct rxe_qp *qp;
+	int ret;
+
+	qp = malloc(sizeof(*qp));
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_create_qp(ibpd, &qp->vqp.qp, attr, &cmd, sizeof(cmd),
+				&resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		free(qp);
+		return NULL;
+	}
+
+	if (attr->srq) {
+		qp->rq.max_sge = 0;
+		qp->rq.queue = NULL;
+		qp->rq_mmap_info.size = 0;
+	} else {
+		qp->rq.max_sge = attr->cap.max_recv_sge;
+		qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE,
+				    MAP_SHARED,
+				    ibpd->context->cmd_fd, resp.rq_mi.offset);
+		if ((void *)qp->rq.queue == MAP_FAILED) {
+			ibv_cmd_destroy_qp(&qp->vqp.qp);
+			free(qp);
+			return NULL;
+		}
+
+		qp->rq_mmap_info = resp.rq_mi;
+		pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE);
+	}
+
+	qp->sq.max_sge = attr->cap.max_send_sge;
+	qp->sq.max_inline = attr->cap.max_inline_data;
+	qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE,
+			    MAP_SHARED,
+			    ibpd->context->cmd_fd, resp.sq_mi.offset);
+	if ((void *)qp->sq.queue == MAP_FAILED) {
+		if (qp->rq_mmap_info.size)
+			munmap(qp->rq.queue, qp->rq_mmap_info.size);
+		ibv_cmd_destroy_qp(&qp->vqp.qp);
+		free(qp);
+		return NULL;
+	}
+
+	qp->sq_mmap_info = resp.sq_mi;
+	pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &qp->vqp.qp;
+}
+
+enum {
+	RXE_QP_CREATE_FLAGS_SUP = 0
+	//	| IBV_QP_CREATE_BLOCK_SELF_MCAST_LB
+	//	| IBV_QP_CREATE_SCATTER_FCS
+	//	| IBV_QP_CREATE_CVLAN_STRIPPING
+	//	| IBV_QP_CREATE_SOURCE_QPN
+	//	| IBV_QP_CREATE_PCI_WRITE_END_PADDING
+		,
+
+	RXE_QP_COMP_MASK_SUP =
+		  IBV_QP_INIT_ATTR_PD
+		| IBV_QP_INIT_ATTR_XRCD
+		| IBV_QP_INIT_ATTR_CREATE_FLAGS
+	//	| IBV_QP_INIT_ATTR_MAX_TSO_HEADER
+	//	| IBV_QP_INIT_ATTR_IND_TABLE
+	//	| IBV_QP_INIT_ATTR_RX_HASH
+		| IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
+
+	RXE_SUP_RC_QP_SEND_OPS_FLAGS =
+		  IBV_QP_EX_WITH_RDMA_WRITE
+		| IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
+		| IBV_QP_EX_WITH_SEND
+		| IBV_QP_EX_WITH_SEND_WITH_IMM
+		| IBV_QP_EX_WITH_RDMA_READ
+		| IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
+		| IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
+		| IBV_QP_EX_WITH_LOCAL_INV
+		| IBV_QP_EX_WITH_BIND_MW
+		| IBV_QP_EX_WITH_SEND_WITH_INV,
+
+	RXE_SUP_UC_QP_SEND_OPS_FLAGS =
+		  IBV_QP_EX_WITH_RDMA_WRITE
+		| IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
+		| IBV_QP_EX_WITH_SEND
+		| IBV_QP_EX_WITH_SEND_WITH_IMM
+		| IBV_QP_EX_WITH_BIND_MW
+		| IBV_QP_EX_WITH_SEND_WITH_INV,
+
+	RXE_SUP_UD_QP_SEND_OPS_FLAGS =
+		  IBV_QP_EX_WITH_SEND
+		| IBV_QP_EX_WITH_SEND_WITH_IMM,
+
+	RXE_SUP_XRC_QP_SEND_OPS_FLAGS =
+		RXE_SUP_RC_QP_SEND_OPS_FLAGS,
+};
+
+static int check_qp_init_attr(struct ibv_context *context,
+			      struct ibv_qp_init_attr_ex *attr)
+{
+	if (attr->comp_mask & ~RXE_QP_COMP_MASK_SUP)
+		return EOPNOTSUPP;
+
+	if ((attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) &&
+	    (attr->create_flags & ~RXE_QP_CREATE_FLAGS_SUP))
+		return EOPNOTSUPP;
+
+	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
+		switch(attr->qp_type) {
+		case IBV_QPT_RC:
+			if (attr->send_ops_flags & ~RXE_SUP_RC_QP_SEND_OPS_FLAGS)
+				return EOPNOTSUPP;
+			break;
+		case IBV_QPT_UC:
+			if (attr->send_ops_flags & ~RXE_SUP_UC_QP_SEND_OPS_FLAGS)
+				return EOPNOTSUPP;
+			break;
+		case IBV_QPT_UD:
+			if (attr->send_ops_flags & ~RXE_SUP_UD_QP_SEND_OPS_FLAGS)
+				return EOPNOTSUPP;
+			break;
+		case IBV_QPT_RAW_PACKET:
+			return EOPNOTSUPP;
+		case IBV_QPT_XRC_SEND:
+			if (attr->send_ops_flags & ~RXE_SUP_XRC_QP_SEND_OPS_FLAGS)
+				return EOPNOTSUPP;
+			break;
+		case IBV_QPT_XRC_RECV:
+			return EOPNOTSUPP;
+		case IBV_QPT_DRIVER:
+			return EOPNOTSUPP;
+		default:
+			return EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
+{
+	if (flags & IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP)
+		qp->vqp.qp_ex.wr_atomic_cmp_swp = wr_atomic_cmp_swp;
+
+	if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
+		qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
+
+	if (flags & IBV_QP_EX_WITH_BIND_MW)
+		qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
+
+	if (flags & IBV_QP_EX_WITH_LOCAL_INV)
+		qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
+
+	if (flags & IBV_QP_EX_WITH_RDMA_READ)
+		qp->vqp.qp_ex.wr_rdma_read = wr_rdma_read;
+
+	if (flags & IBV_QP_EX_WITH_RDMA_WRITE)
+		qp->vqp.qp_ex.wr_rdma_write = wr_rdma_write;
+
+	if (flags & IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM)
+		qp->vqp.qp_ex.wr_rdma_write_imm = wr_rdma_write_imm;
+
+	if (flags & IBV_QP_EX_WITH_SEND)
+		qp->vqp.qp_ex.wr_send = wr_send;
+
+	if (flags & IBV_QP_EX_WITH_SEND_WITH_IMM)
+		qp->vqp.qp_ex.wr_send_imm = wr_send_imm;
+
+	if (flags & IBV_QP_EX_WITH_SEND_WITH_INV)
+		qp->vqp.qp_ex.wr_send_inv = wr_send_inv;
+
+	if (flags & IBV_QP_EX_WITH_TSO)
+		qp->vqp.qp_ex.wr_send_tso = wr_send_tso;
+
+	qp->vqp.qp_ex.wr_set_ud_addr = wr_set_ud_addr;
+	qp->vqp.qp_ex.wr_set_xrc_srqn = wr_set_xrc_srqn;
+	qp->vqp.qp_ex.wr_set_inline_data = wr_set_inline_data;
+	qp->vqp.qp_ex.wr_set_inline_data_list = wr_set_inline_data_list;
+	qp->vqp.qp_ex.wr_set_sge = wr_set_sge;
+	qp->vqp.qp_ex.wr_set_sge_list = wr_set_sge_list;
+
+	qp->vqp.qp_ex.wr_start = wr_start;
+	qp->vqp.qp_ex.wr_complete = wr_complete;
+	qp->vqp.qp_ex.wr_abort = wr_abort;
+}
+
+struct ibv_qp *rxe_create_qp_ex(struct ibv_context *context,
+				struct ibv_qp_init_attr_ex *attr)
+{
+	int ret;
+	struct rxe_qp *qp;
+	struct ibv_create_qp_ex cmd = {};
+	struct urxe_create_qp_ex_resp resp = {};
+	size_t cmd_size = sizeof(cmd);
+	size_t resp_size = sizeof(resp);
+
+	ret = check_qp_init_attr(context, attr);
+	if (ret) {
+		errno = ret;
+		return NULL;
+	}
+
+	qp = calloc(1, sizeof(*qp));
+	if (!qp)
+		return NULL;
+
+	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS)
+		set_qp_send_ops(qp, attr->send_ops_flags);
+
+	ret = ibv_cmd_create_qp_ex2(context, &qp->vqp, attr,
+				    &cmd, cmd_size,
+				    &resp.ibv_resp, resp_size);
+	if (ret) {
+		free(qp);
+		return NULL;
+	}
+
+	qp->vqp.comp_mask |= VERBS_QP_EX;
+
+	if (attr->srq) {
+		qp->rq.max_sge = 0;
+		qp->rq.queue = NULL;
+		qp->rq_mmap_info.size = 0;
+	} else {
+		qp->rq.max_sge = attr->cap.max_recv_sge;
+		qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE,
+				    MAP_SHARED, context->cmd_fd, resp.rq_mi.offset);
+		if ((void *)qp->rq.queue == MAP_FAILED) {
+			ibv_cmd_destroy_qp(&qp->vqp.qp);
+			free(qp);
+			return NULL;
+		}
+
+		qp->rq_mmap_info = resp.rq_mi;
+		pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE);
+	}
+
+	qp->sq.max_sge = attr->cap.max_send_sge;
+	qp->sq.max_inline = attr->cap.max_inline_data;
+	qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE,
+			    MAP_SHARED, context->cmd_fd, resp.sq_mi.offset);
+	if ((void *)qp->sq.queue == MAP_FAILED) {
+		if (qp->rq_mmap_info.size)
+			munmap(qp->rq.queue, qp->rq_mmap_info.size);
+		ibv_cmd_destroy_qp(&qp->vqp.qp);
+		free(qp);
+		return NULL;
+	}
+
+	qp->sq_mmap_info = resp.sq_mi;
+	pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &qp->vqp.qp;
+}
+
+int rxe_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask,
+		 struct ibv_qp_init_attr *init_attr)
+{
+	struct ibv_query_qp cmd;
+
+	return ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr,
+				&cmd, sizeof(cmd));
+}
+
+int rxe_modify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
+		  int attr_mask)
+{
+	struct ibv_modify_qp cmd = {};
+
+	return ibv_cmd_modify_qp(ibqp, attr, attr_mask, &cmd, sizeof(cmd));
+}
+
+int rxe_destroy_qp(struct ibv_qp *ibqp)
+{
+	int ret;
+	struct rxe_qp *qp = to_rqp(ibqp);
+
+	ret = ibv_cmd_destroy_qp(ibqp);
+	if (!ret) {
+		if (qp->rq_mmap_info.size)
+			munmap(qp->rq.queue, qp->rq_mmap_info.size);
+		if (qp->sq_mmap_info.size)
+			munmap(qp->sq.queue, qp->sq_mmap_info.size);
+
+		free(qp);
+	}
+
+	return ret;
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-18 21:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-18 21:25 [PATCH for-next v5 0/4] rxe: API extensions Bob Pearson
2020-09-18 21:25 ` [PATCH for-next v5 1/4] rxe: Implement MW commands Bob Pearson
2020-09-18 21:25 ` [PATCH for-next v5 2/4] rxe: add extended query device verb Bob Pearson
2020-09-18 21:25 ` [PATCH for-next v5 3/4] rxe: add support for extended CQ operations Bob Pearson
2020-09-18 21:25 ` [PATCH for-next v5 4/4] rxe: add support for extended QP operations Bob Pearson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).