All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Implement memory windows support for rxe
@ 2021-06-08  4:28 Bob Pearson
  2021-06-08  4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
  2021-06-08  4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
  0 siblings, 2 replies; 10+ messages in thread
From: Bob Pearson @ 2021-06-08  4:28 UTC (permalink / raw)
  To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson

These patches makes the required changes to the rxe provider and the
rxe ABI to support the kernel memory windows patches to the rxe driver.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---

Bob Pearson (2):
  Update kernel headers
  Providers/rxe: Implement memory windows

 kernel-headers/rdma/rdma_user_rxe.h |  10 ++
 providers/rxe/rxe.c                 | 157 +++++++++++++++++++++++++++-
 2 files changed, 165 insertions(+), 2 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/2] Update kernel headers
  2021-06-08  4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
@ 2021-06-08  4:28 ` Bob Pearson
  2021-06-08  4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
  1 sibling, 0 replies; 10+ messages in thread
From: Bob Pearson @ 2021-06-08  4:28 UTC (permalink / raw)
  To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson

To commit ?? ("RDMA/rxe: Disallow MR dereg and invalidate when bound").

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 kernel-headers/rdma/rdma_user_rxe.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index 068433e2..e283c222 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -99,7 +99,16 @@ struct rxe_send_wr {
 			__u32	remote_qkey;
 			__u16	pkey_index;
 		} ud;
+		struct {
+			__aligned_u64	addr;
+			__aligned_u64	length;
+			__u32		mr_lkey;
+			__u32		mw_rkey;
+			__u32		rkey;
+			__u32		access;
+		} mw;
 		/* reg is only used by the kernel and is not part of the uapi */
+#ifdef __KERNEL__
 		struct {
 			union {
 				struct ib_mr *mr;
@@ -108,6 +117,7 @@ struct rxe_send_wr {
 			__u32	     key;
 			__u32	     access;
 		} reg;
+#endif
 	} wr;
 };
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08  4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
  2021-06-08  4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
@ 2021-06-08  4:28 ` Bob Pearson
  2021-06-08  6:54   ` Zhu Yanjun
  1 sibling, 1 reply; 10+ messages in thread
From: Bob Pearson @ 2021-06-08  4:28 UTC (permalink / raw)
  To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson

This patch makes the required changes to the rxe provider to support the
kernel memory windows patches to the rxe driver.

The following changes are made:
  - Add ibv_alloc_mw verb
  - Add ibv_dealloc_mw verb
  - Add ibv_bind_mw verb for type 1 MWs
  - Add support for bind MW send work requests through the traditional
    QP API and the extended QP API.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v2:
  Added support for extended QP bind MW work requests.
---
 providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 155 insertions(+), 2 deletions(-)

diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index a68656ae..bb39ef04 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
 	return ret;
 }
 
+static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
+{
+	int ret;
+	struct ibv_mw *ibmw;
+	struct ibv_alloc_mw cmd = {};
+	struct ib_uverbs_alloc_mw_resp resp = {};
+
+	ibmw = calloc(1, sizeof(*ibmw));
+	if (!ibmw)
+		return NULL;
+
+	ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
+						&resp, sizeof(resp));
+	if (ret) {
+		free(ibmw);
+		return NULL;
+	}
+
+	return ibmw;
+}
+
+static int rxe_dealloc_mw(struct ibv_mw *ibmw)
+{
+	int ret;
+
+	ret = ibv_cmd_dealloc_mw(ibmw);
+	if (ret)
+		return ret;
+
+	free(ibmw);
+	return 0;
+}
+
+static int next_rkey(int rkey)
+{
+	return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
+}
+
+static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
+			 struct ibv_send_wr **bad_wr);
+
+static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
+			struct ibv_mw_bind *mw_bind)
+{
+	int ret;
+	struct ibv_mw_bind_info	*bind_info = &mw_bind->bind_info;
+	struct ibv_send_wr ibwr;
+	struct ibv_send_wr *bad_wr;
+
+	if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
+		ret = EINVAL;
+		goto err;
+	}
+
+	if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
+		ret = EINVAL;
+		goto err;
+	}
+
+	if (bind_info->mr) {
+		if (ibmw->pd != bind_info->mr->pd) {
+			ret = EPERM;
+			goto err;
+		}
+	}
+
+	memset(&ibwr, 0, sizeof(ibwr));
+
+	ibwr.opcode		= IBV_WR_BIND_MW;
+	ibwr.next		= NULL;
+	ibwr.wr_id		= mw_bind->wr_id;
+	ibwr.send_flags		= mw_bind->send_flags;
+	ibwr.bind_mw.bind_info	= mw_bind->bind_info;
+	ibwr.bind_mw.mw		= ibmw;
+	ibwr.bind_mw.rkey	= next_rkey(ibmw->rkey);
+
+	ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
+	if (ret)
+		goto err;
+
+	/* user has to undo this if he gets an error wc */
+	ibmw->rkey = ibwr.bind_mw.rkey;
+
+	return 0;
+err:
+	errno = ret;
+	return errno;
+}
+
 static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 				 uint64_t hca_va, int access)
 {
@@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
 	advance_qp_cur_index(qp);
 }
 
+static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
+		       uint32_t rkey, const struct ibv_mw_bind_info *info)
+{
+	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+	struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+	if (check_qp_queue_full(qp))
+		return;
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->wr.wr_id = ibqp->wr_id;
+	wqe->wr.opcode = IBV_WR_BIND_MW;
+	wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+	wqe->wr.wr.mw.addr = info->addr;
+	wqe->wr.wr.mw.length = info->length;
+	wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
+	wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
+	wqe->wr.wr.mw.rkey = rkey;
+	wqe->wr.wr.mw.access = info->mw_access_flags;
+	wqe->ssn = qp->ssn++;
+
+	advance_qp_cur_index(qp);
+}
+
 static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
 {
 	struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
@@ -1106,6 +1220,7 @@ enum {
 		| IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
 		| IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
 		| IBV_QP_EX_WITH_LOCAL_INV
+		| IBV_QP_EX_WITH_BIND_MW
 		| IBV_QP_EX_WITH_SEND_WITH_INV,
 
 	RXE_SUP_UC_QP_SEND_OPS_FLAGS =
@@ -1113,6 +1228,7 @@ enum {
 		| IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
 		| IBV_QP_EX_WITH_SEND
 		| IBV_QP_EX_WITH_SEND_WITH_IMM
+		| IBV_QP_EX_WITH_BIND_MW
 		| IBV_QP_EX_WITH_SEND_WITH_INV,
 
 	RXE_SUP_UD_QP_SEND_OPS_FLAGS =
@@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
 	if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
 		qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
 
+	if (flags & IBV_QP_EX_WITH_BIND_MW)
+		qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
+
 	if (flags & IBV_QP_EX_WITH_LOCAL_INV)
 		qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
 
@@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
 }
 
 /* basic sanity checks for send work request */
-static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
+static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
 			    unsigned int length)
 {
+	struct rxe_wq *sq = &qp->sq;
 	enum ibv_wr_opcode opcode = ibwr->opcode;
 
 	if (ibwr->num_sge > sq->max_sge)
@@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
 	if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
 		return -EINVAL;
 
+	if (ibwr->opcode == IBV_WR_BIND_MW) {
+		if (length)
+			return -EINVAL;
+		if (ibwr->num_sge)
+			return -EINVAL;
+		if (ibwr->imm_data)
+			return -EINVAL;
+		if ((qp_type(qp) != IBV_QPT_RC) &&
+		    (qp_type(qp) != IBV_QPT_UC))
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
 static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
 {
+	struct ibv_mw *ibmw;
+	struct ibv_mr *ibmr;
+
 	memset(kwr, 0, sizeof(*kwr));
 
 	kwr->wr_id		= uwr->wr_id;
@@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
 		kwr->wr.atomic.rkey		= uwr->wr.atomic.rkey;
 		break;
 
+	case IBV_WR_BIND_MW:
+		ibmr = uwr->bind_mw.bind_info.mr;
+		ibmw = uwr->bind_mw.mw;
+
+		kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
+		kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
+		kwr->wr.mw.mr_lkey = ibmr->lkey;
+		kwr->wr.mw.mw_rkey = ibmw->rkey;
+		kwr->wr.mw.rkey = uwr->bind_mw.rkey;
+		kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
+		break;
+
 	default:
 		break;
 	}
@@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
 	if (ibwr->send_flags & IBV_SEND_INLINE) {
 		uint8_t *inline_data = wqe->dma.inline_data;
 
+		wqe->dma.resid = 0;
+
 		for (i = 0; i < num_sge; i++) {
 			memcpy(inline_data,
 			       (uint8_t *)(long)ibwr->sg_list[i].addr,
@@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
 		wqe->iova	= ibwr->wr.atomic.remote_addr;
 	else
 		wqe->iova	= ibwr->wr.rdma.remote_addr;
+
 	wqe->dma.length		= length;
 	wqe->dma.resid		= length;
 	wqe->dma.num_sge	= num_sge;
@@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
 	for (i = 0; i < ibwr->num_sge; i++)
 		length += ibwr->sg_list[i].length;
 
-	err = validate_send_wr(sq, ibwr, length);
+	err = validate_send_wr(qp, ibwr, length);
 	if (err) {
 		printf("validate send failed\n");
 		return err;
@@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
 	.dealloc_pd = rxe_dealloc_pd,
 	.reg_mr = rxe_reg_mr,
 	.dereg_mr = rxe_dereg_mr,
+	.alloc_mw = rxe_alloc_mw,
+	.dealloc_mw = rxe_dealloc_mw,
+	.bind_mw = rxe_bind_mw,
 	.create_cq = rxe_create_cq,
 	.create_cq_ex = rxe_create_cq_ex,
 	.poll_cq = rxe_poll_cq,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08  4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
@ 2021-06-08  6:54   ` Zhu Yanjun
  2021-06-08  7:01     ` Bob Pearson
  0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-08  6:54 UTC (permalink / raw)
  To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list

On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> This patch makes the required changes to the rxe provider to support the
> kernel memory windows patches to the rxe driver.
>
> The following changes are made:
>   - Add ibv_alloc_mw verb
>   - Add ibv_dealloc_mw verb
>   - Add ibv_bind_mw verb for type 1 MWs
>   - Add support for bind MW send work requests through the traditional
>     QP API and the extended QP API.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
> v2:
>   Added support for extended QP bind MW work requests.

I got the following errors.
Is it a known issue?

# ./bin/run_tests.py --dev rxe0
.............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
======================================================================
ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
Verify bind memory window operation using the new post_send API.
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
    u.poll_cq(server.cq)
  File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
    raise PyverbsRDMAError('Completion status is {s}'.
pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
window bind error. Errno: 6, No such device or address

----------------------------------------------------------------------
Ran 193 tests in 2.544s

FAILED (errors=1, skipped=128)

Zhu Yanjun

> ---
>  providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 155 insertions(+), 2 deletions(-)
>
> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> index a68656ae..bb39ef04 100644
> --- a/providers/rxe/rxe.c
> +++ b/providers/rxe/rxe.c
> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>         return ret;
>  }
>
> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> +{
> +       int ret;
> +       struct ibv_mw *ibmw;
> +       struct ibv_alloc_mw cmd = {};
> +       struct ib_uverbs_alloc_mw_resp resp = {};
> +
> +       ibmw = calloc(1, sizeof(*ibmw));
> +       if (!ibmw)
> +               return NULL;
> +
> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> +                                               &resp, sizeof(resp));
> +       if (ret) {
> +               free(ibmw);
> +               return NULL;
> +       }
> +
> +       return ibmw;
> +}
> +
> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> +{
> +       int ret;
> +
> +       ret = ibv_cmd_dealloc_mw(ibmw);
> +       if (ret)
> +               return ret;
> +
> +       free(ibmw);
> +       return 0;
> +}
> +
> +static int next_rkey(int rkey)
> +{
> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> +}
> +
> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> +                        struct ibv_send_wr **bad_wr);
> +
> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> +                       struct ibv_mw_bind *mw_bind)
> +{
> +       int ret;
> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> +       struct ibv_send_wr ibwr;
> +       struct ibv_send_wr *bad_wr;
> +
> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> +               ret = EINVAL;
> +               goto err;
> +       }
> +
> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> +               ret = EINVAL;
> +               goto err;
> +       }
> +
> +       if (bind_info->mr) {
> +               if (ibmw->pd != bind_info->mr->pd) {
> +                       ret = EPERM;
> +                       goto err;
> +               }
> +       }
> +
> +       memset(&ibwr, 0, sizeof(ibwr));
> +
> +       ibwr.opcode             = IBV_WR_BIND_MW;
> +       ibwr.next               = NULL;
> +       ibwr.wr_id              = mw_bind->wr_id;
> +       ibwr.send_flags         = mw_bind->send_flags;
> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
> +       ibwr.bind_mw.mw         = ibmw;
> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
> +
> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> +       if (ret)
> +               goto err;
> +
> +       /* user has to undo this if he gets an error wc */
> +       ibmw->rkey = ibwr.bind_mw.rkey;
> +
> +       return 0;
> +err:
> +       errno = ret;
> +       return errno;
> +}
> +
>  static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>                                  uint64_t hca_va, int access)
>  {
> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>         advance_qp_cur_index(qp);
>  }
>
> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
> +{
> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> +
> +       if (check_qp_queue_full(qp))
> +               return;
> +
> +       memset(wqe, 0, sizeof(*wqe));
> +
> +       wqe->wr.wr_id = ibqp->wr_id;
> +       wqe->wr.opcode = IBV_WR_BIND_MW;
> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> +       wqe->wr.wr.mw.addr = info->addr;
> +       wqe->wr.wr.mw.length = info->length;
> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> +       wqe->wr.wr.mw.rkey = rkey;
> +       wqe->wr.wr.mw.access = info->mw_access_flags;
> +       wqe->ssn = qp->ssn++;
> +
> +       advance_qp_cur_index(qp);
> +}
> +
>  static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>  {
>         struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> @@ -1106,6 +1220,7 @@ enum {
>                 | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>                 | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>                 | IBV_QP_EX_WITH_LOCAL_INV
> +               | IBV_QP_EX_WITH_BIND_MW
>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
>
>         RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> @@ -1113,6 +1228,7 @@ enum {
>                 | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>                 | IBV_QP_EX_WITH_SEND
>                 | IBV_QP_EX_WITH_SEND_WITH_IMM
> +               | IBV_QP_EX_WITH_BIND_MW
>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
>
>         RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>         if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>                 qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>
> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> +
>         if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>                 qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>
> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>  }
>
>  /* basic sanity checks for send work request */
> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>                             unsigned int length)
>  {
> +       struct rxe_wq *sq = &qp->sq;
>         enum ibv_wr_opcode opcode = ibwr->opcode;
>
>         if (ibwr->num_sge > sq->max_sge)
> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>         if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>                 return -EINVAL;
>
> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
> +               if (length)
> +                       return -EINVAL;
> +               if (ibwr->num_sge)
> +                       return -EINVAL;
> +               if (ibwr->imm_data)
> +                       return -EINVAL;
> +               if ((qp_type(qp) != IBV_QPT_RC) &&
> +                   (qp_type(qp) != IBV_QPT_UC))
> +                       return -EINVAL;
> +       }
> +
>         return 0;
>  }
>
>  static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>  {
> +       struct ibv_mw *ibmw;
> +       struct ibv_mr *ibmr;
> +
>         memset(kwr, 0, sizeof(*kwr));
>
>         kwr->wr_id              = uwr->wr_id;
> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>                 kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
>                 break;
>
> +       case IBV_WR_BIND_MW:
> +               ibmr = uwr->bind_mw.bind_info.mr;
> +               ibmw = uwr->bind_mw.mw;
> +
> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> +               break;
> +
>         default:
>                 break;
>         }
> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>         if (ibwr->send_flags & IBV_SEND_INLINE) {
>                 uint8_t *inline_data = wqe->dma.inline_data;
>
> +               wqe->dma.resid = 0;
> +
>                 for (i = 0; i < num_sge; i++) {
>                         memcpy(inline_data,
>                                (uint8_t *)(long)ibwr->sg_list[i].addr,
> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>                 wqe->iova       = ibwr->wr.atomic.remote_addr;
>         else
>                 wqe->iova       = ibwr->wr.rdma.remote_addr;
> +
>         wqe->dma.length         = length;
>         wqe->dma.resid          = length;
>         wqe->dma.num_sge        = num_sge;
> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>         for (i = 0; i < ibwr->num_sge; i++)
>                 length += ibwr->sg_list[i].length;
>
> -       err = validate_send_wr(sq, ibwr, length);
> +       err = validate_send_wr(qp, ibwr, length);
>         if (err) {
>                 printf("validate send failed\n");
>                 return err;
> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>         .dealloc_pd = rxe_dealloc_pd,
>         .reg_mr = rxe_reg_mr,
>         .dereg_mr = rxe_dereg_mr,
> +       .alloc_mw = rxe_alloc_mw,
> +       .dealloc_mw = rxe_dealloc_mw,
> +       .bind_mw = rxe_bind_mw,
>         .create_cq = rxe_create_cq,
>         .create_cq_ex = rxe_create_cq_ex,
>         .poll_cq = rxe_poll_cq,
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08  6:54   ` Zhu Yanjun
@ 2021-06-08  7:01     ` Bob Pearson
  2021-06-08  7:17       ` Zhu Yanjun
  0 siblings, 1 reply; 10+ messages in thread
From: Bob Pearson @ 2021-06-08  7:01 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list

On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> This patch makes the required changes to the rxe provider to support the
>> kernel memory windows patches to the rxe driver.
>>
>> The following changes are made:
>>   - Add ibv_alloc_mw verb
>>   - Add ibv_dealloc_mw verb
>>   - Add ibv_bind_mw verb for type 1 MWs
>>   - Add support for bind MW send work requests through the traditional
>>     QP API and the extended QP API.
>>
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>> v2:
>>   Added support for extended QP bind MW work requests.
> 
> I got the following errors.
> Is it a known issue?
> 
> # ./bin/run_tests.py --dev rxe0
> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> ======================================================================
> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> Verify bind memory window operation using the new post_send API.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>     u.poll_cq(server.cq)
>   File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>     raise PyverbsRDMAError('Completion status is {s}'.
> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> window bind error. Errno: 6, No such device or address
> 
> ----------------------------------------------------------------------
> Ran 193 tests in 2.544s
> 
> FAILED (errors=1, skipped=128)
> 
> Zhu Yanjun
Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
Bob
> 
>> ---
>>  providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 155 insertions(+), 2 deletions(-)
>>
>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>> index a68656ae..bb39ef04 100644
>> --- a/providers/rxe/rxe.c
>> +++ b/providers/rxe/rxe.c
>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>         return ret;
>>  }
>>
>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>> +{
>> +       int ret;
>> +       struct ibv_mw *ibmw;
>> +       struct ibv_alloc_mw cmd = {};
>> +       struct ib_uverbs_alloc_mw_resp resp = {};
>> +
>> +       ibmw = calloc(1, sizeof(*ibmw));
>> +       if (!ibmw)
>> +               return NULL;
>> +
>> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>> +                                               &resp, sizeof(resp));
>> +       if (ret) {
>> +               free(ibmw);
>> +               return NULL;
>> +       }
>> +
>> +       return ibmw;
>> +}
>> +
>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>> +{
>> +       int ret;
>> +
>> +       ret = ibv_cmd_dealloc_mw(ibmw);
>> +       if (ret)
>> +               return ret;
>> +
>> +       free(ibmw);
>> +       return 0;
>> +}
>> +
>> +static int next_rkey(int rkey)
>> +{
>> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>> +}
>> +
>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>> +                        struct ibv_send_wr **bad_wr);
>> +
>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>> +                       struct ibv_mw_bind *mw_bind)
>> +{
>> +       int ret;
>> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>> +       struct ibv_send_wr ibwr;
>> +       struct ibv_send_wr *bad_wr;
>> +
>> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>> +               ret = EINVAL;
>> +               goto err;
>> +       }
>> +
>> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>> +               ret = EINVAL;
>> +               goto err;
>> +       }
>> +
>> +       if (bind_info->mr) {
>> +               if (ibmw->pd != bind_info->mr->pd) {
>> +                       ret = EPERM;
>> +                       goto err;
>> +               }
>> +       }
>> +
>> +       memset(&ibwr, 0, sizeof(ibwr));
>> +
>> +       ibwr.opcode             = IBV_WR_BIND_MW;
>> +       ibwr.next               = NULL;
>> +       ibwr.wr_id              = mw_bind->wr_id;
>> +       ibwr.send_flags         = mw_bind->send_flags;
>> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
>> +       ibwr.bind_mw.mw         = ibmw;
>> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
>> +
>> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>> +       if (ret)
>> +               goto err;
>> +
>> +       /* user has to undo this if he gets an error wc */
>> +       ibmw->rkey = ibwr.bind_mw.rkey;
>> +
>> +       return 0;
>> +err:
>> +       errno = ret;
>> +       return errno;
>> +}
>> +
>>  static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>                                  uint64_t hca_va, int access)
>>  {
>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>         advance_qp_cur_index(qp);
>>  }
>>
>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
>> +{
>> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>> +
>> +       if (check_qp_queue_full(qp))
>> +               return;
>> +
>> +       memset(wqe, 0, sizeof(*wqe));
>> +
>> +       wqe->wr.wr_id = ibqp->wr_id;
>> +       wqe->wr.opcode = IBV_WR_BIND_MW;
>> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>> +       wqe->wr.wr.mw.addr = info->addr;
>> +       wqe->wr.wr.mw.length = info->length;
>> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>> +       wqe->wr.wr.mw.rkey = rkey;
>> +       wqe->wr.wr.mw.access = info->mw_access_flags;
>> +       wqe->ssn = qp->ssn++;
>> +
>> +       advance_qp_cur_index(qp);
>> +}
>> +
>>  static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>  {
>>         struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> @@ -1106,6 +1220,7 @@ enum {
>>                 | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>                 | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>                 | IBV_QP_EX_WITH_LOCAL_INV
>> +               | IBV_QP_EX_WITH_BIND_MW
>>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>>         RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>> @@ -1113,6 +1228,7 @@ enum {
>>                 | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>                 | IBV_QP_EX_WITH_SEND
>>                 | IBV_QP_EX_WITH_SEND_WITH_IMM
>> +               | IBV_QP_EX_WITH_BIND_MW
>>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>>         RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>         if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>                 qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>
>> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
>> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>> +
>>         if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>                 qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>
>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>  }
>>
>>  /* basic sanity checks for send work request */
>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>                             unsigned int length)
>>  {
>> +       struct rxe_wq *sq = &qp->sq;
>>         enum ibv_wr_opcode opcode = ibwr->opcode;
>>
>>         if (ibwr->num_sge > sq->max_sge)
>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>         if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>                 return -EINVAL;
>>
>> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
>> +               if (length)
>> +                       return -EINVAL;
>> +               if (ibwr->num_sge)
>> +                       return -EINVAL;
>> +               if (ibwr->imm_data)
>> +                       return -EINVAL;
>> +               if ((qp_type(qp) != IBV_QPT_RC) &&
>> +                   (qp_type(qp) != IBV_QPT_UC))
>> +                       return -EINVAL;
>> +       }
>> +
>>         return 0;
>>  }
>>
>>  static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>  {
>> +       struct ibv_mw *ibmw;
>> +       struct ibv_mr *ibmr;
>> +
>>         memset(kwr, 0, sizeof(*kwr));
>>
>>         kwr->wr_id              = uwr->wr_id;
>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>                 kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
>>                 break;
>>
>> +       case IBV_WR_BIND_MW:
>> +               ibmr = uwr->bind_mw.bind_info.mr;
>> +               ibmw = uwr->bind_mw.mw;
>> +
>> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
>> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
>> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>> +               break;
>> +
>>         default:
>>                 break;
>>         }
>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>         if (ibwr->send_flags & IBV_SEND_INLINE) {
>>                 uint8_t *inline_data = wqe->dma.inline_data;
>>
>> +               wqe->dma.resid = 0;
>> +
>>                 for (i = 0; i < num_sge; i++) {
>>                         memcpy(inline_data,
>>                                (uint8_t *)(long)ibwr->sg_list[i].addr,
>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>                 wqe->iova       = ibwr->wr.atomic.remote_addr;
>>         else
>>                 wqe->iova       = ibwr->wr.rdma.remote_addr;
>> +
>>         wqe->dma.length         = length;
>>         wqe->dma.resid          = length;
>>         wqe->dma.num_sge        = num_sge;
>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>         for (i = 0; i < ibwr->num_sge; i++)
>>                 length += ibwr->sg_list[i].length;
>>
>> -       err = validate_send_wr(sq, ibwr, length);
>> +       err = validate_send_wr(qp, ibwr, length);
>>         if (err) {
>>                 printf("validate send failed\n");
>>                 return err;
>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>         .dealloc_pd = rxe_dealloc_pd,
>>         .reg_mr = rxe_reg_mr,
>>         .dereg_mr = rxe_dereg_mr,
>> +       .alloc_mw = rxe_alloc_mw,
>> +       .dealloc_mw = rxe_dealloc_mw,
>> +       .bind_mw = rxe_bind_mw,
>>         .create_cq = rxe_create_cq,
>>         .create_cq_ex = rxe_create_cq_ex,
>>         .poll_cq = rxe_poll_cq,
>> --
>> 2.30.2
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08  7:01     ` Bob Pearson
@ 2021-06-08  7:17       ` Zhu Yanjun
  2021-06-08 23:18         ` Pearson, Robert B
  0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-08  7:17 UTC (permalink / raw)
  To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list

On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>
> >> This patch makes the required changes to the rxe provider to support the
> >> kernel memory windows patches to the rxe driver.
> >>
> >> The following changes are made:
> >>   - Add ibv_alloc_mw verb
> >>   - Add ibv_dealloc_mw verb
> >>   - Add ibv_bind_mw verb for type 1 MWs
> >>   - Add support for bind MW send work requests through the traditional
> >>     QP API and the extended QP API.
> >>
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >> v2:
> >>   Added support for extended QP bind MW work requests.
> >
> > I got the following errors.
> > Is it a known issue?
> >
> > # ./bin/run_tests.py --dev rxe0
> > .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> > ======================================================================
> > ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> > Verify bind memory window operation using the new post_send API.
> > ----------------------------------------------------------------------
> > Traceback (most recent call last):
> >   File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> >     u.poll_cq(server.cq)
> >   File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> >     raise PyverbsRDMAError('Completion status is {s}'.
> > pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> > window bind error. Errno: 6, No such device or address
> >
> > ----------------------------------------------------------------------
> > Ran 193 tests in 2.544s
> >
> > FAILED (errors=1, skipped=128)
> >
> > Zhu Yanjun
> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.

Thanks a lot.
If the root cause to this problem is correct, and setting the BIND_MW
access for the MR can fix this problem,

Please file a commit to fix this problem.

I am fine with this MW patch series if all the problems (including
rdma-core and rxe in kernel) are fixed.

Thanks for your effort.

Zhu Yanjun

> Bob
> >
> >> ---
> >>  providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 155 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >> index a68656ae..bb39ef04 100644
> >> --- a/providers/rxe/rxe.c
> >> +++ b/providers/rxe/rxe.c
> >> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >>         return ret;
> >>  }
> >>
> >> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >> +{
> >> +       int ret;
> >> +       struct ibv_mw *ibmw;
> >> +       struct ibv_alloc_mw cmd = {};
> >> +       struct ib_uverbs_alloc_mw_resp resp = {};
> >> +
> >> +       ibmw = calloc(1, sizeof(*ibmw));
> >> +       if (!ibmw)
> >> +               return NULL;
> >> +
> >> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >> +                                               &resp, sizeof(resp));
> >> +       if (ret) {
> >> +               free(ibmw);
> >> +               return NULL;
> >> +       }
> >> +
> >> +       return ibmw;
> >> +}
> >> +
> >> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >> +{
> >> +       int ret;
> >> +
> >> +       ret = ibv_cmd_dealloc_mw(ibmw);
> >> +       if (ret)
> >> +               return ret;
> >> +
> >> +       free(ibmw);
> >> +       return 0;
> >> +}
> >> +
> >> +static int next_rkey(int rkey)
> >> +{
> >> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >> +}
> >> +
> >> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >> +                        struct ibv_send_wr **bad_wr);
> >> +
> >> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >> +                       struct ibv_mw_bind *mw_bind)
> >> +{
> >> +       int ret;
> >> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >> +       struct ibv_send_wr ibwr;
> >> +       struct ibv_send_wr *bad_wr;
> >> +
> >> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >> +               ret = EINVAL;
> >> +               goto err;
> >> +       }
> >> +
> >> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >> +               ret = EINVAL;
> >> +               goto err;
> >> +       }
> >> +
> >> +       if (bind_info->mr) {
> >> +               if (ibmw->pd != bind_info->mr->pd) {
> >> +                       ret = EPERM;
> >> +                       goto err;
> >> +               }
> >> +       }
> >> +
> >> +       memset(&ibwr, 0, sizeof(ibwr));
> >> +
> >> +       ibwr.opcode             = IBV_WR_BIND_MW;
> >> +       ibwr.next               = NULL;
> >> +       ibwr.wr_id              = mw_bind->wr_id;
> >> +       ibwr.send_flags         = mw_bind->send_flags;
> >> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
> >> +       ibwr.bind_mw.mw         = ibmw;
> >> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
> >> +
> >> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >> +       if (ret)
> >> +               goto err;
> >> +
> >> +       /* user has to undo this if he gets an error wc */
> >> +       ibmw->rkey = ibwr.bind_mw.rkey;
> >> +
> >> +       return 0;
> >> +err:
> >> +       errno = ret;
> >> +       return errno;
> >> +}
> >> +
> >>  static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >>                                  uint64_t hca_va, int access)
> >>  {
> >> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >>         advance_qp_cur_index(qp);
> >>  }
> >>
> >> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
> >> +{
> >> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >> +
> >> +       if (check_qp_queue_full(qp))
> >> +               return;
> >> +
> >> +       memset(wqe, 0, sizeof(*wqe));
> >> +
> >> +       wqe->wr.wr_id = ibqp->wr_id;
> >> +       wqe->wr.opcode = IBV_WR_BIND_MW;
> >> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >> +       wqe->wr.wr.mw.addr = info->addr;
> >> +       wqe->wr.wr.mw.length = info->length;
> >> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >> +       wqe->wr.wr.mw.rkey = rkey;
> >> +       wqe->wr.wr.mw.access = info->mw_access_flags;
> >> +       wqe->ssn = qp->ssn++;
> >> +
> >> +       advance_qp_cur_index(qp);
> >> +}
> >> +
> >>  static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >>  {
> >>         struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> @@ -1106,6 +1220,7 @@ enum {
> >>                 | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >>                 | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >>                 | IBV_QP_EX_WITH_LOCAL_INV
> >> +               | IBV_QP_EX_WITH_BIND_MW
> >>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >>         RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >> @@ -1113,6 +1228,7 @@ enum {
> >>                 | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >>                 | IBV_QP_EX_WITH_SEND
> >>                 | IBV_QP_EX_WITH_SEND_WITH_IMM
> >> +               | IBV_QP_EX_WITH_BIND_MW
> >>                 | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >>         RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >>         if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >>                 qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>
> >> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
> >> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >> +
> >>         if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >>                 qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>
> >> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >>  }
> >>
> >>  /* basic sanity checks for send work request */
> >> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >>                             unsigned int length)
> >>  {
> >> +       struct rxe_wq *sq = &qp->sq;
> >>         enum ibv_wr_opcode opcode = ibwr->opcode;
> >>
> >>         if (ibwr->num_sge > sq->max_sge)
> >> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>         if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >>                 return -EINVAL;
> >>
> >> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
> >> +               if (length)
> >> +                       return -EINVAL;
> >> +               if (ibwr->num_sge)
> >> +                       return -EINVAL;
> >> +               if (ibwr->imm_data)
> >> +                       return -EINVAL;
> >> +               if ((qp_type(qp) != IBV_QPT_RC) &&
> >> +                   (qp_type(qp) != IBV_QPT_UC))
> >> +                       return -EINVAL;
> >> +       }
> >> +
> >>         return 0;
> >>  }
> >>
> >>  static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>  {
> >> +       struct ibv_mw *ibmw;
> >> +       struct ibv_mr *ibmr;
> >> +
> >>         memset(kwr, 0, sizeof(*kwr));
> >>
> >>         kwr->wr_id              = uwr->wr_id;
> >> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>                 kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
> >>                 break;
> >>
> >> +       case IBV_WR_BIND_MW:
> >> +               ibmr = uwr->bind_mw.bind_info.mr;
> >> +               ibmw = uwr->bind_mw.mw;
> >> +
> >> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
> >> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
> >> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >> +               break;
> >> +
> >>         default:
> >>                 break;
> >>         }
> >> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>         if (ibwr->send_flags & IBV_SEND_INLINE) {
> >>                 uint8_t *inline_data = wqe->dma.inline_data;
> >>
> >> +               wqe->dma.resid = 0;
> >> +
> >>                 for (i = 0; i < num_sge; i++) {
> >>                         memcpy(inline_data,
> >>                                (uint8_t *)(long)ibwr->sg_list[i].addr,
> >> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>                 wqe->iova       = ibwr->wr.atomic.remote_addr;
> >>         else
> >>                 wqe->iova       = ibwr->wr.rdma.remote_addr;
> >> +
> >>         wqe->dma.length         = length;
> >>         wqe->dma.resid          = length;
> >>         wqe->dma.num_sge        = num_sge;
> >> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >>         for (i = 0; i < ibwr->num_sge; i++)
> >>                 length += ibwr->sg_list[i].length;
> >>
> >> -       err = validate_send_wr(sq, ibwr, length);
> >> +       err = validate_send_wr(qp, ibwr, length);
> >>         if (err) {
> >>                 printf("validate send failed\n");
> >>                 return err;
> >> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >>         .dealloc_pd = rxe_dealloc_pd,
> >>         .reg_mr = rxe_reg_mr,
> >>         .dereg_mr = rxe_dereg_mr,
> >> +       .alloc_mw = rxe_alloc_mw,
> >> +       .dealloc_mw = rxe_dealloc_mw,
> >> +       .bind_mw = rxe_bind_mw,
> >>         .create_cq = rxe_create_cq,
> >>         .create_cq_ex = rxe_create_cq_ex,
> >>         .poll_cq = rxe_poll_cq,
> >> --
> >> 2.30.2
> >>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08  7:17       ` Zhu Yanjun
@ 2021-06-08 23:18         ` Pearson, Robert B
  2021-06-09  1:51           ` Zhu Yanjun
  0 siblings, 1 reply; 10+ messages in thread
From: Pearson, Robert B @ 2021-06-08 23:18 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list


On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> This patch makes the required changes to the rxe provider to support the
>>>> kernel memory windows patches to the rxe driver.
>>>>
>>>> The following changes are made:
>>>>    - Add ibv_alloc_mw verb
>>>>    - Add ibv_dealloc_mw verb
>>>>    - Add ibv_bind_mw verb for type 1 MWs
>>>>    - Add support for bind MW send work requests through the traditional
>>>>      QP API and the extended QP API.
>>>>
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>> ---
>>>> v2:
>>>>    Added support for extended QP bind MW work requests.
>>> I got the following errors.
>>> Is it a known issue?
>>>
>>> # ./bin/run_tests.py --dev rxe0
>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>> ======================================================================
>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>> Verify bind memory window operation using the new post_send API.
>>> ----------------------------------------------------------------------
>>> Traceback (most recent call last):
>>>    File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>>      u.poll_cq(server.cq)
>>>    File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>>      raise PyverbsRDMAError('Completion status is {s}'.
>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>> window bind error. Errno: 6, No such device or address
>>>
>>> ----------------------------------------------------------------------
>>> Ran 193 tests in 2.544s
>>>
>>> FAILED (errors=1, skipped=128)
>>>
>>> Zhu Yanjun
>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> Thanks a lot.
> If the root cause to this problem is correct, and setting the BIND_MW
> access for the MR can fix this problem,
>
> Please file a commit to fix this problem.
>
> I am fine with this MW patch series if all the problems (including
> rdma-core and rxe in kernel) are fixed.
>
> Thanks for your effort.
>
> Zhu Yanjun

Zhu,

There was a second test bug, now fixed. And all the python tests are 
passing or skipped. No errors. There is a message to the liux-rdma list 
from Edward Srouji indicating that the patch is upstream at github or 
you can use the one in my email to him. It will be a while until his PR 
goes up stream I would guess.

I sent a fresh set of patches last night for both the kernel and user 
space provider.

Bob

>> Bob
>>>> ---
>>>>   providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>>   1 file changed, 155 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>> index a68656ae..bb39ef04 100644
>>>> --- a/providers/rxe/rxe.c
>>>> +++ b/providers/rxe/rxe.c
>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>>          return ret;
>>>>   }
>>>>
>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>> +{
>>>> +       int ret;
>>>> +       struct ibv_mw *ibmw;
>>>> +       struct ibv_alloc_mw cmd = {};
>>>> +       struct ib_uverbs_alloc_mw_resp resp = {};
>>>> +
>>>> +       ibmw = calloc(1, sizeof(*ibmw));
>>>> +       if (!ibmw)
>>>> +               return NULL;
>>>> +
>>>> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>> +                                               &resp, sizeof(resp));
>>>> +       if (ret) {
>>>> +               free(ibmw);
>>>> +               return NULL;
>>>> +       }
>>>> +
>>>> +       return ibmw;
>>>> +}
>>>> +
>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>> +{
>>>> +       int ret;
>>>> +
>>>> +       ret = ibv_cmd_dealloc_mw(ibmw);
>>>> +       if (ret)
>>>> +               return ret;
>>>> +
>>>> +       free(ibmw);
>>>> +       return 0;
>>>> +}
>>>> +
>>>> +static int next_rkey(int rkey)
>>>> +{
>>>> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>> +}
>>>> +
>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>> +                        struct ibv_send_wr **bad_wr);
>>>> +
>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>> +                       struct ibv_mw_bind *mw_bind)
>>>> +{
>>>> +       int ret;
>>>> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>> +       struct ibv_send_wr ibwr;
>>>> +       struct ibv_send_wr *bad_wr;
>>>> +
>>>> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>> +               ret = EINVAL;
>>>> +               goto err;
>>>> +       }
>>>> +
>>>> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>> +               ret = EINVAL;
>>>> +               goto err;
>>>> +       }
>>>> +
>>>> +       if (bind_info->mr) {
>>>> +               if (ibmw->pd != bind_info->mr->pd) {
>>>> +                       ret = EPERM;
>>>> +                       goto err;
>>>> +               }
>>>> +       }
>>>> +
>>>> +       memset(&ibwr, 0, sizeof(ibwr));
>>>> +
>>>> +       ibwr.opcode             = IBV_WR_BIND_MW;
>>>> +       ibwr.next               = NULL;
>>>> +       ibwr.wr_id              = mw_bind->wr_id;
>>>> +       ibwr.send_flags         = mw_bind->send_flags;
>>>> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
>>>> +       ibwr.bind_mw.mw         = ibmw;
>>>> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
>>>> +
>>>> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>> +       if (ret)
>>>> +               goto err;
>>>> +
>>>> +       /* user has to undo this if he gets an error wc */
>>>> +       ibmw->rkey = ibwr.bind_mw.rkey;
>>>> +
>>>> +       return 0;
>>>> +err:
>>>> +       errno = ret;
>>>> +       return errno;
>>>> +}
>>>> +
>>>>   static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>>                                   uint64_t hca_va, int access)
>>>>   {
>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>>          advance_qp_cur_index(qp);
>>>>   }
>>>>
>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>> +{
>>>> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>> +
>>>> +       if (check_qp_queue_full(qp))
>>>> +               return;
>>>> +
>>>> +       memset(wqe, 0, sizeof(*wqe));
>>>> +
>>>> +       wqe->wr.wr_id = ibqp->wr_id;
>>>> +       wqe->wr.opcode = IBV_WR_BIND_MW;
>>>> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>> +       wqe->wr.wr.mw.addr = info->addr;
>>>> +       wqe->wr.wr.mw.length = info->length;
>>>> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>> +       wqe->wr.wr.mw.rkey = rkey;
>>>> +       wqe->wr.wr.mw.access = info->mw_access_flags;
>>>> +       wqe->ssn = qp->ssn++;
>>>> +
>>>> +       advance_qp_cur_index(qp);
>>>> +}
>>>> +
>>>>   static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>>   {
>>>>          struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> @@ -1106,6 +1220,7 @@ enum {
>>>>                  | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>>                  | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>>                  | IBV_QP_EX_WITH_LOCAL_INV
>>>> +               | IBV_QP_EX_WITH_BIND_MW
>>>>                  | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>>          RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>> @@ -1113,6 +1228,7 @@ enum {
>>>>                  | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>>                  | IBV_QP_EX_WITH_SEND
>>>>                  | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>> +               | IBV_QP_EX_WITH_BIND_MW
>>>>                  | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>>          RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>>          if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>>                  qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>
>>>> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>> +
>>>>          if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>>                  qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>
>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>>   }
>>>>
>>>>   /* basic sanity checks for send work request */
>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>>                              unsigned int length)
>>>>   {
>>>> +       struct rxe_wq *sq = &qp->sq;
>>>>          enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>
>>>>          if (ibwr->num_sge > sq->max_sge)
>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>          if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>>                  return -EINVAL;
>>>>
>>>> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>> +               if (length)
>>>> +                       return -EINVAL;
>>>> +               if (ibwr->num_sge)
>>>> +                       return -EINVAL;
>>>> +               if (ibwr->imm_data)
>>>> +                       return -EINVAL;
>>>> +               if ((qp_type(qp) != IBV_QPT_RC) &&
>>>> +                   (qp_type(qp) != IBV_QPT_UC))
>>>> +                       return -EINVAL;
>>>> +       }
>>>> +
>>>>          return 0;
>>>>   }
>>>>
>>>>   static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>   {
>>>> +       struct ibv_mw *ibmw;
>>>> +       struct ibv_mr *ibmr;
>>>> +
>>>>          memset(kwr, 0, sizeof(*kwr));
>>>>
>>>>          kwr->wr_id              = uwr->wr_id;
>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>                  kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
>>>>                  break;
>>>>
>>>> +       case IBV_WR_BIND_MW:
>>>> +               ibmr = uwr->bind_mw.bind_info.mr;
>>>> +               ibmw = uwr->bind_mw.mw;
>>>> +
>>>> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>> +               break;
>>>> +
>>>>          default:
>>>>                  break;
>>>>          }
>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>          if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>>                  uint8_t *inline_data = wqe->dma.inline_data;
>>>>
>>>> +               wqe->dma.resid = 0;
>>>> +
>>>>                  for (i = 0; i < num_sge; i++) {
>>>>                          memcpy(inline_data,
>>>>                                 (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>                  wqe->iova       = ibwr->wr.atomic.remote_addr;
>>>>          else
>>>>                  wqe->iova       = ibwr->wr.rdma.remote_addr;
>>>> +
>>>>          wqe->dma.length         = length;
>>>>          wqe->dma.resid          = length;
>>>>          wqe->dma.num_sge        = num_sge;
>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>          for (i = 0; i < ibwr->num_sge; i++)
>>>>                  length += ibwr->sg_list[i].length;
>>>>
>>>> -       err = validate_send_wr(sq, ibwr, length);
>>>> +       err = validate_send_wr(qp, ibwr, length);
>>>>          if (err) {
>>>>                  printf("validate send failed\n");
>>>>                  return err;
>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>>          .dealloc_pd = rxe_dealloc_pd,
>>>>          .reg_mr = rxe_reg_mr,
>>>>          .dereg_mr = rxe_dereg_mr,
>>>> +       .alloc_mw = rxe_alloc_mw,
>>>> +       .dealloc_mw = rxe_dealloc_mw,
>>>> +       .bind_mw = rxe_bind_mw,
>>>>          .create_cq = rxe_create_cq,
>>>>          .create_cq_ex = rxe_create_cq_ex,
>>>>          .poll_cq = rxe_poll_cq,
>>>> --
>>>> 2.30.2
>>>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-08 23:18         ` Pearson, Robert B
@ 2021-06-09  1:51           ` Zhu Yanjun
  2021-06-09  1:59             ` Pearson, Robert B
  0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-09  1:51 UTC (permalink / raw)
  To: Pearson, Robert B; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list

On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>
>
> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> >>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>>> This patch makes the required changes to the rxe provider to support the
> >>>> kernel memory windows patches to the rxe driver.
> >>>>
> >>>> The following changes are made:
> >>>>    - Add ibv_alloc_mw verb
> >>>>    - Add ibv_dealloc_mw verb
> >>>>    - Add ibv_bind_mw verb for type 1 MWs
> >>>>    - Add support for bind MW send work requests through the traditional
> >>>>      QP API and the extended QP API.
> >>>>
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >>>> ---
> >>>> v2:
> >>>>    Added support for extended QP bind MW work requests.
> >>> I got the following errors.
> >>> Is it a known issue?
> >>>
> >>> # ./bin/run_tests.py --dev rxe0
> >>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> >>> ======================================================================
> >>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> >>> Verify bind memory window operation using the new post_send API.
> >>> ----------------------------------------------------------------------
> >>> Traceback (most recent call last):
> >>>    File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> >>>      u.poll_cq(server.cq)
> >>>    File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> >>>      raise PyverbsRDMAError('Completion status is {s}'.
> >>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> >>> window bind error. Errno: 6, No such device or address
> >>>
> >>> ----------------------------------------------------------------------
> >>> Ran 193 tests in 2.544s
> >>>
> >>> FAILED (errors=1, skipped=128)
> >>>
> >>> Zhu Yanjun
> >> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> > Thanks a lot.
> > If the root cause to this problem is correct, and setting the BIND_MW
> > access for the MR can fix this problem,
> >
> > Please file a commit to fix this problem.
> >
> > I am fine with this MW patch series if all the problems (including
> > rdma-core and rxe in kernel) are fixed.
> >
> > Thanks for your effort.
> >
> > Zhu Yanjun
>
> Zhu,
>
> There was a second test bug, now fixed. And all the python tests are
> passing or skipped. No errors. There is a message to the liux-rdma list
> from Edward Srouji indicating that the patch is upstream at github or
> you can use the one in my email to him. It will be a while until his PR
> goes up stream I would guess.
>
> I sent a fresh set of patches last night for both the kernel and user
> space provider.

Thanks a lot for your effort.

I applied your commits for rdma-core and rxe in kernel, and the following diff.

 All the python tests in rdma-core pass or skipped. No errors

Thanks again.

"
diff --git a/tests/test_qpex.py b/tests/test_qpex.py
index 20288d45..41028e2d 100644
--- a/tests/test_qpex.py
+++ b/tests/test_qpex.py
@@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):

 class QpExRCBindMw(RCResources):
     def create_qps(self):
-        create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
+        create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)

     def create_mr(self):
-        self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
+        self.mr = u.create_custom_mr(self,
e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
"

Zhu Yanjun

>
> Bob
>
> >> Bob
> >>>> ---
> >>>>   providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >>>>   1 file changed, 155 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >>>> index a68656ae..bb39ef04 100644
> >>>> --- a/providers/rxe/rxe.c
> >>>> +++ b/providers/rxe/rxe.c
> >>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >>>>          return ret;
> >>>>   }
> >>>>
> >>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >>>> +{
> >>>> +       int ret;
> >>>> +       struct ibv_mw *ibmw;
> >>>> +       struct ibv_alloc_mw cmd = {};
> >>>> +       struct ib_uverbs_alloc_mw_resp resp = {};
> >>>> +
> >>>> +       ibmw = calloc(1, sizeof(*ibmw));
> >>>> +       if (!ibmw)
> >>>> +               return NULL;
> >>>> +
> >>>> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >>>> +                                               &resp, sizeof(resp));
> >>>> +       if (ret) {
> >>>> +               free(ibmw);
> >>>> +               return NULL;
> >>>> +       }
> >>>> +
> >>>> +       return ibmw;
> >>>> +}
> >>>> +
> >>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >>>> +{
> >>>> +       int ret;
> >>>> +
> >>>> +       ret = ibv_cmd_dealloc_mw(ibmw);
> >>>> +       if (ret)
> >>>> +               return ret;
> >>>> +
> >>>> +       free(ibmw);
> >>>> +       return 0;
> >>>> +}
> >>>> +
> >>>> +static int next_rkey(int rkey)
> >>>> +{
> >>>> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >>>> +}
> >>>> +
> >>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >>>> +                        struct ibv_send_wr **bad_wr);
> >>>> +
> >>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >>>> +                       struct ibv_mw_bind *mw_bind)
> >>>> +{
> >>>> +       int ret;
> >>>> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >>>> +       struct ibv_send_wr ibwr;
> >>>> +       struct ibv_send_wr *bad_wr;
> >>>> +
> >>>> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >>>> +               ret = EINVAL;
> >>>> +               goto err;
> >>>> +       }
> >>>> +
> >>>> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >>>> +               ret = EINVAL;
> >>>> +               goto err;
> >>>> +       }
> >>>> +
> >>>> +       if (bind_info->mr) {
> >>>> +               if (ibmw->pd != bind_info->mr->pd) {
> >>>> +                       ret = EPERM;
> >>>> +                       goto err;
> >>>> +               }
> >>>> +       }
> >>>> +
> >>>> +       memset(&ibwr, 0, sizeof(ibwr));
> >>>> +
> >>>> +       ibwr.opcode             = IBV_WR_BIND_MW;
> >>>> +       ibwr.next               = NULL;
> >>>> +       ibwr.wr_id              = mw_bind->wr_id;
> >>>> +       ibwr.send_flags         = mw_bind->send_flags;
> >>>> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
> >>>> +       ibwr.bind_mw.mw         = ibmw;
> >>>> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
> >>>> +
> >>>> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >>>> +       if (ret)
> >>>> +               goto err;
> >>>> +
> >>>> +       /* user has to undo this if he gets an error wc */
> >>>> +       ibmw->rkey = ibwr.bind_mw.rkey;
> >>>> +
> >>>> +       return 0;
> >>>> +err:
> >>>> +       errno = ret;
> >>>> +       return errno;
> >>>> +}
> >>>> +
> >>>>   static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >>>>                                   uint64_t hca_va, int access)
> >>>>   {
> >>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >>>>          advance_qp_cur_index(qp);
> >>>>   }
> >>>>
> >>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >>>> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
> >>>> +{
> >>>> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >>>> +
> >>>> +       if (check_qp_queue_full(qp))
> >>>> +               return;
> >>>> +
> >>>> +       memset(wqe, 0, sizeof(*wqe));
> >>>> +
> >>>> +       wqe->wr.wr_id = ibqp->wr_id;
> >>>> +       wqe->wr.opcode = IBV_WR_BIND_MW;
> >>>> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >>>> +       wqe->wr.wr.mw.addr = info->addr;
> >>>> +       wqe->wr.wr.mw.length = info->length;
> >>>> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >>>> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >>>> +       wqe->wr.wr.mw.rkey = rkey;
> >>>> +       wqe->wr.wr.mw.access = info->mw_access_flags;
> >>>> +       wqe->ssn = qp->ssn++;
> >>>> +
> >>>> +       advance_qp_cur_index(qp);
> >>>> +}
> >>>> +
> >>>>   static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >>>>   {
> >>>>          struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> @@ -1106,6 +1220,7 @@ enum {
> >>>>                  | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >>>>                  | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >>>>                  | IBV_QP_EX_WITH_LOCAL_INV
> >>>> +               | IBV_QP_EX_WITH_BIND_MW
> >>>>                  | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>>          RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >>>> @@ -1113,6 +1228,7 @@ enum {
> >>>>                  | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >>>>                  | IBV_QP_EX_WITH_SEND
> >>>>                  | IBV_QP_EX_WITH_SEND_WITH_IMM
> >>>> +               | IBV_QP_EX_WITH_BIND_MW
> >>>>                  | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>>          RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >>>>          if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >>>>                  qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>>>
> >>>> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
> >>>> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >>>> +
> >>>>          if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >>>>                  qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>>>
> >>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >>>>   }
> >>>>
> >>>>   /* basic sanity checks for send work request */
> >>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >>>>                              unsigned int length)
> >>>>   {
> >>>> +       struct rxe_wq *sq = &qp->sq;
> >>>>          enum ibv_wr_opcode opcode = ibwr->opcode;
> >>>>
> >>>>          if (ibwr->num_sge > sq->max_sge)
> >>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>>          if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >>>>                  return -EINVAL;
> >>>>
> >>>> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
> >>>> +               if (length)
> >>>> +                       return -EINVAL;
> >>>> +               if (ibwr->num_sge)
> >>>> +                       return -EINVAL;
> >>>> +               if (ibwr->imm_data)
> >>>> +                       return -EINVAL;
> >>>> +               if ((qp_type(qp) != IBV_QPT_RC) &&
> >>>> +                   (qp_type(qp) != IBV_QPT_UC))
> >>>> +                       return -EINVAL;
> >>>> +       }
> >>>> +
> >>>>          return 0;
> >>>>   }
> >>>>
> >>>>   static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>>   {
> >>>> +       struct ibv_mw *ibmw;
> >>>> +       struct ibv_mr *ibmr;
> >>>> +
> >>>>          memset(kwr, 0, sizeof(*kwr));
> >>>>
> >>>>          kwr->wr_id              = uwr->wr_id;
> >>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>>                  kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
> >>>>                  break;
> >>>>
> >>>> +       case IBV_WR_BIND_MW:
> >>>> +               ibmr = uwr->bind_mw.bind_info.mr;
> >>>> +               ibmw = uwr->bind_mw.mw;
> >>>> +
> >>>> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >>>> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >>>> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
> >>>> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
> >>>> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >>>> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >>>> +               break;
> >>>> +
> >>>>          default:
> >>>>                  break;
> >>>>          }
> >>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>>          if (ibwr->send_flags & IBV_SEND_INLINE) {
> >>>>                  uint8_t *inline_data = wqe->dma.inline_data;
> >>>>
> >>>> +               wqe->dma.resid = 0;
> >>>> +
> >>>>                  for (i = 0; i < num_sge; i++) {
> >>>>                          memcpy(inline_data,
> >>>>                                 (uint8_t *)(long)ibwr->sg_list[i].addr,
> >>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>>                  wqe->iova       = ibwr->wr.atomic.remote_addr;
> >>>>          else
> >>>>                  wqe->iova       = ibwr->wr.rdma.remote_addr;
> >>>> +
> >>>>          wqe->dma.length         = length;
> >>>>          wqe->dma.resid          = length;
> >>>>          wqe->dma.num_sge        = num_sge;
> >>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>>          for (i = 0; i < ibwr->num_sge; i++)
> >>>>                  length += ibwr->sg_list[i].length;
> >>>>
> >>>> -       err = validate_send_wr(sq, ibwr, length);
> >>>> +       err = validate_send_wr(qp, ibwr, length);
> >>>>          if (err) {
> >>>>                  printf("validate send failed\n");
> >>>>                  return err;
> >>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >>>>          .dealloc_pd = rxe_dealloc_pd,
> >>>>          .reg_mr = rxe_reg_mr,
> >>>>          .dereg_mr = rxe_dereg_mr,
> >>>> +       .alloc_mw = rxe_alloc_mw,
> >>>> +       .dealloc_mw = rxe_dealloc_mw,
> >>>> +       .bind_mw = rxe_bind_mw,
> >>>>          .create_cq = rxe_create_cq,
> >>>>          .create_cq_ex = rxe_create_cq_ex,
> >>>>          .poll_cq = rxe_poll_cq,
> >>>> --
> >>>> 2.30.2
> >>>>

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
  2021-06-09  1:51           ` Zhu Yanjun
@ 2021-06-09  1:59             ` Pearson, Robert B
  0 siblings, 0 replies; 10+ messages in thread
From: Pearson, Robert B @ 2021-06-09  1:59 UTC (permalink / raw)
  To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list


On 6/8/2021 8:51 PM, Zhu Yanjun wrote:
> On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>>
>> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>>>> This patch makes the required changes to the rxe provider to support the
>>>>>> kernel memory windows patches to the rxe driver.
>>>>>>
>>>>>> The following changes are made:
>>>>>>     - Add ibv_alloc_mw verb
>>>>>>     - Add ibv_dealloc_mw verb
>>>>>>     - Add ibv_bind_mw verb for type 1 MWs
>>>>>>     - Add support for bind MW send work requests through the traditional
>>>>>>       QP API and the extended QP API.
>>>>>>
>>>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>>>> ---
>>>>>> v2:
>>>>>>     Added support for extended QP bind MW work requests.
>>>>> I got the following errors.
>>>>> Is it a known issue?
>>>>>
>>>>> # ./bin/run_tests.py --dev rxe0
>>>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>>>> ======================================================================
>>>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>>>> Verify bind memory window operation using the new post_send API.
>>>>> ----------------------------------------------------------------------
>>>>> Traceback (most recent call last):
>>>>>     File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>>>>       u.poll_cq(server.cq)
>>>>>     File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>>>>       raise PyverbsRDMAError('Completion status is {s}'.
>>>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>>>> window bind error. Errno: 6, No such device or address
>>>>>
>>>>> ----------------------------------------------------------------------
>>>>> Ran 193 tests in 2.544s
>>>>>
>>>>> FAILED (errors=1, skipped=128)
>>>>>
>>>>> Zhu Yanjun
>>>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
>>> Thanks a lot.
>>> If the root cause to this problem is correct, and setting the BIND_MW
>>> access for the MR can fix this problem,
>>>
>>> Please file a commit to fix this problem.
>>>
>>> I am fine with this MW patch series if all the problems (including
>>> rdma-core and rxe in kernel) are fixed.
>>>
>>> Thanks for your effort.
>>>
>>> Zhu Yanjun
>> Zhu,
>>
>> There was a second test bug, now fixed. And all the python tests are
>> passing or skipped. No errors. There is a message to the liux-rdma list
>> from Edward Srouji indicating that the patch is upstream at github or
>> you can use the one in my email to him. It will be a while until his PR
>> goes up stream I would guess.
>>
>> I sent a fresh set of patches last night for both the kernel and user
>> space provider.
> Thanks a lot for your effort.
>
> I applied your commits for rdma-core and rxe in kernel, and the following diff.
>
>   All the python tests in rdma-core pass or skipped. No errors
>
> Thanks again.

Thanks to you for your patience.

Bob

>
> "
> diff --git a/tests/test_qpex.py b/tests/test_qpex.py
> index 20288d45..41028e2d 100644
> --- a/tests/test_qpex.py
> +++ b/tests/test_qpex.py
> @@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):
>
>   class QpExRCBindMw(RCResources):
>       def create_qps(self):
> -        create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
> +        create_qp_ex(self, e.IBV_QPT_RC,
> e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)
>
>       def create_mr(self):
> -        self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
> +        self.mr = u.create_custom_mr(self,
> e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
> "
>
> Zhu Yanjun
>
>> Bob
>>
>>>> Bob
>>>>>> ---
>>>>>>    providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>>>>    1 file changed, 155 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>>>> index a68656ae..bb39ef04 100644
>>>>>> --- a/providers/rxe/rxe.c
>>>>>> +++ b/providers/rxe/rxe.c
>>>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>>>>           return ret;
>>>>>>    }
>>>>>>
>>>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>>>> +{
>>>>>> +       int ret;
>>>>>> +       struct ibv_mw *ibmw;
>>>>>> +       struct ibv_alloc_mw cmd = {};
>>>>>> +       struct ib_uverbs_alloc_mw_resp resp = {};
>>>>>> +
>>>>>> +       ibmw = calloc(1, sizeof(*ibmw));
>>>>>> +       if (!ibmw)
>>>>>> +               return NULL;
>>>>>> +
>>>>>> +       ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>>>> +                                               &resp, sizeof(resp));
>>>>>> +       if (ret) {
>>>>>> +               free(ibmw);
>>>>>> +               return NULL;
>>>>>> +       }
>>>>>> +
>>>>>> +       return ibmw;
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>>>> +{
>>>>>> +       int ret;
>>>>>> +
>>>>>> +       ret = ibv_cmd_dealloc_mw(ibmw);
>>>>>> +       if (ret)
>>>>>> +               return ret;
>>>>>> +
>>>>>> +       free(ibmw);
>>>>>> +       return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int next_rkey(int rkey)
>>>>>> +{
>>>>>> +       return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>>>> +                        struct ibv_send_wr **bad_wr);
>>>>>> +
>>>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>>>> +                       struct ibv_mw_bind *mw_bind)
>>>>>> +{
>>>>>> +       int ret;
>>>>>> +       struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>>>> +       struct ibv_send_wr ibwr;
>>>>>> +       struct ibv_send_wr *bad_wr;
>>>>>> +
>>>>>> +       if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>>>> +               ret = EINVAL;
>>>>>> +               goto err;
>>>>>> +       }
>>>>>> +
>>>>>> +       if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>>>> +               ret = EINVAL;
>>>>>> +               goto err;
>>>>>> +       }
>>>>>> +
>>>>>> +       if (bind_info->mr) {
>>>>>> +               if (ibmw->pd != bind_info->mr->pd) {
>>>>>> +                       ret = EPERM;
>>>>>> +                       goto err;
>>>>>> +               }
>>>>>> +       }
>>>>>> +
>>>>>> +       memset(&ibwr, 0, sizeof(ibwr));
>>>>>> +
>>>>>> +       ibwr.opcode             = IBV_WR_BIND_MW;
>>>>>> +       ibwr.next               = NULL;
>>>>>> +       ibwr.wr_id              = mw_bind->wr_id;
>>>>>> +       ibwr.send_flags         = mw_bind->send_flags;
>>>>>> +       ibwr.bind_mw.bind_info  = mw_bind->bind_info;
>>>>>> +       ibwr.bind_mw.mw         = ibmw;
>>>>>> +       ibwr.bind_mw.rkey       = next_rkey(ibmw->rkey);
>>>>>> +
>>>>>> +       ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>>>> +       if (ret)
>>>>>> +               goto err;
>>>>>> +
>>>>>> +       /* user has to undo this if he gets an error wc */
>>>>>> +       ibmw->rkey = ibwr.bind_mw.rkey;
>>>>>> +
>>>>>> +       return 0;
>>>>>> +err:
>>>>>> +       errno = ret;
>>>>>> +       return errno;
>>>>>> +}
>>>>>> +
>>>>>>    static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>>>>                                    uint64_t hca_va, int access)
>>>>>>    {
>>>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>>>>           advance_qp_cur_index(qp);
>>>>>>    }
>>>>>>
>>>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>>>> +                      uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>>>> +{
>>>>>> +       struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> +       struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>>>> +
>>>>>> +       if (check_qp_queue_full(qp))
>>>>>> +               return;
>>>>>> +
>>>>>> +       memset(wqe, 0, sizeof(*wqe));
>>>>>> +
>>>>>> +       wqe->wr.wr_id = ibqp->wr_id;
>>>>>> +       wqe->wr.opcode = IBV_WR_BIND_MW;
>>>>>> +       wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>>>> +       wqe->wr.wr.mw.addr = info->addr;
>>>>>> +       wqe->wr.wr.mw.length = info->length;
>>>>>> +       wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>>>> +       wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>>>> +       wqe->wr.wr.mw.rkey = rkey;
>>>>>> +       wqe->wr.wr.mw.access = info->mw_access_flags;
>>>>>> +       wqe->ssn = qp->ssn++;
>>>>>> +
>>>>>> +       advance_qp_cur_index(qp);
>>>>>> +}
>>>>>> +
>>>>>>    static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>>>>    {
>>>>>>           struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> @@ -1106,6 +1220,7 @@ enum {
>>>>>>                   | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>>>>                   | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>>>>                   | IBV_QP_EX_WITH_LOCAL_INV
>>>>>> +               | IBV_QP_EX_WITH_BIND_MW
>>>>>>                   | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>>           RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1113,6 +1228,7 @@ enum {
>>>>>>                   | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>>>>                   | IBV_QP_EX_WITH_SEND
>>>>>>                   | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>>>> +               | IBV_QP_EX_WITH_BIND_MW
>>>>>>                   | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>>           RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>>>>           if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>>>>                   qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>>>
>>>>>> +       if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>>>> +               qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>>>> +
>>>>>>           if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>>>>                   qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>>>
>>>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>>>>    }
>>>>>>
>>>>>>    /* basic sanity checks for send work request */
>>>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>>>>                               unsigned int length)
>>>>>>    {
>>>>>> +       struct rxe_wq *sq = &qp->sq;
>>>>>>           enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>>>
>>>>>>           if (ibwr->num_sge > sq->max_sge)
>>>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>>           if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>>>>                   return -EINVAL;
>>>>>>
>>>>>> +       if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>>>> +               if (length)
>>>>>> +                       return -EINVAL;
>>>>>> +               if (ibwr->num_sge)
>>>>>> +                       return -EINVAL;
>>>>>> +               if (ibwr->imm_data)
>>>>>> +                       return -EINVAL;
>>>>>> +               if ((qp_type(qp) != IBV_QPT_RC) &&
>>>>>> +                   (qp_type(qp) != IBV_QPT_UC))
>>>>>> +                       return -EINVAL;
>>>>>> +       }
>>>>>> +
>>>>>>           return 0;
>>>>>>    }
>>>>>>
>>>>>>    static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>>    {
>>>>>> +       struct ibv_mw *ibmw;
>>>>>> +       struct ibv_mr *ibmr;
>>>>>> +
>>>>>>           memset(kwr, 0, sizeof(*kwr));
>>>>>>
>>>>>>           kwr->wr_id              = uwr->wr_id;
>>>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>>                   kwr->wr.atomic.rkey             = uwr->wr.atomic.rkey;
>>>>>>                   break;
>>>>>>
>>>>>> +       case IBV_WR_BIND_MW:
>>>>>> +               ibmr = uwr->bind_mw.bind_info.mr;
>>>>>> +               ibmw = uwr->bind_mw.mw;
>>>>>> +
>>>>>> +               kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>>>> +               kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>>>> +               kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>>>> +               kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>>>> +               kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>>>> +               kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>>>> +               break;
>>>>>> +
>>>>>>           default:
>>>>>>                   break;
>>>>>>           }
>>>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>>           if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>>>>                   uint8_t *inline_data = wqe->dma.inline_data;
>>>>>>
>>>>>> +               wqe->dma.resid = 0;
>>>>>> +
>>>>>>                   for (i = 0; i < num_sge; i++) {
>>>>>>                           memcpy(inline_data,
>>>>>>                                  (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>>                   wqe->iova       = ibwr->wr.atomic.remote_addr;
>>>>>>           else
>>>>>>                   wqe->iova       = ibwr->wr.rdma.remote_addr;
>>>>>> +
>>>>>>           wqe->dma.length         = length;
>>>>>>           wqe->dma.resid          = length;
>>>>>>           wqe->dma.num_sge        = num_sge;
>>>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>>           for (i = 0; i < ibwr->num_sge; i++)
>>>>>>                   length += ibwr->sg_list[i].length;
>>>>>>
>>>>>> -       err = validate_send_wr(sq, ibwr, length);
>>>>>> +       err = validate_send_wr(qp, ibwr, length);
>>>>>>           if (err) {
>>>>>>                   printf("validate send failed\n");
>>>>>>                   return err;
>>>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>>>>           .dealloc_pd = rxe_dealloc_pd,
>>>>>>           .reg_mr = rxe_reg_mr,
>>>>>>           .dereg_mr = rxe_dereg_mr,
>>>>>> +       .alloc_mw = rxe_alloc_mw,
>>>>>> +       .dealloc_mw = rxe_dealloc_mw,
>>>>>> +       .bind_mw = rxe_bind_mw,
>>>>>>           .create_cq = rxe_create_cq,
>>>>>>           .create_cq_ex = rxe_create_cq_ex,
>>>>>>           .poll_cq = rxe_poll_cq,
>>>>>> --
>>>>>> 2.30.2
>>>>>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/2] Update kernel headers
  2021-07-18 21:28 [PATCH v2 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
@ 2021-07-18 21:28 ` Bob Pearson
  0 siblings, 0 replies; 10+ messages in thread
From: Bob Pearson @ 2021-07-18 21:28 UTC (permalink / raw)
  To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson

To commit ?? ("RDMA/rxe: Convert kernel UD post send to use ah_num").

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 kernel-headers/rdma/rdma_user_rxe.h | 10 +++++++++-
 providers/rxe/rxe.c                 |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index e283c222..ad7da77d 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -98,6 +98,10 @@ struct rxe_send_wr {
 			__u32	remote_qpn;
 			__u32	remote_qkey;
 			__u16	pkey_index;
+			__u16	reserved;
+			__u32	ah_num;
+			__u32	pad[4];
+			struct rxe_av av;	/* deprecated */
 		} ud;
 		struct {
 			__aligned_u64	addr;
@@ -148,7 +152,6 @@ struct rxe_dma_info {
 
 struct rxe_send_wqe {
 	struct rxe_send_wr	wr;
-	struct rxe_av		av;
 	__u32			status;
 	__u32			state;
 	__aligned_u64		iova;
@@ -168,6 +171,11 @@ struct rxe_recv_wqe {
 	struct rxe_dma_info	dma;
 };
 
+struct rxe_create_ah_resp {
+	__u32 ah_num;
+	__u32 reserved;
+};
+
 struct rxe_create_cq_resp {
 	struct mminfo mi;
 };
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 3c3ea8bb..d19aa9df 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -982,7 +982,7 @@ static void wr_set_ud_addr(struct ibv_qp_ex *ibqp, struct ibv_ah *ibah,
 	if (qp->err)
 		return;
 
-	memcpy(&wqe->av, &ah->av, sizeof(ah->av));
+	memcpy(&wqe->wr.wr.ud.av, &ah->av, sizeof(ah->av));
 	wqe->wr.wr.ud.remote_qpn = remote_qpn;
 	wqe->wr.wr.ud.remote_qkey = remote_qkey;
 }
@@ -1467,7 +1467,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
 	convert_send_wr(&wqe->wr, ibwr);
 
 	if (qp_type(qp) == IBV_QPT_UD)
-		memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
+		memcpy(&wqe->wr.wr.ud.av, &to_rah(ibwr->wr.ud.ah)->av,
 		       sizeof(struct rxe_av));
 
 	if (ibwr->send_flags & IBV_SEND_INLINE) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-07-18 21:29 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-08  4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
2021-06-08  4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
2021-06-08  4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
2021-06-08  6:54   ` Zhu Yanjun
2021-06-08  7:01     ` Bob Pearson
2021-06-08  7:17       ` Zhu Yanjun
2021-06-08 23:18         ` Pearson, Robert B
2021-06-09  1:51           ` Zhu Yanjun
2021-06-09  1:59             ` Pearson, Robert B
2021-07-18 21:28 [PATCH v2 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
2021-07-18 21:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.