* [PATCH v2 0/2] Implement memory windows support for rxe @ 2021-06-08 4:28 Bob Pearson 2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson 2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson 0 siblings, 2 replies; 9+ messages in thread From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw) To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson These patches makes the required changes to the rxe provider and the rxe ABI to support the kernel memory windows patches to the rxe driver. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- Bob Pearson (2): Update kernel headers Providers/rxe: Implement memory windows kernel-headers/rdma/rdma_user_rxe.h | 10 ++ providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 2 deletions(-) -- 2.30.2 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 1/2] Update kernel headers 2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson @ 2021-06-08 4:28 ` Bob Pearson 2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson 1 sibling, 0 replies; 9+ messages in thread From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw) To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson To commit ?? ("RDMA/rxe: Disallow MR dereg and invalidate when bound"). Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- kernel-headers/rdma/rdma_user_rxe.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h index 068433e2..e283c222 100644 --- a/kernel-headers/rdma/rdma_user_rxe.h +++ b/kernel-headers/rdma/rdma_user_rxe.h @@ -99,7 +99,16 @@ struct rxe_send_wr { __u32 remote_qkey; __u16 pkey_index; } ud; + struct { + __aligned_u64 addr; + __aligned_u64 length; + __u32 mr_lkey; + __u32 mw_rkey; + __u32 rkey; + __u32 access; + } mw; /* reg is only used by the kernel and is not part of the uapi */ +#ifdef __KERNEL__ struct { union { struct ib_mr *mr; @@ -108,6 +117,7 @@ struct rxe_send_wr { __u32 key; __u32 access; } reg; +#endif } wr; }; -- 2.30.2 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson 2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson @ 2021-06-08 4:28 ` Bob Pearson 2021-06-08 6:54 ` Zhu Yanjun 1 sibling, 1 reply; 9+ messages in thread From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw) To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson This patch makes the required changes to the rxe provider to support the kernel memory windows patches to the rxe driver. The following changes are made: - Add ibv_alloc_mw verb - Add ibv_dealloc_mw verb - Add ibv_bind_mw verb for type 1 MWs - Add support for bind MW send work requests through the traditional QP API and the extended QP API. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- v2: Added support for extended QP bind MW work requests. --- providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c index a68656ae..bb39ef04 100644 --- a/providers/rxe/rxe.c +++ b/providers/rxe/rxe.c @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) return ret; } +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) +{ + int ret; + struct ibv_mw *ibmw; + struct ibv_alloc_mw cmd = {}; + struct ib_uverbs_alloc_mw_resp resp = {}; + + ibmw = calloc(1, sizeof(*ibmw)); + if (!ibmw) + return NULL; + + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), + &resp, sizeof(resp)); + if (ret) { + free(ibmw); + return NULL; + } + + return ibmw; +} + +static int rxe_dealloc_mw(struct ibv_mw *ibmw) +{ + int ret; + + ret = ibv_cmd_dealloc_mw(ibmw); + if (ret) + return ret; + + free(ibmw); + return 0; +} + +static int next_rkey(int rkey) +{ + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); +} + +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, + struct ibv_send_wr **bad_wr); + +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, + struct ibv_mw_bind *mw_bind) +{ + int ret; + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; + struct ibv_send_wr ibwr; + struct ibv_send_wr *bad_wr; + + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { + ret = EINVAL; + goto err; + } + + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { + ret = EINVAL; + goto err; + } + + if (bind_info->mr) { + if (ibmw->pd != bind_info->mr->pd) { + ret = EPERM; + goto err; + } + } + + memset(&ibwr, 0, sizeof(ibwr)); + + ibwr.opcode = IBV_WR_BIND_MW; + ibwr.next = NULL; + ibwr.wr_id = mw_bind->wr_id; + ibwr.send_flags = mw_bind->send_flags; + ibwr.bind_mw.bind_info = mw_bind->bind_info; + ibwr.bind_mw.mw = ibmw; + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); + + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); + if (ret) + goto err; + + /* user has to undo this if he gets an error wc */ + ibmw->rkey = ibwr.bind_mw.rkey; + + return 0; +err: + errno = ret; + return errno; +} + static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access) { @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, advance_qp_cur_index(qp); } +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, + uint32_t rkey, const struct ibv_mw_bind_info *info) +{ + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); + + if (check_qp_queue_full(qp)) + return; + + memset(wqe, 0, sizeof(*wqe)); + + wqe->wr.wr_id = ibqp->wr_id; + wqe->wr.opcode = IBV_WR_BIND_MW; + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; + wqe->wr.wr.mw.addr = info->addr; + wqe->wr.wr.mw.length = info->length; + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; + wqe->wr.wr.mw.rkey = rkey; + wqe->wr.wr.mw.access = info->mw_access_flags; + wqe->ssn = qp->ssn++; + + advance_qp_cur_index(qp); +} + static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) { struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); @@ -1106,6 +1220,7 @@ enum { | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV + | IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_SEND_WITH_INV, RXE_SUP_UC_QP_SEND_OPS_FLAGS = @@ -1113,6 +1228,7 @@ enum { | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM + | IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_SEND_WITH_INV, RXE_SUP_UD_QP_SEND_OPS_FLAGS = @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; + if (flags & IBV_QP_EX_WITH_BIND_MW) + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; + if (flags & IBV_QP_EX_WITH_LOCAL_INV) qp->vqp.qp_ex.wr_local_inv = wr_local_inv; @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) } /* basic sanity checks for send work request */ -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, unsigned int length) { + struct rxe_wq *sq = &qp->sq; enum ibv_wr_opcode opcode = ibwr->opcode; if (ibwr->num_sge > sq->max_sge) @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) return -EINVAL; + if (ibwr->opcode == IBV_WR_BIND_MW) { + if (length) + return -EINVAL; + if (ibwr->num_sge) + return -EINVAL; + if (ibwr->imm_data) + return -EINVAL; + if ((qp_type(qp) != IBV_QPT_RC) && + (qp_type(qp) != IBV_QPT_UC)) + return -EINVAL; + } + return 0; } static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) { + struct ibv_mw *ibmw; + struct ibv_mr *ibmr; + memset(kwr, 0, sizeof(*kwr)); kwr->wr_id = uwr->wr_id; @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; break; + case IBV_WR_BIND_MW: + ibmr = uwr->bind_mw.bind_info.mr; + ibmw = uwr->bind_mw.mw; + + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; + kwr->wr.mw.mr_lkey = ibmr->lkey; + kwr->wr.mw.mw_rkey = ibmw->rkey; + kwr->wr.mw.rkey = uwr->bind_mw.rkey; + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; + break; + default: break; } @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, if (ibwr->send_flags & IBV_SEND_INLINE) { uint8_t *inline_data = wqe->dma.inline_data; + wqe->dma.resid = 0; + for (i = 0; i < num_sge; i++) { memcpy(inline_data, (uint8_t *)(long)ibwr->sg_list[i].addr, @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, wqe->iova = ibwr->wr.atomic.remote_addr; else wqe->iova = ibwr->wr.rdma.remote_addr; + wqe->dma.length = length; wqe->dma.resid = length; wqe->dma.num_sge = num_sge; @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, for (i = 0; i < ibwr->num_sge; i++) length += ibwr->sg_list[i].length; - err = validate_send_wr(sq, ibwr, length); + err = validate_send_wr(qp, ibwr, length); if (err) { printf("validate send failed\n"); return err; @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { .dealloc_pd = rxe_dealloc_pd, .reg_mr = rxe_reg_mr, .dereg_mr = rxe_dereg_mr, + .alloc_mw = rxe_alloc_mw, + .dealloc_mw = rxe_dealloc_mw, + .bind_mw = rxe_bind_mw, .create_cq = rxe_create_cq, .create_cq_ex = rxe_create_cq_ex, .poll_cq = rxe_poll_cq, -- 2.30.2 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson @ 2021-06-08 6:54 ` Zhu Yanjun 2021-06-08 7:01 ` Bob Pearson 0 siblings, 1 reply; 9+ messages in thread From: Zhu Yanjun @ 2021-06-08 6:54 UTC (permalink / raw) To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > This patch makes the required changes to the rxe provider to support the > kernel memory windows patches to the rxe driver. > > The following changes are made: > - Add ibv_alloc_mw verb > - Add ibv_dealloc_mw verb > - Add ibv_bind_mw verb for type 1 MWs > - Add support for bind MW send work requests through the traditional > QP API and the extended QP API. > > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > v2: > Added support for extended QP bind MW work requests. I got the following errors. Is it a known issue? # ./bin/run_tests.py --dev rxe0 .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss ====================================================================== ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) Verify bind memory window operation using the new post_send API. ---------------------------------------------------------------------- Traceback (most recent call last): File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw u.poll_cq(server.cq) File "/root/rdma-core/tests/utils.py", line 538, in poll_cq raise PyverbsRDMAError('Completion status is {s}'. pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory window bind error. Errno: 6, No such device or address ---------------------------------------------------------------------- Ran 193 tests in 2.544s FAILED (errors=1, skipped=128) Zhu Yanjun > --- > providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 155 insertions(+), 2 deletions(-) > > diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c > index a68656ae..bb39ef04 100644 > --- a/providers/rxe/rxe.c > +++ b/providers/rxe/rxe.c > @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) > return ret; > } > > +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) > +{ > + int ret; > + struct ibv_mw *ibmw; > + struct ibv_alloc_mw cmd = {}; > + struct ib_uverbs_alloc_mw_resp resp = {}; > + > + ibmw = calloc(1, sizeof(*ibmw)); > + if (!ibmw) > + return NULL; > + > + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), > + &resp, sizeof(resp)); > + if (ret) { > + free(ibmw); > + return NULL; > + } > + > + return ibmw; > +} > + > +static int rxe_dealloc_mw(struct ibv_mw *ibmw) > +{ > + int ret; > + > + ret = ibv_cmd_dealloc_mw(ibmw); > + if (ret) > + return ret; > + > + free(ibmw); > + return 0; > +} > + > +static int next_rkey(int rkey) > +{ > + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); > +} > + > +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, > + struct ibv_send_wr **bad_wr); > + > +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, > + struct ibv_mw_bind *mw_bind) > +{ > + int ret; > + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; > + struct ibv_send_wr ibwr; > + struct ibv_send_wr *bad_wr; > + > + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { > + ret = EINVAL; > + goto err; > + } > + > + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { > + ret = EINVAL; > + goto err; > + } > + > + if (bind_info->mr) { > + if (ibmw->pd != bind_info->mr->pd) { > + ret = EPERM; > + goto err; > + } > + } > + > + memset(&ibwr, 0, sizeof(ibwr)); > + > + ibwr.opcode = IBV_WR_BIND_MW; > + ibwr.next = NULL; > + ibwr.wr_id = mw_bind->wr_id; > + ibwr.send_flags = mw_bind->send_flags; > + ibwr.bind_mw.bind_info = mw_bind->bind_info; > + ibwr.bind_mw.mw = ibmw; > + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); > + > + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); > + if (ret) > + goto err; > + > + /* user has to undo this if he gets an error wc */ > + ibmw->rkey = ibwr.bind_mw.rkey; > + > + return 0; > +err: > + errno = ret; > + return errno; > +} > + > static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > uint64_t hca_va, int access) > { > @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, > advance_qp_cur_index(qp); > } > > +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, > + uint32_t rkey, const struct ibv_mw_bind_info *info) > +{ > + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); > + > + if (check_qp_queue_full(qp)) > + return; > + > + memset(wqe, 0, sizeof(*wqe)); > + > + wqe->wr.wr_id = ibqp->wr_id; > + wqe->wr.opcode = IBV_WR_BIND_MW; > + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; > + wqe->wr.wr.mw.addr = info->addr; > + wqe->wr.wr.mw.length = info->length; > + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; > + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; > + wqe->wr.wr.mw.rkey = rkey; > + wqe->wr.wr.mw.access = info->mw_access_flags; > + wqe->ssn = qp->ssn++; > + > + advance_qp_cur_index(qp); > +} > + > static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) > { > struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > @@ -1106,6 +1220,7 @@ enum { > | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP > | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD > | IBV_QP_EX_WITH_LOCAL_INV > + | IBV_QP_EX_WITH_BIND_MW > | IBV_QP_EX_WITH_SEND_WITH_INV, > > RXE_SUP_UC_QP_SEND_OPS_FLAGS = > @@ -1113,6 +1228,7 @@ enum { > | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM > | IBV_QP_EX_WITH_SEND > | IBV_QP_EX_WITH_SEND_WITH_IMM > + | IBV_QP_EX_WITH_BIND_MW > | IBV_QP_EX_WITH_SEND_WITH_INV, > > RXE_SUP_UD_QP_SEND_OPS_FLAGS = > @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) > if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) > qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; > > + if (flags & IBV_QP_EX_WITH_BIND_MW) > + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; > + > if (flags & IBV_QP_EX_WITH_LOCAL_INV) > qp->vqp.qp_ex.wr_local_inv = wr_local_inv; > > @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) > } > > /* basic sanity checks for send work request */ > -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, > unsigned int length) > { > + struct rxe_wq *sq = &qp->sq; > enum ibv_wr_opcode opcode = ibwr->opcode; > > if (ibwr->num_sge > sq->max_sge) > @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) > return -EINVAL; > > + if (ibwr->opcode == IBV_WR_BIND_MW) { > + if (length) > + return -EINVAL; > + if (ibwr->num_sge) > + return -EINVAL; > + if (ibwr->imm_data) > + return -EINVAL; > + if ((qp_type(qp) != IBV_QPT_RC) && > + (qp_type(qp) != IBV_QPT_UC)) > + return -EINVAL; > + } > + > return 0; > } > > static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > { > + struct ibv_mw *ibmw; > + struct ibv_mr *ibmr; > + > memset(kwr, 0, sizeof(*kwr)); > > kwr->wr_id = uwr->wr_id; > @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; > break; > > + case IBV_WR_BIND_MW: > + ibmr = uwr->bind_mw.bind_info.mr; > + ibmw = uwr->bind_mw.mw; > + > + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; > + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; > + kwr->wr.mw.mr_lkey = ibmr->lkey; > + kwr->wr.mw.mw_rkey = ibmw->rkey; > + kwr->wr.mw.rkey = uwr->bind_mw.rkey; > + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; > + break; > + > default: > break; > } > @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > if (ibwr->send_flags & IBV_SEND_INLINE) { > uint8_t *inline_data = wqe->dma.inline_data; > > + wqe->dma.resid = 0; > + > for (i = 0; i < num_sge; i++) { > memcpy(inline_data, > (uint8_t *)(long)ibwr->sg_list[i].addr, > @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > wqe->iova = ibwr->wr.atomic.remote_addr; > else > wqe->iova = ibwr->wr.rdma.remote_addr; > + > wqe->dma.length = length; > wqe->dma.resid = length; > wqe->dma.num_sge = num_sge; > @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, > for (i = 0; i < ibwr->num_sge; i++) > length += ibwr->sg_list[i].length; > > - err = validate_send_wr(sq, ibwr, length); > + err = validate_send_wr(qp, ibwr, length); > if (err) { > printf("validate send failed\n"); > return err; > @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { > .dealloc_pd = rxe_dealloc_pd, > .reg_mr = rxe_reg_mr, > .dereg_mr = rxe_dereg_mr, > + .alloc_mw = rxe_alloc_mw, > + .dealloc_mw = rxe_dealloc_mw, > + .bind_mw = rxe_bind_mw, > .create_cq = rxe_create_cq, > .create_cq_ex = rxe_create_cq_ex, > .poll_cq = rxe_poll_cq, > -- > 2.30.2 > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 6:54 ` Zhu Yanjun @ 2021-06-08 7:01 ` Bob Pearson 2021-06-08 7:17 ` Zhu Yanjun 0 siblings, 1 reply; 9+ messages in thread From: Bob Pearson @ 2021-06-08 7:01 UTC (permalink / raw) To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On 6/8/21 1:54 AM, Zhu Yanjun wrote: > On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >> >> This patch makes the required changes to the rxe provider to support the >> kernel memory windows patches to the rxe driver. >> >> The following changes are made: >> - Add ibv_alloc_mw verb >> - Add ibv_dealloc_mw verb >> - Add ibv_bind_mw verb for type 1 MWs >> - Add support for bind MW send work requests through the traditional >> QP API and the extended QP API. >> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >> --- >> v2: >> Added support for extended QP bind MW work requests. > > I got the following errors. > Is it a known issue? > > # ./bin/run_tests.py --dev rxe0 > .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss > ====================================================================== > ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) > Verify bind memory window operation using the new post_send API. > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw > u.poll_cq(server.cq) > File "/root/rdma-core/tests/utils.py", line 538, in poll_cq > raise PyverbsRDMAError('Completion status is {s}'. > pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory > window bind error. Errno: 6, No such device or address > > ---------------------------------------------------------------------- > Ran 193 tests in 2.544s > > FAILED (errors=1, skipped=128) > > Zhu Yanjun Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag. Bob > >> --- >> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 155 insertions(+), 2 deletions(-) >> >> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c >> index a68656ae..bb39ef04 100644 >> --- a/providers/rxe/rxe.c >> +++ b/providers/rxe/rxe.c >> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) >> return ret; >> } >> >> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) >> +{ >> + int ret; >> + struct ibv_mw *ibmw; >> + struct ibv_alloc_mw cmd = {}; >> + struct ib_uverbs_alloc_mw_resp resp = {}; >> + >> + ibmw = calloc(1, sizeof(*ibmw)); >> + if (!ibmw) >> + return NULL; >> + >> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), >> + &resp, sizeof(resp)); >> + if (ret) { >> + free(ibmw); >> + return NULL; >> + } >> + >> + return ibmw; >> +} >> + >> +static int rxe_dealloc_mw(struct ibv_mw *ibmw) >> +{ >> + int ret; >> + >> + ret = ibv_cmd_dealloc_mw(ibmw); >> + if (ret) >> + return ret; >> + >> + free(ibmw); >> + return 0; >> +} >> + >> +static int next_rkey(int rkey) >> +{ >> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); >> +} >> + >> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, >> + struct ibv_send_wr **bad_wr); >> + >> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, >> + struct ibv_mw_bind *mw_bind) >> +{ >> + int ret; >> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; >> + struct ibv_send_wr ibwr; >> + struct ibv_send_wr *bad_wr; >> + >> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { >> + ret = EINVAL; >> + goto err; >> + } >> + >> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { >> + ret = EINVAL; >> + goto err; >> + } >> + >> + if (bind_info->mr) { >> + if (ibmw->pd != bind_info->mr->pd) { >> + ret = EPERM; >> + goto err; >> + } >> + } >> + >> + memset(&ibwr, 0, sizeof(ibwr)); >> + >> + ibwr.opcode = IBV_WR_BIND_MW; >> + ibwr.next = NULL; >> + ibwr.wr_id = mw_bind->wr_id; >> + ibwr.send_flags = mw_bind->send_flags; >> + ibwr.bind_mw.bind_info = mw_bind->bind_info; >> + ibwr.bind_mw.mw = ibmw; >> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); >> + >> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); >> + if (ret) >> + goto err; >> + >> + /* user has to undo this if he gets an error wc */ >> + ibmw->rkey = ibwr.bind_mw.rkey; >> + >> + return 0; >> +err: >> + errno = ret; >> + return errno; >> +} >> + >> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, >> uint64_t hca_va, int access) >> { >> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, >> advance_qp_cur_index(qp); >> } >> >> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, >> + uint32_t rkey, const struct ibv_mw_bind_info *info) >> +{ >> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); >> + >> + if (check_qp_queue_full(qp)) >> + return; >> + >> + memset(wqe, 0, sizeof(*wqe)); >> + >> + wqe->wr.wr_id = ibqp->wr_id; >> + wqe->wr.opcode = IBV_WR_BIND_MW; >> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; >> + wqe->wr.wr.mw.addr = info->addr; >> + wqe->wr.wr.mw.length = info->length; >> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; >> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; >> + wqe->wr.wr.mw.rkey = rkey; >> + wqe->wr.wr.mw.access = info->mw_access_flags; >> + wqe->ssn = qp->ssn++; >> + >> + advance_qp_cur_index(qp); >> +} >> + >> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) >> { >> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >> @@ -1106,6 +1220,7 @@ enum { >> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP >> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD >> | IBV_QP_EX_WITH_LOCAL_INV >> + | IBV_QP_EX_WITH_BIND_MW >> | IBV_QP_EX_WITH_SEND_WITH_INV, >> >> RXE_SUP_UC_QP_SEND_OPS_FLAGS = >> @@ -1113,6 +1228,7 @@ enum { >> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM >> | IBV_QP_EX_WITH_SEND >> | IBV_QP_EX_WITH_SEND_WITH_IMM >> + | IBV_QP_EX_WITH_BIND_MW >> | IBV_QP_EX_WITH_SEND_WITH_INV, >> >> RXE_SUP_UD_QP_SEND_OPS_FLAGS = >> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) >> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) >> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; >> >> + if (flags & IBV_QP_EX_WITH_BIND_MW) >> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; >> + >> if (flags & IBV_QP_EX_WITH_LOCAL_INV) >> qp->vqp.qp_ex.wr_local_inv = wr_local_inv; >> >> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) >> } >> >> /* basic sanity checks for send work request */ >> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, >> unsigned int length) >> { >> + struct rxe_wq *sq = &qp->sq; >> enum ibv_wr_opcode opcode = ibwr->opcode; >> >> if (ibwr->num_sge > sq->max_sge) >> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) >> return -EINVAL; >> >> + if (ibwr->opcode == IBV_WR_BIND_MW) { >> + if (length) >> + return -EINVAL; >> + if (ibwr->num_sge) >> + return -EINVAL; >> + if (ibwr->imm_data) >> + return -EINVAL; >> + if ((qp_type(qp) != IBV_QPT_RC) && >> + (qp_type(qp) != IBV_QPT_UC)) >> + return -EINVAL; >> + } >> + >> return 0; >> } >> >> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >> { >> + struct ibv_mw *ibmw; >> + struct ibv_mr *ibmr; >> + >> memset(kwr, 0, sizeof(*kwr)); >> >> kwr->wr_id = uwr->wr_id; >> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; >> break; >> >> + case IBV_WR_BIND_MW: >> + ibmr = uwr->bind_mw.bind_info.mr; >> + ibmw = uwr->bind_mw.mw; >> + >> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; >> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; >> + kwr->wr.mw.mr_lkey = ibmr->lkey; >> + kwr->wr.mw.mw_rkey = ibmw->rkey; >> + kwr->wr.mw.rkey = uwr->bind_mw.rkey; >> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; >> + break; >> + >> default: >> break; >> } >> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >> if (ibwr->send_flags & IBV_SEND_INLINE) { >> uint8_t *inline_data = wqe->dma.inline_data; >> >> + wqe->dma.resid = 0; >> + >> for (i = 0; i < num_sge; i++) { >> memcpy(inline_data, >> (uint8_t *)(long)ibwr->sg_list[i].addr, >> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >> wqe->iova = ibwr->wr.atomic.remote_addr; >> else >> wqe->iova = ibwr->wr.rdma.remote_addr; >> + >> wqe->dma.length = length; >> wqe->dma.resid = length; >> wqe->dma.num_sge = num_sge; >> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, >> for (i = 0; i < ibwr->num_sge; i++) >> length += ibwr->sg_list[i].length; >> >> - err = validate_send_wr(sq, ibwr, length); >> + err = validate_send_wr(qp, ibwr, length); >> if (err) { >> printf("validate send failed\n"); >> return err; >> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { >> .dealloc_pd = rxe_dealloc_pd, >> .reg_mr = rxe_reg_mr, >> .dereg_mr = rxe_dereg_mr, >> + .alloc_mw = rxe_alloc_mw, >> + .dealloc_mw = rxe_dealloc_mw, >> + .bind_mw = rxe_bind_mw, >> .create_cq = rxe_create_cq, >> .create_cq_ex = rxe_create_cq_ex, >> .poll_cq = rxe_poll_cq, >> -- >> 2.30.2 >> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 7:01 ` Bob Pearson @ 2021-06-08 7:17 ` Zhu Yanjun 2021-06-08 23:18 ` Pearson, Robert B 0 siblings, 1 reply; 9+ messages in thread From: Zhu Yanjun @ 2021-06-08 7:17 UTC (permalink / raw) To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > On 6/8/21 1:54 AM, Zhu Yanjun wrote: > > On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >> > >> This patch makes the required changes to the rxe provider to support the > >> kernel memory windows patches to the rxe driver. > >> > >> The following changes are made: > >> - Add ibv_alloc_mw verb > >> - Add ibv_dealloc_mw verb > >> - Add ibv_bind_mw verb for type 1 MWs > >> - Add support for bind MW send work requests through the traditional > >> QP API and the extended QP API. > >> > >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >> --- > >> v2: > >> Added support for extended QP bind MW work requests. > > > > I got the following errors. > > Is it a known issue? > > > > # ./bin/run_tests.py --dev rxe0 > > .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss > > ====================================================================== > > ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) > > Verify bind memory window operation using the new post_send API. > > ---------------------------------------------------------------------- > > Traceback (most recent call last): > > File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw > > u.poll_cq(server.cq) > > File "/root/rdma-core/tests/utils.py", line 538, in poll_cq > > raise PyverbsRDMAError('Completion status is {s}'. > > pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory > > window bind error. Errno: 6, No such device or address > > > > ---------------------------------------------------------------------- > > Ran 193 tests in 2.544s > > > > FAILED (errors=1, skipped=128) > > > > Zhu Yanjun > Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag. Thanks a lot. If the root cause to this problem is correct, and setting the BIND_MW access for the MR can fix this problem, Please file a commit to fix this problem. I am fine with this MW patch series if all the problems (including rdma-core and rxe in kernel) are fixed. Thanks for your effort. Zhu Yanjun > Bob > > > >> --- > >> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- > >> 1 file changed, 155 insertions(+), 2 deletions(-) > >> > >> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c > >> index a68656ae..bb39ef04 100644 > >> --- a/providers/rxe/rxe.c > >> +++ b/providers/rxe/rxe.c > >> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) > >> return ret; > >> } > >> > >> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) > >> +{ > >> + int ret; > >> + struct ibv_mw *ibmw; > >> + struct ibv_alloc_mw cmd = {}; > >> + struct ib_uverbs_alloc_mw_resp resp = {}; > >> + > >> + ibmw = calloc(1, sizeof(*ibmw)); > >> + if (!ibmw) > >> + return NULL; > >> + > >> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), > >> + &resp, sizeof(resp)); > >> + if (ret) { > >> + free(ibmw); > >> + return NULL; > >> + } > >> + > >> + return ibmw; > >> +} > >> + > >> +static int rxe_dealloc_mw(struct ibv_mw *ibmw) > >> +{ > >> + int ret; > >> + > >> + ret = ibv_cmd_dealloc_mw(ibmw); > >> + if (ret) > >> + return ret; > >> + > >> + free(ibmw); > >> + return 0; > >> +} > >> + > >> +static int next_rkey(int rkey) > >> +{ > >> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); > >> +} > >> + > >> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, > >> + struct ibv_send_wr **bad_wr); > >> + > >> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, > >> + struct ibv_mw_bind *mw_bind) > >> +{ > >> + int ret; > >> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; > >> + struct ibv_send_wr ibwr; > >> + struct ibv_send_wr *bad_wr; > >> + > >> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { > >> + ret = EINVAL; > >> + goto err; > >> + } > >> + > >> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { > >> + ret = EINVAL; > >> + goto err; > >> + } > >> + > >> + if (bind_info->mr) { > >> + if (ibmw->pd != bind_info->mr->pd) { > >> + ret = EPERM; > >> + goto err; > >> + } > >> + } > >> + > >> + memset(&ibwr, 0, sizeof(ibwr)); > >> + > >> + ibwr.opcode = IBV_WR_BIND_MW; > >> + ibwr.next = NULL; > >> + ibwr.wr_id = mw_bind->wr_id; > >> + ibwr.send_flags = mw_bind->send_flags; > >> + ibwr.bind_mw.bind_info = mw_bind->bind_info; > >> + ibwr.bind_mw.mw = ibmw; > >> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); > >> + > >> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); > >> + if (ret) > >> + goto err; > >> + > >> + /* user has to undo this if he gets an error wc */ > >> + ibmw->rkey = ibwr.bind_mw.rkey; > >> + > >> + return 0; > >> +err: > >> + errno = ret; > >> + return errno; > >> +} > >> + > >> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > >> uint64_t hca_va, int access) > >> { > >> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, > >> advance_qp_cur_index(qp); > >> } > >> > >> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, > >> + uint32_t rkey, const struct ibv_mw_bind_info *info) > >> +{ > >> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > >> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); > >> + > >> + if (check_qp_queue_full(qp)) > >> + return; > >> + > >> + memset(wqe, 0, sizeof(*wqe)); > >> + > >> + wqe->wr.wr_id = ibqp->wr_id; > >> + wqe->wr.opcode = IBV_WR_BIND_MW; > >> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; > >> + wqe->wr.wr.mw.addr = info->addr; > >> + wqe->wr.wr.mw.length = info->length; > >> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; > >> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; > >> + wqe->wr.wr.mw.rkey = rkey; > >> + wqe->wr.wr.mw.access = info->mw_access_flags; > >> + wqe->ssn = qp->ssn++; > >> + > >> + advance_qp_cur_index(qp); > >> +} > >> + > >> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) > >> { > >> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > >> @@ -1106,6 +1220,7 @@ enum { > >> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP > >> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD > >> | IBV_QP_EX_WITH_LOCAL_INV > >> + | IBV_QP_EX_WITH_BIND_MW > >> | IBV_QP_EX_WITH_SEND_WITH_INV, > >> > >> RXE_SUP_UC_QP_SEND_OPS_FLAGS = > >> @@ -1113,6 +1228,7 @@ enum { > >> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM > >> | IBV_QP_EX_WITH_SEND > >> | IBV_QP_EX_WITH_SEND_WITH_IMM > >> + | IBV_QP_EX_WITH_BIND_MW > >> | IBV_QP_EX_WITH_SEND_WITH_INV, > >> > >> RXE_SUP_UD_QP_SEND_OPS_FLAGS = > >> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) > >> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) > >> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; > >> > >> + if (flags & IBV_QP_EX_WITH_BIND_MW) > >> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; > >> + > >> if (flags & IBV_QP_EX_WITH_LOCAL_INV) > >> qp->vqp.qp_ex.wr_local_inv = wr_local_inv; > >> > >> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) > >> } > >> > >> /* basic sanity checks for send work request */ > >> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > >> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, > >> unsigned int length) > >> { > >> + struct rxe_wq *sq = &qp->sq; > >> enum ibv_wr_opcode opcode = ibwr->opcode; > >> > >> if (ibwr->num_sge > sq->max_sge) > >> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > >> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) > >> return -EINVAL; > >> > >> + if (ibwr->opcode == IBV_WR_BIND_MW) { > >> + if (length) > >> + return -EINVAL; > >> + if (ibwr->num_sge) > >> + return -EINVAL; > >> + if (ibwr->imm_data) > >> + return -EINVAL; > >> + if ((qp_type(qp) != IBV_QPT_RC) && > >> + (qp_type(qp) != IBV_QPT_UC)) > >> + return -EINVAL; > >> + } > >> + > >> return 0; > >> } > >> > >> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > >> { > >> + struct ibv_mw *ibmw; > >> + struct ibv_mr *ibmr; > >> + > >> memset(kwr, 0, sizeof(*kwr)); > >> > >> kwr->wr_id = uwr->wr_id; > >> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > >> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; > >> break; > >> > >> + case IBV_WR_BIND_MW: > >> + ibmr = uwr->bind_mw.bind_info.mr; > >> + ibmw = uwr->bind_mw.mw; > >> + > >> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; > >> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; > >> + kwr->wr.mw.mr_lkey = ibmr->lkey; > >> + kwr->wr.mw.mw_rkey = ibmw->rkey; > >> + kwr->wr.mw.rkey = uwr->bind_mw.rkey; > >> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; > >> + break; > >> + > >> default: > >> break; > >> } > >> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > >> if (ibwr->send_flags & IBV_SEND_INLINE) { > >> uint8_t *inline_data = wqe->dma.inline_data; > >> > >> + wqe->dma.resid = 0; > >> + > >> for (i = 0; i < num_sge; i++) { > >> memcpy(inline_data, > >> (uint8_t *)(long)ibwr->sg_list[i].addr, > >> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > >> wqe->iova = ibwr->wr.atomic.remote_addr; > >> else > >> wqe->iova = ibwr->wr.rdma.remote_addr; > >> + > >> wqe->dma.length = length; > >> wqe->dma.resid = length; > >> wqe->dma.num_sge = num_sge; > >> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, > >> for (i = 0; i < ibwr->num_sge; i++) > >> length += ibwr->sg_list[i].length; > >> > >> - err = validate_send_wr(sq, ibwr, length); > >> + err = validate_send_wr(qp, ibwr, length); > >> if (err) { > >> printf("validate send failed\n"); > >> return err; > >> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { > >> .dealloc_pd = rxe_dealloc_pd, > >> .reg_mr = rxe_reg_mr, > >> .dereg_mr = rxe_dereg_mr, > >> + .alloc_mw = rxe_alloc_mw, > >> + .dealloc_mw = rxe_dealloc_mw, > >> + .bind_mw = rxe_bind_mw, > >> .create_cq = rxe_create_cq, > >> .create_cq_ex = rxe_create_cq_ex, > >> .poll_cq = rxe_poll_cq, > >> -- > >> 2.30.2 > >> > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 7:17 ` Zhu Yanjun @ 2021-06-08 23:18 ` Pearson, Robert B 2021-06-09 1:51 ` Zhu Yanjun 0 siblings, 1 reply; 9+ messages in thread From: Pearson, Robert B @ 2021-06-08 23:18 UTC (permalink / raw) To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On 6/8/2021 2:17 AM, Zhu Yanjun wrote: > On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >> On 6/8/21 1:54 AM, Zhu Yanjun wrote: >>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >>>> This patch makes the required changes to the rxe provider to support the >>>> kernel memory windows patches to the rxe driver. >>>> >>>> The following changes are made: >>>> - Add ibv_alloc_mw verb >>>> - Add ibv_dealloc_mw verb >>>> - Add ibv_bind_mw verb for type 1 MWs >>>> - Add support for bind MW send work requests through the traditional >>>> QP API and the extended QP API. >>>> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >>>> --- >>>> v2: >>>> Added support for extended QP bind MW work requests. >>> I got the following errors. >>> Is it a known issue? >>> >>> # ./bin/run_tests.py --dev rxe0 >>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss >>> ====================================================================== >>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) >>> Verify bind memory window operation using the new post_send API. >>> ---------------------------------------------------------------------- >>> Traceback (most recent call last): >>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw >>> u.poll_cq(server.cq) >>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq >>> raise PyverbsRDMAError('Completion status is {s}'. >>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory >>> window bind error. Errno: 6, No such device or address >>> >>> ---------------------------------------------------------------------- >>> Ran 193 tests in 2.544s >>> >>> FAILED (errors=1, skipped=128) >>> >>> Zhu Yanjun >> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag. > Thanks a lot. > If the root cause to this problem is correct, and setting the BIND_MW > access for the MR can fix this problem, > > Please file a commit to fix this problem. > > I am fine with this MW patch series if all the problems (including > rdma-core and rxe in kernel) are fixed. > > Thanks for your effort. > > Zhu Yanjun Zhu, There was a second test bug, now fixed. And all the python tests are passing or skipped. No errors. There is a message to the liux-rdma list from Edward Srouji indicating that the patch is upstream at github or you can use the one in my email to him. It will be a while until his PR goes up stream I would guess. I sent a fresh set of patches last night for both the kernel and user space provider. Bob >> Bob >>>> --- >>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- >>>> 1 file changed, 155 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c >>>> index a68656ae..bb39ef04 100644 >>>> --- a/providers/rxe/rxe.c >>>> +++ b/providers/rxe/rxe.c >>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) >>>> return ret; >>>> } >>>> >>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) >>>> +{ >>>> + int ret; >>>> + struct ibv_mw *ibmw; >>>> + struct ibv_alloc_mw cmd = {}; >>>> + struct ib_uverbs_alloc_mw_resp resp = {}; >>>> + >>>> + ibmw = calloc(1, sizeof(*ibmw)); >>>> + if (!ibmw) >>>> + return NULL; >>>> + >>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), >>>> + &resp, sizeof(resp)); >>>> + if (ret) { >>>> + free(ibmw); >>>> + return NULL; >>>> + } >>>> + >>>> + return ibmw; >>>> +} >>>> + >>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw) >>>> +{ >>>> + int ret; >>>> + >>>> + ret = ibv_cmd_dealloc_mw(ibmw); >>>> + if (ret) >>>> + return ret; >>>> + >>>> + free(ibmw); >>>> + return 0; >>>> +} >>>> + >>>> +static int next_rkey(int rkey) >>>> +{ >>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); >>>> +} >>>> + >>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, >>>> + struct ibv_send_wr **bad_wr); >>>> + >>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, >>>> + struct ibv_mw_bind *mw_bind) >>>> +{ >>>> + int ret; >>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; >>>> + struct ibv_send_wr ibwr; >>>> + struct ibv_send_wr *bad_wr; >>>> + >>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { >>>> + ret = EINVAL; >>>> + goto err; >>>> + } >>>> + >>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { >>>> + ret = EINVAL; >>>> + goto err; >>>> + } >>>> + >>>> + if (bind_info->mr) { >>>> + if (ibmw->pd != bind_info->mr->pd) { >>>> + ret = EPERM; >>>> + goto err; >>>> + } >>>> + } >>>> + >>>> + memset(&ibwr, 0, sizeof(ibwr)); >>>> + >>>> + ibwr.opcode = IBV_WR_BIND_MW; >>>> + ibwr.next = NULL; >>>> + ibwr.wr_id = mw_bind->wr_id; >>>> + ibwr.send_flags = mw_bind->send_flags; >>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info; >>>> + ibwr.bind_mw.mw = ibmw; >>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); >>>> + >>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); >>>> + if (ret) >>>> + goto err; >>>> + >>>> + /* user has to undo this if he gets an error wc */ >>>> + ibmw->rkey = ibwr.bind_mw.rkey; >>>> + >>>> + return 0; >>>> +err: >>>> + errno = ret; >>>> + return errno; >>>> +} >>>> + >>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, >>>> uint64_t hca_va, int access) >>>> { >>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, >>>> advance_qp_cur_index(qp); >>>> } >>>> >>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, >>>> + uint32_t rkey, const struct ibv_mw_bind_info *info) >>>> +{ >>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); >>>> + >>>> + if (check_qp_queue_full(qp)) >>>> + return; >>>> + >>>> + memset(wqe, 0, sizeof(*wqe)); >>>> + >>>> + wqe->wr.wr_id = ibqp->wr_id; >>>> + wqe->wr.opcode = IBV_WR_BIND_MW; >>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; >>>> + wqe->wr.wr.mw.addr = info->addr; >>>> + wqe->wr.wr.mw.length = info->length; >>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; >>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; >>>> + wqe->wr.wr.mw.rkey = rkey; >>>> + wqe->wr.wr.mw.access = info->mw_access_flags; >>>> + wqe->ssn = qp->ssn++; >>>> + >>>> + advance_qp_cur_index(qp); >>>> +} >>>> + >>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) >>>> { >>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >>>> @@ -1106,6 +1220,7 @@ enum { >>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP >>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD >>>> | IBV_QP_EX_WITH_LOCAL_INV >>>> + | IBV_QP_EX_WITH_BIND_MW >>>> | IBV_QP_EX_WITH_SEND_WITH_INV, >>>> >>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS = >>>> @@ -1113,6 +1228,7 @@ enum { >>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM >>>> | IBV_QP_EX_WITH_SEND >>>> | IBV_QP_EX_WITH_SEND_WITH_IMM >>>> + | IBV_QP_EX_WITH_BIND_MW >>>> | IBV_QP_EX_WITH_SEND_WITH_INV, >>>> >>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS = >>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) >>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) >>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; >>>> >>>> + if (flags & IBV_QP_EX_WITH_BIND_MW) >>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; >>>> + >>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV) >>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv; >>>> >>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) >>>> } >>>> >>>> /* basic sanity checks for send work request */ >>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, >>>> unsigned int length) >>>> { >>>> + struct rxe_wq *sq = &qp->sq; >>>> enum ibv_wr_opcode opcode = ibwr->opcode; >>>> >>>> if (ibwr->num_sge > sq->max_sge) >>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) >>>> return -EINVAL; >>>> >>>> + if (ibwr->opcode == IBV_WR_BIND_MW) { >>>> + if (length) >>>> + return -EINVAL; >>>> + if (ibwr->num_sge) >>>> + return -EINVAL; >>>> + if (ibwr->imm_data) >>>> + return -EINVAL; >>>> + if ((qp_type(qp) != IBV_QPT_RC) && >>>> + (qp_type(qp) != IBV_QPT_UC)) >>>> + return -EINVAL; >>>> + } >>>> + >>>> return 0; >>>> } >>>> >>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >>>> { >>>> + struct ibv_mw *ibmw; >>>> + struct ibv_mr *ibmr; >>>> + >>>> memset(kwr, 0, sizeof(*kwr)); >>>> >>>> kwr->wr_id = uwr->wr_id; >>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; >>>> break; >>>> >>>> + case IBV_WR_BIND_MW: >>>> + ibmr = uwr->bind_mw.bind_info.mr; >>>> + ibmw = uwr->bind_mw.mw; >>>> + >>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; >>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; >>>> + kwr->wr.mw.mr_lkey = ibmr->lkey; >>>> + kwr->wr.mw.mw_rkey = ibmw->rkey; >>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey; >>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; >>>> + break; >>>> + >>>> default: >>>> break; >>>> } >>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >>>> if (ibwr->send_flags & IBV_SEND_INLINE) { >>>> uint8_t *inline_data = wqe->dma.inline_data; >>>> >>>> + wqe->dma.resid = 0; >>>> + >>>> for (i = 0; i < num_sge; i++) { >>>> memcpy(inline_data, >>>> (uint8_t *)(long)ibwr->sg_list[i].addr, >>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >>>> wqe->iova = ibwr->wr.atomic.remote_addr; >>>> else >>>> wqe->iova = ibwr->wr.rdma.remote_addr; >>>> + >>>> wqe->dma.length = length; >>>> wqe->dma.resid = length; >>>> wqe->dma.num_sge = num_sge; >>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, >>>> for (i = 0; i < ibwr->num_sge; i++) >>>> length += ibwr->sg_list[i].length; >>>> >>>> - err = validate_send_wr(sq, ibwr, length); >>>> + err = validate_send_wr(qp, ibwr, length); >>>> if (err) { >>>> printf("validate send failed\n"); >>>> return err; >>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { >>>> .dealloc_pd = rxe_dealloc_pd, >>>> .reg_mr = rxe_reg_mr, >>>> .dereg_mr = rxe_dereg_mr, >>>> + .alloc_mw = rxe_alloc_mw, >>>> + .dealloc_mw = rxe_dealloc_mw, >>>> + .bind_mw = rxe_bind_mw, >>>> .create_cq = rxe_create_cq, >>>> .create_cq_ex = rxe_create_cq_ex, >>>> .poll_cq = rxe_poll_cq, >>>> -- >>>> 2.30.2 >>>> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-08 23:18 ` Pearson, Robert B @ 2021-06-09 1:51 ` Zhu Yanjun 2021-06-09 1:59 ` Pearson, Robert B 0 siblings, 1 reply; 9+ messages in thread From: Zhu Yanjun @ 2021-06-09 1:51 UTC (permalink / raw) To: Pearson, Robert B; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote: > > > On 6/8/2021 2:17 AM, Zhu Yanjun wrote: > > On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >> On 6/8/21 1:54 AM, Zhu Yanjun wrote: > >>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >>>> This patch makes the required changes to the rxe provider to support the > >>>> kernel memory windows patches to the rxe driver. > >>>> > >>>> The following changes are made: > >>>> - Add ibv_alloc_mw verb > >>>> - Add ibv_dealloc_mw verb > >>>> - Add ibv_bind_mw verb for type 1 MWs > >>>> - Add support for bind MW send work requests through the traditional > >>>> QP API and the extended QP API. > >>>> > >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >>>> --- > >>>> v2: > >>>> Added support for extended QP bind MW work requests. > >>> I got the following errors. > >>> Is it a known issue? > >>> > >>> # ./bin/run_tests.py --dev rxe0 > >>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss > >>> ====================================================================== > >>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) > >>> Verify bind memory window operation using the new post_send API. > >>> ---------------------------------------------------------------------- > >>> Traceback (most recent call last): > >>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw > >>> u.poll_cq(server.cq) > >>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq > >>> raise PyverbsRDMAError('Completion status is {s}'. > >>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory > >>> window bind error. Errno: 6, No such device or address > >>> > >>> ---------------------------------------------------------------------- > >>> Ran 193 tests in 2.544s > >>> > >>> FAILED (errors=1, skipped=128) > >>> > >>> Zhu Yanjun > >> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag. > > Thanks a lot. > > If the root cause to this problem is correct, and setting the BIND_MW > > access for the MR can fix this problem, > > > > Please file a commit to fix this problem. > > > > I am fine with this MW patch series if all the problems (including > > rdma-core and rxe in kernel) are fixed. > > > > Thanks for your effort. > > > > Zhu Yanjun > > Zhu, > > There was a second test bug, now fixed. And all the python tests are > passing or skipped. No errors. There is a message to the liux-rdma list > from Edward Srouji indicating that the patch is upstream at github or > you can use the one in my email to him. It will be a while until his PR > goes up stream I would guess. > > I sent a fresh set of patches last night for both the kernel and user > space provider. Thanks a lot for your effort. I applied your commits for rdma-core and rxe in kernel, and the following diff. All the python tests in rdma-core pass or skipped. No errors Thanks again. " diff --git a/tests/test_qpex.py b/tests/test_qpex.py index 20288d45..41028e2d 100644 --- a/tests/test_qpex.py +++ b/tests/test_qpex.py @@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources): class QpExRCBindMw(RCResources): def create_qps(self): - create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW) + create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE) def create_mr(self): - self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE) + self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND) " Zhu Yanjun > > Bob > > >> Bob > >>>> --- > >>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- > >>>> 1 file changed, 155 insertions(+), 2 deletions(-) > >>>> > >>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c > >>>> index a68656ae..bb39ef04 100644 > >>>> --- a/providers/rxe/rxe.c > >>>> +++ b/providers/rxe/rxe.c > >>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) > >>>> return ret; > >>>> } > >>>> > >>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) > >>>> +{ > >>>> + int ret; > >>>> + struct ibv_mw *ibmw; > >>>> + struct ibv_alloc_mw cmd = {}; > >>>> + struct ib_uverbs_alloc_mw_resp resp = {}; > >>>> + > >>>> + ibmw = calloc(1, sizeof(*ibmw)); > >>>> + if (!ibmw) > >>>> + return NULL; > >>>> + > >>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), > >>>> + &resp, sizeof(resp)); > >>>> + if (ret) { > >>>> + free(ibmw); > >>>> + return NULL; > >>>> + } > >>>> + > >>>> + return ibmw; > >>>> +} > >>>> + > >>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw) > >>>> +{ > >>>> + int ret; > >>>> + > >>>> + ret = ibv_cmd_dealloc_mw(ibmw); > >>>> + if (ret) > >>>> + return ret; > >>>> + > >>>> + free(ibmw); > >>>> + return 0; > >>>> +} > >>>> + > >>>> +static int next_rkey(int rkey) > >>>> +{ > >>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); > >>>> +} > >>>> + > >>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, > >>>> + struct ibv_send_wr **bad_wr); > >>>> + > >>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, > >>>> + struct ibv_mw_bind *mw_bind) > >>>> +{ > >>>> + int ret; > >>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; > >>>> + struct ibv_send_wr ibwr; > >>>> + struct ibv_send_wr *bad_wr; > >>>> + > >>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { > >>>> + ret = EINVAL; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { > >>>> + ret = EINVAL; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + if (bind_info->mr) { > >>>> + if (ibmw->pd != bind_info->mr->pd) { > >>>> + ret = EPERM; > >>>> + goto err; > >>>> + } > >>>> + } > >>>> + > >>>> + memset(&ibwr, 0, sizeof(ibwr)); > >>>> + > >>>> + ibwr.opcode = IBV_WR_BIND_MW; > >>>> + ibwr.next = NULL; > >>>> + ibwr.wr_id = mw_bind->wr_id; > >>>> + ibwr.send_flags = mw_bind->send_flags; > >>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info; > >>>> + ibwr.bind_mw.mw = ibmw; > >>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); > >>>> + > >>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); > >>>> + if (ret) > >>>> + goto err; > >>>> + > >>>> + /* user has to undo this if he gets an error wc */ > >>>> + ibmw->rkey = ibwr.bind_mw.rkey; > >>>> + > >>>> + return 0; > >>>> +err: > >>>> + errno = ret; > >>>> + return errno; > >>>> +} > >>>> + > >>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > >>>> uint64_t hca_va, int access) > >>>> { > >>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, > >>>> advance_qp_cur_index(qp); > >>>> } > >>>> > >>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, > >>>> + uint32_t rkey, const struct ibv_mw_bind_info *info) > >>>> +{ > >>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > >>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); > >>>> + > >>>> + if (check_qp_queue_full(qp)) > >>>> + return; > >>>> + > >>>> + memset(wqe, 0, sizeof(*wqe)); > >>>> + > >>>> + wqe->wr.wr_id = ibqp->wr_id; > >>>> + wqe->wr.opcode = IBV_WR_BIND_MW; > >>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; > >>>> + wqe->wr.wr.mw.addr = info->addr; > >>>> + wqe->wr.wr.mw.length = info->length; > >>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; > >>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; > >>>> + wqe->wr.wr.mw.rkey = rkey; > >>>> + wqe->wr.wr.mw.access = info->mw_access_flags; > >>>> + wqe->ssn = qp->ssn++; > >>>> + > >>>> + advance_qp_cur_index(qp); > >>>> +} > >>>> + > >>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) > >>>> { > >>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); > >>>> @@ -1106,6 +1220,7 @@ enum { > >>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP > >>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD > >>>> | IBV_QP_EX_WITH_LOCAL_INV > >>>> + | IBV_QP_EX_WITH_BIND_MW > >>>> | IBV_QP_EX_WITH_SEND_WITH_INV, > >>>> > >>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS = > >>>> @@ -1113,6 +1228,7 @@ enum { > >>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM > >>>> | IBV_QP_EX_WITH_SEND > >>>> | IBV_QP_EX_WITH_SEND_WITH_IMM > >>>> + | IBV_QP_EX_WITH_BIND_MW > >>>> | IBV_QP_EX_WITH_SEND_WITH_INV, > >>>> > >>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS = > >>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) > >>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) > >>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; > >>>> > >>>> + if (flags & IBV_QP_EX_WITH_BIND_MW) > >>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; > >>>> + > >>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV) > >>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv; > >>>> > >>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) > >>>> } > >>>> > >>>> /* basic sanity checks for send work request */ > >>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > >>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, > >>>> unsigned int length) > >>>> { > >>>> + struct rxe_wq *sq = &qp->sq; > >>>> enum ibv_wr_opcode opcode = ibwr->opcode; > >>>> > >>>> if (ibwr->num_sge > sq->max_sge) > >>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, > >>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) > >>>> return -EINVAL; > >>>> > >>>> + if (ibwr->opcode == IBV_WR_BIND_MW) { > >>>> + if (length) > >>>> + return -EINVAL; > >>>> + if (ibwr->num_sge) > >>>> + return -EINVAL; > >>>> + if (ibwr->imm_data) > >>>> + return -EINVAL; > >>>> + if ((qp_type(qp) != IBV_QPT_RC) && > >>>> + (qp_type(qp) != IBV_QPT_UC)) > >>>> + return -EINVAL; > >>>> + } > >>>> + > >>>> return 0; > >>>> } > >>>> > >>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > >>>> { > >>>> + struct ibv_mw *ibmw; > >>>> + struct ibv_mr *ibmr; > >>>> + > >>>> memset(kwr, 0, sizeof(*kwr)); > >>>> > >>>> kwr->wr_id = uwr->wr_id; > >>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) > >>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; > >>>> break; > >>>> > >>>> + case IBV_WR_BIND_MW: > >>>> + ibmr = uwr->bind_mw.bind_info.mr; > >>>> + ibmw = uwr->bind_mw.mw; > >>>> + > >>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; > >>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; > >>>> + kwr->wr.mw.mr_lkey = ibmr->lkey; > >>>> + kwr->wr.mw.mw_rkey = ibmw->rkey; > >>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey; > >>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; > >>>> + break; > >>>> + > >>>> default: > >>>> break; > >>>> } > >>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > >>>> if (ibwr->send_flags & IBV_SEND_INLINE) { > >>>> uint8_t *inline_data = wqe->dma.inline_data; > >>>> > >>>> + wqe->dma.resid = 0; > >>>> + > >>>> for (i = 0; i < num_sge; i++) { > >>>> memcpy(inline_data, > >>>> (uint8_t *)(long)ibwr->sg_list[i].addr, > >>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, > >>>> wqe->iova = ibwr->wr.atomic.remote_addr; > >>>> else > >>>> wqe->iova = ibwr->wr.rdma.remote_addr; > >>>> + > >>>> wqe->dma.length = length; > >>>> wqe->dma.resid = length; > >>>> wqe->dma.num_sge = num_sge; > >>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, > >>>> for (i = 0; i < ibwr->num_sge; i++) > >>>> length += ibwr->sg_list[i].length; > >>>> > >>>> - err = validate_send_wr(sq, ibwr, length); > >>>> + err = validate_send_wr(qp, ibwr, length); > >>>> if (err) { > >>>> printf("validate send failed\n"); > >>>> return err; > >>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { > >>>> .dealloc_pd = rxe_dealloc_pd, > >>>> .reg_mr = rxe_reg_mr, > >>>> .dereg_mr = rxe_dereg_mr, > >>>> + .alloc_mw = rxe_alloc_mw, > >>>> + .dealloc_mw = rxe_dealloc_mw, > >>>> + .bind_mw = rxe_bind_mw, > >>>> .create_cq = rxe_create_cq, > >>>> .create_cq_ex = rxe_create_cq_ex, > >>>> .poll_cq = rxe_poll_cq, > >>>> -- > >>>> 2.30.2 > >>>> ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows 2021-06-09 1:51 ` Zhu Yanjun @ 2021-06-09 1:59 ` Pearson, Robert B 0 siblings, 0 replies; 9+ messages in thread From: Pearson, Robert B @ 2021-06-09 1:59 UTC (permalink / raw) To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list On 6/8/2021 8:51 PM, Zhu Yanjun wrote: > On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote: >> >> On 6/8/2021 2:17 AM, Zhu Yanjun wrote: >>> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >>>> On 6/8/21 1:54 AM, Zhu Yanjun wrote: >>>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >>>>>> This patch makes the required changes to the rxe provider to support the >>>>>> kernel memory windows patches to the rxe driver. >>>>>> >>>>>> The following changes are made: >>>>>> - Add ibv_alloc_mw verb >>>>>> - Add ibv_dealloc_mw verb >>>>>> - Add ibv_bind_mw verb for type 1 MWs >>>>>> - Add support for bind MW send work requests through the traditional >>>>>> QP API and the extended QP API. >>>>>> >>>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >>>>>> --- >>>>>> v2: >>>>>> Added support for extended QP bind MW work requests. >>>>> I got the following errors. >>>>> Is it a known issue? >>>>> >>>>> # ./bin/run_tests.py --dev rxe0 >>>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss >>>>> ====================================================================== >>>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase) >>>>> Verify bind memory window operation using the new post_send API. >>>>> ---------------------------------------------------------------------- >>>>> Traceback (most recent call last): >>>>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw >>>>> u.poll_cq(server.cq) >>>>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq >>>>> raise PyverbsRDMAError('Completion status is {s}'. >>>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory >>>>> window bind error. Errno: 6, No such device or address >>>>> >>>>> ---------------------------------------------------------------------- >>>>> Ran 193 tests in 2.544s >>>>> >>>>> FAILED (errors=1, skipped=128) >>>>> >>>>> Zhu Yanjun >>>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag. >>> Thanks a lot. >>> If the root cause to this problem is correct, and setting the BIND_MW >>> access for the MR can fix this problem, >>> >>> Please file a commit to fix this problem. >>> >>> I am fine with this MW patch series if all the problems (including >>> rdma-core and rxe in kernel) are fixed. >>> >>> Thanks for your effort. >>> >>> Zhu Yanjun >> Zhu, >> >> There was a second test bug, now fixed. And all the python tests are >> passing or skipped. No errors. There is a message to the liux-rdma list >> from Edward Srouji indicating that the patch is upstream at github or >> you can use the one in my email to him. It will be a while until his PR >> goes up stream I would guess. >> >> I sent a fresh set of patches last night for both the kernel and user >> space provider. > Thanks a lot for your effort. > > I applied your commits for rdma-core and rxe in kernel, and the following diff. > > All the python tests in rdma-core pass or skipped. No errors > > Thanks again. Thanks to you for your patience. Bob > > " > diff --git a/tests/test_qpex.py b/tests/test_qpex.py > index 20288d45..41028e2d 100644 > --- a/tests/test_qpex.py > +++ b/tests/test_qpex.py > @@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources): > > class QpExRCBindMw(RCResources): > def create_qps(self): > - create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW) > + create_qp_ex(self, e.IBV_QPT_RC, > e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE) > > def create_mr(self): > - self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE) > + self.mr = u.create_custom_mr(self, > e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND) > " > > Zhu Yanjun > >> Bob >> >>>> Bob >>>>>> --- >>>>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++- >>>>>> 1 file changed, 155 insertions(+), 2 deletions(-) >>>>>> >>>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c >>>>>> index a68656ae..bb39ef04 100644 >>>>>> --- a/providers/rxe/rxe.c >>>>>> +++ b/providers/rxe/rxe.c >>>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd) >>>>>> return ret; >>>>>> } >>>>>> >>>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type) >>>>>> +{ >>>>>> + int ret; >>>>>> + struct ibv_mw *ibmw; >>>>>> + struct ibv_alloc_mw cmd = {}; >>>>>> + struct ib_uverbs_alloc_mw_resp resp = {}; >>>>>> + >>>>>> + ibmw = calloc(1, sizeof(*ibmw)); >>>>>> + if (!ibmw) >>>>>> + return NULL; >>>>>> + >>>>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd), >>>>>> + &resp, sizeof(resp)); >>>>>> + if (ret) { >>>>>> + free(ibmw); >>>>>> + return NULL; >>>>>> + } >>>>>> + >>>>>> + return ibmw; >>>>>> +} >>>>>> + >>>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw) >>>>>> +{ >>>>>> + int ret; >>>>>> + >>>>>> + ret = ibv_cmd_dealloc_mw(ibmw); >>>>>> + if (ret) >>>>>> + return ret; >>>>>> + >>>>>> + free(ibmw); >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +static int next_rkey(int rkey) >>>>>> +{ >>>>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff); >>>>>> +} >>>>>> + >>>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list, >>>>>> + struct ibv_send_wr **bad_wr); >>>>>> + >>>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw, >>>>>> + struct ibv_mw_bind *mw_bind) >>>>>> +{ >>>>>> + int ret; >>>>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; >>>>>> + struct ibv_send_wr ibwr; >>>>>> + struct ibv_send_wr *bad_wr; >>>>>> + >>>>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) { >>>>>> + ret = EINVAL; >>>>>> + goto err; >>>>>> + } >>>>>> + >>>>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { >>>>>> + ret = EINVAL; >>>>>> + goto err; >>>>>> + } >>>>>> + >>>>>> + if (bind_info->mr) { >>>>>> + if (ibmw->pd != bind_info->mr->pd) { >>>>>> + ret = EPERM; >>>>>> + goto err; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + memset(&ibwr, 0, sizeof(ibwr)); >>>>>> + >>>>>> + ibwr.opcode = IBV_WR_BIND_MW; >>>>>> + ibwr.next = NULL; >>>>>> + ibwr.wr_id = mw_bind->wr_id; >>>>>> + ibwr.send_flags = mw_bind->send_flags; >>>>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info; >>>>>> + ibwr.bind_mw.mw = ibmw; >>>>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey); >>>>>> + >>>>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr); >>>>>> + if (ret) >>>>>> + goto err; >>>>>> + >>>>>> + /* user has to undo this if he gets an error wc */ >>>>>> + ibmw->rkey = ibwr.bind_mw.rkey; >>>>>> + >>>>>> + return 0; >>>>>> +err: >>>>>> + errno = ret; >>>>>> + return errno; >>>>>> +} >>>>>> + >>>>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length, >>>>>> uint64_t hca_va, int access) >>>>>> { >>>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey, >>>>>> advance_qp_cur_index(qp); >>>>>> } >>>>>> >>>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, >>>>>> + uint32_t rkey, const struct ibv_mw_bind_info *info) >>>>>> +{ >>>>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >>>>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index); >>>>>> + >>>>>> + if (check_qp_queue_full(qp)) >>>>>> + return; >>>>>> + >>>>>> + memset(wqe, 0, sizeof(*wqe)); >>>>>> + >>>>>> + wqe->wr.wr_id = ibqp->wr_id; >>>>>> + wqe->wr.opcode = IBV_WR_BIND_MW; >>>>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags; >>>>>> + wqe->wr.wr.mw.addr = info->addr; >>>>>> + wqe->wr.wr.mw.length = info->length; >>>>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey; >>>>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey; >>>>>> + wqe->wr.wr.mw.rkey = rkey; >>>>>> + wqe->wr.wr.mw.access = info->mw_access_flags; >>>>>> + wqe->ssn = qp->ssn++; >>>>>> + >>>>>> + advance_qp_cur_index(qp); >>>>>> +} >>>>>> + >>>>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) >>>>>> { >>>>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex); >>>>>> @@ -1106,6 +1220,7 @@ enum { >>>>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP >>>>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD >>>>>> | IBV_QP_EX_WITH_LOCAL_INV >>>>>> + | IBV_QP_EX_WITH_BIND_MW >>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV, >>>>>> >>>>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS = >>>>>> @@ -1113,6 +1228,7 @@ enum { >>>>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM >>>>>> | IBV_QP_EX_WITH_SEND >>>>>> | IBV_QP_EX_WITH_SEND_WITH_IMM >>>>>> + | IBV_QP_EX_WITH_BIND_MW >>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV, >>>>>> >>>>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS = >>>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags) >>>>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) >>>>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add; >>>>>> >>>>>> + if (flags & IBV_QP_EX_WITH_BIND_MW) >>>>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw; >>>>>> + >>>>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV) >>>>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv; >>>>>> >>>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp) >>>>>> } >>>>>> >>>>>> /* basic sanity checks for send work request */ >>>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >>>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr, >>>>>> unsigned int length) >>>>>> { >>>>>> + struct rxe_wq *sq = &qp->sq; >>>>>> enum ibv_wr_opcode opcode = ibwr->opcode; >>>>>> >>>>>> if (ibwr->num_sge > sq->max_sge) >>>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr, >>>>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) >>>>>> return -EINVAL; >>>>>> >>>>>> + if (ibwr->opcode == IBV_WR_BIND_MW) { >>>>>> + if (length) >>>>>> + return -EINVAL; >>>>>> + if (ibwr->num_sge) >>>>>> + return -EINVAL; >>>>>> + if (ibwr->imm_data) >>>>>> + return -EINVAL; >>>>>> + if ((qp_type(qp) != IBV_QPT_RC) && >>>>>> + (qp_type(qp) != IBV_QPT_UC)) >>>>>> + return -EINVAL; >>>>>> + } >>>>>> + >>>>>> return 0; >>>>>> } >>>>>> >>>>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >>>>>> { >>>>>> + struct ibv_mw *ibmw; >>>>>> + struct ibv_mr *ibmr; >>>>>> + >>>>>> memset(kwr, 0, sizeof(*kwr)); >>>>>> >>>>>> kwr->wr_id = uwr->wr_id; >>>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr) >>>>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; >>>>>> break; >>>>>> >>>>>> + case IBV_WR_BIND_MW: >>>>>> + ibmr = uwr->bind_mw.bind_info.mr; >>>>>> + ibmw = uwr->bind_mw.mw; >>>>>> + >>>>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr; >>>>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length; >>>>>> + kwr->wr.mw.mr_lkey = ibmr->lkey; >>>>>> + kwr->wr.mw.mw_rkey = ibmw->rkey; >>>>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey; >>>>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags; >>>>>> + break; >>>>>> + >>>>>> default: >>>>>> break; >>>>>> } >>>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >>>>>> if (ibwr->send_flags & IBV_SEND_INLINE) { >>>>>> uint8_t *inline_data = wqe->dma.inline_data; >>>>>> >>>>>> + wqe->dma.resid = 0; >>>>>> + >>>>>> for (i = 0; i < num_sge; i++) { >>>>>> memcpy(inline_data, >>>>>> (uint8_t *)(long)ibwr->sg_list[i].addr, >>>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq, >>>>>> wqe->iova = ibwr->wr.atomic.remote_addr; >>>>>> else >>>>>> wqe->iova = ibwr->wr.rdma.remote_addr; >>>>>> + >>>>>> wqe->dma.length = length; >>>>>> wqe->dma.resid = length; >>>>>> wqe->dma.num_sge = num_sge; >>>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq, >>>>>> for (i = 0; i < ibwr->num_sge; i++) >>>>>> length += ibwr->sg_list[i].length; >>>>>> >>>>>> - err = validate_send_wr(sq, ibwr, length); >>>>>> + err = validate_send_wr(qp, ibwr, length); >>>>>> if (err) { >>>>>> printf("validate send failed\n"); >>>>>> return err; >>>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = { >>>>>> .dealloc_pd = rxe_dealloc_pd, >>>>>> .reg_mr = rxe_reg_mr, >>>>>> .dereg_mr = rxe_dereg_mr, >>>>>> + .alloc_mw = rxe_alloc_mw, >>>>>> + .dealloc_mw = rxe_dealloc_mw, >>>>>> + .bind_mw = rxe_bind_mw, >>>>>> .create_cq = rxe_create_cq, >>>>>> .create_cq_ex = rxe_create_cq_ex, >>>>>> .poll_cq = rxe_poll_cq, >>>>>> -- >>>>>> 2.30.2 >>>>>> ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2021-06-09 1:59 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson 2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson 2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson 2021-06-08 6:54 ` Zhu Yanjun 2021-06-08 7:01 ` Bob Pearson 2021-06-08 7:17 ` Zhu Yanjun 2021-06-08 23:18 ` Pearson, Robert B 2021-06-09 1:51 ` Zhu Yanjun 2021-06-09 1:59 ` Pearson, Robert B
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.