* [PATCH v2 0/2] Implement memory windows support for rxe
@ 2021-06-08 4:28 Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
0 siblings, 2 replies; 9+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
These patches makes the required changes to the rxe provider and the
rxe ABI to support the kernel memory windows patches to the rxe driver.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
Bob Pearson (2):
Update kernel headers
Providers/rxe: Implement memory windows
kernel-headers/rdma/rdma_user_rxe.h | 10 ++
providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++-
2 files changed, 165 insertions(+), 2 deletions(-)
--
2.30.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 1/2] Update kernel headers
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
@ 2021-06-08 4:28 ` Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
1 sibling, 0 replies; 9+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
To commit ?? ("RDMA/rxe: Disallow MR dereg and invalidate when bound").
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
| 10 ++++++++++
1 file changed, 10 insertions(+)
--git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index 068433e2..e283c222 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -99,7 +99,16 @@ struct rxe_send_wr {
__u32 remote_qkey;
__u16 pkey_index;
} ud;
+ struct {
+ __aligned_u64 addr;
+ __aligned_u64 length;
+ __u32 mr_lkey;
+ __u32 mw_rkey;
+ __u32 rkey;
+ __u32 access;
+ } mw;
/* reg is only used by the kernel and is not part of the uapi */
+#ifdef __KERNEL__
struct {
union {
struct ib_mr *mr;
@@ -108,6 +117,7 @@ struct rxe_send_wr {
__u32 key;
__u32 access;
} reg;
+#endif
} wr;
};
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
@ 2021-06-08 4:28 ` Bob Pearson
2021-06-08 6:54 ` Zhu Yanjun
1 sibling, 1 reply; 9+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
This patch makes the required changes to the rxe provider to support the
kernel memory windows patches to the rxe driver.
The following changes are made:
- Add ibv_alloc_mw verb
- Add ibv_dealloc_mw verb
- Add ibv_bind_mw verb for type 1 MWs
- Add support for bind MW send work requests through the traditional
QP API and the extended QP API.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v2:
Added support for extended QP bind MW work requests.
---
providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 155 insertions(+), 2 deletions(-)
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index a68656ae..bb39ef04 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
return ret;
}
+static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
+{
+ int ret;
+ struct ibv_mw *ibmw;
+ struct ibv_alloc_mw cmd = {};
+ struct ib_uverbs_alloc_mw_resp resp = {};
+
+ ibmw = calloc(1, sizeof(*ibmw));
+ if (!ibmw)
+ return NULL;
+
+ ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
+ &resp, sizeof(resp));
+ if (ret) {
+ free(ibmw);
+ return NULL;
+ }
+
+ return ibmw;
+}
+
+static int rxe_dealloc_mw(struct ibv_mw *ibmw)
+{
+ int ret;
+
+ ret = ibv_cmd_dealloc_mw(ibmw);
+ if (ret)
+ return ret;
+
+ free(ibmw);
+ return 0;
+}
+
+static int next_rkey(int rkey)
+{
+ return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
+}
+
+static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
+ struct ibv_send_wr **bad_wr);
+
+static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
+ struct ibv_mw_bind *mw_bind)
+{
+ int ret;
+ struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
+ struct ibv_send_wr ibwr;
+ struct ibv_send_wr *bad_wr;
+
+ if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
+ ret = EINVAL;
+ goto err;
+ }
+
+ if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
+ ret = EINVAL;
+ goto err;
+ }
+
+ if (bind_info->mr) {
+ if (ibmw->pd != bind_info->mr->pd) {
+ ret = EPERM;
+ goto err;
+ }
+ }
+
+ memset(&ibwr, 0, sizeof(ibwr));
+
+ ibwr.opcode = IBV_WR_BIND_MW;
+ ibwr.next = NULL;
+ ibwr.wr_id = mw_bind->wr_id;
+ ibwr.send_flags = mw_bind->send_flags;
+ ibwr.bind_mw.bind_info = mw_bind->bind_info;
+ ibwr.bind_mw.mw = ibmw;
+ ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
+
+ ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
+ if (ret)
+ goto err;
+
+ /* user has to undo this if he gets an error wc */
+ ibmw->rkey = ibwr.bind_mw.rkey;
+
+ return 0;
+err:
+ errno = ret;
+ return errno;
+}
+
static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
uint64_t hca_va, int access)
{
@@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
advance_qp_cur_index(qp);
}
+static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
+ uint32_t rkey, const struct ibv_mw_bind_info *info)
+{
+ struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+ struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+ if (check_qp_queue_full(qp))
+ return;
+
+ memset(wqe, 0, sizeof(*wqe));
+
+ wqe->wr.wr_id = ibqp->wr_id;
+ wqe->wr.opcode = IBV_WR_BIND_MW;
+ wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+ wqe->wr.wr.mw.addr = info->addr;
+ wqe->wr.wr.mw.length = info->length;
+ wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
+ wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
+ wqe->wr.wr.mw.rkey = rkey;
+ wqe->wr.wr.mw.access = info->mw_access_flags;
+ wqe->ssn = qp->ssn++;
+
+ advance_qp_cur_index(qp);
+}
+
static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
{
struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
@@ -1106,6 +1220,7 @@ enum {
| IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
| IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
| IBV_QP_EX_WITH_LOCAL_INV
+ | IBV_QP_EX_WITH_BIND_MW
| IBV_QP_EX_WITH_SEND_WITH_INV,
RXE_SUP_UC_QP_SEND_OPS_FLAGS =
@@ -1113,6 +1228,7 @@ enum {
| IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
| IBV_QP_EX_WITH_SEND
| IBV_QP_EX_WITH_SEND_WITH_IMM
+ | IBV_QP_EX_WITH_BIND_MW
| IBV_QP_EX_WITH_SEND_WITH_INV,
RXE_SUP_UD_QP_SEND_OPS_FLAGS =
@@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
+ if (flags & IBV_QP_EX_WITH_BIND_MW)
+ qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
+
if (flags & IBV_QP_EX_WITH_LOCAL_INV)
qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
@@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
}
/* basic sanity checks for send work request */
-static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
+static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
unsigned int length)
{
+ struct rxe_wq *sq = &qp->sq;
enum ibv_wr_opcode opcode = ibwr->opcode;
if (ibwr->num_sge > sq->max_sge)
@@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
return -EINVAL;
+ if (ibwr->opcode == IBV_WR_BIND_MW) {
+ if (length)
+ return -EINVAL;
+ if (ibwr->num_sge)
+ return -EINVAL;
+ if (ibwr->imm_data)
+ return -EINVAL;
+ if ((qp_type(qp) != IBV_QPT_RC) &&
+ (qp_type(qp) != IBV_QPT_UC))
+ return -EINVAL;
+ }
+
return 0;
}
static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
{
+ struct ibv_mw *ibmw;
+ struct ibv_mr *ibmr;
+
memset(kwr, 0, sizeof(*kwr));
kwr->wr_id = uwr->wr_id;
@@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
break;
+ case IBV_WR_BIND_MW:
+ ibmr = uwr->bind_mw.bind_info.mr;
+ ibmw = uwr->bind_mw.mw;
+
+ kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
+ kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
+ kwr->wr.mw.mr_lkey = ibmr->lkey;
+ kwr->wr.mw.mw_rkey = ibmw->rkey;
+ kwr->wr.mw.rkey = uwr->bind_mw.rkey;
+ kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
+ break;
+
default:
break;
}
@@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
if (ibwr->send_flags & IBV_SEND_INLINE) {
uint8_t *inline_data = wqe->dma.inline_data;
+ wqe->dma.resid = 0;
+
for (i = 0; i < num_sge; i++) {
memcpy(inline_data,
(uint8_t *)(long)ibwr->sg_list[i].addr,
@@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
wqe->iova = ibwr->wr.atomic.remote_addr;
else
wqe->iova = ibwr->wr.rdma.remote_addr;
+
wqe->dma.length = length;
wqe->dma.resid = length;
wqe->dma.num_sge = num_sge;
@@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
for (i = 0; i < ibwr->num_sge; i++)
length += ibwr->sg_list[i].length;
- err = validate_send_wr(sq, ibwr, length);
+ err = validate_send_wr(qp, ibwr, length);
if (err) {
printf("validate send failed\n");
return err;
@@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
.dealloc_pd = rxe_dealloc_pd,
.reg_mr = rxe_reg_mr,
.dereg_mr = rxe_dereg_mr,
+ .alloc_mw = rxe_alloc_mw,
+ .dealloc_mw = rxe_dealloc_mw,
+ .bind_mw = rxe_bind_mw,
.create_cq = rxe_create_cq,
.create_cq_ex = rxe_create_cq_ex,
.poll_cq = rxe_poll_cq,
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
@ 2021-06-08 6:54 ` Zhu Yanjun
2021-06-08 7:01 ` Bob Pearson
0 siblings, 1 reply; 9+ messages in thread
From: Zhu Yanjun @ 2021-06-08 6:54 UTC (permalink / raw)
To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> This patch makes the required changes to the rxe provider to support the
> kernel memory windows patches to the rxe driver.
>
> The following changes are made:
> - Add ibv_alloc_mw verb
> - Add ibv_dealloc_mw verb
> - Add ibv_bind_mw verb for type 1 MWs
> - Add support for bind MW send work requests through the traditional
> QP API and the extended QP API.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
> v2:
> Added support for extended QP bind MW work requests.
I got the following errors.
Is it a known issue?
# ./bin/run_tests.py --dev rxe0
.............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
======================================================================
ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
Verify bind memory window operation using the new post_send API.
----------------------------------------------------------------------
Traceback (most recent call last):
File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
u.poll_cq(server.cq)
File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
raise PyverbsRDMAError('Completion status is {s}'.
pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
window bind error. Errno: 6, No such device or address
----------------------------------------------------------------------
Ran 193 tests in 2.544s
FAILED (errors=1, skipped=128)
Zhu Yanjun
> ---
> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 155 insertions(+), 2 deletions(-)
>
> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> index a68656ae..bb39ef04 100644
> --- a/providers/rxe/rxe.c
> +++ b/providers/rxe/rxe.c
> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> return ret;
> }
>
> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> +{
> + int ret;
> + struct ibv_mw *ibmw;
> + struct ibv_alloc_mw cmd = {};
> + struct ib_uverbs_alloc_mw_resp resp = {};
> +
> + ibmw = calloc(1, sizeof(*ibmw));
> + if (!ibmw)
> + return NULL;
> +
> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> + &resp, sizeof(resp));
> + if (ret) {
> + free(ibmw);
> + return NULL;
> + }
> +
> + return ibmw;
> +}
> +
> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> +{
> + int ret;
> +
> + ret = ibv_cmd_dealloc_mw(ibmw);
> + if (ret)
> + return ret;
> +
> + free(ibmw);
> + return 0;
> +}
> +
> +static int next_rkey(int rkey)
> +{
> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> +}
> +
> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> + struct ibv_send_wr **bad_wr);
> +
> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> + struct ibv_mw_bind *mw_bind)
> +{
> + int ret;
> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> + struct ibv_send_wr ibwr;
> + struct ibv_send_wr *bad_wr;
> +
> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> + ret = EINVAL;
> + goto err;
> + }
> +
> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> + ret = EINVAL;
> + goto err;
> + }
> +
> + if (bind_info->mr) {
> + if (ibmw->pd != bind_info->mr->pd) {
> + ret = EPERM;
> + goto err;
> + }
> + }
> +
> + memset(&ibwr, 0, sizeof(ibwr));
> +
> + ibwr.opcode = IBV_WR_BIND_MW;
> + ibwr.next = NULL;
> + ibwr.wr_id = mw_bind->wr_id;
> + ibwr.send_flags = mw_bind->send_flags;
> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> + ibwr.bind_mw.mw = ibmw;
> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> +
> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> + if (ret)
> + goto err;
> +
> + /* user has to undo this if he gets an error wc */
> + ibmw->rkey = ibwr.bind_mw.rkey;
> +
> + return 0;
> +err:
> + errno = ret;
> + return errno;
> +}
> +
> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> uint64_t hca_va, int access)
> {
> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> advance_qp_cur_index(qp);
> }
>
> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> +{
> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> +
> + if (check_qp_queue_full(qp))
> + return;
> +
> + memset(wqe, 0, sizeof(*wqe));
> +
> + wqe->wr.wr_id = ibqp->wr_id;
> + wqe->wr.opcode = IBV_WR_BIND_MW;
> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> + wqe->wr.wr.mw.addr = info->addr;
> + wqe->wr.wr.mw.length = info->length;
> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> + wqe->wr.wr.mw.rkey = rkey;
> + wqe->wr.wr.mw.access = info->mw_access_flags;
> + wqe->ssn = qp->ssn++;
> +
> + advance_qp_cur_index(qp);
> +}
> +
> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> {
> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> @@ -1106,6 +1220,7 @@ enum {
> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> | IBV_QP_EX_WITH_LOCAL_INV
> + | IBV_QP_EX_WITH_BIND_MW
> | IBV_QP_EX_WITH_SEND_WITH_INV,
>
> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> @@ -1113,6 +1228,7 @@ enum {
> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> | IBV_QP_EX_WITH_SEND
> | IBV_QP_EX_WITH_SEND_WITH_IMM
> + | IBV_QP_EX_WITH_BIND_MW
> | IBV_QP_EX_WITH_SEND_WITH_INV,
>
> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>
> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> +
> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>
> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> }
>
> /* basic sanity checks for send work request */
> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> unsigned int length)
> {
> + struct rxe_wq *sq = &qp->sq;
> enum ibv_wr_opcode opcode = ibwr->opcode;
>
> if (ibwr->num_sge > sq->max_sge)
> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> return -EINVAL;
>
> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> + if (length)
> + return -EINVAL;
> + if (ibwr->num_sge)
> + return -EINVAL;
> + if (ibwr->imm_data)
> + return -EINVAL;
> + if ((qp_type(qp) != IBV_QPT_RC) &&
> + (qp_type(qp) != IBV_QPT_UC))
> + return -EINVAL;
> + }
> +
> return 0;
> }
>
> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> {
> + struct ibv_mw *ibmw;
> + struct ibv_mr *ibmr;
> +
> memset(kwr, 0, sizeof(*kwr));
>
> kwr->wr_id = uwr->wr_id;
> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> break;
>
> + case IBV_WR_BIND_MW:
> + ibmr = uwr->bind_mw.bind_info.mr;
> + ibmw = uwr->bind_mw.mw;
> +
> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> + break;
> +
> default:
> break;
> }
> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> if (ibwr->send_flags & IBV_SEND_INLINE) {
> uint8_t *inline_data = wqe->dma.inline_data;
>
> + wqe->dma.resid = 0;
> +
> for (i = 0; i < num_sge; i++) {
> memcpy(inline_data,
> (uint8_t *)(long)ibwr->sg_list[i].addr,
> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> wqe->iova = ibwr->wr.atomic.remote_addr;
> else
> wqe->iova = ibwr->wr.rdma.remote_addr;
> +
> wqe->dma.length = length;
> wqe->dma.resid = length;
> wqe->dma.num_sge = num_sge;
> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> for (i = 0; i < ibwr->num_sge; i++)
> length += ibwr->sg_list[i].length;
>
> - err = validate_send_wr(sq, ibwr, length);
> + err = validate_send_wr(qp, ibwr, length);
> if (err) {
> printf("validate send failed\n");
> return err;
> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> .dealloc_pd = rxe_dealloc_pd,
> .reg_mr = rxe_reg_mr,
> .dereg_mr = rxe_dereg_mr,
> + .alloc_mw = rxe_alloc_mw,
> + .dealloc_mw = rxe_dealloc_mw,
> + .bind_mw = rxe_bind_mw,
> .create_cq = rxe_create_cq,
> .create_cq_ex = rxe_create_cq_ex,
> .poll_cq = rxe_poll_cq,
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 6:54 ` Zhu Yanjun
@ 2021-06-08 7:01 ` Bob Pearson
2021-06-08 7:17 ` Zhu Yanjun
0 siblings, 1 reply; 9+ messages in thread
From: Bob Pearson @ 2021-06-08 7:01 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> This patch makes the required changes to the rxe provider to support the
>> kernel memory windows patches to the rxe driver.
>>
>> The following changes are made:
>> - Add ibv_alloc_mw verb
>> - Add ibv_dealloc_mw verb
>> - Add ibv_bind_mw verb for type 1 MWs
>> - Add support for bind MW send work requests through the traditional
>> QP API and the extended QP API.
>>
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>> v2:
>> Added support for extended QP bind MW work requests.
>
> I got the following errors.
> Is it a known issue?
>
> # ./bin/run_tests.py --dev rxe0
> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> ======================================================================
> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> Verify bind memory window operation using the new post_send API.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> u.poll_cq(server.cq)
> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> raise PyverbsRDMAError('Completion status is {s}'.
> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> window bind error. Errno: 6, No such device or address
>
> ----------------------------------------------------------------------
> Ran 193 tests in 2.544s
>
> FAILED (errors=1, skipped=128)
>
> Zhu Yanjun
Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
Bob
>
>> ---
>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>
>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>> index a68656ae..bb39ef04 100644
>> --- a/providers/rxe/rxe.c
>> +++ b/providers/rxe/rxe.c
>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>> return ret;
>> }
>>
>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>> +{
>> + int ret;
>> + struct ibv_mw *ibmw;
>> + struct ibv_alloc_mw cmd = {};
>> + struct ib_uverbs_alloc_mw_resp resp = {};
>> +
>> + ibmw = calloc(1, sizeof(*ibmw));
>> + if (!ibmw)
>> + return NULL;
>> +
>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>> + &resp, sizeof(resp));
>> + if (ret) {
>> + free(ibmw);
>> + return NULL;
>> + }
>> +
>> + return ibmw;
>> +}
>> +
>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>> +{
>> + int ret;
>> +
>> + ret = ibv_cmd_dealloc_mw(ibmw);
>> + if (ret)
>> + return ret;
>> +
>> + free(ibmw);
>> + return 0;
>> +}
>> +
>> +static int next_rkey(int rkey)
>> +{
>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>> +}
>> +
>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>> + struct ibv_send_wr **bad_wr);
>> +
>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>> + struct ibv_mw_bind *mw_bind)
>> +{
>> + int ret;
>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>> + struct ibv_send_wr ibwr;
>> + struct ibv_send_wr *bad_wr;
>> +
>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>> + ret = EINVAL;
>> + goto err;
>> + }
>> +
>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>> + ret = EINVAL;
>> + goto err;
>> + }
>> +
>> + if (bind_info->mr) {
>> + if (ibmw->pd != bind_info->mr->pd) {
>> + ret = EPERM;
>> + goto err;
>> + }
>> + }
>> +
>> + memset(&ibwr, 0, sizeof(ibwr));
>> +
>> + ibwr.opcode = IBV_WR_BIND_MW;
>> + ibwr.next = NULL;
>> + ibwr.wr_id = mw_bind->wr_id;
>> + ibwr.send_flags = mw_bind->send_flags;
>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>> + ibwr.bind_mw.mw = ibmw;
>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>> +
>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>> + if (ret)
>> + goto err;
>> +
>> + /* user has to undo this if he gets an error wc */
>> + ibmw->rkey = ibwr.bind_mw.rkey;
>> +
>> + return 0;
>> +err:
>> + errno = ret;
>> + return errno;
>> +}
>> +
>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>> uint64_t hca_va, int access)
>> {
>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>> advance_qp_cur_index(qp);
>> }
>>
>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>> +{
>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>> +
>> + if (check_qp_queue_full(qp))
>> + return;
>> +
>> + memset(wqe, 0, sizeof(*wqe));
>> +
>> + wqe->wr.wr_id = ibqp->wr_id;
>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>> + wqe->wr.wr.mw.addr = info->addr;
>> + wqe->wr.wr.mw.length = info->length;
>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>> + wqe->wr.wr.mw.rkey = rkey;
>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>> + wqe->ssn = qp->ssn++;
>> +
>> + advance_qp_cur_index(qp);
>> +}
>> +
>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>> {
>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> @@ -1106,6 +1220,7 @@ enum {
>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>> | IBV_QP_EX_WITH_LOCAL_INV
>> + | IBV_QP_EX_WITH_BIND_MW
>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>> @@ -1113,6 +1228,7 @@ enum {
>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>> | IBV_QP_EX_WITH_SEND
>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>> + | IBV_QP_EX_WITH_BIND_MW
>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>
>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>> +
>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>
>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>> }
>>
>> /* basic sanity checks for send work request */
>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>> unsigned int length)
>> {
>> + struct rxe_wq *sq = &qp->sq;
>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>
>> if (ibwr->num_sge > sq->max_sge)
>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>> return -EINVAL;
>>
>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>> + if (length)
>> + return -EINVAL;
>> + if (ibwr->num_sge)
>> + return -EINVAL;
>> + if (ibwr->imm_data)
>> + return -EINVAL;
>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>> + (qp_type(qp) != IBV_QPT_UC))
>> + return -EINVAL;
>> + }
>> +
>> return 0;
>> }
>>
>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>> {
>> + struct ibv_mw *ibmw;
>> + struct ibv_mr *ibmr;
>> +
>> memset(kwr, 0, sizeof(*kwr));
>>
>> kwr->wr_id = uwr->wr_id;
>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>> break;
>>
>> + case IBV_WR_BIND_MW:
>> + ibmr = uwr->bind_mw.bind_info.mr;
>> + ibmw = uwr->bind_mw.mw;
>> +
>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>> + break;
>> +
>> default:
>> break;
>> }
>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>> uint8_t *inline_data = wqe->dma.inline_data;
>>
>> + wqe->dma.resid = 0;
>> +
>> for (i = 0; i < num_sge; i++) {
>> memcpy(inline_data,
>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>> wqe->iova = ibwr->wr.atomic.remote_addr;
>> else
>> wqe->iova = ibwr->wr.rdma.remote_addr;
>> +
>> wqe->dma.length = length;
>> wqe->dma.resid = length;
>> wqe->dma.num_sge = num_sge;
>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>> for (i = 0; i < ibwr->num_sge; i++)
>> length += ibwr->sg_list[i].length;
>>
>> - err = validate_send_wr(sq, ibwr, length);
>> + err = validate_send_wr(qp, ibwr, length);
>> if (err) {
>> printf("validate send failed\n");
>> return err;
>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>> .dealloc_pd = rxe_dealloc_pd,
>> .reg_mr = rxe_reg_mr,
>> .dereg_mr = rxe_dereg_mr,
>> + .alloc_mw = rxe_alloc_mw,
>> + .dealloc_mw = rxe_dealloc_mw,
>> + .bind_mw = rxe_bind_mw,
>> .create_cq = rxe_create_cq,
>> .create_cq_ex = rxe_create_cq_ex,
>> .poll_cq = rxe_poll_cq,
>> --
>> 2.30.2
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 7:01 ` Bob Pearson
@ 2021-06-08 7:17 ` Zhu Yanjun
2021-06-08 23:18 ` Pearson, Robert B
0 siblings, 1 reply; 9+ messages in thread
From: Zhu Yanjun @ 2021-06-08 7:17 UTC (permalink / raw)
To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>
> >> This patch makes the required changes to the rxe provider to support the
> >> kernel memory windows patches to the rxe driver.
> >>
> >> The following changes are made:
> >> - Add ibv_alloc_mw verb
> >> - Add ibv_dealloc_mw verb
> >> - Add ibv_bind_mw verb for type 1 MWs
> >> - Add support for bind MW send work requests through the traditional
> >> QP API and the extended QP API.
> >>
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >> v2:
> >> Added support for extended QP bind MW work requests.
> >
> > I got the following errors.
> > Is it a known issue?
> >
> > # ./bin/run_tests.py --dev rxe0
> > .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> > ======================================================================
> > ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> > Verify bind memory window operation using the new post_send API.
> > ----------------------------------------------------------------------
> > Traceback (most recent call last):
> > File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> > u.poll_cq(server.cq)
> > File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> > raise PyverbsRDMAError('Completion status is {s}'.
> > pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> > window bind error. Errno: 6, No such device or address
> >
> > ----------------------------------------------------------------------
> > Ran 193 tests in 2.544s
> >
> > FAILED (errors=1, skipped=128)
> >
> > Zhu Yanjun
> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
Thanks a lot.
If the root cause to this problem is correct, and setting the BIND_MW
access for the MR can fix this problem,
Please file a commit to fix this problem.
I am fine with this MW patch series if all the problems (including
rdma-core and rxe in kernel) are fixed.
Thanks for your effort.
Zhu Yanjun
> Bob
> >
> >> ---
> >> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >> 1 file changed, 155 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >> index a68656ae..bb39ef04 100644
> >> --- a/providers/rxe/rxe.c
> >> +++ b/providers/rxe/rxe.c
> >> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >> return ret;
> >> }
> >>
> >> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >> +{
> >> + int ret;
> >> + struct ibv_mw *ibmw;
> >> + struct ibv_alloc_mw cmd = {};
> >> + struct ib_uverbs_alloc_mw_resp resp = {};
> >> +
> >> + ibmw = calloc(1, sizeof(*ibmw));
> >> + if (!ibmw)
> >> + return NULL;
> >> +
> >> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >> + &resp, sizeof(resp));
> >> + if (ret) {
> >> + free(ibmw);
> >> + return NULL;
> >> + }
> >> +
> >> + return ibmw;
> >> +}
> >> +
> >> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >> +{
> >> + int ret;
> >> +
> >> + ret = ibv_cmd_dealloc_mw(ibmw);
> >> + if (ret)
> >> + return ret;
> >> +
> >> + free(ibmw);
> >> + return 0;
> >> +}
> >> +
> >> +static int next_rkey(int rkey)
> >> +{
> >> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >> +}
> >> +
> >> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >> + struct ibv_send_wr **bad_wr);
> >> +
> >> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >> + struct ibv_mw_bind *mw_bind)
> >> +{
> >> + int ret;
> >> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >> + struct ibv_send_wr ibwr;
> >> + struct ibv_send_wr *bad_wr;
> >> +
> >> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >> + ret = EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >> + ret = EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + if (bind_info->mr) {
> >> + if (ibmw->pd != bind_info->mr->pd) {
> >> + ret = EPERM;
> >> + goto err;
> >> + }
> >> + }
> >> +
> >> + memset(&ibwr, 0, sizeof(ibwr));
> >> +
> >> + ibwr.opcode = IBV_WR_BIND_MW;
> >> + ibwr.next = NULL;
> >> + ibwr.wr_id = mw_bind->wr_id;
> >> + ibwr.send_flags = mw_bind->send_flags;
> >> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> >> + ibwr.bind_mw.mw = ibmw;
> >> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> >> +
> >> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >> + if (ret)
> >> + goto err;
> >> +
> >> + /* user has to undo this if he gets an error wc */
> >> + ibmw->rkey = ibwr.bind_mw.rkey;
> >> +
> >> + return 0;
> >> +err:
> >> + errno = ret;
> >> + return errno;
> >> +}
> >> +
> >> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >> uint64_t hca_va, int access)
> >> {
> >> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >> advance_qp_cur_index(qp);
> >> }
> >>
> >> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> >> +{
> >> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >> +
> >> + if (check_qp_queue_full(qp))
> >> + return;
> >> +
> >> + memset(wqe, 0, sizeof(*wqe));
> >> +
> >> + wqe->wr.wr_id = ibqp->wr_id;
> >> + wqe->wr.opcode = IBV_WR_BIND_MW;
> >> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >> + wqe->wr.wr.mw.addr = info->addr;
> >> + wqe->wr.wr.mw.length = info->length;
> >> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >> + wqe->wr.wr.mw.rkey = rkey;
> >> + wqe->wr.wr.mw.access = info->mw_access_flags;
> >> + wqe->ssn = qp->ssn++;
> >> +
> >> + advance_qp_cur_index(qp);
> >> +}
> >> +
> >> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >> {
> >> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> @@ -1106,6 +1220,7 @@ enum {
> >> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >> | IBV_QP_EX_WITH_LOCAL_INV
> >> + | IBV_QP_EX_WITH_BIND_MW
> >> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >> @@ -1113,6 +1228,7 @@ enum {
> >> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >> | IBV_QP_EX_WITH_SEND
> >> | IBV_QP_EX_WITH_SEND_WITH_IMM
> >> + | IBV_QP_EX_WITH_BIND_MW
> >> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>
> >> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> >> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >> +
> >> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>
> >> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >> }
> >>
> >> /* basic sanity checks for send work request */
> >> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >> unsigned int length)
> >> {
> >> + struct rxe_wq *sq = &qp->sq;
> >> enum ibv_wr_opcode opcode = ibwr->opcode;
> >>
> >> if (ibwr->num_sge > sq->max_sge)
> >> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >> return -EINVAL;
> >>
> >> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> >> + if (length)
> >> + return -EINVAL;
> >> + if (ibwr->num_sge)
> >> + return -EINVAL;
> >> + if (ibwr->imm_data)
> >> + return -EINVAL;
> >> + if ((qp_type(qp) != IBV_QPT_RC) &&
> >> + (qp_type(qp) != IBV_QPT_UC))
> >> + return -EINVAL;
> >> + }
> >> +
> >> return 0;
> >> }
> >>
> >> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >> {
> >> + struct ibv_mw *ibmw;
> >> + struct ibv_mr *ibmr;
> >> +
> >> memset(kwr, 0, sizeof(*kwr));
> >>
> >> kwr->wr_id = uwr->wr_id;
> >> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> >> break;
> >>
> >> + case IBV_WR_BIND_MW:
> >> + ibmr = uwr->bind_mw.bind_info.mr;
> >> + ibmw = uwr->bind_mw.mw;
> >> +
> >> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> >> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> >> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >> + break;
> >> +
> >> default:
> >> break;
> >> }
> >> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >> if (ibwr->send_flags & IBV_SEND_INLINE) {
> >> uint8_t *inline_data = wqe->dma.inline_data;
> >>
> >> + wqe->dma.resid = 0;
> >> +
> >> for (i = 0; i < num_sge; i++) {
> >> memcpy(inline_data,
> >> (uint8_t *)(long)ibwr->sg_list[i].addr,
> >> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >> wqe->iova = ibwr->wr.atomic.remote_addr;
> >> else
> >> wqe->iova = ibwr->wr.rdma.remote_addr;
> >> +
> >> wqe->dma.length = length;
> >> wqe->dma.resid = length;
> >> wqe->dma.num_sge = num_sge;
> >> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >> for (i = 0; i < ibwr->num_sge; i++)
> >> length += ibwr->sg_list[i].length;
> >>
> >> - err = validate_send_wr(sq, ibwr, length);
> >> + err = validate_send_wr(qp, ibwr, length);
> >> if (err) {
> >> printf("validate send failed\n");
> >> return err;
> >> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >> .dealloc_pd = rxe_dealloc_pd,
> >> .reg_mr = rxe_reg_mr,
> >> .dereg_mr = rxe_dereg_mr,
> >> + .alloc_mw = rxe_alloc_mw,
> >> + .dealloc_mw = rxe_dealloc_mw,
> >> + .bind_mw = rxe_bind_mw,
> >> .create_cq = rxe_create_cq,
> >> .create_cq_ex = rxe_create_cq_ex,
> >> .poll_cq = rxe_poll_cq,
> >> --
> >> 2.30.2
> >>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 7:17 ` Zhu Yanjun
@ 2021-06-08 23:18 ` Pearson, Robert B
2021-06-09 1:51 ` Zhu Yanjun
0 siblings, 1 reply; 9+ messages in thread
From: Pearson, Robert B @ 2021-06-08 23:18 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> This patch makes the required changes to the rxe provider to support the
>>>> kernel memory windows patches to the rxe driver.
>>>>
>>>> The following changes are made:
>>>> - Add ibv_alloc_mw verb
>>>> - Add ibv_dealloc_mw verb
>>>> - Add ibv_bind_mw verb for type 1 MWs
>>>> - Add support for bind MW send work requests through the traditional
>>>> QP API and the extended QP API.
>>>>
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>> ---
>>>> v2:
>>>> Added support for extended QP bind MW work requests.
>>> I got the following errors.
>>> Is it a known issue?
>>>
>>> # ./bin/run_tests.py --dev rxe0
>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>> ======================================================================
>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>> Verify bind memory window operation using the new post_send API.
>>> ----------------------------------------------------------------------
>>> Traceback (most recent call last):
>>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>> u.poll_cq(server.cq)
>>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>> raise PyverbsRDMAError('Completion status is {s}'.
>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>> window bind error. Errno: 6, No such device or address
>>>
>>> ----------------------------------------------------------------------
>>> Ran 193 tests in 2.544s
>>>
>>> FAILED (errors=1, skipped=128)
>>>
>>> Zhu Yanjun
>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> Thanks a lot.
> If the root cause to this problem is correct, and setting the BIND_MW
> access for the MR can fix this problem,
>
> Please file a commit to fix this problem.
>
> I am fine with this MW patch series if all the problems (including
> rdma-core and rxe in kernel) are fixed.
>
> Thanks for your effort.
>
> Zhu Yanjun
Zhu,
There was a second test bug, now fixed. And all the python tests are
passing or skipped. No errors. There is a message to the liux-rdma list
from Edward Srouji indicating that the patch is upstream at github or
you can use the one in my email to him. It will be a while until his PR
goes up stream I would guess.
I sent a fresh set of patches last night for both the kernel and user
space provider.
Bob
>> Bob
>>>> ---
>>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>> index a68656ae..bb39ef04 100644
>>>> --- a/providers/rxe/rxe.c
>>>> +++ b/providers/rxe/rxe.c
>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>> return ret;
>>>> }
>>>>
>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>> +{
>>>> + int ret;
>>>> + struct ibv_mw *ibmw;
>>>> + struct ibv_alloc_mw cmd = {};
>>>> + struct ib_uverbs_alloc_mw_resp resp = {};
>>>> +
>>>> + ibmw = calloc(1, sizeof(*ibmw));
>>>> + if (!ibmw)
>>>> + return NULL;
>>>> +
>>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>> + &resp, sizeof(resp));
>>>> + if (ret) {
>>>> + free(ibmw);
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + return ibmw;
>>>> +}
>>>> +
>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>> +{
>>>> + int ret;
>>>> +
>>>> + ret = ibv_cmd_dealloc_mw(ibmw);
>>>> + if (ret)
>>>> + return ret;
>>>> +
>>>> + free(ibmw);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int next_rkey(int rkey)
>>>> +{
>>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>> +}
>>>> +
>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>> + struct ibv_send_wr **bad_wr);
>>>> +
>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>> + struct ibv_mw_bind *mw_bind)
>>>> +{
>>>> + int ret;
>>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>> + struct ibv_send_wr ibwr;
>>>> + struct ibv_send_wr *bad_wr;
>>>> +
>>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>> + ret = EINVAL;
>>>> + goto err;
>>>> + }
>>>> +
>>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>> + ret = EINVAL;
>>>> + goto err;
>>>> + }
>>>> +
>>>> + if (bind_info->mr) {
>>>> + if (ibmw->pd != bind_info->mr->pd) {
>>>> + ret = EPERM;
>>>> + goto err;
>>>> + }
>>>> + }
>>>> +
>>>> + memset(&ibwr, 0, sizeof(ibwr));
>>>> +
>>>> + ibwr.opcode = IBV_WR_BIND_MW;
>>>> + ibwr.next = NULL;
>>>> + ibwr.wr_id = mw_bind->wr_id;
>>>> + ibwr.send_flags = mw_bind->send_flags;
>>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>>>> + ibwr.bind_mw.mw = ibmw;
>>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>>>> +
>>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>> + if (ret)
>>>> + goto err;
>>>> +
>>>> + /* user has to undo this if he gets an error wc */
>>>> + ibmw->rkey = ibwr.bind_mw.rkey;
>>>> +
>>>> + return 0;
>>>> +err:
>>>> + errno = ret;
>>>> + return errno;
>>>> +}
>>>> +
>>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>> uint64_t hca_va, int access)
>>>> {
>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>> advance_qp_cur_index(qp);
>>>> }
>>>>
>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>> +{
>>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>> +
>>>> + if (check_qp_queue_full(qp))
>>>> + return;
>>>> +
>>>> + memset(wqe, 0, sizeof(*wqe));
>>>> +
>>>> + wqe->wr.wr_id = ibqp->wr_id;
>>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>> + wqe->wr.wr.mw.addr = info->addr;
>>>> + wqe->wr.wr.mw.length = info->length;
>>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>> + wqe->wr.wr.mw.rkey = rkey;
>>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>>>> + wqe->ssn = qp->ssn++;
>>>> +
>>>> + advance_qp_cur_index(qp);
>>>> +}
>>>> +
>>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>> {
>>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> @@ -1106,6 +1220,7 @@ enum {
>>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>> | IBV_QP_EX_WITH_LOCAL_INV
>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>> @@ -1113,6 +1228,7 @@ enum {
>>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>> | IBV_QP_EX_WITH_SEND
>>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>
>>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>> +
>>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>
>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>> }
>>>>
>>>> /* basic sanity checks for send work request */
>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>> unsigned int length)
>>>> {
>>>> + struct rxe_wq *sq = &qp->sq;
>>>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>
>>>> if (ibwr->num_sge > sq->max_sge)
>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>> return -EINVAL;
>>>>
>>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>> + if (length)
>>>> + return -EINVAL;
>>>> + if (ibwr->num_sge)
>>>> + return -EINVAL;
>>>> + if (ibwr->imm_data)
>>>> + return -EINVAL;
>>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>>>> + (qp_type(qp) != IBV_QPT_UC))
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>> {
>>>> + struct ibv_mw *ibmw;
>>>> + struct ibv_mr *ibmr;
>>>> +
>>>> memset(kwr, 0, sizeof(*kwr));
>>>>
>>>> kwr->wr_id = uwr->wr_id;
>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>>>> break;
>>>>
>>>> + case IBV_WR_BIND_MW:
>>>> + ibmr = uwr->bind_mw.bind_info.mr;
>>>> + ibmw = uwr->bind_mw.mw;
>>>> +
>>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>> + break;
>>>> +
>>>> default:
>>>> break;
>>>> }
>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>> uint8_t *inline_data = wqe->dma.inline_data;
>>>>
>>>> + wqe->dma.resid = 0;
>>>> +
>>>> for (i = 0; i < num_sge; i++) {
>>>> memcpy(inline_data,
>>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> wqe->iova = ibwr->wr.atomic.remote_addr;
>>>> else
>>>> wqe->iova = ibwr->wr.rdma.remote_addr;
>>>> +
>>>> wqe->dma.length = length;
>>>> wqe->dma.resid = length;
>>>> wqe->dma.num_sge = num_sge;
>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> for (i = 0; i < ibwr->num_sge; i++)
>>>> length += ibwr->sg_list[i].length;
>>>>
>>>> - err = validate_send_wr(sq, ibwr, length);
>>>> + err = validate_send_wr(qp, ibwr, length);
>>>> if (err) {
>>>> printf("validate send failed\n");
>>>> return err;
>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>> .dealloc_pd = rxe_dealloc_pd,
>>>> .reg_mr = rxe_reg_mr,
>>>> .dereg_mr = rxe_dereg_mr,
>>>> + .alloc_mw = rxe_alloc_mw,
>>>> + .dealloc_mw = rxe_dealloc_mw,
>>>> + .bind_mw = rxe_bind_mw,
>>>> .create_cq = rxe_create_cq,
>>>> .create_cq_ex = rxe_create_cq_ex,
>>>> .poll_cq = rxe_poll_cq,
>>>> --
>>>> 2.30.2
>>>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 23:18 ` Pearson, Robert B
@ 2021-06-09 1:51 ` Zhu Yanjun
2021-06-09 1:59 ` Pearson, Robert B
0 siblings, 1 reply; 9+ messages in thread
From: Zhu Yanjun @ 2021-06-09 1:51 UTC (permalink / raw)
To: Pearson, Robert B; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>
>
> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> >>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>>> This patch makes the required changes to the rxe provider to support the
> >>>> kernel memory windows patches to the rxe driver.
> >>>>
> >>>> The following changes are made:
> >>>> - Add ibv_alloc_mw verb
> >>>> - Add ibv_dealloc_mw verb
> >>>> - Add ibv_bind_mw verb for type 1 MWs
> >>>> - Add support for bind MW send work requests through the traditional
> >>>> QP API and the extended QP API.
> >>>>
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >>>> ---
> >>>> v2:
> >>>> Added support for extended QP bind MW work requests.
> >>> I got the following errors.
> >>> Is it a known issue?
> >>>
> >>> # ./bin/run_tests.py --dev rxe0
> >>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> >>> ======================================================================
> >>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> >>> Verify bind memory window operation using the new post_send API.
> >>> ----------------------------------------------------------------------
> >>> Traceback (most recent call last):
> >>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> >>> u.poll_cq(server.cq)
> >>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> >>> raise PyverbsRDMAError('Completion status is {s}'.
> >>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> >>> window bind error. Errno: 6, No such device or address
> >>>
> >>> ----------------------------------------------------------------------
> >>> Ran 193 tests in 2.544s
> >>>
> >>> FAILED (errors=1, skipped=128)
> >>>
> >>> Zhu Yanjun
> >> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> > Thanks a lot.
> > If the root cause to this problem is correct, and setting the BIND_MW
> > access for the MR can fix this problem,
> >
> > Please file a commit to fix this problem.
> >
> > I am fine with this MW patch series if all the problems (including
> > rdma-core and rxe in kernel) are fixed.
> >
> > Thanks for your effort.
> >
> > Zhu Yanjun
>
> Zhu,
>
> There was a second test bug, now fixed. And all the python tests are
> passing or skipped. No errors. There is a message to the liux-rdma list
> from Edward Srouji indicating that the patch is upstream at github or
> you can use the one in my email to him. It will be a while until his PR
> goes up stream I would guess.
>
> I sent a fresh set of patches last night for both the kernel and user
> space provider.
Thanks a lot for your effort.
I applied your commits for rdma-core and rxe in kernel, and the following diff.
All the python tests in rdma-core pass or skipped. No errors
Thanks again.
"
diff --git a/tests/test_qpex.py b/tests/test_qpex.py
index 20288d45..41028e2d 100644
--- a/tests/test_qpex.py
+++ b/tests/test_qpex.py
@@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):
class QpExRCBindMw(RCResources):
def create_qps(self):
- create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
+ create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)
def create_mr(self):
- self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
+ self.mr = u.create_custom_mr(self,
e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
"
Zhu Yanjun
>
> Bob
>
> >> Bob
> >>>> ---
> >>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >>>> 1 file changed, 155 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >>>> index a68656ae..bb39ef04 100644
> >>>> --- a/providers/rxe/rxe.c
> >>>> +++ b/providers/rxe/rxe.c
> >>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >>>> return ret;
> >>>> }
> >>>>
> >>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >>>> +{
> >>>> + int ret;
> >>>> + struct ibv_mw *ibmw;
> >>>> + struct ibv_alloc_mw cmd = {};
> >>>> + struct ib_uverbs_alloc_mw_resp resp = {};
> >>>> +
> >>>> + ibmw = calloc(1, sizeof(*ibmw));
> >>>> + if (!ibmw)
> >>>> + return NULL;
> >>>> +
> >>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >>>> + &resp, sizeof(resp));
> >>>> + if (ret) {
> >>>> + free(ibmw);
> >>>> + return NULL;
> >>>> + }
> >>>> +
> >>>> + return ibmw;
> >>>> +}
> >>>> +
> >>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >>>> +{
> >>>> + int ret;
> >>>> +
> >>>> + ret = ibv_cmd_dealloc_mw(ibmw);
> >>>> + if (ret)
> >>>> + return ret;
> >>>> +
> >>>> + free(ibmw);
> >>>> + return 0;
> >>>> +}
> >>>> +
> >>>> +static int next_rkey(int rkey)
> >>>> +{
> >>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >>>> +}
> >>>> +
> >>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >>>> + struct ibv_send_wr **bad_wr);
> >>>> +
> >>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >>>> + struct ibv_mw_bind *mw_bind)
> >>>> +{
> >>>> + int ret;
> >>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >>>> + struct ibv_send_wr ibwr;
> >>>> + struct ibv_send_wr *bad_wr;
> >>>> +
> >>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >>>> + ret = EINVAL;
> >>>> + goto err;
> >>>> + }
> >>>> +
> >>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >>>> + ret = EINVAL;
> >>>> + goto err;
> >>>> + }
> >>>> +
> >>>> + if (bind_info->mr) {
> >>>> + if (ibmw->pd != bind_info->mr->pd) {
> >>>> + ret = EPERM;
> >>>> + goto err;
> >>>> + }
> >>>> + }
> >>>> +
> >>>> + memset(&ibwr, 0, sizeof(ibwr));
> >>>> +
> >>>> + ibwr.opcode = IBV_WR_BIND_MW;
> >>>> + ibwr.next = NULL;
> >>>> + ibwr.wr_id = mw_bind->wr_id;
> >>>> + ibwr.send_flags = mw_bind->send_flags;
> >>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> >>>> + ibwr.bind_mw.mw = ibmw;
> >>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> >>>> +
> >>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >>>> + if (ret)
> >>>> + goto err;
> >>>> +
> >>>> + /* user has to undo this if he gets an error wc */
> >>>> + ibmw->rkey = ibwr.bind_mw.rkey;
> >>>> +
> >>>> + return 0;
> >>>> +err:
> >>>> + errno = ret;
> >>>> + return errno;
> >>>> +}
> >>>> +
> >>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >>>> uint64_t hca_va, int access)
> >>>> {
> >>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >>>> advance_qp_cur_index(qp);
> >>>> }
> >>>>
> >>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> >>>> +{
> >>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >>>> +
> >>>> + if (check_qp_queue_full(qp))
> >>>> + return;
> >>>> +
> >>>> + memset(wqe, 0, sizeof(*wqe));
> >>>> +
> >>>> + wqe->wr.wr_id = ibqp->wr_id;
> >>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
> >>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >>>> + wqe->wr.wr.mw.addr = info->addr;
> >>>> + wqe->wr.wr.mw.length = info->length;
> >>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >>>> + wqe->wr.wr.mw.rkey = rkey;
> >>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
> >>>> + wqe->ssn = qp->ssn++;
> >>>> +
> >>>> + advance_qp_cur_index(qp);
> >>>> +}
> >>>> +
> >>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >>>> {
> >>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> @@ -1106,6 +1220,7 @@ enum {
> >>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >>>> | IBV_QP_EX_WITH_LOCAL_INV
> >>>> + | IBV_QP_EX_WITH_BIND_MW
> >>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >>>> @@ -1113,6 +1228,7 @@ enum {
> >>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >>>> | IBV_QP_EX_WITH_SEND
> >>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
> >>>> + | IBV_QP_EX_WITH_BIND_MW
> >>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>>>
> >>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> >>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >>>> +
> >>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>>>
> >>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >>>> }
> >>>>
> >>>> /* basic sanity checks for send work request */
> >>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >>>> unsigned int length)
> >>>> {
> >>>> + struct rxe_wq *sq = &qp->sq;
> >>>> enum ibv_wr_opcode opcode = ibwr->opcode;
> >>>>
> >>>> if (ibwr->num_sge > sq->max_sge)
> >>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >>>> return -EINVAL;
> >>>>
> >>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> >>>> + if (length)
> >>>> + return -EINVAL;
> >>>> + if (ibwr->num_sge)
> >>>> + return -EINVAL;
> >>>> + if (ibwr->imm_data)
> >>>> + return -EINVAL;
> >>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
> >>>> + (qp_type(qp) != IBV_QPT_UC))
> >>>> + return -EINVAL;
> >>>> + }
> >>>> +
> >>>> return 0;
> >>>> }
> >>>>
> >>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>> {
> >>>> + struct ibv_mw *ibmw;
> >>>> + struct ibv_mr *ibmr;
> >>>> +
> >>>> memset(kwr, 0, sizeof(*kwr));
> >>>>
> >>>> kwr->wr_id = uwr->wr_id;
> >>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> >>>> break;
> >>>>
> >>>> + case IBV_WR_BIND_MW:
> >>>> + ibmr = uwr->bind_mw.bind_info.mr;
> >>>> + ibmw = uwr->bind_mw.mw;
> >>>> +
> >>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> >>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> >>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >>>> + break;
> >>>> +
> >>>> default:
> >>>> break;
> >>>> }
> >>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
> >>>> uint8_t *inline_data = wqe->dma.inline_data;
> >>>>
> >>>> + wqe->dma.resid = 0;
> >>>> +
> >>>> for (i = 0; i < num_sge; i++) {
> >>>> memcpy(inline_data,
> >>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
> >>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> wqe->iova = ibwr->wr.atomic.remote_addr;
> >>>> else
> >>>> wqe->iova = ibwr->wr.rdma.remote_addr;
> >>>> +
> >>>> wqe->dma.length = length;
> >>>> wqe->dma.resid = length;
> >>>> wqe->dma.num_sge = num_sge;
> >>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> for (i = 0; i < ibwr->num_sge; i++)
> >>>> length += ibwr->sg_list[i].length;
> >>>>
> >>>> - err = validate_send_wr(sq, ibwr, length);
> >>>> + err = validate_send_wr(qp, ibwr, length);
> >>>> if (err) {
> >>>> printf("validate send failed\n");
> >>>> return err;
> >>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >>>> .dealloc_pd = rxe_dealloc_pd,
> >>>> .reg_mr = rxe_reg_mr,
> >>>> .dereg_mr = rxe_dereg_mr,
> >>>> + .alloc_mw = rxe_alloc_mw,
> >>>> + .dealloc_mw = rxe_dealloc_mw,
> >>>> + .bind_mw = rxe_bind_mw,
> >>>> .create_cq = rxe_create_cq,
> >>>> .create_cq_ex = rxe_create_cq_ex,
> >>>> .poll_cq = rxe_poll_cq,
> >>>> --
> >>>> 2.30.2
> >>>>
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-09 1:51 ` Zhu Yanjun
@ 2021-06-09 1:59 ` Pearson, Robert B
0 siblings, 0 replies; 9+ messages in thread
From: Pearson, Robert B @ 2021-06-09 1:59 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/2021 8:51 PM, Zhu Yanjun wrote:
> On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>>
>> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>>>> This patch makes the required changes to the rxe provider to support the
>>>>>> kernel memory windows patches to the rxe driver.
>>>>>>
>>>>>> The following changes are made:
>>>>>> - Add ibv_alloc_mw verb
>>>>>> - Add ibv_dealloc_mw verb
>>>>>> - Add ibv_bind_mw verb for type 1 MWs
>>>>>> - Add support for bind MW send work requests through the traditional
>>>>>> QP API and the extended QP API.
>>>>>>
>>>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>>>> ---
>>>>>> v2:
>>>>>> Added support for extended QP bind MW work requests.
>>>>> I got the following errors.
>>>>> Is it a known issue?
>>>>>
>>>>> # ./bin/run_tests.py --dev rxe0
>>>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>>>> ======================================================================
>>>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>>>> Verify bind memory window operation using the new post_send API.
>>>>> ----------------------------------------------------------------------
>>>>> Traceback (most recent call last):
>>>>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>>>> u.poll_cq(server.cq)
>>>>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>>>> raise PyverbsRDMAError('Completion status is {s}'.
>>>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>>>> window bind error. Errno: 6, No such device or address
>>>>>
>>>>> ----------------------------------------------------------------------
>>>>> Ran 193 tests in 2.544s
>>>>>
>>>>> FAILED (errors=1, skipped=128)
>>>>>
>>>>> Zhu Yanjun
>>>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
>>> Thanks a lot.
>>> If the root cause to this problem is correct, and setting the BIND_MW
>>> access for the MR can fix this problem,
>>>
>>> Please file a commit to fix this problem.
>>>
>>> I am fine with this MW patch series if all the problems (including
>>> rdma-core and rxe in kernel) are fixed.
>>>
>>> Thanks for your effort.
>>>
>>> Zhu Yanjun
>> Zhu,
>>
>> There was a second test bug, now fixed. And all the python tests are
>> passing or skipped. No errors. There is a message to the liux-rdma list
>> from Edward Srouji indicating that the patch is upstream at github or
>> you can use the one in my email to him. It will be a while until his PR
>> goes up stream I would guess.
>>
>> I sent a fresh set of patches last night for both the kernel and user
>> space provider.
> Thanks a lot for your effort.
>
> I applied your commits for rdma-core and rxe in kernel, and the following diff.
>
> All the python tests in rdma-core pass or skipped. No errors
>
> Thanks again.
Thanks to you for your patience.
Bob
>
> "
> diff --git a/tests/test_qpex.py b/tests/test_qpex.py
> index 20288d45..41028e2d 100644
> --- a/tests/test_qpex.py
> +++ b/tests/test_qpex.py
> @@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):
>
> class QpExRCBindMw(RCResources):
> def create_qps(self):
> - create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
> + create_qp_ex(self, e.IBV_QPT_RC,
> e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)
>
> def create_mr(self):
> - self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
> + self.mr = u.create_custom_mr(self,
> e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
> "
>
> Zhu Yanjun
>
>> Bob
>>
>>>> Bob
>>>>>> ---
>>>>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>>>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>>>> index a68656ae..bb39ef04 100644
>>>>>> --- a/providers/rxe/rxe.c
>>>>>> +++ b/providers/rxe/rxe.c
>>>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>>>> return ret;
>>>>>> }
>>>>>>
>>>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>>>> +{
>>>>>> + int ret;
>>>>>> + struct ibv_mw *ibmw;
>>>>>> + struct ibv_alloc_mw cmd = {};
>>>>>> + struct ib_uverbs_alloc_mw_resp resp = {};
>>>>>> +
>>>>>> + ibmw = calloc(1, sizeof(*ibmw));
>>>>>> + if (!ibmw)
>>>>>> + return NULL;
>>>>>> +
>>>>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>>>> + &resp, sizeof(resp));
>>>>>> + if (ret) {
>>>>>> + free(ibmw);
>>>>>> + return NULL;
>>>>>> + }
>>>>>> +
>>>>>> + return ibmw;
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>>>> +{
>>>>>> + int ret;
>>>>>> +
>>>>>> + ret = ibv_cmd_dealloc_mw(ibmw);
>>>>>> + if (ret)
>>>>>> + return ret;
>>>>>> +
>>>>>> + free(ibmw);
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int next_rkey(int rkey)
>>>>>> +{
>>>>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>>>> + struct ibv_send_wr **bad_wr);
>>>>>> +
>>>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>>>> + struct ibv_mw_bind *mw_bind)
>>>>>> +{
>>>>>> + int ret;
>>>>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>>>> + struct ibv_send_wr ibwr;
>>>>>> + struct ibv_send_wr *bad_wr;
>>>>>> +
>>>>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>>>> + ret = EINVAL;
>>>>>> + goto err;
>>>>>> + }
>>>>>> +
>>>>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>>>> + ret = EINVAL;
>>>>>> + goto err;
>>>>>> + }
>>>>>> +
>>>>>> + if (bind_info->mr) {
>>>>>> + if (ibmw->pd != bind_info->mr->pd) {
>>>>>> + ret = EPERM;
>>>>>> + goto err;
>>>>>> + }
>>>>>> + }
>>>>>> +
>>>>>> + memset(&ibwr, 0, sizeof(ibwr));
>>>>>> +
>>>>>> + ibwr.opcode = IBV_WR_BIND_MW;
>>>>>> + ibwr.next = NULL;
>>>>>> + ibwr.wr_id = mw_bind->wr_id;
>>>>>> + ibwr.send_flags = mw_bind->send_flags;
>>>>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>>>>>> + ibwr.bind_mw.mw = ibmw;
>>>>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>>>>>> +
>>>>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>>>> + if (ret)
>>>>>> + goto err;
>>>>>> +
>>>>>> + /* user has to undo this if he gets an error wc */
>>>>>> + ibmw->rkey = ibwr.bind_mw.rkey;
>>>>>> +
>>>>>> + return 0;
>>>>>> +err:
>>>>>> + errno = ret;
>>>>>> + return errno;
>>>>>> +}
>>>>>> +
>>>>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>>>> uint64_t hca_va, int access)
>>>>>> {
>>>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>>>> advance_qp_cur_index(qp);
>>>>>> }
>>>>>>
>>>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>>>> +{
>>>>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>>>> +
>>>>>> + if (check_qp_queue_full(qp))
>>>>>> + return;
>>>>>> +
>>>>>> + memset(wqe, 0, sizeof(*wqe));
>>>>>> +
>>>>>> + wqe->wr.wr_id = ibqp->wr_id;
>>>>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>>>>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>>>> + wqe->wr.wr.mw.addr = info->addr;
>>>>>> + wqe->wr.wr.mw.length = info->length;
>>>>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>>>> + wqe->wr.wr.mw.rkey = rkey;
>>>>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>>>>>> + wqe->ssn = qp->ssn++;
>>>>>> +
>>>>>> + advance_qp_cur_index(qp);
>>>>>> +}
>>>>>> +
>>>>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>>>> {
>>>>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> @@ -1106,6 +1220,7 @@ enum {
>>>>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>>>> | IBV_QP_EX_WITH_LOCAL_INV
>>>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1113,6 +1228,7 @@ enum {
>>>>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>>>> | IBV_QP_EX_WITH_SEND
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>>>
>>>>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>>>> +
>>>>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>>>
>>>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>>>> }
>>>>>>
>>>>>> /* basic sanity checks for send work request */
>>>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>>>> unsigned int length)
>>>>>> {
>>>>>> + struct rxe_wq *sq = &qp->sq;
>>>>>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>>>
>>>>>> if (ibwr->num_sge > sq->max_sge)
>>>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>>>> return -EINVAL;
>>>>>>
>>>>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>>>> + if (length)
>>>>>> + return -EINVAL;
>>>>>> + if (ibwr->num_sge)
>>>>>> + return -EINVAL;
>>>>>> + if (ibwr->imm_data)
>>>>>> + return -EINVAL;
>>>>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>>>>>> + (qp_type(qp) != IBV_QPT_UC))
>>>>>> + return -EINVAL;
>>>>>> + }
>>>>>> +
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>> {
>>>>>> + struct ibv_mw *ibmw;
>>>>>> + struct ibv_mr *ibmr;
>>>>>> +
>>>>>> memset(kwr, 0, sizeof(*kwr));
>>>>>>
>>>>>> kwr->wr_id = uwr->wr_id;
>>>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>>>>>> break;
>>>>>>
>>>>>> + case IBV_WR_BIND_MW:
>>>>>> + ibmr = uwr->bind_mw.bind_info.mr;
>>>>>> + ibmw = uwr->bind_mw.mw;
>>>>>> +
>>>>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>>>> + break;
>>>>>> +
>>>>>> default:
>>>>>> break;
>>>>>> }
>>>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>>>> uint8_t *inline_data = wqe->dma.inline_data;
>>>>>>
>>>>>> + wqe->dma.resid = 0;
>>>>>> +
>>>>>> for (i = 0; i < num_sge; i++) {
>>>>>> memcpy(inline_data,
>>>>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> wqe->iova = ibwr->wr.atomic.remote_addr;
>>>>>> else
>>>>>> wqe->iova = ibwr->wr.rdma.remote_addr;
>>>>>> +
>>>>>> wqe->dma.length = length;
>>>>>> wqe->dma.resid = length;
>>>>>> wqe->dma.num_sge = num_sge;
>>>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> for (i = 0; i < ibwr->num_sge; i++)
>>>>>> length += ibwr->sg_list[i].length;
>>>>>>
>>>>>> - err = validate_send_wr(sq, ibwr, length);
>>>>>> + err = validate_send_wr(qp, ibwr, length);
>>>>>> if (err) {
>>>>>> printf("validate send failed\n");
>>>>>> return err;
>>>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>>>> .dealloc_pd = rxe_dealloc_pd,
>>>>>> .reg_mr = rxe_reg_mr,
>>>>>> .dereg_mr = rxe_dereg_mr,
>>>>>> + .alloc_mw = rxe_alloc_mw,
>>>>>> + .dealloc_mw = rxe_dealloc_mw,
>>>>>> + .bind_mw = rxe_bind_mw,
>>>>>> .create_cq = rxe_create_cq,
>>>>>> .create_cq_ex = rxe_create_cq_ex,
>>>>>> .poll_cq = rxe_poll_cq,
>>>>>> --
>>>>>> 2.30.2
>>>>>>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2021-06-09 1:59 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
2021-06-08 6:54 ` Zhu Yanjun
2021-06-08 7:01 ` Bob Pearson
2021-06-08 7:17 ` Zhu Yanjun
2021-06-08 23:18 ` Pearson, Robert B
2021-06-09 1:51 ` Zhu Yanjun
2021-06-09 1:59 ` Pearson, Robert B
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.