* [PATCH v2 0/2] Implement memory windows support for rxe
@ 2021-06-08 4:28 Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
0 siblings, 2 replies; 10+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
These patches makes the required changes to the rxe provider and the
rxe ABI to support the kernel memory windows patches to the rxe driver.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
Bob Pearson (2):
Update kernel headers
Providers/rxe: Implement memory windows
kernel-headers/rdma/rdma_user_rxe.h | 10 ++
providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++-
2 files changed, 165 insertions(+), 2 deletions(-)
--
2.30.2
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 1/2] Update kernel headers
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
@ 2021-06-08 4:28 ` Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
1 sibling, 0 replies; 10+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
To commit ?? ("RDMA/rxe: Disallow MR dereg and invalidate when bound").
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
| 10 ++++++++++
1 file changed, 10 insertions(+)
--git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index 068433e2..e283c222 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -99,7 +99,16 @@ struct rxe_send_wr {
__u32 remote_qkey;
__u16 pkey_index;
} ud;
+ struct {
+ __aligned_u64 addr;
+ __aligned_u64 length;
+ __u32 mr_lkey;
+ __u32 mw_rkey;
+ __u32 rkey;
+ __u32 access;
+ } mw;
/* reg is only used by the kernel and is not part of the uapi */
+#ifdef __KERNEL__
struct {
union {
struct ib_mr *mr;
@@ -108,6 +117,7 @@ struct rxe_send_wr {
__u32 key;
__u32 access;
} reg;
+#endif
} wr;
};
--
2.30.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
@ 2021-06-08 4:28 ` Bob Pearson
2021-06-08 6:54 ` Zhu Yanjun
1 sibling, 1 reply; 10+ messages in thread
From: Bob Pearson @ 2021-06-08 4:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, monis, linux-rdma; +Cc: Bob Pearson
This patch makes the required changes to the rxe provider to support the
kernel memory windows patches to the rxe driver.
The following changes are made:
- Add ibv_alloc_mw verb
- Add ibv_dealloc_mw verb
- Add ibv_bind_mw verb for type 1 MWs
- Add support for bind MW send work requests through the traditional
QP API and the extended QP API.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v2:
Added support for extended QP bind MW work requests.
---
providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 155 insertions(+), 2 deletions(-)
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index a68656ae..bb39ef04 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
return ret;
}
+static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
+{
+ int ret;
+ struct ibv_mw *ibmw;
+ struct ibv_alloc_mw cmd = {};
+ struct ib_uverbs_alloc_mw_resp resp = {};
+
+ ibmw = calloc(1, sizeof(*ibmw));
+ if (!ibmw)
+ return NULL;
+
+ ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
+ &resp, sizeof(resp));
+ if (ret) {
+ free(ibmw);
+ return NULL;
+ }
+
+ return ibmw;
+}
+
+static int rxe_dealloc_mw(struct ibv_mw *ibmw)
+{
+ int ret;
+
+ ret = ibv_cmd_dealloc_mw(ibmw);
+ if (ret)
+ return ret;
+
+ free(ibmw);
+ return 0;
+}
+
+static int next_rkey(int rkey)
+{
+ return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
+}
+
+static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
+ struct ibv_send_wr **bad_wr);
+
+static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
+ struct ibv_mw_bind *mw_bind)
+{
+ int ret;
+ struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
+ struct ibv_send_wr ibwr;
+ struct ibv_send_wr *bad_wr;
+
+ if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
+ ret = EINVAL;
+ goto err;
+ }
+
+ if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
+ ret = EINVAL;
+ goto err;
+ }
+
+ if (bind_info->mr) {
+ if (ibmw->pd != bind_info->mr->pd) {
+ ret = EPERM;
+ goto err;
+ }
+ }
+
+ memset(&ibwr, 0, sizeof(ibwr));
+
+ ibwr.opcode = IBV_WR_BIND_MW;
+ ibwr.next = NULL;
+ ibwr.wr_id = mw_bind->wr_id;
+ ibwr.send_flags = mw_bind->send_flags;
+ ibwr.bind_mw.bind_info = mw_bind->bind_info;
+ ibwr.bind_mw.mw = ibmw;
+ ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
+
+ ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
+ if (ret)
+ goto err;
+
+ /* user has to undo this if he gets an error wc */
+ ibmw->rkey = ibwr.bind_mw.rkey;
+
+ return 0;
+err:
+ errno = ret;
+ return errno;
+}
+
static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
uint64_t hca_va, int access)
{
@@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
advance_qp_cur_index(qp);
}
+static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
+ uint32_t rkey, const struct ibv_mw_bind_info *info)
+{
+ struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
+ struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
+
+ if (check_qp_queue_full(qp))
+ return;
+
+ memset(wqe, 0, sizeof(*wqe));
+
+ wqe->wr.wr_id = ibqp->wr_id;
+ wqe->wr.opcode = IBV_WR_BIND_MW;
+ wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
+ wqe->wr.wr.mw.addr = info->addr;
+ wqe->wr.wr.mw.length = info->length;
+ wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
+ wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
+ wqe->wr.wr.mw.rkey = rkey;
+ wqe->wr.wr.mw.access = info->mw_access_flags;
+ wqe->ssn = qp->ssn++;
+
+ advance_qp_cur_index(qp);
+}
+
static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
{
struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
@@ -1106,6 +1220,7 @@ enum {
| IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
| IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
| IBV_QP_EX_WITH_LOCAL_INV
+ | IBV_QP_EX_WITH_BIND_MW
| IBV_QP_EX_WITH_SEND_WITH_INV,
RXE_SUP_UC_QP_SEND_OPS_FLAGS =
@@ -1113,6 +1228,7 @@ enum {
| IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
| IBV_QP_EX_WITH_SEND
| IBV_QP_EX_WITH_SEND_WITH_IMM
+ | IBV_QP_EX_WITH_BIND_MW
| IBV_QP_EX_WITH_SEND_WITH_INV,
RXE_SUP_UD_QP_SEND_OPS_FLAGS =
@@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
+ if (flags & IBV_QP_EX_WITH_BIND_MW)
+ qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
+
if (flags & IBV_QP_EX_WITH_LOCAL_INV)
qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
@@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
}
/* basic sanity checks for send work request */
-static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
+static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
unsigned int length)
{
+ struct rxe_wq *sq = &qp->sq;
enum ibv_wr_opcode opcode = ibwr->opcode;
if (ibwr->num_sge > sq->max_sge)
@@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
return -EINVAL;
+ if (ibwr->opcode == IBV_WR_BIND_MW) {
+ if (length)
+ return -EINVAL;
+ if (ibwr->num_sge)
+ return -EINVAL;
+ if (ibwr->imm_data)
+ return -EINVAL;
+ if ((qp_type(qp) != IBV_QPT_RC) &&
+ (qp_type(qp) != IBV_QPT_UC))
+ return -EINVAL;
+ }
+
return 0;
}
static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
{
+ struct ibv_mw *ibmw;
+ struct ibv_mr *ibmr;
+
memset(kwr, 0, sizeof(*kwr));
kwr->wr_id = uwr->wr_id;
@@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
break;
+ case IBV_WR_BIND_MW:
+ ibmr = uwr->bind_mw.bind_info.mr;
+ ibmw = uwr->bind_mw.mw;
+
+ kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
+ kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
+ kwr->wr.mw.mr_lkey = ibmr->lkey;
+ kwr->wr.mw.mw_rkey = ibmw->rkey;
+ kwr->wr.mw.rkey = uwr->bind_mw.rkey;
+ kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
+ break;
+
default:
break;
}
@@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
if (ibwr->send_flags & IBV_SEND_INLINE) {
uint8_t *inline_data = wqe->dma.inline_data;
+ wqe->dma.resid = 0;
+
for (i = 0; i < num_sge; i++) {
memcpy(inline_data,
(uint8_t *)(long)ibwr->sg_list[i].addr,
@@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
wqe->iova = ibwr->wr.atomic.remote_addr;
else
wqe->iova = ibwr->wr.rdma.remote_addr;
+
wqe->dma.length = length;
wqe->dma.resid = length;
wqe->dma.num_sge = num_sge;
@@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
for (i = 0; i < ibwr->num_sge; i++)
length += ibwr->sg_list[i].length;
- err = validate_send_wr(sq, ibwr, length);
+ err = validate_send_wr(qp, ibwr, length);
if (err) {
printf("validate send failed\n");
return err;
@@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
.dealloc_pd = rxe_dealloc_pd,
.reg_mr = rxe_reg_mr,
.dereg_mr = rxe_dereg_mr,
+ .alloc_mw = rxe_alloc_mw,
+ .dealloc_mw = rxe_dealloc_mw,
+ .bind_mw = rxe_bind_mw,
.create_cq = rxe_create_cq,
.create_cq_ex = rxe_create_cq_ex,
.poll_cq = rxe_poll_cq,
--
2.30.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
@ 2021-06-08 6:54 ` Zhu Yanjun
2021-06-08 7:01 ` Bob Pearson
0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-08 6:54 UTC (permalink / raw)
To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> This patch makes the required changes to the rxe provider to support the
> kernel memory windows patches to the rxe driver.
>
> The following changes are made:
> - Add ibv_alloc_mw verb
> - Add ibv_dealloc_mw verb
> - Add ibv_bind_mw verb for type 1 MWs
> - Add support for bind MW send work requests through the traditional
> QP API and the extended QP API.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
> v2:
> Added support for extended QP bind MW work requests.
I got the following errors.
Is it a known issue?
# ./bin/run_tests.py --dev rxe0
.............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
======================================================================
ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
Verify bind memory window operation using the new post_send API.
----------------------------------------------------------------------
Traceback (most recent call last):
File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
u.poll_cq(server.cq)
File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
raise PyverbsRDMAError('Completion status is {s}'.
pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
window bind error. Errno: 6, No such device or address
----------------------------------------------------------------------
Ran 193 tests in 2.544s
FAILED (errors=1, skipped=128)
Zhu Yanjun
> ---
> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 155 insertions(+), 2 deletions(-)
>
> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> index a68656ae..bb39ef04 100644
> --- a/providers/rxe/rxe.c
> +++ b/providers/rxe/rxe.c
> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> return ret;
> }
>
> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> +{
> + int ret;
> + struct ibv_mw *ibmw;
> + struct ibv_alloc_mw cmd = {};
> + struct ib_uverbs_alloc_mw_resp resp = {};
> +
> + ibmw = calloc(1, sizeof(*ibmw));
> + if (!ibmw)
> + return NULL;
> +
> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> + &resp, sizeof(resp));
> + if (ret) {
> + free(ibmw);
> + return NULL;
> + }
> +
> + return ibmw;
> +}
> +
> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> +{
> + int ret;
> +
> + ret = ibv_cmd_dealloc_mw(ibmw);
> + if (ret)
> + return ret;
> +
> + free(ibmw);
> + return 0;
> +}
> +
> +static int next_rkey(int rkey)
> +{
> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> +}
> +
> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> + struct ibv_send_wr **bad_wr);
> +
> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> + struct ibv_mw_bind *mw_bind)
> +{
> + int ret;
> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> + struct ibv_send_wr ibwr;
> + struct ibv_send_wr *bad_wr;
> +
> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> + ret = EINVAL;
> + goto err;
> + }
> +
> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> + ret = EINVAL;
> + goto err;
> + }
> +
> + if (bind_info->mr) {
> + if (ibmw->pd != bind_info->mr->pd) {
> + ret = EPERM;
> + goto err;
> + }
> + }
> +
> + memset(&ibwr, 0, sizeof(ibwr));
> +
> + ibwr.opcode = IBV_WR_BIND_MW;
> + ibwr.next = NULL;
> + ibwr.wr_id = mw_bind->wr_id;
> + ibwr.send_flags = mw_bind->send_flags;
> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> + ibwr.bind_mw.mw = ibmw;
> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> +
> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> + if (ret)
> + goto err;
> +
> + /* user has to undo this if he gets an error wc */
> + ibmw->rkey = ibwr.bind_mw.rkey;
> +
> + return 0;
> +err:
> + errno = ret;
> + return errno;
> +}
> +
> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> uint64_t hca_va, int access)
> {
> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> advance_qp_cur_index(qp);
> }
>
> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> +{
> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> +
> + if (check_qp_queue_full(qp))
> + return;
> +
> + memset(wqe, 0, sizeof(*wqe));
> +
> + wqe->wr.wr_id = ibqp->wr_id;
> + wqe->wr.opcode = IBV_WR_BIND_MW;
> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> + wqe->wr.wr.mw.addr = info->addr;
> + wqe->wr.wr.mw.length = info->length;
> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> + wqe->wr.wr.mw.rkey = rkey;
> + wqe->wr.wr.mw.access = info->mw_access_flags;
> + wqe->ssn = qp->ssn++;
> +
> + advance_qp_cur_index(qp);
> +}
> +
> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> {
> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> @@ -1106,6 +1220,7 @@ enum {
> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> | IBV_QP_EX_WITH_LOCAL_INV
> + | IBV_QP_EX_WITH_BIND_MW
> | IBV_QP_EX_WITH_SEND_WITH_INV,
>
> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> @@ -1113,6 +1228,7 @@ enum {
> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> | IBV_QP_EX_WITH_SEND
> | IBV_QP_EX_WITH_SEND_WITH_IMM
> + | IBV_QP_EX_WITH_BIND_MW
> | IBV_QP_EX_WITH_SEND_WITH_INV,
>
> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>
> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> +
> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>
> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> }
>
> /* basic sanity checks for send work request */
> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> unsigned int length)
> {
> + struct rxe_wq *sq = &qp->sq;
> enum ibv_wr_opcode opcode = ibwr->opcode;
>
> if (ibwr->num_sge > sq->max_sge)
> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> return -EINVAL;
>
> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> + if (length)
> + return -EINVAL;
> + if (ibwr->num_sge)
> + return -EINVAL;
> + if (ibwr->imm_data)
> + return -EINVAL;
> + if ((qp_type(qp) != IBV_QPT_RC) &&
> + (qp_type(qp) != IBV_QPT_UC))
> + return -EINVAL;
> + }
> +
> return 0;
> }
>
> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> {
> + struct ibv_mw *ibmw;
> + struct ibv_mr *ibmr;
> +
> memset(kwr, 0, sizeof(*kwr));
>
> kwr->wr_id = uwr->wr_id;
> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> break;
>
> + case IBV_WR_BIND_MW:
> + ibmr = uwr->bind_mw.bind_info.mr;
> + ibmw = uwr->bind_mw.mw;
> +
> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> + break;
> +
> default:
> break;
> }
> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> if (ibwr->send_flags & IBV_SEND_INLINE) {
> uint8_t *inline_data = wqe->dma.inline_data;
>
> + wqe->dma.resid = 0;
> +
> for (i = 0; i < num_sge; i++) {
> memcpy(inline_data,
> (uint8_t *)(long)ibwr->sg_list[i].addr,
> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> wqe->iova = ibwr->wr.atomic.remote_addr;
> else
> wqe->iova = ibwr->wr.rdma.remote_addr;
> +
> wqe->dma.length = length;
> wqe->dma.resid = length;
> wqe->dma.num_sge = num_sge;
> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> for (i = 0; i < ibwr->num_sge; i++)
> length += ibwr->sg_list[i].length;
>
> - err = validate_send_wr(sq, ibwr, length);
> + err = validate_send_wr(qp, ibwr, length);
> if (err) {
> printf("validate send failed\n");
> return err;
> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> .dealloc_pd = rxe_dealloc_pd,
> .reg_mr = rxe_reg_mr,
> .dereg_mr = rxe_dereg_mr,
> + .alloc_mw = rxe_alloc_mw,
> + .dealloc_mw = rxe_dealloc_mw,
> + .bind_mw = rxe_bind_mw,
> .create_cq = rxe_create_cq,
> .create_cq_ex = rxe_create_cq_ex,
> .poll_cq = rxe_poll_cq,
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 6:54 ` Zhu Yanjun
@ 2021-06-08 7:01 ` Bob Pearson
2021-06-08 7:17 ` Zhu Yanjun
0 siblings, 1 reply; 10+ messages in thread
From: Bob Pearson @ 2021-06-08 7:01 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> This patch makes the required changes to the rxe provider to support the
>> kernel memory windows patches to the rxe driver.
>>
>> The following changes are made:
>> - Add ibv_alloc_mw verb
>> - Add ibv_dealloc_mw verb
>> - Add ibv_bind_mw verb for type 1 MWs
>> - Add support for bind MW send work requests through the traditional
>> QP API and the extended QP API.
>>
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>> v2:
>> Added support for extended QP bind MW work requests.
>
> I got the following errors.
> Is it a known issue?
>
> # ./bin/run_tests.py --dev rxe0
> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> ======================================================================
> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> Verify bind memory window operation using the new post_send API.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> u.poll_cq(server.cq)
> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> raise PyverbsRDMAError('Completion status is {s}'.
> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> window bind error. Errno: 6, No such device or address
>
> ----------------------------------------------------------------------
> Ran 193 tests in 2.544s
>
> FAILED (errors=1, skipped=128)
>
> Zhu Yanjun
Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
Bob
>
>> ---
>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>
>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>> index a68656ae..bb39ef04 100644
>> --- a/providers/rxe/rxe.c
>> +++ b/providers/rxe/rxe.c
>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>> return ret;
>> }
>>
>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>> +{
>> + int ret;
>> + struct ibv_mw *ibmw;
>> + struct ibv_alloc_mw cmd = {};
>> + struct ib_uverbs_alloc_mw_resp resp = {};
>> +
>> + ibmw = calloc(1, sizeof(*ibmw));
>> + if (!ibmw)
>> + return NULL;
>> +
>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>> + &resp, sizeof(resp));
>> + if (ret) {
>> + free(ibmw);
>> + return NULL;
>> + }
>> +
>> + return ibmw;
>> +}
>> +
>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>> +{
>> + int ret;
>> +
>> + ret = ibv_cmd_dealloc_mw(ibmw);
>> + if (ret)
>> + return ret;
>> +
>> + free(ibmw);
>> + return 0;
>> +}
>> +
>> +static int next_rkey(int rkey)
>> +{
>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>> +}
>> +
>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>> + struct ibv_send_wr **bad_wr);
>> +
>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>> + struct ibv_mw_bind *mw_bind)
>> +{
>> + int ret;
>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>> + struct ibv_send_wr ibwr;
>> + struct ibv_send_wr *bad_wr;
>> +
>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>> + ret = EINVAL;
>> + goto err;
>> + }
>> +
>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>> + ret = EINVAL;
>> + goto err;
>> + }
>> +
>> + if (bind_info->mr) {
>> + if (ibmw->pd != bind_info->mr->pd) {
>> + ret = EPERM;
>> + goto err;
>> + }
>> + }
>> +
>> + memset(&ibwr, 0, sizeof(ibwr));
>> +
>> + ibwr.opcode = IBV_WR_BIND_MW;
>> + ibwr.next = NULL;
>> + ibwr.wr_id = mw_bind->wr_id;
>> + ibwr.send_flags = mw_bind->send_flags;
>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>> + ibwr.bind_mw.mw = ibmw;
>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>> +
>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>> + if (ret)
>> + goto err;
>> +
>> + /* user has to undo this if he gets an error wc */
>> + ibmw->rkey = ibwr.bind_mw.rkey;
>> +
>> + return 0;
>> +err:
>> + errno = ret;
>> + return errno;
>> +}
>> +
>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>> uint64_t hca_va, int access)
>> {
>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>> advance_qp_cur_index(qp);
>> }
>>
>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>> +{
>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>> +
>> + if (check_qp_queue_full(qp))
>> + return;
>> +
>> + memset(wqe, 0, sizeof(*wqe));
>> +
>> + wqe->wr.wr_id = ibqp->wr_id;
>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>> + wqe->wr.wr.mw.addr = info->addr;
>> + wqe->wr.wr.mw.length = info->length;
>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>> + wqe->wr.wr.mw.rkey = rkey;
>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>> + wqe->ssn = qp->ssn++;
>> +
>> + advance_qp_cur_index(qp);
>> +}
>> +
>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>> {
>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>> @@ -1106,6 +1220,7 @@ enum {
>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>> | IBV_QP_EX_WITH_LOCAL_INV
>> + | IBV_QP_EX_WITH_BIND_MW
>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>> @@ -1113,6 +1228,7 @@ enum {
>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>> | IBV_QP_EX_WITH_SEND
>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>> + | IBV_QP_EX_WITH_BIND_MW
>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>
>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>
>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>> +
>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>
>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>> }
>>
>> /* basic sanity checks for send work request */
>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>> unsigned int length)
>> {
>> + struct rxe_wq *sq = &qp->sq;
>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>
>> if (ibwr->num_sge > sq->max_sge)
>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>> return -EINVAL;
>>
>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>> + if (length)
>> + return -EINVAL;
>> + if (ibwr->num_sge)
>> + return -EINVAL;
>> + if (ibwr->imm_data)
>> + return -EINVAL;
>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>> + (qp_type(qp) != IBV_QPT_UC))
>> + return -EINVAL;
>> + }
>> +
>> return 0;
>> }
>>
>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>> {
>> + struct ibv_mw *ibmw;
>> + struct ibv_mr *ibmr;
>> +
>> memset(kwr, 0, sizeof(*kwr));
>>
>> kwr->wr_id = uwr->wr_id;
>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>> break;
>>
>> + case IBV_WR_BIND_MW:
>> + ibmr = uwr->bind_mw.bind_info.mr;
>> + ibmw = uwr->bind_mw.mw;
>> +
>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>> + break;
>> +
>> default:
>> break;
>> }
>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>> uint8_t *inline_data = wqe->dma.inline_data;
>>
>> + wqe->dma.resid = 0;
>> +
>> for (i = 0; i < num_sge; i++) {
>> memcpy(inline_data,
>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>> wqe->iova = ibwr->wr.atomic.remote_addr;
>> else
>> wqe->iova = ibwr->wr.rdma.remote_addr;
>> +
>> wqe->dma.length = length;
>> wqe->dma.resid = length;
>> wqe->dma.num_sge = num_sge;
>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>> for (i = 0; i < ibwr->num_sge; i++)
>> length += ibwr->sg_list[i].length;
>>
>> - err = validate_send_wr(sq, ibwr, length);
>> + err = validate_send_wr(qp, ibwr, length);
>> if (err) {
>> printf("validate send failed\n");
>> return err;
>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>> .dealloc_pd = rxe_dealloc_pd,
>> .reg_mr = rxe_reg_mr,
>> .dereg_mr = rxe_dereg_mr,
>> + .alloc_mw = rxe_alloc_mw,
>> + .dealloc_mw = rxe_dealloc_mw,
>> + .bind_mw = rxe_bind_mw,
>> .create_cq = rxe_create_cq,
>> .create_cq_ex = rxe_create_cq_ex,
>> .poll_cq = rxe_poll_cq,
>> --
>> 2.30.2
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 7:01 ` Bob Pearson
@ 2021-06-08 7:17 ` Zhu Yanjun
2021-06-08 23:18 ` Pearson, Robert B
0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-08 7:17 UTC (permalink / raw)
To: Bob Pearson; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>
> >> This patch makes the required changes to the rxe provider to support the
> >> kernel memory windows patches to the rxe driver.
> >>
> >> The following changes are made:
> >> - Add ibv_alloc_mw verb
> >> - Add ibv_dealloc_mw verb
> >> - Add ibv_bind_mw verb for type 1 MWs
> >> - Add support for bind MW send work requests through the traditional
> >> QP API and the extended QP API.
> >>
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >> v2:
> >> Added support for extended QP bind MW work requests.
> >
> > I got the following errors.
> > Is it a known issue?
> >
> > # ./bin/run_tests.py --dev rxe0
> > .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> > ======================================================================
> > ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> > Verify bind memory window operation using the new post_send API.
> > ----------------------------------------------------------------------
> > Traceback (most recent call last):
> > File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> > u.poll_cq(server.cq)
> > File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> > raise PyverbsRDMAError('Completion status is {s}'.
> > pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> > window bind error. Errno: 6, No such device or address
> >
> > ----------------------------------------------------------------------
> > Ran 193 tests in 2.544s
> >
> > FAILED (errors=1, skipped=128)
> >
> > Zhu Yanjun
> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
Thanks a lot.
If the root cause to this problem is correct, and setting the BIND_MW
access for the MR can fix this problem,
Please file a commit to fix this problem.
I am fine with this MW patch series if all the problems (including
rdma-core and rxe in kernel) are fixed.
Thanks for your effort.
Zhu Yanjun
> Bob
> >
> >> ---
> >> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >> 1 file changed, 155 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >> index a68656ae..bb39ef04 100644
> >> --- a/providers/rxe/rxe.c
> >> +++ b/providers/rxe/rxe.c
> >> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >> return ret;
> >> }
> >>
> >> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >> +{
> >> + int ret;
> >> + struct ibv_mw *ibmw;
> >> + struct ibv_alloc_mw cmd = {};
> >> + struct ib_uverbs_alloc_mw_resp resp = {};
> >> +
> >> + ibmw = calloc(1, sizeof(*ibmw));
> >> + if (!ibmw)
> >> + return NULL;
> >> +
> >> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >> + &resp, sizeof(resp));
> >> + if (ret) {
> >> + free(ibmw);
> >> + return NULL;
> >> + }
> >> +
> >> + return ibmw;
> >> +}
> >> +
> >> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >> +{
> >> + int ret;
> >> +
> >> + ret = ibv_cmd_dealloc_mw(ibmw);
> >> + if (ret)
> >> + return ret;
> >> +
> >> + free(ibmw);
> >> + return 0;
> >> +}
> >> +
> >> +static int next_rkey(int rkey)
> >> +{
> >> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >> +}
> >> +
> >> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >> + struct ibv_send_wr **bad_wr);
> >> +
> >> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >> + struct ibv_mw_bind *mw_bind)
> >> +{
> >> + int ret;
> >> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >> + struct ibv_send_wr ibwr;
> >> + struct ibv_send_wr *bad_wr;
> >> +
> >> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >> + ret = EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >> + ret = EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + if (bind_info->mr) {
> >> + if (ibmw->pd != bind_info->mr->pd) {
> >> + ret = EPERM;
> >> + goto err;
> >> + }
> >> + }
> >> +
> >> + memset(&ibwr, 0, sizeof(ibwr));
> >> +
> >> + ibwr.opcode = IBV_WR_BIND_MW;
> >> + ibwr.next = NULL;
> >> + ibwr.wr_id = mw_bind->wr_id;
> >> + ibwr.send_flags = mw_bind->send_flags;
> >> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> >> + ibwr.bind_mw.mw = ibmw;
> >> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> >> +
> >> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >> + if (ret)
> >> + goto err;
> >> +
> >> + /* user has to undo this if he gets an error wc */
> >> + ibmw->rkey = ibwr.bind_mw.rkey;
> >> +
> >> + return 0;
> >> +err:
> >> + errno = ret;
> >> + return errno;
> >> +}
> >> +
> >> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >> uint64_t hca_va, int access)
> >> {
> >> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >> advance_qp_cur_index(qp);
> >> }
> >>
> >> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> >> +{
> >> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >> +
> >> + if (check_qp_queue_full(qp))
> >> + return;
> >> +
> >> + memset(wqe, 0, sizeof(*wqe));
> >> +
> >> + wqe->wr.wr_id = ibqp->wr_id;
> >> + wqe->wr.opcode = IBV_WR_BIND_MW;
> >> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >> + wqe->wr.wr.mw.addr = info->addr;
> >> + wqe->wr.wr.mw.length = info->length;
> >> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >> + wqe->wr.wr.mw.rkey = rkey;
> >> + wqe->wr.wr.mw.access = info->mw_access_flags;
> >> + wqe->ssn = qp->ssn++;
> >> +
> >> + advance_qp_cur_index(qp);
> >> +}
> >> +
> >> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >> {
> >> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >> @@ -1106,6 +1220,7 @@ enum {
> >> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >> | IBV_QP_EX_WITH_LOCAL_INV
> >> + | IBV_QP_EX_WITH_BIND_MW
> >> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >> @@ -1113,6 +1228,7 @@ enum {
> >> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >> | IBV_QP_EX_WITH_SEND
> >> | IBV_QP_EX_WITH_SEND_WITH_IMM
> >> + | IBV_QP_EX_WITH_BIND_MW
> >> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>
> >> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>
> >> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> >> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >> +
> >> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>
> >> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >> }
> >>
> >> /* basic sanity checks for send work request */
> >> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >> unsigned int length)
> >> {
> >> + struct rxe_wq *sq = &qp->sq;
> >> enum ibv_wr_opcode opcode = ibwr->opcode;
> >>
> >> if (ibwr->num_sge > sq->max_sge)
> >> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >> return -EINVAL;
> >>
> >> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> >> + if (length)
> >> + return -EINVAL;
> >> + if (ibwr->num_sge)
> >> + return -EINVAL;
> >> + if (ibwr->imm_data)
> >> + return -EINVAL;
> >> + if ((qp_type(qp) != IBV_QPT_RC) &&
> >> + (qp_type(qp) != IBV_QPT_UC))
> >> + return -EINVAL;
> >> + }
> >> +
> >> return 0;
> >> }
> >>
> >> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >> {
> >> + struct ibv_mw *ibmw;
> >> + struct ibv_mr *ibmr;
> >> +
> >> memset(kwr, 0, sizeof(*kwr));
> >>
> >> kwr->wr_id = uwr->wr_id;
> >> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> >> break;
> >>
> >> + case IBV_WR_BIND_MW:
> >> + ibmr = uwr->bind_mw.bind_info.mr;
> >> + ibmw = uwr->bind_mw.mw;
> >> +
> >> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> >> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> >> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >> + break;
> >> +
> >> default:
> >> break;
> >> }
> >> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >> if (ibwr->send_flags & IBV_SEND_INLINE) {
> >> uint8_t *inline_data = wqe->dma.inline_data;
> >>
> >> + wqe->dma.resid = 0;
> >> +
> >> for (i = 0; i < num_sge; i++) {
> >> memcpy(inline_data,
> >> (uint8_t *)(long)ibwr->sg_list[i].addr,
> >> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >> wqe->iova = ibwr->wr.atomic.remote_addr;
> >> else
> >> wqe->iova = ibwr->wr.rdma.remote_addr;
> >> +
> >> wqe->dma.length = length;
> >> wqe->dma.resid = length;
> >> wqe->dma.num_sge = num_sge;
> >> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >> for (i = 0; i < ibwr->num_sge; i++)
> >> length += ibwr->sg_list[i].length;
> >>
> >> - err = validate_send_wr(sq, ibwr, length);
> >> + err = validate_send_wr(qp, ibwr, length);
> >> if (err) {
> >> printf("validate send failed\n");
> >> return err;
> >> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >> .dealloc_pd = rxe_dealloc_pd,
> >> .reg_mr = rxe_reg_mr,
> >> .dereg_mr = rxe_dereg_mr,
> >> + .alloc_mw = rxe_alloc_mw,
> >> + .dealloc_mw = rxe_dealloc_mw,
> >> + .bind_mw = rxe_bind_mw,
> >> .create_cq = rxe_create_cq,
> >> .create_cq_ex = rxe_create_cq_ex,
> >> .poll_cq = rxe_poll_cq,
> >> --
> >> 2.30.2
> >>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 7:17 ` Zhu Yanjun
@ 2021-06-08 23:18 ` Pearson, Robert B
2021-06-09 1:51 ` Zhu Yanjun
0 siblings, 1 reply; 10+ messages in thread
From: Pearson, Robert B @ 2021-06-08 23:18 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> This patch makes the required changes to the rxe provider to support the
>>>> kernel memory windows patches to the rxe driver.
>>>>
>>>> The following changes are made:
>>>> - Add ibv_alloc_mw verb
>>>> - Add ibv_dealloc_mw verb
>>>> - Add ibv_bind_mw verb for type 1 MWs
>>>> - Add support for bind MW send work requests through the traditional
>>>> QP API and the extended QP API.
>>>>
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>> ---
>>>> v2:
>>>> Added support for extended QP bind MW work requests.
>>> I got the following errors.
>>> Is it a known issue?
>>>
>>> # ./bin/run_tests.py --dev rxe0
>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>> ======================================================================
>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>> Verify bind memory window operation using the new post_send API.
>>> ----------------------------------------------------------------------
>>> Traceback (most recent call last):
>>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>> u.poll_cq(server.cq)
>>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>> raise PyverbsRDMAError('Completion status is {s}'.
>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>> window bind error. Errno: 6, No such device or address
>>>
>>> ----------------------------------------------------------------------
>>> Ran 193 tests in 2.544s
>>>
>>> FAILED (errors=1, skipped=128)
>>>
>>> Zhu Yanjun
>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> Thanks a lot.
> If the root cause to this problem is correct, and setting the BIND_MW
> access for the MR can fix this problem,
>
> Please file a commit to fix this problem.
>
> I am fine with this MW patch series if all the problems (including
> rdma-core and rxe in kernel) are fixed.
>
> Thanks for your effort.
>
> Zhu Yanjun
Zhu,
There was a second test bug, now fixed. And all the python tests are
passing or skipped. No errors. There is a message to the liux-rdma list
from Edward Srouji indicating that the patch is upstream at github or
you can use the one in my email to him. It will be a while until his PR
goes up stream I would guess.
I sent a fresh set of patches last night for both the kernel and user
space provider.
Bob
>> Bob
>>>> ---
>>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>> index a68656ae..bb39ef04 100644
>>>> --- a/providers/rxe/rxe.c
>>>> +++ b/providers/rxe/rxe.c
>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>> return ret;
>>>> }
>>>>
>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>> +{
>>>> + int ret;
>>>> + struct ibv_mw *ibmw;
>>>> + struct ibv_alloc_mw cmd = {};
>>>> + struct ib_uverbs_alloc_mw_resp resp = {};
>>>> +
>>>> + ibmw = calloc(1, sizeof(*ibmw));
>>>> + if (!ibmw)
>>>> + return NULL;
>>>> +
>>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>> + &resp, sizeof(resp));
>>>> + if (ret) {
>>>> + free(ibmw);
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + return ibmw;
>>>> +}
>>>> +
>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>> +{
>>>> + int ret;
>>>> +
>>>> + ret = ibv_cmd_dealloc_mw(ibmw);
>>>> + if (ret)
>>>> + return ret;
>>>> +
>>>> + free(ibmw);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int next_rkey(int rkey)
>>>> +{
>>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>> +}
>>>> +
>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>> + struct ibv_send_wr **bad_wr);
>>>> +
>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>> + struct ibv_mw_bind *mw_bind)
>>>> +{
>>>> + int ret;
>>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>> + struct ibv_send_wr ibwr;
>>>> + struct ibv_send_wr *bad_wr;
>>>> +
>>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>> + ret = EINVAL;
>>>> + goto err;
>>>> + }
>>>> +
>>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>> + ret = EINVAL;
>>>> + goto err;
>>>> + }
>>>> +
>>>> + if (bind_info->mr) {
>>>> + if (ibmw->pd != bind_info->mr->pd) {
>>>> + ret = EPERM;
>>>> + goto err;
>>>> + }
>>>> + }
>>>> +
>>>> + memset(&ibwr, 0, sizeof(ibwr));
>>>> +
>>>> + ibwr.opcode = IBV_WR_BIND_MW;
>>>> + ibwr.next = NULL;
>>>> + ibwr.wr_id = mw_bind->wr_id;
>>>> + ibwr.send_flags = mw_bind->send_flags;
>>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>>>> + ibwr.bind_mw.mw = ibmw;
>>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>>>> +
>>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>> + if (ret)
>>>> + goto err;
>>>> +
>>>> + /* user has to undo this if he gets an error wc */
>>>> + ibmw->rkey = ibwr.bind_mw.rkey;
>>>> +
>>>> + return 0;
>>>> +err:
>>>> + errno = ret;
>>>> + return errno;
>>>> +}
>>>> +
>>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>> uint64_t hca_va, int access)
>>>> {
>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>> advance_qp_cur_index(qp);
>>>> }
>>>>
>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>> +{
>>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>> +
>>>> + if (check_qp_queue_full(qp))
>>>> + return;
>>>> +
>>>> + memset(wqe, 0, sizeof(*wqe));
>>>> +
>>>> + wqe->wr.wr_id = ibqp->wr_id;
>>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>> + wqe->wr.wr.mw.addr = info->addr;
>>>> + wqe->wr.wr.mw.length = info->length;
>>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>> + wqe->wr.wr.mw.rkey = rkey;
>>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>>>> + wqe->ssn = qp->ssn++;
>>>> +
>>>> + advance_qp_cur_index(qp);
>>>> +}
>>>> +
>>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>> {
>>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>> @@ -1106,6 +1220,7 @@ enum {
>>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>> | IBV_QP_EX_WITH_LOCAL_INV
>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>> @@ -1113,6 +1228,7 @@ enum {
>>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>> | IBV_QP_EX_WITH_SEND
>>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>
>>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>
>>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>> +
>>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>
>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>> }
>>>>
>>>> /* basic sanity checks for send work request */
>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>> unsigned int length)
>>>> {
>>>> + struct rxe_wq *sq = &qp->sq;
>>>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>
>>>> if (ibwr->num_sge > sq->max_sge)
>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>> return -EINVAL;
>>>>
>>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>> + if (length)
>>>> + return -EINVAL;
>>>> + if (ibwr->num_sge)
>>>> + return -EINVAL;
>>>> + if (ibwr->imm_data)
>>>> + return -EINVAL;
>>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>>>> + (qp_type(qp) != IBV_QPT_UC))
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>> {
>>>> + struct ibv_mw *ibmw;
>>>> + struct ibv_mr *ibmr;
>>>> +
>>>> memset(kwr, 0, sizeof(*kwr));
>>>>
>>>> kwr->wr_id = uwr->wr_id;
>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>>>> break;
>>>>
>>>> + case IBV_WR_BIND_MW:
>>>> + ibmr = uwr->bind_mw.bind_info.mr;
>>>> + ibmw = uwr->bind_mw.mw;
>>>> +
>>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>> + break;
>>>> +
>>>> default:
>>>> break;
>>>> }
>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>> uint8_t *inline_data = wqe->dma.inline_data;
>>>>
>>>> + wqe->dma.resid = 0;
>>>> +
>>>> for (i = 0; i < num_sge; i++) {
>>>> memcpy(inline_data,
>>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> wqe->iova = ibwr->wr.atomic.remote_addr;
>>>> else
>>>> wqe->iova = ibwr->wr.rdma.remote_addr;
>>>> +
>>>> wqe->dma.length = length;
>>>> wqe->dma.resid = length;
>>>> wqe->dma.num_sge = num_sge;
>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>> for (i = 0; i < ibwr->num_sge; i++)
>>>> length += ibwr->sg_list[i].length;
>>>>
>>>> - err = validate_send_wr(sq, ibwr, length);
>>>> + err = validate_send_wr(qp, ibwr, length);
>>>> if (err) {
>>>> printf("validate send failed\n");
>>>> return err;
>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>> .dealloc_pd = rxe_dealloc_pd,
>>>> .reg_mr = rxe_reg_mr,
>>>> .dereg_mr = rxe_dereg_mr,
>>>> + .alloc_mw = rxe_alloc_mw,
>>>> + .dealloc_mw = rxe_dealloc_mw,
>>>> + .bind_mw = rxe_bind_mw,
>>>> .create_cq = rxe_create_cq,
>>>> .create_cq_ex = rxe_create_cq_ex,
>>>> .poll_cq = rxe_poll_cq,
>>>> --
>>>> 2.30.2
>>>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-08 23:18 ` Pearson, Robert B
@ 2021-06-09 1:51 ` Zhu Yanjun
2021-06-09 1:59 ` Pearson, Robert B
0 siblings, 1 reply; 10+ messages in thread
From: Zhu Yanjun @ 2021-06-09 1:51 UTC (permalink / raw)
To: Pearson, Robert B; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>
>
> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
> > On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
> >>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>>> This patch makes the required changes to the rxe provider to support the
> >>>> kernel memory windows patches to the rxe driver.
> >>>>
> >>>> The following changes are made:
> >>>> - Add ibv_alloc_mw verb
> >>>> - Add ibv_dealloc_mw verb
> >>>> - Add ibv_bind_mw verb for type 1 MWs
> >>>> - Add support for bind MW send work requests through the traditional
> >>>> QP API and the extended QP API.
> >>>>
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >>>> ---
> >>>> v2:
> >>>> Added support for extended QP bind MW work requests.
> >>> I got the following errors.
> >>> Is it a known issue?
> >>>
> >>> # ./bin/run_tests.py --dev rxe0
> >>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
> >>> ======================================================================
> >>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
> >>> Verify bind memory window operation using the new post_send API.
> >>> ----------------------------------------------------------------------
> >>> Traceback (most recent call last):
> >>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
> >>> u.poll_cq(server.cq)
> >>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
> >>> raise PyverbsRDMAError('Completion status is {s}'.
> >>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
> >>> window bind error. Errno: 6, No such device or address
> >>>
> >>> ----------------------------------------------------------------------
> >>> Ran 193 tests in 2.544s
> >>>
> >>> FAILED (errors=1, skipped=128)
> >>>
> >>> Zhu Yanjun
> >> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
> > Thanks a lot.
> > If the root cause to this problem is correct, and setting the BIND_MW
> > access for the MR can fix this problem,
> >
> > Please file a commit to fix this problem.
> >
> > I am fine with this MW patch series if all the problems (including
> > rdma-core and rxe in kernel) are fixed.
> >
> > Thanks for your effort.
> >
> > Zhu Yanjun
>
> Zhu,
>
> There was a second test bug, now fixed. And all the python tests are
> passing or skipped. No errors. There is a message to the liux-rdma list
> from Edward Srouji indicating that the patch is upstream at github or
> you can use the one in my email to him. It will be a while until his PR
> goes up stream I would guess.
>
> I sent a fresh set of patches last night for both the kernel and user
> space provider.
Thanks a lot for your effort.
I applied your commits for rdma-core and rxe in kernel, and the following diff.
All the python tests in rdma-core pass or skipped. No errors
Thanks again.
"
diff --git a/tests/test_qpex.py b/tests/test_qpex.py
index 20288d45..41028e2d 100644
--- a/tests/test_qpex.py
+++ b/tests/test_qpex.py
@@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):
class QpExRCBindMw(RCResources):
def create_qps(self):
- create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
+ create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)
def create_mr(self):
- self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
+ self.mr = u.create_custom_mr(self,
e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
"
Zhu Yanjun
>
> Bob
>
> >> Bob
> >>>> ---
> >>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
> >>>> 1 file changed, 155 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> >>>> index a68656ae..bb39ef04 100644
> >>>> --- a/providers/rxe/rxe.c
> >>>> +++ b/providers/rxe/rxe.c
> >>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
> >>>> return ret;
> >>>> }
> >>>>
> >>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
> >>>> +{
> >>>> + int ret;
> >>>> + struct ibv_mw *ibmw;
> >>>> + struct ibv_alloc_mw cmd = {};
> >>>> + struct ib_uverbs_alloc_mw_resp resp = {};
> >>>> +
> >>>> + ibmw = calloc(1, sizeof(*ibmw));
> >>>> + if (!ibmw)
> >>>> + return NULL;
> >>>> +
> >>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
> >>>> + &resp, sizeof(resp));
> >>>> + if (ret) {
> >>>> + free(ibmw);
> >>>> + return NULL;
> >>>> + }
> >>>> +
> >>>> + return ibmw;
> >>>> +}
> >>>> +
> >>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
> >>>> +{
> >>>> + int ret;
> >>>> +
> >>>> + ret = ibv_cmd_dealloc_mw(ibmw);
> >>>> + if (ret)
> >>>> + return ret;
> >>>> +
> >>>> + free(ibmw);
> >>>> + return 0;
> >>>> +}
> >>>> +
> >>>> +static int next_rkey(int rkey)
> >>>> +{
> >>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
> >>>> +}
> >>>> +
> >>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
> >>>> + struct ibv_send_wr **bad_wr);
> >>>> +
> >>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
> >>>> + struct ibv_mw_bind *mw_bind)
> >>>> +{
> >>>> + int ret;
> >>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
> >>>> + struct ibv_send_wr ibwr;
> >>>> + struct ibv_send_wr *bad_wr;
> >>>> +
> >>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
> >>>> + ret = EINVAL;
> >>>> + goto err;
> >>>> + }
> >>>> +
> >>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
> >>>> + ret = EINVAL;
> >>>> + goto err;
> >>>> + }
> >>>> +
> >>>> + if (bind_info->mr) {
> >>>> + if (ibmw->pd != bind_info->mr->pd) {
> >>>> + ret = EPERM;
> >>>> + goto err;
> >>>> + }
> >>>> + }
> >>>> +
> >>>> + memset(&ibwr, 0, sizeof(ibwr));
> >>>> +
> >>>> + ibwr.opcode = IBV_WR_BIND_MW;
> >>>> + ibwr.next = NULL;
> >>>> + ibwr.wr_id = mw_bind->wr_id;
> >>>> + ibwr.send_flags = mw_bind->send_flags;
> >>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
> >>>> + ibwr.bind_mw.mw = ibmw;
> >>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
> >>>> +
> >>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
> >>>> + if (ret)
> >>>> + goto err;
> >>>> +
> >>>> + /* user has to undo this if he gets an error wc */
> >>>> + ibmw->rkey = ibwr.bind_mw.rkey;
> >>>> +
> >>>> + return 0;
> >>>> +err:
> >>>> + errno = ret;
> >>>> + return errno;
> >>>> +}
> >>>> +
> >>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> >>>> uint64_t hca_va, int access)
> >>>> {
> >>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
> >>>> advance_qp_cur_index(qp);
> >>>> }
> >>>>
> >>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
> >>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
> >>>> +{
> >>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
> >>>> +
> >>>> + if (check_qp_queue_full(qp))
> >>>> + return;
> >>>> +
> >>>> + memset(wqe, 0, sizeof(*wqe));
> >>>> +
> >>>> + wqe->wr.wr_id = ibqp->wr_id;
> >>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
> >>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
> >>>> + wqe->wr.wr.mw.addr = info->addr;
> >>>> + wqe->wr.wr.mw.length = info->length;
> >>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
> >>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
> >>>> + wqe->wr.wr.mw.rkey = rkey;
> >>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
> >>>> + wqe->ssn = qp->ssn++;
> >>>> +
> >>>> + advance_qp_cur_index(qp);
> >>>> +}
> >>>> +
> >>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
> >>>> {
> >>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
> >>>> @@ -1106,6 +1220,7 @@ enum {
> >>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
> >>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
> >>>> | IBV_QP_EX_WITH_LOCAL_INV
> >>>> + | IBV_QP_EX_WITH_BIND_MW
> >>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
> >>>> @@ -1113,6 +1228,7 @@ enum {
> >>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
> >>>> | IBV_QP_EX_WITH_SEND
> >>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
> >>>> + | IBV_QP_EX_WITH_BIND_MW
> >>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
> >>>>
> >>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
> >>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
> >>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
> >>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
> >>>>
> >>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
> >>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
> >>>> +
> >>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
> >>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
> >>>>
> >>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
> >>>> }
> >>>>
> >>>> /* basic sanity checks for send work request */
> >>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
> >>>> unsigned int length)
> >>>> {
> >>>> + struct rxe_wq *sq = &qp->sq;
> >>>> enum ibv_wr_opcode opcode = ibwr->opcode;
> >>>>
> >>>> if (ibwr->num_sge > sq->max_sge)
> >>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
> >>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
> >>>> return -EINVAL;
> >>>>
> >>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
> >>>> + if (length)
> >>>> + return -EINVAL;
> >>>> + if (ibwr->num_sge)
> >>>> + return -EINVAL;
> >>>> + if (ibwr->imm_data)
> >>>> + return -EINVAL;
> >>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
> >>>> + (qp_type(qp) != IBV_QPT_UC))
> >>>> + return -EINVAL;
> >>>> + }
> >>>> +
> >>>> return 0;
> >>>> }
> >>>>
> >>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>> {
> >>>> + struct ibv_mw *ibmw;
> >>>> + struct ibv_mr *ibmr;
> >>>> +
> >>>> memset(kwr, 0, sizeof(*kwr));
> >>>>
> >>>> kwr->wr_id = uwr->wr_id;
> >>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
> >>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
> >>>> break;
> >>>>
> >>>> + case IBV_WR_BIND_MW:
> >>>> + ibmr = uwr->bind_mw.bind_info.mr;
> >>>> + ibmw = uwr->bind_mw.mw;
> >>>> +
> >>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
> >>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
> >>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
> >>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
> >>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
> >>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
> >>>> + break;
> >>>> +
> >>>> default:
> >>>> break;
> >>>> }
> >>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
> >>>> uint8_t *inline_data = wqe->dma.inline_data;
> >>>>
> >>>> + wqe->dma.resid = 0;
> >>>> +
> >>>> for (i = 0; i < num_sge; i++) {
> >>>> memcpy(inline_data,
> >>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
> >>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> wqe->iova = ibwr->wr.atomic.remote_addr;
> >>>> else
> >>>> wqe->iova = ibwr->wr.rdma.remote_addr;
> >>>> +
> >>>> wqe->dma.length = length;
> >>>> wqe->dma.resid = length;
> >>>> wqe->dma.num_sge = num_sge;
> >>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
> >>>> for (i = 0; i < ibwr->num_sge; i++)
> >>>> length += ibwr->sg_list[i].length;
> >>>>
> >>>> - err = validate_send_wr(sq, ibwr, length);
> >>>> + err = validate_send_wr(qp, ibwr, length);
> >>>> if (err) {
> >>>> printf("validate send failed\n");
> >>>> return err;
> >>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
> >>>> .dealloc_pd = rxe_dealloc_pd,
> >>>> .reg_mr = rxe_reg_mr,
> >>>> .dereg_mr = rxe_dereg_mr,
> >>>> + .alloc_mw = rxe_alloc_mw,
> >>>> + .dealloc_mw = rxe_dealloc_mw,
> >>>> + .bind_mw = rxe_bind_mw,
> >>>> .create_cq = rxe_create_cq,
> >>>> .create_cq_ex = rxe_create_cq_ex,
> >>>> .poll_cq = rxe_poll_cq,
> >>>> --
> >>>> 2.30.2
> >>>>
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2 2/2] Providers/rxe: Implement memory windows
2021-06-09 1:51 ` Zhu Yanjun
@ 2021-06-09 1:59 ` Pearson, Robert B
0 siblings, 0 replies; 10+ messages in thread
From: Pearson, Robert B @ 2021-06-09 1:59 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Jason Gunthorpe, Moni Shoua, RDMA mailing list
On 6/8/2021 8:51 PM, Zhu Yanjun wrote:
> On Wed, Jun 9, 2021 at 7:18 AM Pearson, Robert B <rpearsonhpe@gmail.com> wrote:
>>
>> On 6/8/2021 2:17 AM, Zhu Yanjun wrote:
>>> On Tue, Jun 8, 2021 at 3:01 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>> On 6/8/21 1:54 AM, Zhu Yanjun wrote:
>>>>> On Tue, Jun 8, 2021 at 12:28 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>>>> This patch makes the required changes to the rxe provider to support the
>>>>>> kernel memory windows patches to the rxe driver.
>>>>>>
>>>>>> The following changes are made:
>>>>>> - Add ibv_alloc_mw verb
>>>>>> - Add ibv_dealloc_mw verb
>>>>>> - Add ibv_bind_mw verb for type 1 MWs
>>>>>> - Add support for bind MW send work requests through the traditional
>>>>>> QP API and the extended QP API.
>>>>>>
>>>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>>>> ---
>>>>>> v2:
>>>>>> Added support for extended QP bind MW work requests.
>>>>> I got the following errors.
>>>>> Is it a known issue?
>>>>>
>>>>> # ./bin/run_tests.py --dev rxe0
>>>>> .............sssssssss.............ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss........sssssssssssssssssss....ssss.........s...s.....E.......ssssssssss..ss
>>>>> ======================================================================
>>>>> ERROR: test_qp_ex_rc_bind_mw (tests.test_qpex.QpExTestCase)
>>>>> Verify bind memory window operation using the new post_send API.
>>>>> ----------------------------------------------------------------------
>>>>> Traceback (most recent call last):
>>>>> File "/root/rdma-core/tests/test_qpex.py", line 292, in test_qp_ex_rc_bind_mw
>>>>> u.poll_cq(server.cq)
>>>>> File "/root/rdma-core/tests/utils.py", line 538, in poll_cq
>>>>> raise PyverbsRDMAError('Completion status is {s}'.
>>>>> pyverbs.pyverbs_error.PyverbsRDMAError: Completion status is Memory
>>>>> window bind error. Errno: 6, No such device or address
>>>>>
>>>>> ----------------------------------------------------------------------
>>>>> Ran 193 tests in 2.544s
>>>>>
>>>>> FAILED (errors=1, skipped=128)
>>>>>
>>>>> Zhu Yanjun
>>>> Yes. It is because the test is not setting the BIND_MW access for the MR. It is not permitted to bind an MW to a MR that does not have the BIND_MW access flag set. But this test mistakenly thinks it is going to work. The other MW tests in test_mr.py do set the BIND_MW access flag.
>>> Thanks a lot.
>>> If the root cause to this problem is correct, and setting the BIND_MW
>>> access for the MR can fix this problem,
>>>
>>> Please file a commit to fix this problem.
>>>
>>> I am fine with this MW patch series if all the problems (including
>>> rdma-core and rxe in kernel) are fixed.
>>>
>>> Thanks for your effort.
>>>
>>> Zhu Yanjun
>> Zhu,
>>
>> There was a second test bug, now fixed. And all the python tests are
>> passing or skipped. No errors. There is a message to the liux-rdma list
>> from Edward Srouji indicating that the patch is upstream at github or
>> you can use the one in my email to him. It will be a while until his PR
>> goes up stream I would guess.
>>
>> I sent a fresh set of patches last night for both the kernel and user
>> space provider.
> Thanks a lot for your effort.
>
> I applied your commits for rdma-core and rxe in kernel, and the following diff.
>
> All the python tests in rdma-core pass or skipped. No errors
>
> Thanks again.
Thanks to you for your patience.
Bob
>
> "
> diff --git a/tests/test_qpex.py b/tests/test_qpex.py
> index 20288d45..41028e2d 100644
> --- a/tests/test_qpex.py
> +++ b/tests/test_qpex.py
> @@ -146,10 +146,10 @@ class QpExRCAtomicFetchAdd(RCResources):
>
> class QpExRCBindMw(RCResources):
> def create_qps(self):
> - create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_BIND_MW)
> + create_qp_ex(self, e.IBV_QPT_RC,
> e.IBV_QP_EX_WITH_BIND_MW|e.IBV_QP_EX_WITH_RDMA_WRITE)
>
> def create_mr(self):
> - self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)
> + self.mr = u.create_custom_mr(self,
> e.IBV_ACCESS_REMOTE_WRITE|e.IBV_ACCESS_MW_BIND)
> "
>
> Zhu Yanjun
>
>> Bob
>>
>>>> Bob
>>>>>> ---
>>>>>> providers/rxe/rxe.c | 157 +++++++++++++++++++++++++++++++++++++++++++-
>>>>>> 1 file changed, 155 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
>>>>>> index a68656ae..bb39ef04 100644
>>>>>> --- a/providers/rxe/rxe.c
>>>>>> +++ b/providers/rxe/rxe.c
>>>>>> @@ -128,6 +128,95 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>>>>>> return ret;
>>>>>> }
>>>>>>
>>>>>> +static struct ibv_mw *rxe_alloc_mw(struct ibv_pd *ibpd, enum ibv_mw_type type)
>>>>>> +{
>>>>>> + int ret;
>>>>>> + struct ibv_mw *ibmw;
>>>>>> + struct ibv_alloc_mw cmd = {};
>>>>>> + struct ib_uverbs_alloc_mw_resp resp = {};
>>>>>> +
>>>>>> + ibmw = calloc(1, sizeof(*ibmw));
>>>>>> + if (!ibmw)
>>>>>> + return NULL;
>>>>>> +
>>>>>> + ret = ibv_cmd_alloc_mw(ibpd, type, ibmw, &cmd, sizeof(cmd),
>>>>>> + &resp, sizeof(resp));
>>>>>> + if (ret) {
>>>>>> + free(ibmw);
>>>>>> + return NULL;
>>>>>> + }
>>>>>> +
>>>>>> + return ibmw;
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_dealloc_mw(struct ibv_mw *ibmw)
>>>>>> +{
>>>>>> + int ret;
>>>>>> +
>>>>>> + ret = ibv_cmd_dealloc_mw(ibmw);
>>>>>> + if (ret)
>>>>>> + return ret;
>>>>>> +
>>>>>> + free(ibmw);
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int next_rkey(int rkey)
>>>>>> +{
>>>>>> + return (rkey & 0xffffff00) | ((rkey + 1) & 0x000000ff);
>>>>>> +}
>>>>>> +
>>>>>> +static int rxe_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr_list,
>>>>>> + struct ibv_send_wr **bad_wr);
>>>>>> +
>>>>>> +static int rxe_bind_mw(struct ibv_qp *ibqp, struct ibv_mw *ibmw,
>>>>>> + struct ibv_mw_bind *mw_bind)
>>>>>> +{
>>>>>> + int ret;
>>>>>> + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
>>>>>> + struct ibv_send_wr ibwr;
>>>>>> + struct ibv_send_wr *bad_wr;
>>>>>> +
>>>>>> + if (!bind_info->mr && (bind_info->addr || bind_info->length)) {
>>>>>> + ret = EINVAL;
>>>>>> + goto err;
>>>>>> + }
>>>>>> +
>>>>>> + if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) {
>>>>>> + ret = EINVAL;
>>>>>> + goto err;
>>>>>> + }
>>>>>> +
>>>>>> + if (bind_info->mr) {
>>>>>> + if (ibmw->pd != bind_info->mr->pd) {
>>>>>> + ret = EPERM;
>>>>>> + goto err;
>>>>>> + }
>>>>>> + }
>>>>>> +
>>>>>> + memset(&ibwr, 0, sizeof(ibwr));
>>>>>> +
>>>>>> + ibwr.opcode = IBV_WR_BIND_MW;
>>>>>> + ibwr.next = NULL;
>>>>>> + ibwr.wr_id = mw_bind->wr_id;
>>>>>> + ibwr.send_flags = mw_bind->send_flags;
>>>>>> + ibwr.bind_mw.bind_info = mw_bind->bind_info;
>>>>>> + ibwr.bind_mw.mw = ibmw;
>>>>>> + ibwr.bind_mw.rkey = next_rkey(ibmw->rkey);
>>>>>> +
>>>>>> + ret = rxe_post_send(ibqp, &ibwr, &bad_wr);
>>>>>> + if (ret)
>>>>>> + goto err;
>>>>>> +
>>>>>> + /* user has to undo this if he gets an error wc */
>>>>>> + ibmw->rkey = ibwr.bind_mw.rkey;
>>>>>> +
>>>>>> + return 0;
>>>>>> +err:
>>>>>> + errno = ret;
>>>>>> + return errno;
>>>>>> +}
>>>>>> +
>>>>>> static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>>>>> uint64_t hca_va, int access)
>>>>>> {
>>>>>> @@ -715,6 +804,31 @@ static void wr_atomic_fetch_add(struct ibv_qp_ex *ibqp, uint32_t rkey,
>>>>>> advance_qp_cur_index(qp);
>>>>>> }
>>>>>>
>>>>>> +static void wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
>>>>>> + uint32_t rkey, const struct ibv_mw_bind_info *info)
>>>>>> +{
>>>>>> + struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> + struct rxe_send_wqe *wqe = addr_from_index(qp->sq.queue, qp->cur_index);
>>>>>> +
>>>>>> + if (check_qp_queue_full(qp))
>>>>>> + return;
>>>>>> +
>>>>>> + memset(wqe, 0, sizeof(*wqe));
>>>>>> +
>>>>>> + wqe->wr.wr_id = ibqp->wr_id;
>>>>>> + wqe->wr.opcode = IBV_WR_BIND_MW;
>>>>>> + wqe->wr.send_flags = qp->vqp.qp_ex.wr_flags;
>>>>>> + wqe->wr.wr.mw.addr = info->addr;
>>>>>> + wqe->wr.wr.mw.length = info->length;
>>>>>> + wqe->wr.wr.mw.mr_lkey = info->mr->lkey;
>>>>>> + wqe->wr.wr.mw.mw_rkey = ibmw->rkey;
>>>>>> + wqe->wr.wr.mw.rkey = rkey;
>>>>>> + wqe->wr.wr.mw.access = info->mw_access_flags;
>>>>>> + wqe->ssn = qp->ssn++;
>>>>>> +
>>>>>> + advance_qp_cur_index(qp);
>>>>>> +}
>>>>>> +
>>>>>> static void wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
>>>>>> {
>>>>>> struct rxe_qp *qp = container_of(ibqp, struct rxe_qp, vqp.qp_ex);
>>>>>> @@ -1106,6 +1220,7 @@ enum {
>>>>>> | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP
>>>>>> | IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD
>>>>>> | IBV_QP_EX_WITH_LOCAL_INV
>>>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>> RXE_SUP_UC_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1113,6 +1228,7 @@ enum {
>>>>>> | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM
>>>>>> | IBV_QP_EX_WITH_SEND
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_IMM
>>>>>> + | IBV_QP_EX_WITH_BIND_MW
>>>>>> | IBV_QP_EX_WITH_SEND_WITH_INV,
>>>>>>
>>>>>> RXE_SUP_UD_QP_SEND_OPS_FLAGS =
>>>>>> @@ -1162,6 +1278,9 @@ static void set_qp_send_ops(struct rxe_qp *qp, uint64_t flags)
>>>>>> if (flags & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
>>>>>> qp->vqp.qp_ex.wr_atomic_fetch_add = wr_atomic_fetch_add;
>>>>>>
>>>>>> + if (flags & IBV_QP_EX_WITH_BIND_MW)
>>>>>> + qp->vqp.qp_ex.wr_bind_mw = wr_bind_mw;
>>>>>> +
>>>>>> if (flags & IBV_QP_EX_WITH_LOCAL_INV)
>>>>>> qp->vqp.qp_ex.wr_local_inv = wr_local_inv;
>>>>>>
>>>>>> @@ -1275,9 +1394,10 @@ static int rxe_destroy_qp(struct ibv_qp *ibqp)
>>>>>> }
>>>>>>
>>>>>> /* basic sanity checks for send work request */
>>>>>> -static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>> +static int validate_send_wr(struct rxe_qp *qp, struct ibv_send_wr *ibwr,
>>>>>> unsigned int length)
>>>>>> {
>>>>>> + struct rxe_wq *sq = &qp->sq;
>>>>>> enum ibv_wr_opcode opcode = ibwr->opcode;
>>>>>>
>>>>>> if (ibwr->num_sge > sq->max_sge)
>>>>>> @@ -1291,11 +1411,26 @@ static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
>>>>>> if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
>>>>>> return -EINVAL;
>>>>>>
>>>>>> + if (ibwr->opcode == IBV_WR_BIND_MW) {
>>>>>> + if (length)
>>>>>> + return -EINVAL;
>>>>>> + if (ibwr->num_sge)
>>>>>> + return -EINVAL;
>>>>>> + if (ibwr->imm_data)
>>>>>> + return -EINVAL;
>>>>>> + if ((qp_type(qp) != IBV_QPT_RC) &&
>>>>>> + (qp_type(qp) != IBV_QPT_UC))
>>>>>> + return -EINVAL;
>>>>>> + }
>>>>>> +
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>> {
>>>>>> + struct ibv_mw *ibmw;
>>>>>> + struct ibv_mr *ibmr;
>>>>>> +
>>>>>> memset(kwr, 0, sizeof(*kwr));
>>>>>>
>>>>>> kwr->wr_id = uwr->wr_id;
>>>>>> @@ -1326,6 +1461,18 @@ static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
>>>>>> kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
>>>>>> break;
>>>>>>
>>>>>> + case IBV_WR_BIND_MW:
>>>>>> + ibmr = uwr->bind_mw.bind_info.mr;
>>>>>> + ibmw = uwr->bind_mw.mw;
>>>>>> +
>>>>>> + kwr->wr.mw.addr = uwr->bind_mw.bind_info.addr;
>>>>>> + kwr->wr.mw.length = uwr->bind_mw.bind_info.length;
>>>>>> + kwr->wr.mw.mr_lkey = ibmr->lkey;
>>>>>> + kwr->wr.mw.mw_rkey = ibmw->rkey;
>>>>>> + kwr->wr.mw.rkey = uwr->bind_mw.rkey;
>>>>>> + kwr->wr.mw.access = uwr->bind_mw.bind_info.mw_access_flags;
>>>>>> + break;
>>>>>> +
>>>>>> default:
>>>>>> break;
>>>>>> }
>>>>>> @@ -1348,6 +1495,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> if (ibwr->send_flags & IBV_SEND_INLINE) {
>>>>>> uint8_t *inline_data = wqe->dma.inline_data;
>>>>>>
>>>>>> + wqe->dma.resid = 0;
>>>>>> +
>>>>>> for (i = 0; i < num_sge; i++) {
>>>>>> memcpy(inline_data,
>>>>>> (uint8_t *)(long)ibwr->sg_list[i].addr,
>>>>>> @@ -1363,6 +1512,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> wqe->iova = ibwr->wr.atomic.remote_addr;
>>>>>> else
>>>>>> wqe->iova = ibwr->wr.rdma.remote_addr;
>>>>>> +
>>>>>> wqe->dma.length = length;
>>>>>> wqe->dma.resid = length;
>>>>>> wqe->dma.num_sge = num_sge;
>>>>>> @@ -1385,7 +1535,7 @@ static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
>>>>>> for (i = 0; i < ibwr->num_sge; i++)
>>>>>> length += ibwr->sg_list[i].length;
>>>>>>
>>>>>> - err = validate_send_wr(sq, ibwr, length);
>>>>>> + err = validate_send_wr(qp, ibwr, length);
>>>>>> if (err) {
>>>>>> printf("validate send failed\n");
>>>>>> return err;
>>>>>> @@ -1579,6 +1729,9 @@ static const struct verbs_context_ops rxe_ctx_ops = {
>>>>>> .dealloc_pd = rxe_dealloc_pd,
>>>>>> .reg_mr = rxe_reg_mr,
>>>>>> .dereg_mr = rxe_dereg_mr,
>>>>>> + .alloc_mw = rxe_alloc_mw,
>>>>>> + .dealloc_mw = rxe_dealloc_mw,
>>>>>> + .bind_mw = rxe_bind_mw,
>>>>>> .create_cq = rxe_create_cq,
>>>>>> .create_cq_ex = rxe_create_cq_ex,
>>>>>> .poll_cq = rxe_poll_cq,
>>>>>> --
>>>>>> 2.30.2
>>>>>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 1/2] Update kernel headers
2021-07-18 21:28 [PATCH v2 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
@ 2021-07-18 21:28 ` Bob Pearson
0 siblings, 0 replies; 10+ messages in thread
From: Bob Pearson @ 2021-07-18 21:28 UTC (permalink / raw)
To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson
To commit ?? ("RDMA/rxe: Convert kernel UD post send to use ah_num").
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
| 10 +++++++++-
providers/rxe/rxe.c | 4 ++--
2 files changed, 11 insertions(+), 3 deletions(-)
--git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index e283c222..ad7da77d 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -98,6 +98,10 @@ struct rxe_send_wr {
__u32 remote_qpn;
__u32 remote_qkey;
__u16 pkey_index;
+ __u16 reserved;
+ __u32 ah_num;
+ __u32 pad[4];
+ struct rxe_av av; /* deprecated */
} ud;
struct {
__aligned_u64 addr;
@@ -148,7 +152,6 @@ struct rxe_dma_info {
struct rxe_send_wqe {
struct rxe_send_wr wr;
- struct rxe_av av;
__u32 status;
__u32 state;
__aligned_u64 iova;
@@ -168,6 +171,11 @@ struct rxe_recv_wqe {
struct rxe_dma_info dma;
};
+struct rxe_create_ah_resp {
+ __u32 ah_num;
+ __u32 reserved;
+};
+
struct rxe_create_cq_resp {
struct mminfo mi;
};
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 3c3ea8bb..d19aa9df 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -982,7 +982,7 @@ static void wr_set_ud_addr(struct ibv_qp_ex *ibqp, struct ibv_ah *ibah,
if (qp->err)
return;
- memcpy(&wqe->av, &ah->av, sizeof(ah->av));
+ memcpy(&wqe->wr.wr.ud.av, &ah->av, sizeof(ah->av));
wqe->wr.wr.ud.remote_qpn = remote_qpn;
wqe->wr.wr.ud.remote_qkey = remote_qkey;
}
@@ -1467,7 +1467,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
convert_send_wr(&wqe->wr, ibwr);
if (qp_type(qp) == IBV_QPT_UD)
- memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
+ memcpy(&wqe->wr.wr.ud.av, &to_rah(ibwr->wr.ud.ah)->av,
sizeof(struct rxe_av));
if (ibwr->send_flags & IBV_SEND_INLINE) {
--
2.30.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2021-07-18 21:29 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-08 4:28 [PATCH v2 0/2] Implement memory windows support for rxe Bob Pearson
2021-06-08 4:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
2021-06-08 4:28 ` [PATCH v2 2/2] Providers/rxe: Implement memory windows Bob Pearson
2021-06-08 6:54 ` Zhu Yanjun
2021-06-08 7:01 ` Bob Pearson
2021-06-08 7:17 ` Zhu Yanjun
2021-06-08 23:18 ` Pearson, Robert B
2021-06-09 1:51 ` Zhu Yanjun
2021-06-09 1:59 ` Pearson, Robert B
2021-07-18 21:28 [PATCH v2 0/2] Providers/rxe: Replace AV by AH for UD sends Bob Pearson
2021-07-18 21:28 ` [PATCH v2 1/2] Update kernel headers Bob Pearson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.