* [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
2019-04-03 18:05 ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib
Add the required functions and definitions to support shared receive
queues (SRQs) in the backend layer.
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++-
hw/rdma/rdma_backend.h | 12 ++++
hw/rdma/rdma_backend_defs.h | 5 ++
hw/rdma/rdma_rm.c | 2 +
hw/rdma/rdma_rm_defs.h | 1 +
5 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d1660b6474fa..04dfd63a573b 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -40,6 +40,7 @@ typedef struct BackendCtx {
void *up_ctx;
struct ibv_sge sge; /* Used to save MAD recv buffer */
RdmaBackendQP *backend_qp; /* To maintain recv buffers */
+ RdmaBackendSRQ *backend_srq;
} BackendCtx;
struct backend_umad {
@@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
int i, ne, total_ne = 0;
BackendCtx *bctx;
struct ibv_wc wc[2];
+ RdmaProtectedGSList *cqe_ctx_list;
qemu_mutex_lock(&rdma_dev_res->lock);
do {
@@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
comp_handler(bctx->up_ctx, &wc[i]);
- rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
- wc[i].wr_id);
+ if (bctx->backend_qp) {
+ cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
+ } else {
+ cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
+ }
+
+ rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
g_free(bctx);
}
@@ -662,6 +669,60 @@ err_free_bctx:
g_free(bctx);
}
+void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
+ RdmaBackendSRQ *srq, struct ibv_sge *sge,
+ uint32_t num_sge, void *ctx)
+{
+ BackendCtx *bctx;
+ struct ibv_sge new_sge[MAX_SGE];
+ uint32_t bctx_id;
+ int rc;
+ struct ibv_recv_wr wr = {}, *bad_wr;
+
+ bctx = g_malloc0(sizeof(*bctx));
+ bctx->up_ctx = ctx;
+ bctx->backend_srq = srq;
+
+ rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
+ if (unlikely(rc)) {
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+ goto err_free_bctx;
+ }
+
+ rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
+
+ rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
+ &backend_dev->rdma_dev_res->stats.rx_bufs_len);
+ if (rc) {
+ complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
+ goto err_dealloc_cqe_ctx;
+ }
+
+ wr.num_sge = num_sge;
+ wr.sg_list = new_sge;
+ wr.wr_id = bctx_id;
+ rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
+ if (rc) {
+ rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
+ srq->ibsrq->handle, rc, errno);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+ goto err_dealloc_cqe_ctx;
+ }
+
+ atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
+ backend_dev->rdma_dev_res->stats.rx_bufs++;
+ backend_dev->rdma_dev_res->stats.rx_srq++;
+
+ return;
+
+err_dealloc_cqe_ctx:
+ backend_dev->rdma_dev_res->stats.rx_bufs_err++;
+ rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
+
+err_free_bctx:
+ g_free(bctx);
+}
+
int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
{
pd->ibpd = ibv_alloc_pd(backend_dev->context);
@@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
}
+int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
+ uint32_t max_wr, uint32_t max_sge,
+ uint32_t srq_limit)
+{
+ struct ibv_srq_init_attr srq_init_attr = {};
+
+ srq_init_attr.attr.max_wr = max_wr;
+ srq_init_attr.attr.max_sge = max_sge;
+ srq_init_attr.attr.srq_limit = srq_limit;
+
+ srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
+ if (!srq->ibsrq) {
+ rdma_error_report("ibv_create_srq failed, errno=%d", errno);
+ return -EIO;
+ }
+
+ rdma_protected_gslist_init(&srq->cqe_ctx_list);
+
+ return 0;
+}
+
+int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
+{
+ if (!srq->ibsrq) {
+ return -EINVAL;
+ }
+
+ return ibv_query_srq(srq->ibsrq, srq_attr);
+}
+
+int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
+ int srq_attr_mask)
+{
+ if (!srq->ibsrq) {
+ return -EINVAL;
+ }
+
+ return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
+}
+
+void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
+{
+ if (srq->ibsrq) {
+ ibv_destroy_srq(srq->ibsrq);
+ }
+ g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
+ rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
+}
+
#define CHK_ATTR(req, dev, member, fmt) ({ \
trace_rdma_check_dev_attr(#member, dev.member, req->member); \
if (req->member > dev.member) { \
@@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
}
dev_attr->max_sge = MAX_SGE;
+ dev_attr->max_srq_sge = MAX_SGE;
CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
@@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
+ CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
return 0;
}
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 38056d97c7fc..cad7956d98e8 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
RdmaBackendQP *qp, uint8_t qp_type,
struct ibv_sge *sge, uint32_t num_sge, void *ctx);
+int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
+ uint32_t max_wr, uint32_t max_sge,
+ uint32_t srq_limit);
+int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
+int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
+ int srq_attr_mask);
+void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
+ RdmaDeviceResources *dev_res);
+void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
+ RdmaBackendSRQ *srq, struct ibv_sge *sge,
+ uint32_t num_sge, void *ctx);
+
#endif
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
index 817153dc8cf4..0b55be35038d 100644
--- a/hw/rdma/rdma_backend_defs.h
+++ b/hw/rdma/rdma_backend_defs.h
@@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
RdmaProtectedGSList cqe_ctx_list;
} RdmaBackendQP;
+typedef struct RdmaBackendSRQ {
+ struct ibv_srq *ibsrq;
+ RdmaProtectedGSList cqe_ctx_list;
+} RdmaBackendSRQ;
+
#endif
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index bac3b2f4a6c3..b683506b8616 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
dev_res->stats.tx_err);
monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
dev_res->stats.rx_bufs);
+ monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
+ dev_res->stats.rx_srq);
monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
dev_res->stats.rx_bufs_len);
monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index c200d311de37..e774af528022 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
uint64_t rx_bufs;
uint64_t rx_bufs_len;
uint64_t rx_bufs_err;
+ uint64_t rx_srq;
uint64_t completions;
uint64_t mad_tx;
uint64_t mad_tx_err;
--
2.20.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
@ 2019-04-03 18:05 ` Yuval Shaia
2019-04-07 8:13 ` Kamal Heib
0 siblings, 1 reply; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:05 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> Add the required functions and definitions to support shared receive
> queues (SRQs) in the backend layer.
>
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
> hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++-
> hw/rdma/rdma_backend.h | 12 ++++
> hw/rdma/rdma_backend_defs.h | 5 ++
> hw/rdma/rdma_rm.c | 2 +
> hw/rdma/rdma_rm_defs.h | 1 +
> 5 files changed, 134 insertions(+), 2 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index d1660b6474fa..04dfd63a573b 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
> void *up_ctx;
> struct ibv_sge sge; /* Used to save MAD recv buffer */
> RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> + RdmaBackendSRQ *backend_srq;
> } BackendCtx;
>
> struct backend_umad {
> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> int i, ne, total_ne = 0;
> BackendCtx *bctx;
> struct ibv_wc wc[2];
> + RdmaProtectedGSList *cqe_ctx_list;
>
> qemu_mutex_lock(&rdma_dev_res->lock);
> do {
> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>
> comp_handler(bctx->up_ctx, &wc[i]);
>
> - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
> - wc[i].wr_id);
> + if (bctx->backend_qp) {
> + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
> + } else {
> + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
> + }
> +
> + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
> g_free(bctx);
> }
> @@ -662,6 +669,60 @@ err_free_bctx:
> g_free(bctx);
> }
>
> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
> + uint32_t num_sge, void *ctx)
> +{
> + BackendCtx *bctx;
> + struct ibv_sge new_sge[MAX_SGE];
> + uint32_t bctx_id;
> + int rc;
> + struct ibv_recv_wr wr = {}, *bad_wr;
> +
> + bctx = g_malloc0(sizeof(*bctx));
> + bctx->up_ctx = ctx;
> + bctx->backend_srq = srq;
> +
> + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
> + if (unlikely(rc)) {
> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> + goto err_free_bctx;
> + }
> +
> + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
> +
> + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> + &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> + if (rc) {
> + complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> + goto err_dealloc_cqe_ctx;
> + }
> +
> + wr.num_sge = num_sge;
> + wr.sg_list = new_sge;
> + wr.wr_id = bctx_id;
> + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
> + if (rc) {
> + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
> + srq->ibsrq->handle, rc, errno);
> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> + goto err_dealloc_cqe_ctx;
> + }
> +
> + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
> + backend_dev->rdma_dev_res->stats.rx_bufs++;
> + backend_dev->rdma_dev_res->stats.rx_srq++;
You should update function rdma_dump_device_counters with this new
counter.
> +
> + return;
> +
> +err_dealloc_cqe_ctx:
> + backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> +
> +err_free_bctx:
> + g_free(bctx);
> +}
> +
> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
> {
> pd->ibpd = ibv_alloc_pd(backend_dev->context);
> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
> rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
> }
>
> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> + uint32_t max_wr, uint32_t max_sge,
> + uint32_t srq_limit)
> +{
> + struct ibv_srq_init_attr srq_init_attr = {};
> +
> + srq_init_attr.attr.max_wr = max_wr;
> + srq_init_attr.attr.max_sge = max_sge;
> + srq_init_attr.attr.srq_limit = srq_limit;
> +
> + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
> + if (!srq->ibsrq) {
> + rdma_error_report("ibv_create_srq failed, errno=%d", errno);
> + return -EIO;
> + }
> +
> + rdma_protected_gslist_init(&srq->cqe_ctx_list);
> +
> + return 0;
> +}
> +
> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
> +{
> + if (!srq->ibsrq) {
> + return -EINVAL;
> + }
> +
> + return ibv_query_srq(srq->ibsrq, srq_attr);
> +}
> +
> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> + int srq_attr_mask)
> +{
> + if (!srq->ibsrq) {
> + return -EINVAL;
> + }
> +
> + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
> +}
> +
> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
> +{
> + if (srq->ibsrq) {
> + ibv_destroy_srq(srq->ibsrq);
> + }
> + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
> + rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
> +}
> +
> #define CHK_ATTR(req, dev, member, fmt) ({ \
> trace_rdma_check_dev_attr(#member, dev.member, req->member); \
> if (req->member > dev.member) { \
> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> }
>
> dev_attr->max_sge = MAX_SGE;
> + dev_attr->max_srq_sge = MAX_SGE;
>
> CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
> CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
> CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
> + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
>
> return 0;
> }
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index 38056d97c7fc..cad7956d98e8 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
> RdmaBackendQP *qp, uint8_t qp_type,
> struct ibv_sge *sge, uint32_t num_sge, void *ctx);
>
> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> + uint32_t max_wr, uint32_t max_sge,
> + uint32_t srq_limit);
> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> + int srq_attr_mask);
> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
> + RdmaDeviceResources *dev_res);
> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
> + uint32_t num_sge, void *ctx);
> +
> #endif
> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> index 817153dc8cf4..0b55be35038d 100644
> --- a/hw/rdma/rdma_backend_defs.h
> +++ b/hw/rdma/rdma_backend_defs.h
> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
> RdmaProtectedGSList cqe_ctx_list;
> } RdmaBackendQP;
>
> +typedef struct RdmaBackendSRQ {
> + struct ibv_srq *ibsrq;
> + RdmaProtectedGSList cqe_ctx_list;
> +} RdmaBackendSRQ;
> +
> #endif
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index bac3b2f4a6c3..b683506b8616 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
> dev_res->stats.tx_err);
> monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
> dev_res->stats.rx_bufs);
> + monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
> + dev_res->stats.rx_srq);
> monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
> dev_res->stats.rx_bufs_len);
> monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index c200d311de37..e774af528022 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
> uint64_t rx_bufs;
> uint64_t rx_bufs_len;
> uint64_t rx_bufs_err;
> + uint64_t rx_srq;
> uint64_t completions;
> uint64_t mad_tx;
> uint64_t mad_tx_err;
Please make a separate patch to update the function
rdma_dump_device_counters.
Besides that patch lgtm.
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
2019-04-03 18:05 ` Yuval Shaia
@ 2019-04-07 8:13 ` Kamal Heib
2019-04-07 9:13 ` Yuval Shaia
0 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-07 8:13 UTC (permalink / raw)
To: Yuval Shaia; +Cc: qemu-devel
On 4/3/19 9:05 PM, Yuval Shaia wrote:
> On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
>> Add the required functions and definitions to support shared receive
>> queues (SRQs) in the backend layer.
>>
>> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
>> ---
>> hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++-
>> hw/rdma/rdma_backend.h | 12 ++++
>> hw/rdma/rdma_backend_defs.h | 5 ++
>> hw/rdma/rdma_rm.c | 2 +
>> hw/rdma/rdma_rm_defs.h | 1 +
>> 5 files changed, 134 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>> index d1660b6474fa..04dfd63a573b 100644
>> --- a/hw/rdma/rdma_backend.c
>> +++ b/hw/rdma/rdma_backend.c
>> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
>> void *up_ctx;
>> struct ibv_sge sge; /* Used to save MAD recv buffer */
>> RdmaBackendQP *backend_qp; /* To maintain recv buffers */
>> + RdmaBackendSRQ *backend_srq;
>> } BackendCtx;
>>
>> struct backend_umad {
>> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>> int i, ne, total_ne = 0;
>> BackendCtx *bctx;
>> struct ibv_wc wc[2];
>> + RdmaProtectedGSList *cqe_ctx_list;
>>
>> qemu_mutex_lock(&rdma_dev_res->lock);
>> do {
>> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>>
>> comp_handler(bctx->up_ctx, &wc[i]);
>>
>> - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
>> - wc[i].wr_id);
>> + if (bctx->backend_qp) {
>> + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
>> + } else {
>> + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
>> + }
>> +
>> + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
>> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>> g_free(bctx);
>> }
>> @@ -662,6 +669,60 @@ err_free_bctx:
>> g_free(bctx);
>> }
>>
>> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
>> + uint32_t num_sge, void *ctx)
>> +{
>> + BackendCtx *bctx;
>> + struct ibv_sge new_sge[MAX_SGE];
>> + uint32_t bctx_id;
>> + int rc;
>> + struct ibv_recv_wr wr = {}, *bad_wr;
>> +
>> + bctx = g_malloc0(sizeof(*bctx));
>> + bctx->up_ctx = ctx;
>> + bctx->backend_srq = srq;
>> +
>> + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
>> + if (unlikely(rc)) {
>> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
>> + goto err_free_bctx;
>> + }
>> +
>> + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
>> +
>> + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
>> + &backend_dev->rdma_dev_res->stats.rx_bufs_len);
>> + if (rc) {
>> + complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
>> + goto err_dealloc_cqe_ctx;
>> + }
>> +
>> + wr.num_sge = num_sge;
>> + wr.sg_list = new_sge;
>> + wr.wr_id = bctx_id;
>> + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
>> + if (rc) {
>> + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
>> + srq->ibsrq->handle, rc, errno);
>> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
>> + goto err_dealloc_cqe_ctx;
>> + }
>> +
>> + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
>> + backend_dev->rdma_dev_res->stats.rx_bufs++;
>> + backend_dev->rdma_dev_res->stats.rx_srq++;
>
> You should update function rdma_dump_device_counters with this new
> counter.
>
>> +
>> + return;
>> +
>> +err_dealloc_cqe_ctx:
>> + backend_dev->rdma_dev_res->stats.rx_bufs_err++;
>> + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
>> +
>> +err_free_bctx:
>> + g_free(bctx);
>> +}
>> +
>> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
>> {
>> pd->ibpd = ibv_alloc_pd(backend_dev->context);
>> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
>> rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
>> }
>>
>> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
>> + uint32_t max_wr, uint32_t max_sge,
>> + uint32_t srq_limit)
>> +{
>> + struct ibv_srq_init_attr srq_init_attr = {};
>> +
>> + srq_init_attr.attr.max_wr = max_wr;
>> + srq_init_attr.attr.max_sge = max_sge;
>> + srq_init_attr.attr.srq_limit = srq_limit;
>> +
>> + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
>> + if (!srq->ibsrq) {
>> + rdma_error_report("ibv_create_srq failed, errno=%d", errno);
>> + return -EIO;
>> + }
>> +
>> + rdma_protected_gslist_init(&srq->cqe_ctx_list);
>> +
>> + return 0;
>> +}
>> +
>> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
>> +{
>> + if (!srq->ibsrq) {
>> + return -EINVAL;
>> + }
>> +
>> + return ibv_query_srq(srq->ibsrq, srq_attr);
>> +}
>> +
>> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
>> + int srq_attr_mask)
>> +{
>> + if (!srq->ibsrq) {
>> + return -EINVAL;
>> + }
>> +
>> + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
>> +}
>> +
>> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
>> +{
>> + if (srq->ibsrq) {
>> + ibv_destroy_srq(srq->ibsrq);
>> + }
>> + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
>> + rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
>> +}
>> +
>> #define CHK_ATTR(req, dev, member, fmt) ({ \
>> trace_rdma_check_dev_attr(#member, dev.member, req->member); \
>> if (req->member > dev.member) { \
>> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>> }
>>
>> dev_attr->max_sge = MAX_SGE;
>> + dev_attr->max_srq_sge = MAX_SGE;
>>
>> CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
>> CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
>> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
>> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
>> CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
>> + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
>>
>> return 0;
>> }
>> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
>> index 38056d97c7fc..cad7956d98e8 100644
>> --- a/hw/rdma/rdma_backend.h
>> +++ b/hw/rdma/rdma_backend.h
>> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>> RdmaBackendQP *qp, uint8_t qp_type,
>> struct ibv_sge *sge, uint32_t num_sge, void *ctx);
>>
>> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
>> + uint32_t max_wr, uint32_t max_sge,
>> + uint32_t srq_limit);
>> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
>> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
>> + int srq_attr_mask);
>> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
>> + RdmaDeviceResources *dev_res);
>> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
>> + uint32_t num_sge, void *ctx);
>> +
>> #endif
>> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
>> index 817153dc8cf4..0b55be35038d 100644
>> --- a/hw/rdma/rdma_backend_defs.h
>> +++ b/hw/rdma/rdma_backend_defs.h
>> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
>> RdmaProtectedGSList cqe_ctx_list;
>> } RdmaBackendQP;
>>
>> +typedef struct RdmaBackendSRQ {
>> + struct ibv_srq *ibsrq;
>> + RdmaProtectedGSList cqe_ctx_list;
>> +} RdmaBackendSRQ;
>> +
>> #endif
>> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
>> index bac3b2f4a6c3..b683506b8616 100644
>> --- a/hw/rdma/rdma_rm.c
>> +++ b/hw/rdma/rdma_rm.c
>> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
>> dev_res->stats.tx_err);
>> monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
>> dev_res->stats.rx_bufs);
>> + monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
>> + dev_res->stats.rx_srq);
>> monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
>> dev_res->stats.rx_bufs_len);
>> monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
>> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
>> index c200d311de37..e774af528022 100644
>> --- a/hw/rdma/rdma_rm_defs.h
>> +++ b/hw/rdma/rdma_rm_defs.h
>> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
>> uint64_t rx_bufs;
>> uint64_t rx_bufs_len;
>> uint64_t rx_bufs_err;
>> + uint64_t rx_srq;
>> uint64_t completions;
>> uint64_t mad_tx;
>> uint64_t mad_tx_err;
>
> Please make a separate patch to update the function
> rdma_dump_device_counters.
>
You mean a separate patch for introducing the "rx_srq" counter & update the
function rdma_dump_device_counters()?
> Besides that patch lgtm.
>
> Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
>
>> --
>> 2.20.1
>>
>>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
2019-04-07 8:13 ` Kamal Heib
@ 2019-04-07 9:13 ` Yuval Shaia
0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-07 9:13 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Sun, Apr 07, 2019 at 11:13:15AM +0300, Kamal Heib wrote:
>
>
> On 4/3/19 9:05 PM, Yuval Shaia wrote:
> > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> >> Add the required functions and definitions to support shared receive
> >> queues (SRQs) in the backend layer.
> >>
> >> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> >> ---
> >> hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++-
> >> hw/rdma/rdma_backend.h | 12 ++++
> >> hw/rdma/rdma_backend_defs.h | 5 ++
> >> hw/rdma/rdma_rm.c | 2 +
> >> hw/rdma/rdma_rm_defs.h | 1 +
> >> 5 files changed, 134 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> >> index d1660b6474fa..04dfd63a573b 100644
> >> --- a/hw/rdma/rdma_backend.c
> >> +++ b/hw/rdma/rdma_backend.c
> >> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
> >> void *up_ctx;
> >> struct ibv_sge sge; /* Used to save MAD recv buffer */
> >> RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> >> + RdmaBackendSRQ *backend_srq;
> >> } BackendCtx;
> >>
> >> struct backend_umad {
> >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> >> int i, ne, total_ne = 0;
> >> BackendCtx *bctx;
> >> struct ibv_wc wc[2];
> >> + RdmaProtectedGSList *cqe_ctx_list;
> >>
> >> qemu_mutex_lock(&rdma_dev_res->lock);
> >> do {
> >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> >>
> >> comp_handler(bctx->up_ctx, &wc[i]);
> >>
> >> - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
> >> - wc[i].wr_id);
> >> + if (bctx->backend_qp) {
> >> + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
> >> + } else {
> >> + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
> >> + }
> >> +
> >> + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
> >> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
> >> g_free(bctx);
> >> }
> >> @@ -662,6 +669,60 @@ err_free_bctx:
> >> g_free(bctx);
> >> }
> >>
> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> >> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
> >> + uint32_t num_sge, void *ctx)
> >> +{
> >> + BackendCtx *bctx;
> >> + struct ibv_sge new_sge[MAX_SGE];
> >> + uint32_t bctx_id;
> >> + int rc;
> >> + struct ibv_recv_wr wr = {}, *bad_wr;
> >> +
> >> + bctx = g_malloc0(sizeof(*bctx));
> >> + bctx->up_ctx = ctx;
> >> + bctx->backend_srq = srq;
> >> +
> >> + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
> >> + if (unlikely(rc)) {
> >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> >> + goto err_free_bctx;
> >> + }
> >> +
> >> + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
> >> +
> >> + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> >> + &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> >> + if (rc) {
> >> + complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> >> + goto err_dealloc_cqe_ctx;
> >> + }
> >> +
> >> + wr.num_sge = num_sge;
> >> + wr.sg_list = new_sge;
> >> + wr.wr_id = bctx_id;
> >> + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
> >> + if (rc) {
> >> + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
> >> + srq->ibsrq->handle, rc, errno);
> >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> >> + goto err_dealloc_cqe_ctx;
> >> + }
> >> +
> >> + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
> >> + backend_dev->rdma_dev_res->stats.rx_bufs++;
> >> + backend_dev->rdma_dev_res->stats.rx_srq++;
> >
> > You should update function rdma_dump_device_counters with this new
> > counter.
> >
> >> +
> >> + return;
> >> +
> >> +err_dealloc_cqe_ctx:
> >> + backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> >> + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> >> +
> >> +err_free_bctx:
> >> + g_free(bctx);
> >> +}
> >> +
> >> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
> >> {
> >> pd->ibpd = ibv_alloc_pd(backend_dev->context);
> >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
> >> rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
> >> }
> >>
> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> >> + uint32_t max_wr, uint32_t max_sge,
> >> + uint32_t srq_limit)
> >> +{
> >> + struct ibv_srq_init_attr srq_init_attr = {};
> >> +
> >> + srq_init_attr.attr.max_wr = max_wr;
> >> + srq_init_attr.attr.max_sge = max_sge;
> >> + srq_init_attr.attr.srq_limit = srq_limit;
> >> +
> >> + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
> >> + if (!srq->ibsrq) {
> >> + rdma_error_report("ibv_create_srq failed, errno=%d", errno);
> >> + return -EIO;
> >> + }
> >> +
> >> + rdma_protected_gslist_init(&srq->cqe_ctx_list);
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
> >> +{
> >> + if (!srq->ibsrq) {
> >> + return -EINVAL;
> >> + }
> >> +
> >> + return ibv_query_srq(srq->ibsrq, srq_attr);
> >> +}
> >> +
> >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> >> + int srq_attr_mask)
> >> +{
> >> + if (!srq->ibsrq) {
> >> + return -EINVAL;
> >> + }
> >> +
> >> + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
> >> +}
> >> +
> >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
> >> +{
> >> + if (srq->ibsrq) {
> >> + ibv_destroy_srq(srq->ibsrq);
> >> + }
> >> + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
> >> + rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
> >> +}
> >> +
> >> #define CHK_ATTR(req, dev, member, fmt) ({ \
> >> trace_rdma_check_dev_attr(#member, dev.member, req->member); \
> >> if (req->member > dev.member) { \
> >> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> >> }
> >>
> >> dev_attr->max_sge = MAX_SGE;
> >> + dev_attr->max_srq_sge = MAX_SGE;
> >>
> >> CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
> >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
> >> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
> >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
> >> CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
> >> + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
> >>
> >> return 0;
> >> }
> >> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> >> index 38056d97c7fc..cad7956d98e8 100644
> >> --- a/hw/rdma/rdma_backend.h
> >> +++ b/hw/rdma/rdma_backend.h
> >> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
> >> RdmaBackendQP *qp, uint8_t qp_type,
> >> struct ibv_sge *sge, uint32_t num_sge, void *ctx);
> >>
> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> >> + uint32_t max_wr, uint32_t max_sge,
> >> + uint32_t srq_limit);
> >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
> >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> >> + int srq_attr_mask);
> >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
> >> + RdmaDeviceResources *dev_res);
> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> >> + RdmaBackendSRQ *srq, struct ibv_sge *sge,
> >> + uint32_t num_sge, void *ctx);
> >> +
> >> #endif
> >> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> >> index 817153dc8cf4..0b55be35038d 100644
> >> --- a/hw/rdma/rdma_backend_defs.h
> >> +++ b/hw/rdma/rdma_backend_defs.h
> >> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
> >> RdmaProtectedGSList cqe_ctx_list;
> >> } RdmaBackendQP;
> >>
> >> +typedef struct RdmaBackendSRQ {
> >> + struct ibv_srq *ibsrq;
> >> + RdmaProtectedGSList cqe_ctx_list;
> >> +} RdmaBackendSRQ;
> >> +
> >> #endif
> >> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> >> index bac3b2f4a6c3..b683506b8616 100644
> >> --- a/hw/rdma/rdma_rm.c
> >> +++ b/hw/rdma/rdma_rm.c
> >> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
> >> dev_res->stats.tx_err);
> >> monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
> >> dev_res->stats.rx_bufs);
> >> + monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
> >> + dev_res->stats.rx_srq);
[1]
> >> monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
> >> dev_res->stats.rx_bufs_len);
> >> monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
> >> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> >> index c200d311de37..e774af528022 100644
> >> --- a/hw/rdma/rdma_rm_defs.h
> >> +++ b/hw/rdma/rdma_rm_defs.h
> >> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
> >> uint64_t rx_bufs;
> >> uint64_t rx_bufs_len;
> >> uint64_t rx_bufs_err;
> >> + uint64_t rx_srq;
> >> uint64_t completions;
> >> uint64_t mad_tx;
> >> uint64_t mad_tx_err;
> >
> > Please make a separate patch to update the function
> > rdma_dump_device_counters.
> >
>
> You mean a separate patch for introducing the "rx_srq" counter & update the
> function rdma_dump_device_counters()?
My bad, missed that ([1]).
No need for a separate patch.
>
> > Besides that patch lgtm.
> >
> > Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> >
> >> --
> >> 2.20.1
> >>
> >>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
2019-04-03 18:10 ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib
Adding the required functions and definitions for support managing the
shared receive queues (SRQs).
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
hw/rdma/rdma_rm.c | 93 ++++++++++++++++++++++++++++++++++++++++++
hw/rdma/rdma_rm.h | 10 +++++
hw/rdma/rdma_rm_defs.h | 8 ++++
3 files changed, 111 insertions(+)
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index b683506b8616..c4fb140dcd96 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -544,6 +544,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
}
+RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
+{
+ return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
+}
+
+int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+ uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
+ uint32_t *srq_handle, void *opaque)
+{
+ RdmaRmSRQ *srq;
+ RdmaRmPD *pd;
+ int rc;
+
+ pd = rdma_rm_get_pd(dev_res, pd_handle);
+ if (!pd) {
+ return -EINVAL;
+ }
+
+ srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
+ if (!srq) {
+ return -ENOMEM;
+ }
+
+ rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
+ max_wr, max_sge, srq_limit);
+ if (rc) {
+ rc = -EIO;
+ goto out_dealloc_srq;
+ }
+
+ srq->opaque = opaque;
+
+ return 0;
+
+out_dealloc_srq:
+ rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
+
+ return rc;
+}
+
+int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+ struct ibv_srq_attr *srq_attr)
+{
+ RdmaRmSRQ *srq;
+
+ srq = rdma_rm_get_srq(dev_res, srq_handle);
+ if (!srq) {
+ return -EINVAL;
+ }
+
+ return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
+}
+
+int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+ struct ibv_srq_attr *srq_attr, int srq_attr_mask)
+{
+ RdmaRmSRQ *srq;
+
+ srq = rdma_rm_get_srq(dev_res, srq_handle);
+ if (!srq) {
+ return -EINVAL;
+ }
+
+ if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
+ (srq_attr->srq_limit == 0)) {
+ return -EINVAL;
+ }
+
+ if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
+ (srq_attr->max_wr == 0)) {
+ return -EINVAL;
+ }
+
+ return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
+ srq_attr_mask);
+}
+
+void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
+{
+ RdmaRmSRQ *srq;
+
+ srq = rdma_rm_get_srq(dev_res, srq_handle);
+ if (!srq) {
+ return;
+ }
+
+ rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
+ rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
+}
+
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
{
void **cqe_ctx;
@@ -673,6 +763,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
dev_attr->max_qp_wr, sizeof(void *));
res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
+ res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
+ sizeof(RdmaRmSRQ));
init_ports(dev_res);
@@ -691,6 +783,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
fini_ports(dev_res, backend_dev, ifname);
+ res_tbl_free(&dev_res->srq_tbl);
res_tbl_free(&dev_res->uc_tbl);
res_tbl_free(&dev_res->cqe_ctx_tbl);
res_tbl_free(&dev_res->qp_tbl);
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index 4f03f9b8c5f1..e88ab95e264b 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -65,6 +65,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
int attr_mask, struct ibv_qp_init_attr *init_attr);
void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
+RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
+int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+ uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
+ uint32_t *srq_handle, void *opaque);
+int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+ struct ibv_srq_attr *srq_attr);
+int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+ struct ibv_srq_attr *srq_attr, int srq_attr_mask);
+void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
+
int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
void *ctx);
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index e774af528022..7bdd9f291f9f 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -33,6 +33,7 @@
#define MAX_QP_RD_ATOM 16
#define MAX_QP_INIT_RD_ATOM 16
#define MAX_AH 64
+#define MAX_SRQ 512
#define MAX_RM_TBL_NAME 16
#define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */
@@ -89,6 +90,12 @@ typedef struct RdmaRmQP {
enum ibv_qp_state qp_state;
} RdmaRmQP;
+typedef struct RdmaRmSRQ {
+ RdmaBackendSRQ backend_srq;
+ uint32_t recv_cq_handle;
+ void *opaque;
+} RdmaRmSRQ;
+
typedef struct RdmaRmGid {
union ibv_gid gid;
int backend_gid_index;
@@ -129,6 +136,7 @@ struct RdmaDeviceResources {
RdmaRmResTbl qp_tbl;
RdmaRmResTbl cq_tbl;
RdmaRmResTbl cqe_ctx_tbl;
+ RdmaRmResTbl srq_tbl;
GHashTable *qp_hash; /* Keeps mapping between real and emulated */
QemuMutex lock;
RdmaRmStats stats;
--
2.20.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource
2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
@ 2019-04-03 18:10 ` Yuval Shaia
0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:10 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 02:33:41PM +0300, Kamal Heib wrote:
> Adding the required functions and definitions for support managing the
> shared receive queues (SRQs).
>
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
> hw/rdma/rdma_rm.c | 93 ++++++++++++++++++++++++++++++++++++++++++
> hw/rdma/rdma_rm.h | 10 +++++
> hw/rdma/rdma_rm_defs.h | 8 ++++
> 3 files changed, 111 insertions(+)
>
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index b683506b8616..c4fb140dcd96 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -544,6 +544,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
> rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
> }
>
> +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
> +{
> + return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
> +}
> +
> +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
> + uint32_t *srq_handle, void *opaque)
> +{
> + RdmaRmSRQ *srq;
> + RdmaRmPD *pd;
> + int rc;
> +
> + pd = rdma_rm_get_pd(dev_res, pd_handle);
> + if (!pd) {
> + return -EINVAL;
> + }
> +
> + srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
> + if (!srq) {
> + return -ENOMEM;
> + }
> +
> + rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
> + max_wr, max_sge, srq_limit);
> + if (rc) {
> + rc = -EIO;
> + goto out_dealloc_srq;
> + }
> +
> + srq->opaque = opaque;
> +
> + return 0;
> +
> +out_dealloc_srq:
> + rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
> +
> + return rc;
> +}
> +
> +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> + struct ibv_srq_attr *srq_attr)
> +{
> + RdmaRmSRQ *srq;
> +
> + srq = rdma_rm_get_srq(dev_res, srq_handle);
> + if (!srq) {
> + return -EINVAL;
> + }
> +
> + return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
> +}
> +
> +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> + struct ibv_srq_attr *srq_attr, int srq_attr_mask)
> +{
> + RdmaRmSRQ *srq;
> +
> + srq = rdma_rm_get_srq(dev_res, srq_handle);
> + if (!srq) {
> + return -EINVAL;
> + }
> +
> + if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
> + (srq_attr->srq_limit == 0)) {
> + return -EINVAL;
> + }
> +
> + if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
> + (srq_attr->max_wr == 0)) {
> + return -EINVAL;
> + }
> +
> + return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
> + srq_attr_mask);
> +}
> +
> +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
> +{
> + RdmaRmSRQ *srq;
> +
> + srq = rdma_rm_get_srq(dev_res, srq_handle);
> + if (!srq) {
> + return;
> + }
> +
> + rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
> + rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
> +}
> +
> void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
> {
> void **cqe_ctx;
> @@ -673,6 +763,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
> res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
> dev_attr->max_qp_wr, sizeof(void *));
> res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
> + res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
> + sizeof(RdmaRmSRQ));
>
> init_ports(dev_res);
>
> @@ -691,6 +783,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
>
> fini_ports(dev_res, backend_dev, ifname);
>
> + res_tbl_free(&dev_res->srq_tbl);
> res_tbl_free(&dev_res->uc_tbl);
> res_tbl_free(&dev_res->cqe_ctx_tbl);
> res_tbl_free(&dev_res->qp_tbl);
> diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
> index 4f03f9b8c5f1..e88ab95e264b 100644
> --- a/hw/rdma/rdma_rm.h
> +++ b/hw/rdma/rdma_rm.h
> @@ -65,6 +65,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> int attr_mask, struct ibv_qp_init_attr *init_attr);
> void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
>
> +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
> +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
> + uint32_t *srq_handle, void *opaque);
> +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> + struct ibv_srq_attr *srq_attr);
> +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> + struct ibv_srq_attr *srq_attr, int srq_attr_mask);
> +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
> +
> int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
> void *ctx);
> void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index e774af528022..7bdd9f291f9f 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -33,6 +33,7 @@
> #define MAX_QP_RD_ATOM 16
> #define MAX_QP_INIT_RD_ATOM 16
> #define MAX_AH 64
> +#define MAX_SRQ 512
>
> #define MAX_RM_TBL_NAME 16
> #define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */
> @@ -89,6 +90,12 @@ typedef struct RdmaRmQP {
> enum ibv_qp_state qp_state;
> } RdmaRmQP;
>
> +typedef struct RdmaRmSRQ {
> + RdmaBackendSRQ backend_srq;
> + uint32_t recv_cq_handle;
> + void *opaque;
> +} RdmaRmSRQ;
> +
> typedef struct RdmaRmGid {
> union ibv_gid gid;
> int backend_gid_index;
> @@ -129,6 +136,7 @@ struct RdmaDeviceResources {
> RdmaRmResTbl qp_tbl;
> RdmaRmResTbl cq_tbl;
> RdmaRmResTbl cqe_ctx_tbl;
> + RdmaRmResTbl srq_tbl;
> GHashTable *qp_hash; /* Keeps mapping between real and emulated */
> QemuMutex lock;
> RdmaRmStats stats;
For some reason v3 is omitted from subject, weird.
Anyway, looks like you took care for the comment raised.
With that - patch lgtm.
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
2019-04-03 18:15 ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib
Modify create/destroy QP to support shared receive queue and rearrange
the destroy_qp() code to avoid touching the QP after calling
rdma_rm_dealloc_qp().
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
hw/rdma/rdma_backend.c | 9 ++++--
hw/rdma/rdma_backend.h | 6 ++--
hw/rdma/rdma_rm.c | 22 +++++++++++++--
hw/rdma/rdma_rm.h | 3 +-
hw/rdma/rdma_rm_defs.h | 1 +
hw/rdma/vmw/pvrdma_cmd.c | 59 ++++++++++++++++++++++++----------------
6 files changed, 67 insertions(+), 33 deletions(-)
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 04dfd63a573b..cf34874e9d2f 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -794,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
RdmaBackendPD *pd, RdmaBackendCQ *scq,
- RdmaBackendCQ *rcq, uint32_t max_send_wr,
- uint32_t max_recv_wr, uint32_t max_send_sge,
- uint32_t max_recv_sge)
+ RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
+ uint32_t max_send_wr, uint32_t max_recv_wr,
+ uint32_t max_send_sge, uint32_t max_recv_sge)
{
struct ibv_qp_init_attr attr = {};
@@ -824,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
attr.cap.max_recv_wr = max_recv_wr;
attr.cap.max_send_sge = max_send_sge;
attr.cap.max_recv_sge = max_recv_sge;
+ if (srq) {
+ attr.srq = srq->ibsrq;
+ }
qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
if (!qp->ibqp) {
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index cad7956d98e8..7c1a19a2b5ff 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
RdmaBackendPD *pd, RdmaBackendCQ *scq,
- RdmaBackendCQ *rcq, uint32_t max_send_wr,
- uint32_t max_recv_wr, uint32_t max_send_sge,
- uint32_t max_recv_sge);
+ RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
+ uint32_t max_send_wr, uint32_t max_recv_wr,
+ uint32_t max_send_sge, uint32_t max_recv_sge);
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
uint8_t qp_type, uint32_t qkey);
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index c0bb27cb6b90..96279e8d6561 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -386,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint8_t qp_type, uint32_t max_send_wr,
uint32_t max_send_sge, uint32_t send_cq_handle,
uint32_t max_recv_wr, uint32_t max_recv_sge,
- uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
+ uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
+ uint8_t is_srq, uint32_t srq_handle)
{
int rc;
RdmaRmQP *qp;
RdmaRmCQ *scq, *rcq;
RdmaRmPD *pd;
+ RdmaRmSRQ *srq = NULL;
uint32_t rm_qpn;
pd = rdma_rm_get_pd(dev_res, pd_handle);
@@ -408,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
return -EINVAL;
}
+ if (is_srq) {
+ srq = rdma_rm_get_srq(dev_res, srq_handle);
+ if (!srq) {
+ rdma_error_report("Invalid srqn %d", srq_handle);
+ return -EINVAL;
+ }
+
+ srq->recv_cq_handle = recv_cq_handle;
+ }
+
if (qp_type == IBV_QPT_GSI) {
scq->notify = CNT_SET;
rcq->notify = CNT_SET;
@@ -424,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
qp->send_cq_handle = send_cq_handle;
qp->recv_cq_handle = recv_cq_handle;
qp->opaque = opaque;
+ qp->is_srq = is_srq;
rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
- &scq->backend_cq, &rcq->backend_cq, max_send_wr,
- max_recv_wr, max_send_sge, max_recv_sge);
+ &scq->backend_cq, &rcq->backend_cq,
+ is_srq ? &srq->backend_srq : NULL,
+ max_send_wr, max_recv_wr, max_send_sge,
+ max_recv_sge);
+
if (rc) {
rc = -EIO;
goto out_dealloc_qp;
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index e88ab95e264b..e8639909cd34 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint8_t qp_type, uint32_t max_send_wr,
uint32_t max_send_sge, uint32_t send_cq_handle,
uint32_t max_recv_wr, uint32_t max_recv_sge,
- uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
+ uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
+ uint8_t is_srq, uint32_t srq_handle);
RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 7bdd9f291f9f..534f2f74d3d1 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -88,6 +88,7 @@ typedef struct RdmaRmQP {
uint32_t send_cq_handle;
uint32_t recv_cq_handle;
enum ibv_qp_state qp_state;
+ uint8_t is_srq;
} RdmaRmQP;
typedef struct RdmaRmSRQ {
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 4afcd2037db2..b931bb6dc9d4 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
- uint32_t rpages)
+ uint32_t rpages, uint8_t is_srq)
{
uint64_t *dir = NULL, *tbl = NULL;
PvrdmaRing *sr, *rr;
@@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
char ring_name[MAX_RING_NAME_SZ];
uint32_t wqe_sz;
- if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
- || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
- rdma_error_report("Got invalid page count for QP ring: %d, %d", spages,
+ if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) {
+ rdma_error_report("Got invalid send page count for QP ring: %d",
+ spages);
+ return rc;
+ }
+
+ if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) {
+ rdma_error_report("Got invalid recv page count for QP ring: %d",
rpages);
return rc;
}
@@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
goto out;
}
- sr = g_malloc(2 * sizeof(*rr));
- rr = &sr[1];
+ if (!is_srq) {
+ sr = g_malloc(2 * sizeof(*rr));
+ rr = &sr[1];
+ } else {
+ sr = g_malloc(sizeof(*sr));
+ }
*rings = sr;
@@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
goto out_unmap_ring_state;
}
- /* Create recv ring */
- rr->ring_state = &sr->ring_state[1];
- wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
- sizeof(struct pvrdma_sge) * rmax_sge - 1);
- sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
- rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
- rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
- if (rc) {
- goto out_free_sr;
+ if (!is_srq) {
+ /* Create recv ring */
+ rr->ring_state = &sr->ring_state[1];
+ wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) * rmax_sge - 1);
+ sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
+ rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
+ rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages],
+ rpages);
+ if (rc) {
+ goto out_free_sr;
+ }
}
goto out;
@@ -436,10 +448,12 @@ out:
return rc;
}
-static void destroy_qp_rings(PvrdmaRing *ring)
+static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq)
{
pvrdma_ring_free(&ring[0]);
- pvrdma_ring_free(&ring[1]);
+ if (!is_srq) {
+ pvrdma_ring_free(&ring[1]);
+ }
rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
g_free(ring);
@@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
cmd->max_recv_wr, cmd->max_recv_sge,
- cmd->total_chunks - cmd->send_chunks - 1);
+ cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq);
if (rc) {
return rc;
}
@@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
cmd->max_send_wr, cmd->max_send_sge,
cmd->send_cq_handle, cmd->max_recv_wr,
cmd->max_recv_sge, cmd->recv_cq_handle, rings,
- &resp->qpn);
+ &resp->qpn, cmd->is_srq, cmd->srq_handle);
if (rc) {
- destroy_qp_rings(rings);
+ destroy_qp_rings(rings, cmd->is_srq);
return rc;
}
@@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
return -EINVAL;
}
- rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
-
ring = (PvrdmaRing *)qp->opaque;
- destroy_qp_rings(ring);
+ destroy_qp_rings(ring, qp->is_srq);
+ rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
return 0;
}
--
2.20.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
@ 2019-04-03 18:15 ` Yuval Shaia
0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:15 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 02:33:42PM +0300, Kamal Heib wrote:
> Modify create/destroy QP to support shared receive queue and rearrange
> the destroy_qp() code to avoid touching the QP after calling
> rdma_rm_dealloc_qp().
>
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
> hw/rdma/rdma_backend.c | 9 ++++--
> hw/rdma/rdma_backend.h | 6 ++--
> hw/rdma/rdma_rm.c | 22 +++++++++++++--
> hw/rdma/rdma_rm.h | 3 +-
> hw/rdma/rdma_rm_defs.h | 1 +
> hw/rdma/vmw/pvrdma_cmd.c | 59 ++++++++++++++++++++++++----------------
> 6 files changed, 67 insertions(+), 33 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index 04dfd63a573b..cf34874e9d2f 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -794,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
>
> int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
> RdmaBackendPD *pd, RdmaBackendCQ *scq,
> - RdmaBackendCQ *rcq, uint32_t max_send_wr,
> - uint32_t max_recv_wr, uint32_t max_send_sge,
> - uint32_t max_recv_sge)
> + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
> + uint32_t max_send_wr, uint32_t max_recv_wr,
> + uint32_t max_send_sge, uint32_t max_recv_sge)
> {
> struct ibv_qp_init_attr attr = {};
>
> @@ -824,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
> attr.cap.max_recv_wr = max_recv_wr;
> attr.cap.max_send_sge = max_send_sge;
> attr.cap.max_recv_sge = max_recv_sge;
> + if (srq) {
> + attr.srq = srq->ibsrq;
> + }
>
> qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
> if (!qp->ibqp) {
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index cad7956d98e8..7c1a19a2b5ff 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
>
> int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
> RdmaBackendPD *pd, RdmaBackendCQ *scq,
> - RdmaBackendCQ *rcq, uint32_t max_send_wr,
> - uint32_t max_recv_wr, uint32_t max_send_sge,
> - uint32_t max_recv_sge);
> + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
> + uint32_t max_send_wr, uint32_t max_recv_wr,
> + uint32_t max_send_sge, uint32_t max_recv_sge);
> int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> uint8_t qp_type, uint32_t qkey);
> int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index c0bb27cb6b90..96279e8d6561 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -386,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> uint8_t qp_type, uint32_t max_send_wr,
> uint32_t max_send_sge, uint32_t send_cq_handle,
> uint32_t max_recv_wr, uint32_t max_recv_sge,
> - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
> + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
> + uint8_t is_srq, uint32_t srq_handle)
> {
> int rc;
> RdmaRmQP *qp;
> RdmaRmCQ *scq, *rcq;
> RdmaRmPD *pd;
> + RdmaRmSRQ *srq = NULL;
> uint32_t rm_qpn;
>
> pd = rdma_rm_get_pd(dev_res, pd_handle);
> @@ -408,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> return -EINVAL;
> }
>
> + if (is_srq) {
> + srq = rdma_rm_get_srq(dev_res, srq_handle);
> + if (!srq) {
> + rdma_error_report("Invalid srqn %d", srq_handle);
> + return -EINVAL;
> + }
> +
> + srq->recv_cq_handle = recv_cq_handle;
> + }
> +
> if (qp_type == IBV_QPT_GSI) {
> scq->notify = CNT_SET;
> rcq->notify = CNT_SET;
> @@ -424,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> qp->send_cq_handle = send_cq_handle;
> qp->recv_cq_handle = recv_cq_handle;
> qp->opaque = opaque;
> + qp->is_srq = is_srq;
>
> rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
> - &scq->backend_cq, &rcq->backend_cq, max_send_wr,
> - max_recv_wr, max_send_sge, max_recv_sge);
> + &scq->backend_cq, &rcq->backend_cq,
> + is_srq ? &srq->backend_srq : NULL,
> + max_send_wr, max_recv_wr, max_send_sge,
> + max_recv_sge);
> +
> if (rc) {
> rc = -EIO;
> goto out_dealloc_qp;
> diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
> index e88ab95e264b..e8639909cd34 100644
> --- a/hw/rdma/rdma_rm.h
> +++ b/hw/rdma/rdma_rm.h
> @@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> uint8_t qp_type, uint32_t max_send_wr,
> uint32_t max_send_sge, uint32_t send_cq_handle,
> uint32_t max_recv_wr, uint32_t max_recv_sge,
> - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
> + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
> + uint8_t is_srq, uint32_t srq_handle);
> RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
> int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index 7bdd9f291f9f..534f2f74d3d1 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -88,6 +88,7 @@ typedef struct RdmaRmQP {
> uint32_t send_cq_handle;
> uint32_t recv_cq_handle;
> enum ibv_qp_state qp_state;
> + uint8_t is_srq;
> } RdmaRmQP;
>
> typedef struct RdmaRmSRQ {
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index 4afcd2037db2..b931bb6dc9d4 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
> PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
> uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
> - uint32_t rpages)
> + uint32_t rpages, uint8_t is_srq)
> {
> uint64_t *dir = NULL, *tbl = NULL;
> PvrdmaRing *sr, *rr;
> @@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
> char ring_name[MAX_RING_NAME_SZ];
> uint32_t wqe_sz;
>
> - if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
> - || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
> - rdma_error_report("Got invalid page count for QP ring: %d, %d", spages,
> + if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) {
> + rdma_error_report("Got invalid send page count for QP ring: %d",
> + spages);
> + return rc;
> + }
> +
> + if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) {
> + rdma_error_report("Got invalid recv page count for QP ring: %d",
> rpages);
> return rc;
> }
> @@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
> goto out;
> }
>
> - sr = g_malloc(2 * sizeof(*rr));
> - rr = &sr[1];
> + if (!is_srq) {
> + sr = g_malloc(2 * sizeof(*rr));
> + rr = &sr[1];
> + } else {
> + sr = g_malloc(sizeof(*sr));
> + }
>
> *rings = sr;
>
> @@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
> goto out_unmap_ring_state;
> }
>
> - /* Create recv ring */
> - rr->ring_state = &sr->ring_state[1];
> - wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> - sizeof(struct pvrdma_sge) * rmax_sge - 1);
> - sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
> - rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
> - rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
> - if (rc) {
> - goto out_free_sr;
> + if (!is_srq) {
> + /* Create recv ring */
> + rr->ring_state = &sr->ring_state[1];
> + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> + sizeof(struct pvrdma_sge) * rmax_sge - 1);
> + sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
> + rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
> + rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages],
> + rpages);
> + if (rc) {
> + goto out_free_sr;
> + }
> }
>
> goto out;
> @@ -436,10 +448,12 @@ out:
> return rc;
> }
>
> -static void destroy_qp_rings(PvrdmaRing *ring)
> +static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq)
> {
> pvrdma_ring_free(&ring[0]);
> - pvrdma_ring_free(&ring[1]);
> + if (!is_srq) {
> + pvrdma_ring_free(&ring[1]);
> + }
>
> rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
> g_free(ring);
> @@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
> rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
> cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
> cmd->max_recv_wr, cmd->max_recv_sge,
> - cmd->total_chunks - cmd->send_chunks - 1);
> + cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq);
> if (rc) {
> return rc;
> }
> @@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
> cmd->max_send_wr, cmd->max_send_sge,
> cmd->send_cq_handle, cmd->max_recv_wr,
> cmd->max_recv_sge, cmd->recv_cq_handle, rings,
> - &resp->qpn);
> + &resp->qpn, cmd->is_srq, cmd->srq_handle);
> if (rc) {
> - destroy_qp_rings(rings);
> + destroy_qp_rings(rings, cmd->is_srq);
> return rc;
> }
>
> @@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
> return -EINVAL;
> }
>
> - rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
> -
> ring = (PvrdmaRing *)qp->opaque;
> - destroy_qp_rings(ring);
> + destroy_qp_rings(ring, qp->is_srq);
> + rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
>
> return 0;
> }
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
` (2 preceding siblings ...)
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
2019-04-03 18:16 ` Yuval Shaia
2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib
Implement the pvrdma device commands for supporting SRQ
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++
hw/rdma/vmw/pvrdma_main.c | 16 ++++
hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++-
hw/rdma/vmw/pvrdma_qp_ops.h | 1 +
4 files changed, 209 insertions(+), 1 deletion(-)
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index b931bb6dc9d4..8d70c0d23de4 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -609,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
return 0;
}
+static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
+ uint64_t pdir_dma, uint32_t max_wr,
+ uint32_t max_sge, uint32_t nchunks)
+{
+ uint64_t *dir = NULL, *tbl = NULL;
+ PvrdmaRing *r;
+ int rc = -EINVAL;
+ char ring_name[MAX_RING_NAME_SZ];
+ uint32_t wqe_sz;
+
+ if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
+ rdma_error_report("Got invalid page count for SRQ ring: %d",
+ nchunks);
+ return rc;
+ }
+
+ dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
+ if (!dir) {
+ rdma_error_report("Failed to map to SRQ page directory");
+ goto out;
+ }
+
+ tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
+ if (!tbl) {
+ rdma_error_report("Failed to map to SRQ page table");
+ goto out;
+ }
+
+ r = g_malloc(sizeof(*r));
+ *ring = r;
+
+ r->ring_state = (struct pvrdma_ring *)
+ rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
+ if (!r->ring_state) {
+ rdma_error_report("Failed to map tp SRQ ring state");
+ goto out_free_ring_mem;
+ }
+
+ wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) * max_sge - 1);
+ sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
+ rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
+ wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
+ if (rc) {
+ goto out_unmap_ring_state;
+ }
+
+ goto out;
+
+out_unmap_ring_state:
+ rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
+
+out_free_ring_mem:
+ g_free(r);
+
+out:
+ rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
+ rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
+
+ return rc;
+}
+
+static void destroy_srq_ring(PvrdmaRing *ring)
+{
+ pvrdma_ring_free(ring);
+ rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
+ g_free(ring);
+}
+
+static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp)
+{
+ struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
+ struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
+ PvrdmaRing *ring = NULL;
+ int rc;
+
+ memset(resp, 0, sizeof(*resp));
+
+ rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
+ cmd->attrs.max_wr, cmd->attrs.max_sge,
+ cmd->nchunks);
+ if (rc) {
+ return rc;
+ }
+
+ rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
+ cmd->attrs.max_wr, cmd->attrs.max_sge,
+ cmd->attrs.srq_limit, &resp->srqn, ring);
+ if (rc) {
+ destroy_srq_ring(ring);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp)
+{
+ struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
+ struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
+
+ memset(resp, 0, sizeof(*resp));
+
+ return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
+ (struct ibv_srq_attr *)&resp->attrs);
+}
+
+static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp)
+{
+ struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
+
+ /* Only support SRQ limit */
+ if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
+ (cmd->attr_mask & IBV_SRQ_MAX_WR))
+ return -EINVAL;
+
+ return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
+ (struct ibv_srq_attr *)&cmd->attrs,
+ cmd->attr_mask);
+}
+
+static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp)
+{
+ struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
+ RdmaRmSRQ *srq;
+ PvrdmaRing *ring;
+
+ srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
+ if (!srq) {
+ return -EINVAL;
+ }
+
+ ring = (PvrdmaRing *)srq->opaque;
+ destroy_srq_ring(ring);
+ rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
+
+ return 0;
+}
+
struct cmd_handler {
uint32_t cmd;
uint32_t ack;
@@ -634,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = {
{PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc},
{PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind},
{PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
+ {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq},
+ {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq},
+ {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq},
+ {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq},
};
int pvrdma_exec_cmd(PVRDMADev *dev)
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 0b46561bad11..769f7990f864 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
+ DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
DEFINE_PROP_END_OF_LIST(),
};
@@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
dsr->caps.max_mr = dev->dev_attr.max_mr;
dsr->caps.max_pd = dev->dev_attr.max_pd;
dsr->caps.max_ah = dev->dev_attr.max_ah;
+ dsr->caps.max_srq = dev->dev_attr.max_srq;
+ dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
+ dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
dsr->caps.gid_tbl_len = MAX_GIDS;
dsr->caps.sys_image_guid = 0;
dsr->caps.node_guid = dev->node_guid;
@@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
}
break;
+ case PVRDMA_UAR_SRQ_OFFSET:
+ if (val & PVRDMA_UAR_SRQ_RECV) {
+ trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
+ val & PVRDMA_UAR_HANDLE_MASK, 0);
+ pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
+ }
+ break;
default:
rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
addr, val);
@@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
TARGET_PAGE_SIZE; /* First page is ring state */
+
+ dev->dev_attr.max_srq_wr = pg_tbl_bytes /
+ ((sizeof(struct pvrdma_rq_wqe_hdr) +
+ sizeof(struct pvrdma_sge)) *
+ dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
}
static int pvrdma_check_ram_shared(Object *obj, void *opaque)
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 5b9786efbe4b..bd6db858de32 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
memset(cqe1, 0, sizeof(*cqe1));
cqe1->wr_id = cqe->wr_id;
- cqe1->qp = cqe->qp;
+ cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
cqe1->opcode = cqe->opcode;
cqe1->status = wc->status;
cqe1->byte_len = wc->byte_len;
@@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
}
}
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
+{
+ RdmaRmSRQ *srq;
+ PvrdmaRqWqe *wqe;
+ PvrdmaRing *ring;
+
+ srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
+ if (unlikely(!srq)) {
+ return;
+ }
+
+ ring = (PvrdmaRing *)srq->opaque;
+
+ wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
+ while (wqe) {
+ CompHandlerCtx *comp_ctx;
+
+ /* Prepare CQE */
+ comp_ctx = g_malloc(sizeof(CompHandlerCtx));
+ comp_ctx->dev = dev;
+ comp_ctx->cq_handle = srq->recv_cq_handle;
+ comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
+ comp_ctx->cqe.qp = 0;
+ comp_ctx->cqe.opcode = IBV_WC_RECV;
+
+ if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
+ rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
+ dev->dev_attr.max_sge);
+ complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
+ continue;
+ }
+
+ rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
+ (struct ibv_sge *)&wqe->sge[0],
+ wqe->hdr.num_sge,
+ comp_ctx);
+
+ pvrdma_ring_read_inc(ring);
+
+ wqe = pvrdma_ring_next_elem_read(ring);
+ }
+
+}
+
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
{
RdmaRmCQ *cq;
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
index 31cb48ba29f1..82e720a76fb4 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.h
+++ b/hw/rdma/vmw/pvrdma_qp_ops.h
@@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
void pvrdma_qp_ops_fini(void);
void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
#endif
--
2.20.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 18:16 ` Yuval Shaia
0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:16 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 02:33:43PM +0300, Kamal Heib wrote:
> Implement the pvrdma device commands for supporting SRQ
>
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
> hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++
> hw/rdma/vmw/pvrdma_main.c | 16 ++++
> hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++-
> hw/rdma/vmw/pvrdma_qp_ops.h | 1 +
> 4 files changed, 209 insertions(+), 1 deletion(-)
>
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index b931bb6dc9d4..8d70c0d23de4 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -609,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
> return 0;
> }
>
> +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
> + uint64_t pdir_dma, uint32_t max_wr,
> + uint32_t max_sge, uint32_t nchunks)
> +{
> + uint64_t *dir = NULL, *tbl = NULL;
> + PvrdmaRing *r;
> + int rc = -EINVAL;
> + char ring_name[MAX_RING_NAME_SZ];
> + uint32_t wqe_sz;
> +
> + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
> + rdma_error_report("Got invalid page count for SRQ ring: %d",
> + nchunks);
> + return rc;
> + }
> +
> + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
> + if (!dir) {
> + rdma_error_report("Failed to map to SRQ page directory");
> + goto out;
> + }
> +
> + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
> + if (!tbl) {
> + rdma_error_report("Failed to map to SRQ page table");
> + goto out;
> + }
> +
> + r = g_malloc(sizeof(*r));
> + *ring = r;
> +
> + r->ring_state = (struct pvrdma_ring *)
> + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
> + if (!r->ring_state) {
> + rdma_error_report("Failed to map tp SRQ ring state");
> + goto out_free_ring_mem;
> + }
> +
> + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> + sizeof(struct pvrdma_sge) * max_sge - 1);
> + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
> + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
> + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
> + if (rc) {
> + goto out_unmap_ring_state;
> + }
> +
> + goto out;
> +
> +out_unmap_ring_state:
> + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
> +
> +out_free_ring_mem:
> + g_free(r);
> +
> +out:
> + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
> + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
> +
> + return rc;
> +}
> +
> +static void destroy_srq_ring(PvrdmaRing *ring)
> +{
> + pvrdma_ring_free(ring);
> + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
> + g_free(ring);
> +}
> +
> +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> + union pvrdma_cmd_resp *rsp)
> +{
> + struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
> + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
> + PvrdmaRing *ring = NULL;
> + int rc;
> +
> + memset(resp, 0, sizeof(*resp));
> +
> + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
> + cmd->attrs.max_wr, cmd->attrs.max_sge,
> + cmd->nchunks);
> + if (rc) {
> + return rc;
> + }
> +
> + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
> + cmd->attrs.max_wr, cmd->attrs.max_sge,
> + cmd->attrs.srq_limit, &resp->srqn, ring);
> + if (rc) {
> + destroy_srq_ring(ring);
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> + union pvrdma_cmd_resp *rsp)
> +{
> + struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
> + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
> +
> + memset(resp, 0, sizeof(*resp));
> +
> + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
> + (struct ibv_srq_attr *)&resp->attrs);
> +}
> +
> +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> + union pvrdma_cmd_resp *rsp)
> +{
> + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
> +
> + /* Only support SRQ limit */
> + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
> + (cmd->attr_mask & IBV_SRQ_MAX_WR))
> + return -EINVAL;
> +
> + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
> + (struct ibv_srq_attr *)&cmd->attrs,
> + cmd->attr_mask);
> +}
> +
> +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> + union pvrdma_cmd_resp *rsp)
> +{
> + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
> + RdmaRmSRQ *srq;
> + PvrdmaRing *ring;
> +
> + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
> + if (!srq) {
> + return -EINVAL;
> + }
> +
> + ring = (PvrdmaRing *)srq->opaque;
> + destroy_srq_ring(ring);
> + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
> +
> + return 0;
> +}
> +
> struct cmd_handler {
> uint32_t cmd;
> uint32_t ack;
> @@ -634,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = {
> {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc},
> {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind},
> {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
> + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq},
> + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq},
> + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq},
> + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq},
> };
>
> int pvrdma_exec_cmd(PVRDMADev *dev)
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 0b46561bad11..769f7990f864 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
> DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
> dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
> DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
> + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
> DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
> DEFINE_PROP_END_OF_LIST(),
> };
> @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
> dsr->caps.max_mr = dev->dev_attr.max_mr;
> dsr->caps.max_pd = dev->dev_attr.max_pd;
> dsr->caps.max_ah = dev->dev_attr.max_ah;
> + dsr->caps.max_srq = dev->dev_attr.max_srq;
> + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
> + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
> dsr->caps.gid_tbl_len = MAX_GIDS;
> dsr->caps.sys_image_guid = 0;
> dsr->caps.node_guid = dev->node_guid;
> @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
> pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
> }
> break;
> + case PVRDMA_UAR_SRQ_OFFSET:
> + if (val & PVRDMA_UAR_SRQ_RECV) {
> + trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
> + val & PVRDMA_UAR_HANDLE_MASK, 0);
> + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
> + }
> + break;
> default:
> rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
> addr, val);
> @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
>
> dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
> TARGET_PAGE_SIZE; /* First page is ring state */
> +
> + dev->dev_attr.max_srq_wr = pg_tbl_bytes /
> + ((sizeof(struct pvrdma_rq_wqe_hdr) +
> + sizeof(struct pvrdma_sge)) *
> + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
> }
>
> static int pvrdma_check_ram_shared(Object *obj, void *opaque)
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
> index 5b9786efbe4b..bd6db858de32 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
> @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
>
> memset(cqe1, 0, sizeof(*cqe1));
> cqe1->wr_id = cqe->wr_id;
> - cqe1->qp = cqe->qp;
> + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
> cqe1->opcode = cqe->opcode;
> cqe1->status = wc->status;
> cqe1->byte_len = wc->byte_len;
> @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
> }
> }
>
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
> +{
> + RdmaRmSRQ *srq;
> + PvrdmaRqWqe *wqe;
> + PvrdmaRing *ring;
> +
> + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
> + if (unlikely(!srq)) {
> + return;
> + }
> +
> + ring = (PvrdmaRing *)srq->opaque;
> +
> + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
> + while (wqe) {
> + CompHandlerCtx *comp_ctx;
> +
> + /* Prepare CQE */
> + comp_ctx = g_malloc(sizeof(CompHandlerCtx));
> + comp_ctx->dev = dev;
> + comp_ctx->cq_handle = srq->recv_cq_handle;
> + comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
> + comp_ctx->cqe.qp = 0;
> + comp_ctx->cqe.opcode = IBV_WC_RECV;
> +
> + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
> + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
> + dev->dev_attr.max_sge);
> + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
> + continue;
> + }
> +
> + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
> + (struct ibv_sge *)&wqe->sge[0],
> + wqe->hdr.num_sge,
> + comp_ctx);
> +
> + pvrdma_ring_read_inc(ring);
> +
> + wqe = pvrdma_ring_next_elem_read(ring);
> + }
> +
> +}
> +
> void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
> {
> RdmaRmCQ *cq;
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
> index 31cb48ba29f1..82e720a76fb4 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.h
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.h
> @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
> void pvrdma_qp_ops_fini(void);
> void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
> void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
> void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
>
> #endif
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
` (3 preceding siblings ...)
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 18:19 ` Yuval Shaia
2019-04-07 9:15 ` Yuval Shaia
4 siblings, 1 reply; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:19 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 02:33:39PM +0300, Kamal Heib wrote:
> This series implements the SRQ (Shared Receive Queue) for the pvrdma
> device, It also includes all the needed functions and definitions for
> support SRQ in the backend and resource management layers.
>
> Changes from v2->3:
> - Patch #1:
> -- Fix commit message.
> -- Remove initialization of backend_qp from rdma_backend_post_srq_recv().
> -- Add rx_srq counter.
> - Patch #2:
> -- Add checks for srq attrs.
> - Patch #3:
> -- Move initialization of recv_cq_handle to be under is_srq.
> -- Rearrange destroy_qp() to avoid use after free.
> - Patch #4:
> -- Avoid use after free.
> -- Fix indentation.
>
> Changes from v1->v2:
> - Handle checkpatch.pl warnings.
Appreciate if you can post v4 with a new patch to display the new counter
(function rdma_dump_device_counters).
>
> Kamal Heib (4):
> hw/rdma: Add SRQ support to backend layer
> hw/rdma: Add support for managing SRQ resource
> hw/rdma: Modify create/destroy QP to support SRQ
> hw/pvrdma: Add support for SRQ
>
> hw/rdma/rdma_backend.c | 125 +++++++++++++++++++++-
> hw/rdma/rdma_backend.h | 18 +++-
> hw/rdma/rdma_backend_defs.h | 5 +
> hw/rdma/rdma_rm.c | 117 +++++++++++++++++++-
> hw/rdma/rdma_rm.h | 13 ++-
> hw/rdma/rdma_rm_defs.h | 10 ++
> hw/rdma/vmw/pvrdma_cmd.c | 206 ++++++++++++++++++++++++++++++++----
> hw/rdma/vmw/pvrdma_main.c | 16 +++
> hw/rdma/vmw/pvrdma_qp_ops.c | 46 +++++++-
> hw/rdma/vmw/pvrdma_qp_ops.h | 1 +
> 10 files changed, 521 insertions(+), 36 deletions(-)
>
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ
2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
@ 2019-04-07 9:15 ` Yuval Shaia
0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-07 9:15 UTC (permalink / raw)
To: Kamal Heib; +Cc: qemu-devel
On Wed, Apr 03, 2019 at 09:19:38PM +0300, Yuval Shaia wrote:
> On Wed, Apr 03, 2019 at 02:33:39PM +0300, Kamal Heib wrote:
> > This series implements the SRQ (Shared Receive Queue) for the pvrdma
> > device, It also includes all the needed functions and definitions for
> > support SRQ in the backend and resource management layers.
> >
> > Changes from v2->3:
> > - Patch #1:
> > -- Fix commit message.
> > -- Remove initialization of backend_qp from rdma_backend_post_srq_recv().
> > -- Add rx_srq counter.
> > - Patch #2:
> > -- Add checks for srq attrs.
> > - Patch #3:
> > -- Move initialization of recv_cq_handle to be under is_srq.
> > -- Rearrange destroy_qp() to avoid use after free.
> > - Patch #4:
> > -- Avoid use after free.
> > -- Fix indentation.
> >
> > Changes from v1->v2:
> > - Handle checkpatch.pl warnings.
>
> Appreciate if you can post v4 with a new patch to display the new counter
> (function rdma_dump_device_counters).
Please ignore this comment, counter is already added - i missed that.
With that - patchset lgtm, from my perspective ready to merge.
>
> >
> > Kamal Heib (4):
> > hw/rdma: Add SRQ support to backend layer
> > hw/rdma: Add support for managing SRQ resource
> > hw/rdma: Modify create/destroy QP to support SRQ
> > hw/pvrdma: Add support for SRQ
> >
> > hw/rdma/rdma_backend.c | 125 +++++++++++++++++++++-
> > hw/rdma/rdma_backend.h | 18 +++-
> > hw/rdma/rdma_backend_defs.h | 5 +
> > hw/rdma/rdma_rm.c | 117 +++++++++++++++++++-
> > hw/rdma/rdma_rm.h | 13 ++-
> > hw/rdma/rdma_rm_defs.h | 10 ++
> > hw/rdma/vmw/pvrdma_cmd.c | 206 ++++++++++++++++++++++++++++++++----
> > hw/rdma/vmw/pvrdma_main.c | 16 +++
> > hw/rdma/vmw/pvrdma_qp_ops.c | 46 +++++++-
> > hw/rdma/vmw/pvrdma_qp_ops.h | 1 +
> > 10 files changed, 521 insertions(+), 36 deletions(-)
> >
> > --
> > 2.20.1
> >
> >
>
^ permalink raw reply [flat|nested] 13+ messages in thread