All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ
@ 2019-04-03 11:33 Kamal Heib
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib

This series implements the SRQ (Shared Receive Queue) for the pvrdma
device, It also includes all the needed functions and definitions for
support SRQ in the backend and resource management layers.

Changes from v2->3:
- Patch #1:
-- Fix commit message.
-- Remove initialization of backend_qp from rdma_backend_post_srq_recv().
-- Add rx_srq counter.
- Patch #2:
-- Add checks for srq attrs.
- Patch #3:
-- Move initialization of recv_cq_handle to be under is_srq.
-- Rearrange destroy_qp() to avoid use after free.
- Patch #4:
-- Avoid use after free.
-- Fix indentation.

Changes from v1->v2:
- Handle checkpatch.pl warnings. 

Kamal Heib (4):
  hw/rdma: Add SRQ support to backend layer
  hw/rdma: Add support for managing SRQ resource
  hw/rdma: Modify create/destroy QP to support SRQ
  hw/pvrdma: Add support for SRQ

 hw/rdma/rdma_backend.c      | 125 +++++++++++++++++++++-
 hw/rdma/rdma_backend.h      |  18 +++-
 hw/rdma/rdma_backend_defs.h |   5 +
 hw/rdma/rdma_rm.c           | 117 +++++++++++++++++++-
 hw/rdma/rdma_rm.h           |  13 ++-
 hw/rdma/rdma_rm_defs.h      |  10 ++
 hw/rdma/vmw/pvrdma_cmd.c    | 206 ++++++++++++++++++++++++++++++++----
 hw/rdma/vmw/pvrdma_main.c   |  16 +++
 hw/rdma/vmw/pvrdma_qp_ops.c |  46 +++++++-
 hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
 10 files changed, 521 insertions(+), 36 deletions(-)

-- 
2.20.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
  2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
  2019-04-03 18:05   ` Yuval Shaia
  2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib

Add the required functions and definitions to support shared receive
queues (SRQs) in the backend layer.

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 hw/rdma/rdma_backend.c      | 116 +++++++++++++++++++++++++++++++++++-
 hw/rdma/rdma_backend.h      |  12 ++++
 hw/rdma/rdma_backend_defs.h |   5 ++
 hw/rdma/rdma_rm.c           |   2 +
 hw/rdma/rdma_rm_defs.h      |   1 +
 5 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d1660b6474fa..04dfd63a573b 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -40,6 +40,7 @@ typedef struct BackendCtx {
     void *up_ctx;
     struct ibv_sge sge; /* Used to save MAD recv buffer */
     RdmaBackendQP *backend_qp; /* To maintain recv buffers */
+    RdmaBackendSRQ *backend_srq;
 } BackendCtx;
 
 struct backend_umad {
@@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
     int i, ne, total_ne = 0;
     BackendCtx *bctx;
     struct ibv_wc wc[2];
+    RdmaProtectedGSList *cqe_ctx_list;
 
     qemu_mutex_lock(&rdma_dev_res->lock);
     do {
@@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
 
             comp_handler(bctx->up_ctx, &wc[i]);
 
-            rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
-                                               wc[i].wr_id);
+            if (bctx->backend_qp) {
+                cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
+            } else {
+                cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
+            }
+
+            rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
             rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
             g_free(bctx);
         }
@@ -662,6 +669,60 @@ err_free_bctx:
     g_free(bctx);
 }
 
+void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
+                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
+                                uint32_t num_sge, void *ctx)
+{
+    BackendCtx *bctx;
+    struct ibv_sge new_sge[MAX_SGE];
+    uint32_t bctx_id;
+    int rc;
+    struct ibv_recv_wr wr = {}, *bad_wr;
+
+    bctx = g_malloc0(sizeof(*bctx));
+    bctx->up_ctx = ctx;
+    bctx->backend_srq = srq;
+
+    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
+    if (unlikely(rc)) {
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+        goto err_free_bctx;
+    }
+
+    rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
+
+    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
+                              &backend_dev->rdma_dev_res->stats.rx_bufs_len);
+    if (rc) {
+        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
+        goto err_dealloc_cqe_ctx;
+    }
+
+    wr.num_sge = num_sge;
+    wr.sg_list = new_sge;
+    wr.wr_id = bctx_id;
+    rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
+    if (rc) {
+        rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
+                          srq->ibsrq->handle, rc, errno);
+        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+        goto err_dealloc_cqe_ctx;
+    }
+
+    atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
+    backend_dev->rdma_dev_res->stats.rx_bufs++;
+    backend_dev->rdma_dev_res->stats.rx_srq++;
+
+    return;
+
+err_dealloc_cqe_ctx:
+    backend_dev->rdma_dev_res->stats.rx_bufs_err++;
+    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
+
+err_free_bctx:
+    g_free(bctx);
+}
+
 int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
 {
     pd->ibpd = ibv_alloc_pd(backend_dev->context);
@@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
     rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
 }
 
+int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
+                            uint32_t max_wr, uint32_t max_sge,
+                            uint32_t srq_limit)
+{
+    struct ibv_srq_init_attr srq_init_attr = {};
+
+    srq_init_attr.attr.max_wr = max_wr;
+    srq_init_attr.attr.max_sge = max_sge;
+    srq_init_attr.attr.srq_limit = srq_limit;
+
+    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
+    if (!srq->ibsrq) {
+        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
+        return -EIO;
+    }
+
+    rdma_protected_gslist_init(&srq->cqe_ctx_list);
+
+    return 0;
+}
+
+int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
+{
+    if (!srq->ibsrq) {
+        return -EINVAL;
+    }
+
+    return ibv_query_srq(srq->ibsrq, srq_attr);
+}
+
+int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
+                int srq_attr_mask)
+{
+    if (!srq->ibsrq) {
+        return -EINVAL;
+    }
+
+    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
+}
+
+void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
+{
+    if (srq->ibsrq) {
+        ibv_destroy_srq(srq->ibsrq);
+    }
+    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
+    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
+}
+
 #define CHK_ATTR(req, dev, member, fmt) ({ \
     trace_rdma_check_dev_attr(#member, dev.member, req->member); \
     if (req->member > dev.member) { \
@@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
     }
 
     dev_attr->max_sge = MAX_SGE;
+    dev_attr->max_srq_sge = MAX_SGE;
 
     CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
     CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
@@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
     CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
     CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
     CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
+    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
 
     return 0;
 }
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 38056d97c7fc..cad7956d98e8 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
                             RdmaBackendQP *qp, uint8_t qp_type,
                             struct ibv_sge *sge, uint32_t num_sge, void *ctx);
 
+int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
+                            uint32_t max_wr, uint32_t max_sge,
+                            uint32_t srq_limit);
+int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
+int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
+                            int srq_attr_mask);
+void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
+                              RdmaDeviceResources *dev_res);
+void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
+                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
+                                uint32_t num_sge, void *ctx);
+
 #endif
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
index 817153dc8cf4..0b55be35038d 100644
--- a/hw/rdma/rdma_backend_defs.h
+++ b/hw/rdma/rdma_backend_defs.h
@@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
     RdmaProtectedGSList cqe_ctx_list;
 } RdmaBackendQP;
 
+typedef struct RdmaBackendSRQ {
+    struct ibv_srq *ibsrq;
+    RdmaProtectedGSList cqe_ctx_list;
+} RdmaBackendSRQ;
+
 #endif
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index bac3b2f4a6c3..b683506b8616 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
                    dev_res->stats.tx_err);
     monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
                    dev_res->stats.rx_bufs);
+    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
+                   dev_res->stats.rx_srq);
     monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
                    dev_res->stats.rx_bufs_len);
     monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index c200d311de37..e774af528022 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
     uint64_t rx_bufs;
     uint64_t rx_bufs_len;
     uint64_t rx_bufs_err;
+    uint64_t rx_srq;
     uint64_t completions;
     uint64_t mad_tx;
     uint64_t mad_tx_err;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource
  2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
  2019-04-03 18:10   ` Yuval Shaia
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib

Adding the required functions and definitions for support managing the
shared receive queues (SRQs).

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 hw/rdma/rdma_rm.c      | 93 ++++++++++++++++++++++++++++++++++++++++++
 hw/rdma/rdma_rm.h      | 10 +++++
 hw/rdma/rdma_rm_defs.h |  8 ++++
 3 files changed, 111 insertions(+)

diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index b683506b8616..c4fb140dcd96 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -544,6 +544,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
     rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
 }
 
+RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
+{
+    return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
+}
+
+int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
+                      uint32_t *srq_handle, void *opaque)
+{
+    RdmaRmSRQ *srq;
+    RdmaRmPD *pd;
+    int rc;
+
+    pd = rdma_rm_get_pd(dev_res, pd_handle);
+    if (!pd) {
+        return -EINVAL;
+    }
+
+    srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
+    if (!srq) {
+        return -ENOMEM;
+    }
+
+    rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
+                                 max_wr, max_sge, srq_limit);
+    if (rc) {
+        rc = -EIO;
+        goto out_dealloc_srq;
+    }
+
+    srq->opaque = opaque;
+
+    return 0;
+
+out_dealloc_srq:
+    rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
+
+    return rc;
+}
+
+int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+                      struct ibv_srq_attr *srq_attr)
+{
+    RdmaRmSRQ *srq;
+
+    srq = rdma_rm_get_srq(dev_res, srq_handle);
+    if (!srq) {
+        return -EINVAL;
+    }
+
+    return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
+}
+
+int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+                       struct ibv_srq_attr *srq_attr, int srq_attr_mask)
+{
+    RdmaRmSRQ *srq;
+
+    srq = rdma_rm_get_srq(dev_res, srq_handle);
+    if (!srq) {
+        return -EINVAL;
+    }
+
+    if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
+        (srq_attr->srq_limit == 0)) {
+        return -EINVAL;
+    }
+
+    if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
+        (srq_attr->max_wr == 0)) {
+        return -EINVAL;
+    }
+
+    return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
+                                   srq_attr_mask);
+}
+
+void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
+{
+    RdmaRmSRQ *srq;
+
+    srq = rdma_rm_get_srq(dev_res, srq_handle);
+    if (!srq) {
+        return;
+    }
+
+    rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
+    rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
+}
+
 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
 {
     void **cqe_ctx;
@@ -673,6 +763,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
     res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
                        dev_attr->max_qp_wr, sizeof(void *));
     res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
+    res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
+                 sizeof(RdmaRmSRQ));
 
     init_ports(dev_res);
 
@@ -691,6 +783,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 
     fini_ports(dev_res, backend_dev, ifname);
 
+    res_tbl_free(&dev_res->srq_tbl);
     res_tbl_free(&dev_res->uc_tbl);
     res_tbl_free(&dev_res->cqe_ctx_tbl);
     res_tbl_free(&dev_res->qp_tbl);
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index 4f03f9b8c5f1..e88ab95e264b 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -65,6 +65,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                      int attr_mask, struct ibv_qp_init_attr *init_attr);
 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
 
+RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
+int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
+                      uint32_t *srq_handle, void *opaque);
+int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+                      struct ibv_srq_attr *srq_attr);
+int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
+                       struct ibv_srq_attr *srq_attr, int srq_attr_mask);
+void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
+
 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
                           void *ctx);
 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index e774af528022..7bdd9f291f9f 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -33,6 +33,7 @@
 #define MAX_QP_RD_ATOM        16
 #define MAX_QP_INIT_RD_ATOM   16
 #define MAX_AH                64
+#define MAX_SRQ               512
 
 #define MAX_RM_TBL_NAME             16
 #define MAX_CONSEQ_EMPTY_POLL_CQ    4096 /* considered as error above this */
@@ -89,6 +90,12 @@ typedef struct RdmaRmQP {
     enum ibv_qp_state qp_state;
 } RdmaRmQP;
 
+typedef struct RdmaRmSRQ {
+    RdmaBackendSRQ backend_srq;
+    uint32_t recv_cq_handle;
+    void *opaque;
+} RdmaRmSRQ;
+
 typedef struct RdmaRmGid {
     union ibv_gid gid;
     int backend_gid_index;
@@ -129,6 +136,7 @@ struct RdmaDeviceResources {
     RdmaRmResTbl qp_tbl;
     RdmaRmResTbl cq_tbl;
     RdmaRmResTbl cqe_ctx_tbl;
+    RdmaRmResTbl srq_tbl;
     GHashTable *qp_hash; /* Keeps mapping between real and emulated */
     QemuMutex lock;
     RdmaRmStats stats;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ
  2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
  2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
  2019-04-03 18:15   ` Yuval Shaia
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
  2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
  4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib

Modify create/destroy QP to support shared receive queue and rearrange
the destroy_qp() code to avoid touching the QP after calling
rdma_rm_dealloc_qp().

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 hw/rdma/rdma_backend.c   |  9 ++++--
 hw/rdma/rdma_backend.h   |  6 ++--
 hw/rdma/rdma_rm.c        | 22 +++++++++++++--
 hw/rdma/rdma_rm.h        |  3 +-
 hw/rdma/rdma_rm_defs.h   |  1 +
 hw/rdma/vmw/pvrdma_cmd.c | 59 ++++++++++++++++++++++++----------------
 6 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 04dfd63a573b..cf34874e9d2f 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -794,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
 
 int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
                            RdmaBackendPD *pd, RdmaBackendCQ *scq,
-                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
-                           uint32_t max_recv_wr, uint32_t max_send_sge,
-                           uint32_t max_recv_sge)
+                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
+                           uint32_t max_send_wr, uint32_t max_recv_wr,
+                           uint32_t max_send_sge, uint32_t max_recv_sge)
 {
     struct ibv_qp_init_attr attr = {};
 
@@ -824,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
     attr.cap.max_recv_wr = max_recv_wr;
     attr.cap.max_send_sge = max_send_sge;
     attr.cap.max_recv_sge = max_recv_sge;
+    if (srq) {
+        attr.srq = srq->ibsrq;
+    }
 
     qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
     if (!qp->ibqp) {
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index cad7956d98e8..7c1a19a2b5ff 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
 
 int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
                            RdmaBackendPD *pd, RdmaBackendCQ *scq,
-                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
-                           uint32_t max_recv_wr, uint32_t max_send_sge,
-                           uint32_t max_recv_sge);
+                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
+                           uint32_t max_send_wr, uint32_t max_recv_wr,
+                           uint32_t max_send_sge, uint32_t max_recv_sge);
 int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
                                uint8_t qp_type, uint32_t qkey);
 int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index c0bb27cb6b90..96279e8d6561 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -386,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                      uint8_t qp_type, uint32_t max_send_wr,
                      uint32_t max_send_sge, uint32_t send_cq_handle,
                      uint32_t max_recv_wr, uint32_t max_recv_sge,
-                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
+                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
+                     uint8_t is_srq, uint32_t srq_handle)
 {
     int rc;
     RdmaRmQP *qp;
     RdmaRmCQ *scq, *rcq;
     RdmaRmPD *pd;
+    RdmaRmSRQ *srq = NULL;
     uint32_t rm_qpn;
 
     pd = rdma_rm_get_pd(dev_res, pd_handle);
@@ -408,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
         return -EINVAL;
     }
 
+    if (is_srq) {
+        srq = rdma_rm_get_srq(dev_res, srq_handle);
+        if (!srq) {
+            rdma_error_report("Invalid srqn %d", srq_handle);
+            return -EINVAL;
+        }
+
+        srq->recv_cq_handle = recv_cq_handle;
+    }
+
     if (qp_type == IBV_QPT_GSI) {
         scq->notify = CNT_SET;
         rcq->notify = CNT_SET;
@@ -424,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
     qp->send_cq_handle = send_cq_handle;
     qp->recv_cq_handle = recv_cq_handle;
     qp->opaque = opaque;
+    qp->is_srq = is_srq;
 
     rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
-                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
-                                max_recv_wr, max_send_sge, max_recv_sge);
+                                &scq->backend_cq, &rcq->backend_cq,
+                                is_srq ? &srq->backend_srq : NULL,
+                                max_send_wr, max_recv_wr, max_send_sge,
+                                max_recv_sge);
+
     if (rc) {
         rc = -EIO;
         goto out_dealloc_qp;
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index e88ab95e264b..e8639909cd34 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                      uint8_t qp_type, uint32_t max_send_wr,
                      uint32_t max_send_sge, uint32_t send_cq_handle,
                      uint32_t max_recv_wr, uint32_t max_recv_sge,
-                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
+                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
+                     uint8_t is_srq, uint32_t srq_handle);
 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                       uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 7bdd9f291f9f..534f2f74d3d1 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -88,6 +88,7 @@ typedef struct RdmaRmQP {
     uint32_t send_cq_handle;
     uint32_t recv_cq_handle;
     enum ibv_qp_state qp_state;
+    uint8_t is_srq;
 } RdmaRmQP;
 
 typedef struct RdmaRmSRQ {
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 4afcd2037db2..b931bb6dc9d4 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
 static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
                            PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
                            uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
-                           uint32_t rpages)
+                           uint32_t rpages, uint8_t is_srq)
 {
     uint64_t *dir = NULL, *tbl = NULL;
     PvrdmaRing *sr, *rr;
@@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
     char ring_name[MAX_RING_NAME_SZ];
     uint32_t wqe_sz;
 
-    if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
-        || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
-        rdma_error_report("Got invalid page count for QP ring: %d, %d", spages,
+    if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) {
+        rdma_error_report("Got invalid send page count for QP ring: %d",
+                          spages);
+        return rc;
+    }
+
+    if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) {
+        rdma_error_report("Got invalid recv page count for QP ring: %d",
                           rpages);
         return rc;
     }
@@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
         goto out;
     }
 
-    sr = g_malloc(2 * sizeof(*rr));
-    rr = &sr[1];
+    if (!is_srq) {
+        sr = g_malloc(2 * sizeof(*rr));
+        rr = &sr[1];
+    } else {
+        sr = g_malloc(sizeof(*sr));
+    }
 
     *rings = sr;
 
@@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
         goto out_unmap_ring_state;
     }
 
-    /* Create recv ring */
-    rr->ring_state = &sr->ring_state[1];
-    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
-                      sizeof(struct pvrdma_sge) * rmax_sge - 1);
-    sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
-    rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
-                          rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
-    if (rc) {
-        goto out_free_sr;
+    if (!is_srq) {
+        /* Create recv ring */
+        rr->ring_state = &sr->ring_state[1];
+        wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
+                          sizeof(struct pvrdma_sge) * rmax_sge - 1);
+        sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
+        rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
+                              rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages],
+                              rpages);
+        if (rc) {
+            goto out_free_sr;
+        }
     }
 
     goto out;
@@ -436,10 +448,12 @@ out:
     return rc;
 }
 
-static void destroy_qp_rings(PvrdmaRing *ring)
+static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq)
 {
     pvrdma_ring_free(&ring[0]);
-    pvrdma_ring_free(&ring[1]);
+    if (!is_srq) {
+        pvrdma_ring_free(&ring[1]);
+    }
 
     rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
     g_free(ring);
@@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
     rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
                          cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
                          cmd->max_recv_wr, cmd->max_recv_sge,
-                         cmd->total_chunks - cmd->send_chunks - 1);
+                         cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq);
     if (rc) {
         return rc;
     }
@@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
                           cmd->max_send_wr, cmd->max_send_sge,
                           cmd->send_cq_handle, cmd->max_recv_wr,
                           cmd->max_recv_sge, cmd->recv_cq_handle, rings,
-                          &resp->qpn);
+                          &resp->qpn, cmd->is_srq, cmd->srq_handle);
     if (rc) {
-        destroy_qp_rings(rings);
+        destroy_qp_rings(rings, cmd->is_srq);
         return rc;
     }
 
@@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
         return -EINVAL;
     }
 
-    rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
-
     ring = (PvrdmaRing *)qp->opaque;
-    destroy_qp_rings(ring);
+    destroy_qp_rings(ring, qp->is_srq);
+    rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
 
     return 0;
 }
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ
  2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
                   ` (2 preceding siblings ...)
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
@ 2019-04-03 11:33 ` Kamal Heib
  2019-04-03 18:16   ` Yuval Shaia
  2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
  4 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-03 11:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marcel Apfelbaum, Yuval Shaia, Kamal Heib

Implement the pvrdma device commands for supporting SRQ

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 hw/rdma/vmw/pvrdma_cmd.c    | 147 ++++++++++++++++++++++++++++++++++++
 hw/rdma/vmw/pvrdma_main.c   |  16 ++++
 hw/rdma/vmw/pvrdma_qp_ops.c |  46 ++++++++++-
 hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
 4 files changed, 209 insertions(+), 1 deletion(-)

diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index b931bb6dc9d4..8d70c0d23de4 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -609,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
     return 0;
 }
 
+static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
+                           uint64_t pdir_dma, uint32_t max_wr,
+                           uint32_t max_sge, uint32_t nchunks)
+{
+    uint64_t *dir = NULL, *tbl = NULL;
+    PvrdmaRing *r;
+    int rc = -EINVAL;
+    char ring_name[MAX_RING_NAME_SZ];
+    uint32_t wqe_sz;
+
+    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
+        rdma_error_report("Got invalid page count for SRQ ring: %d",
+                          nchunks);
+        return rc;
+    }
+
+    dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
+    if (!dir) {
+        rdma_error_report("Failed to map to SRQ page directory");
+        goto out;
+    }
+
+    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
+    if (!tbl) {
+        rdma_error_report("Failed to map to SRQ page table");
+        goto out;
+    }
+
+    r = g_malloc(sizeof(*r));
+    *ring = r;
+
+    r->ring_state = (struct pvrdma_ring *)
+            rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
+    if (!r->ring_state) {
+        rdma_error_report("Failed to map tp SRQ ring state");
+        goto out_free_ring_mem;
+    }
+
+    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
+                      sizeof(struct pvrdma_sge) * max_sge - 1);
+    sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
+    rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
+                          wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
+    if (rc) {
+        goto out_unmap_ring_state;
+    }
+
+    goto out;
+
+out_unmap_ring_state:
+    rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
+
+out_free_ring_mem:
+    g_free(r);
+
+out:
+    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
+    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
+
+    return rc;
+}
+
+static void destroy_srq_ring(PvrdmaRing *ring)
+{
+    pvrdma_ring_free(ring);
+    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
+    g_free(ring);
+}
+
+static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                      union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
+    struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
+    PvrdmaRing *ring = NULL;
+    int rc;
+
+    memset(resp, 0, sizeof(*resp));
+
+    rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
+                         cmd->attrs.max_wr, cmd->attrs.max_sge,
+                         cmd->nchunks);
+    if (rc) {
+        return rc;
+    }
+
+    rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
+                           cmd->attrs.max_wr, cmd->attrs.max_sge,
+                           cmd->attrs.srq_limit, &resp->srqn, ring);
+    if (rc) {
+        destroy_srq_ring(ring);
+        return rc;
+    }
+
+    return 0;
+}
+
+static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                     union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
+    struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
+
+    memset(resp, 0, sizeof(*resp));
+
+    return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
+                             (struct ibv_srq_attr *)&resp->attrs);
+}
+
+static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                      union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
+
+    /* Only support SRQ limit */
+    if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
+        (cmd->attr_mask & IBV_SRQ_MAX_WR))
+            return -EINVAL;
+
+    return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
+                              (struct ibv_srq_attr *)&cmd->attrs,
+                              cmd->attr_mask);
+}
+
+static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                       union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
+    RdmaRmSRQ *srq;
+    PvrdmaRing *ring;
+
+    srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
+    if (!srq) {
+        return -EINVAL;
+    }
+
+    ring = (PvrdmaRing *)srq->opaque;
+    destroy_srq_ring(ring);
+    rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
+
+    return 0;
+}
+
 struct cmd_handler {
     uint32_t cmd;
     uint32_t ack;
@@ -634,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = {
     {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
     {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
     {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
+    {PVRDMA_CMD_CREATE_SRQ,   PVRDMA_CMD_CREATE_SRQ_RESP,        create_srq},
+    {PVRDMA_CMD_QUERY_SRQ,    PVRDMA_CMD_QUERY_SRQ_RESP,         query_srq},
+    {PVRDMA_CMD_MODIFY_SRQ,   PVRDMA_CMD_MODIFY_SRQ_RESP,        modify_srq},
+    {PVRDMA_CMD_DESTROY_SRQ,  PVRDMA_CMD_DESTROY_SRQ_RESP,       destroy_srq},
 };
 
 int pvrdma_exec_cmd(PVRDMADev *dev)
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 0b46561bad11..769f7990f864 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
     DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
                       dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
     DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
+    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
     DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
     dsr->caps.max_mr = dev->dev_attr.max_mr;
     dsr->caps.max_pd = dev->dev_attr.max_pd;
     dsr->caps.max_ah = dev->dev_attr.max_ah;
+    dsr->caps.max_srq = dev->dev_attr.max_srq;
+    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
+    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
     dsr->caps.gid_tbl_len = MAX_GIDS;
     dsr->caps.sys_image_guid = 0;
     dsr->caps.node_guid = dev->node_guid;
@@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
             pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
         }
         break;
+    case PVRDMA_UAR_SRQ_OFFSET:
+        if (val & PVRDMA_UAR_SRQ_RECV) {
+            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
+                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
+            pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
+        }
+        break;
     default:
         rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
                           addr, val);
@@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
 
     dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
                             TARGET_PAGE_SIZE; /* First page is ring state */
+
+    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
+                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
+                                sizeof(struct pvrdma_sge)) *
+                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
 }
 
 static int pvrdma_check_ram_shared(Object *obj, void *opaque)
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 5b9786efbe4b..bd6db858de32 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
 
     memset(cqe1, 0, sizeof(*cqe1));
     cqe1->wr_id = cqe->wr_id;
-    cqe1->qp = cqe->qp;
+    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
     cqe1->opcode = cqe->opcode;
     cqe1->status = wc->status;
     cqe1->byte_len = wc->byte_len;
@@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
     }
 }
 
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
+{
+    RdmaRmSRQ *srq;
+    PvrdmaRqWqe *wqe;
+    PvrdmaRing *ring;
+
+    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
+    if (unlikely(!srq)) {
+        return;
+    }
+
+    ring = (PvrdmaRing *)srq->opaque;
+
+    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
+    while (wqe) {
+        CompHandlerCtx *comp_ctx;
+
+        /* Prepare CQE */
+        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
+        comp_ctx->dev = dev;
+        comp_ctx->cq_handle = srq->recv_cq_handle;
+        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
+        comp_ctx->cqe.qp = 0;
+        comp_ctx->cqe.opcode = IBV_WC_RECV;
+
+        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
+            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
+                              dev->dev_attr.max_sge);
+            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
+            continue;
+        }
+
+        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
+                                   (struct ibv_sge *)&wqe->sge[0],
+                                   wqe->hdr.num_sge,
+                                   comp_ctx);
+
+        pvrdma_ring_read_inc(ring);
+
+        wqe = pvrdma_ring_next_elem_read(ring);
+    }
+
+}
+
 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 {
     RdmaRmCQ *cq;
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
index 31cb48ba29f1..82e720a76fb4 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.h
+++ b/hw/rdma/vmw/pvrdma_qp_ops.h
@@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
 void pvrdma_qp_ops_fini(void);
 void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
 void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
 
 #endif
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
@ 2019-04-03 18:05   ` Yuval Shaia
  2019-04-07  8:13     ` Kamal Heib
  0 siblings, 1 reply; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:05 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> Add the required functions and definitions to support shared receive
> queues (SRQs) in the backend layer.
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  hw/rdma/rdma_backend.c      | 116 +++++++++++++++++++++++++++++++++++-
>  hw/rdma/rdma_backend.h      |  12 ++++
>  hw/rdma/rdma_backend_defs.h |   5 ++
>  hw/rdma/rdma_rm.c           |   2 +
>  hw/rdma/rdma_rm_defs.h      |   1 +
>  5 files changed, 134 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index d1660b6474fa..04dfd63a573b 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
>      void *up_ctx;
>      struct ibv_sge sge; /* Used to save MAD recv buffer */
>      RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> +    RdmaBackendSRQ *backend_srq;
>  } BackendCtx;
>  
>  struct backend_umad {
> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>      int i, ne, total_ne = 0;
>      BackendCtx *bctx;
>      struct ibv_wc wc[2];
> +    RdmaProtectedGSList *cqe_ctx_list;
>  
>      qemu_mutex_lock(&rdma_dev_res->lock);
>      do {
> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>  
>              comp_handler(bctx->up_ctx, &wc[i]);
>  
> -            rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
> -                                               wc[i].wr_id);
> +            if (bctx->backend_qp) {
> +                cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
> +            } else {
> +                cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
> +            }
> +
> +            rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
>              rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>              g_free(bctx);
>          }
> @@ -662,6 +669,60 @@ err_free_bctx:
>      g_free(bctx);
>  }
>  
> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
> +                                uint32_t num_sge, void *ctx)
> +{
> +    BackendCtx *bctx;
> +    struct ibv_sge new_sge[MAX_SGE];
> +    uint32_t bctx_id;
> +    int rc;
> +    struct ibv_recv_wr wr = {}, *bad_wr;
> +
> +    bctx = g_malloc0(sizeof(*bctx));
> +    bctx->up_ctx = ctx;
> +    bctx->backend_srq = srq;
> +
> +    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
> +    if (unlikely(rc)) {
> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> +        goto err_free_bctx;
> +    }
> +
> +    rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
> +
> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> +                              &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> +    if (rc) {
> +        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> +        goto err_dealloc_cqe_ctx;
> +    }
> +
> +    wr.num_sge = num_sge;
> +    wr.sg_list = new_sge;
> +    wr.wr_id = bctx_id;
> +    rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
> +    if (rc) {
> +        rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
> +                          srq->ibsrq->handle, rc, errno);
> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> +        goto err_dealloc_cqe_ctx;
> +    }
> +
> +    atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
> +    backend_dev->rdma_dev_res->stats.rx_bufs++;
> +    backend_dev->rdma_dev_res->stats.rx_srq++;

You should update function rdma_dump_device_counters with this new
counter.

> +
> +    return;
> +
> +err_dealloc_cqe_ctx:
> +    backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> +    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> +
> +err_free_bctx:
> +    g_free(bctx);
> +}
> +
>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
>  {
>      pd->ibpd = ibv_alloc_pd(backend_dev->context);
> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
>      rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
>  }
>  
> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> +                            uint32_t max_wr, uint32_t max_sge,
> +                            uint32_t srq_limit)
> +{
> +    struct ibv_srq_init_attr srq_init_attr = {};
> +
> +    srq_init_attr.attr.max_wr = max_wr;
> +    srq_init_attr.attr.max_sge = max_sge;
> +    srq_init_attr.attr.srq_limit = srq_limit;
> +
> +    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
> +    if (!srq->ibsrq) {
> +        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
> +        return -EIO;
> +    }
> +
> +    rdma_protected_gslist_init(&srq->cqe_ctx_list);
> +
> +    return 0;
> +}
> +
> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
> +{
> +    if (!srq->ibsrq) {
> +        return -EINVAL;
> +    }
> +
> +    return ibv_query_srq(srq->ibsrq, srq_attr);
> +}
> +
> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> +                int srq_attr_mask)
> +{
> +    if (!srq->ibsrq) {
> +        return -EINVAL;
> +    }
> +
> +    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
> +}
> +
> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
> +{
> +    if (srq->ibsrq) {
> +        ibv_destroy_srq(srq->ibsrq);
> +    }
> +    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
> +    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
> +}
> +
>  #define CHK_ATTR(req, dev, member, fmt) ({ \
>      trace_rdma_check_dev_attr(#member, dev.member, req->member); \
>      if (req->member > dev.member) { \
> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>      }
>  
>      dev_attr->max_sge = MAX_SGE;
> +    dev_attr->max_srq_sge = MAX_SGE;
>  
>      CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
>      CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
> +    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
>  
>      return 0;
>  }
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index 38056d97c7fc..cad7956d98e8 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>                              RdmaBackendQP *qp, uint8_t qp_type,
>                              struct ibv_sge *sge, uint32_t num_sge, void *ctx);
>  
> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> +                            uint32_t max_wr, uint32_t max_sge,
> +                            uint32_t srq_limit);
> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> +                            int srq_attr_mask);
> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
> +                              RdmaDeviceResources *dev_res);
> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
> +                                uint32_t num_sge, void *ctx);
> +
>  #endif
> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> index 817153dc8cf4..0b55be35038d 100644
> --- a/hw/rdma/rdma_backend_defs.h
> +++ b/hw/rdma/rdma_backend_defs.h
> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
>      RdmaProtectedGSList cqe_ctx_list;
>  } RdmaBackendQP;
>  
> +typedef struct RdmaBackendSRQ {
> +    struct ibv_srq *ibsrq;
> +    RdmaProtectedGSList cqe_ctx_list;
> +} RdmaBackendSRQ;
> +
>  #endif
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index bac3b2f4a6c3..b683506b8616 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
>                     dev_res->stats.tx_err);
>      monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
>                     dev_res->stats.rx_bufs);
> +    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
> +                   dev_res->stats.rx_srq);
>      monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
>                     dev_res->stats.rx_bufs_len);
>      monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index c200d311de37..e774af528022 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
>      uint64_t rx_bufs;
>      uint64_t rx_bufs_len;
>      uint64_t rx_bufs_err;
> +    uint64_t rx_srq;
>      uint64_t completions;
>      uint64_t mad_tx;
>      uint64_t mad_tx_err;

Please make a separate patch to update the function
rdma_dump_device_counters.

Besides that patch lgtm.

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource
  2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
@ 2019-04-03 18:10   ` Yuval Shaia
  0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:10 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 02:33:41PM +0300, Kamal Heib wrote:
> Adding the required functions and definitions for support managing the
> shared receive queues (SRQs).
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  hw/rdma/rdma_rm.c      | 93 ++++++++++++++++++++++++++++++++++++++++++
>  hw/rdma/rdma_rm.h      | 10 +++++
>  hw/rdma/rdma_rm_defs.h |  8 ++++
>  3 files changed, 111 insertions(+)
> 
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index b683506b8616..c4fb140dcd96 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -544,6 +544,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
>      rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
>  }
>  
> +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
> +{
> +    return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
> +}
> +
> +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> +                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
> +                      uint32_t *srq_handle, void *opaque)
> +{
> +    RdmaRmSRQ *srq;
> +    RdmaRmPD *pd;
> +    int rc;
> +
> +    pd = rdma_rm_get_pd(dev_res, pd_handle);
> +    if (!pd) {
> +        return -EINVAL;
> +    }
> +
> +    srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
> +    if (!srq) {
> +        return -ENOMEM;
> +    }
> +
> +    rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
> +                                 max_wr, max_sge, srq_limit);
> +    if (rc) {
> +        rc = -EIO;
> +        goto out_dealloc_srq;
> +    }
> +
> +    srq->opaque = opaque;
> +
> +    return 0;
> +
> +out_dealloc_srq:
> +    rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
> +
> +    return rc;
> +}
> +
> +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> +                      struct ibv_srq_attr *srq_attr)
> +{
> +    RdmaRmSRQ *srq;
> +
> +    srq = rdma_rm_get_srq(dev_res, srq_handle);
> +    if (!srq) {
> +        return -EINVAL;
> +    }
> +
> +    return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
> +}
> +
> +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> +                       struct ibv_srq_attr *srq_attr, int srq_attr_mask)
> +{
> +    RdmaRmSRQ *srq;
> +
> +    srq = rdma_rm_get_srq(dev_res, srq_handle);
> +    if (!srq) {
> +        return -EINVAL;
> +    }
> +
> +    if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
> +        (srq_attr->srq_limit == 0)) {
> +        return -EINVAL;
> +    }
> +
> +    if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
> +        (srq_attr->max_wr == 0)) {
> +        return -EINVAL;
> +    }
> +
> +    return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
> +                                   srq_attr_mask);
> +}
> +
> +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
> +{
> +    RdmaRmSRQ *srq;
> +
> +    srq = rdma_rm_get_srq(dev_res, srq_handle);
> +    if (!srq) {
> +        return;
> +    }
> +
> +    rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
> +    rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
> +}
> +
>  void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
>  {
>      void **cqe_ctx;
> @@ -673,6 +763,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
>      res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
>                         dev_attr->max_qp_wr, sizeof(void *));
>      res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
> +    res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
> +                 sizeof(RdmaRmSRQ));
>  
>      init_ports(dev_res);
>  
> @@ -691,6 +783,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
>  
>      fini_ports(dev_res, backend_dev, ifname);
>  
> +    res_tbl_free(&dev_res->srq_tbl);
>      res_tbl_free(&dev_res->uc_tbl);
>      res_tbl_free(&dev_res->cqe_ctx_tbl);
>      res_tbl_free(&dev_res->qp_tbl);
> diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
> index 4f03f9b8c5f1..e88ab95e264b 100644
> --- a/hw/rdma/rdma_rm.h
> +++ b/hw/rdma/rdma_rm.h
> @@ -65,6 +65,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
>                       int attr_mask, struct ibv_qp_init_attr *init_attr);
>  void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
>  
> +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
> +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> +                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
> +                      uint32_t *srq_handle, void *opaque);
> +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> +                      struct ibv_srq_attr *srq_attr);
> +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
> +                       struct ibv_srq_attr *srq_attr, int srq_attr_mask);
> +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
> +
>  int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
>                            void *ctx);
>  void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index e774af528022..7bdd9f291f9f 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -33,6 +33,7 @@
>  #define MAX_QP_RD_ATOM        16
>  #define MAX_QP_INIT_RD_ATOM   16
>  #define MAX_AH                64
> +#define MAX_SRQ               512
>  
>  #define MAX_RM_TBL_NAME             16
>  #define MAX_CONSEQ_EMPTY_POLL_CQ    4096 /* considered as error above this */
> @@ -89,6 +90,12 @@ typedef struct RdmaRmQP {
>      enum ibv_qp_state qp_state;
>  } RdmaRmQP;
>  
> +typedef struct RdmaRmSRQ {
> +    RdmaBackendSRQ backend_srq;
> +    uint32_t recv_cq_handle;
> +    void *opaque;
> +} RdmaRmSRQ;
> +
>  typedef struct RdmaRmGid {
>      union ibv_gid gid;
>      int backend_gid_index;
> @@ -129,6 +136,7 @@ struct RdmaDeviceResources {
>      RdmaRmResTbl qp_tbl;
>      RdmaRmResTbl cq_tbl;
>      RdmaRmResTbl cqe_ctx_tbl;
> +    RdmaRmResTbl srq_tbl;
>      GHashTable *qp_hash; /* Keeps mapping between real and emulated */
>      QemuMutex lock;
>      RdmaRmStats stats;

For some reason v3 is omitted from subject, weird.
Anyway, looks like you took care for the comment raised.

With that - patch lgtm.

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
@ 2019-04-03 18:15   ` Yuval Shaia
  0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:15 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 02:33:42PM +0300, Kamal Heib wrote:
> Modify create/destroy QP to support shared receive queue and rearrange
> the destroy_qp() code to avoid touching the QP after calling
> rdma_rm_dealloc_qp().
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  hw/rdma/rdma_backend.c   |  9 ++++--
>  hw/rdma/rdma_backend.h   |  6 ++--
>  hw/rdma/rdma_rm.c        | 22 +++++++++++++--
>  hw/rdma/rdma_rm.h        |  3 +-
>  hw/rdma/rdma_rm_defs.h   |  1 +
>  hw/rdma/vmw/pvrdma_cmd.c | 59 ++++++++++++++++++++++++----------------
>  6 files changed, 67 insertions(+), 33 deletions(-)
> 
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index 04dfd63a573b..cf34874e9d2f 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -794,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
>  
>  int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
>                             RdmaBackendPD *pd, RdmaBackendCQ *scq,
> -                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
> -                           uint32_t max_recv_wr, uint32_t max_send_sge,
> -                           uint32_t max_recv_sge)
> +                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
> +                           uint32_t max_send_wr, uint32_t max_recv_wr,
> +                           uint32_t max_send_sge, uint32_t max_recv_sge)
>  {
>      struct ibv_qp_init_attr attr = {};
>  
> @@ -824,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
>      attr.cap.max_recv_wr = max_recv_wr;
>      attr.cap.max_send_sge = max_send_sge;
>      attr.cap.max_recv_sge = max_recv_sge;
> +    if (srq) {
> +        attr.srq = srq->ibsrq;
> +    }
>  
>      qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
>      if (!qp->ibqp) {
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index cad7956d98e8..7c1a19a2b5ff 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
>  
>  int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
>                             RdmaBackendPD *pd, RdmaBackendCQ *scq,
> -                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
> -                           uint32_t max_recv_wr, uint32_t max_send_sge,
> -                           uint32_t max_recv_sge);
> +                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
> +                           uint32_t max_send_wr, uint32_t max_recv_wr,
> +                           uint32_t max_send_sge, uint32_t max_recv_sge);
>  int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
>                                 uint8_t qp_type, uint32_t qkey);
>  int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index c0bb27cb6b90..96279e8d6561 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -386,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
>                       uint8_t qp_type, uint32_t max_send_wr,
>                       uint32_t max_send_sge, uint32_t send_cq_handle,
>                       uint32_t max_recv_wr, uint32_t max_recv_sge,
> -                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
> +                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
> +                     uint8_t is_srq, uint32_t srq_handle)
>  {
>      int rc;
>      RdmaRmQP *qp;
>      RdmaRmCQ *scq, *rcq;
>      RdmaRmPD *pd;
> +    RdmaRmSRQ *srq = NULL;
>      uint32_t rm_qpn;
>  
>      pd = rdma_rm_get_pd(dev_res, pd_handle);
> @@ -408,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
>          return -EINVAL;
>      }
>  
> +    if (is_srq) {
> +        srq = rdma_rm_get_srq(dev_res, srq_handle);
> +        if (!srq) {
> +            rdma_error_report("Invalid srqn %d", srq_handle);
> +            return -EINVAL;
> +        }
> +
> +        srq->recv_cq_handle = recv_cq_handle;
> +    }
> +
>      if (qp_type == IBV_QPT_GSI) {
>          scq->notify = CNT_SET;
>          rcq->notify = CNT_SET;
> @@ -424,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
>      qp->send_cq_handle = send_cq_handle;
>      qp->recv_cq_handle = recv_cq_handle;
>      qp->opaque = opaque;
> +    qp->is_srq = is_srq;
>  
>      rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
> -                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
> -                                max_recv_wr, max_send_sge, max_recv_sge);
> +                                &scq->backend_cq, &rcq->backend_cq,
> +                                is_srq ? &srq->backend_srq : NULL,
> +                                max_send_wr, max_recv_wr, max_send_sge,
> +                                max_recv_sge);
> +
>      if (rc) {
>          rc = -EIO;
>          goto out_dealloc_qp;
> diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
> index e88ab95e264b..e8639909cd34 100644
> --- a/hw/rdma/rdma_rm.h
> +++ b/hw/rdma/rdma_rm.h
> @@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
>                       uint8_t qp_type, uint32_t max_send_wr,
>                       uint32_t max_send_sge, uint32_t send_cq_handle,
>                       uint32_t max_recv_wr, uint32_t max_recv_sge,
> -                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
> +                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
> +                     uint8_t is_srq, uint32_t srq_handle);
>  RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
>  int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
>                        uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index 7bdd9f291f9f..534f2f74d3d1 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -88,6 +88,7 @@ typedef struct RdmaRmQP {
>      uint32_t send_cq_handle;
>      uint32_t recv_cq_handle;
>      enum ibv_qp_state qp_state;
> +    uint8_t is_srq;
>  } RdmaRmQP;
>  
>  typedef struct RdmaRmSRQ {
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index 4afcd2037db2..b931bb6dc9d4 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
>  static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
>                             PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
>                             uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
> -                           uint32_t rpages)
> +                           uint32_t rpages, uint8_t is_srq)
>  {
>      uint64_t *dir = NULL, *tbl = NULL;
>      PvrdmaRing *sr, *rr;
> @@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
>      char ring_name[MAX_RING_NAME_SZ];
>      uint32_t wqe_sz;
>  
> -    if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
> -        || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
> -        rdma_error_report("Got invalid page count for QP ring: %d, %d", spages,
> +    if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) {
> +        rdma_error_report("Got invalid send page count for QP ring: %d",
> +                          spages);
> +        return rc;
> +    }
> +
> +    if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) {
> +        rdma_error_report("Got invalid recv page count for QP ring: %d",
>                            rpages);
>          return rc;
>      }
> @@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
>          goto out;
>      }
>  
> -    sr = g_malloc(2 * sizeof(*rr));
> -    rr = &sr[1];
> +    if (!is_srq) {
> +        sr = g_malloc(2 * sizeof(*rr));
> +        rr = &sr[1];
> +    } else {
> +        sr = g_malloc(sizeof(*sr));
> +    }
>  
>      *rings = sr;
>  
> @@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
>          goto out_unmap_ring_state;
>      }
>  
> -    /* Create recv ring */
> -    rr->ring_state = &sr->ring_state[1];
> -    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> -                      sizeof(struct pvrdma_sge) * rmax_sge - 1);
> -    sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
> -    rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
> -                          rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
> -    if (rc) {
> -        goto out_free_sr;
> +    if (!is_srq) {
> +        /* Create recv ring */
> +        rr->ring_state = &sr->ring_state[1];
> +        wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> +                          sizeof(struct pvrdma_sge) * rmax_sge - 1);
> +        sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
> +        rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
> +                              rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages],
> +                              rpages);
> +        if (rc) {
> +            goto out_free_sr;
> +        }
>      }
>  
>      goto out;
> @@ -436,10 +448,12 @@ out:
>      return rc;
>  }
>  
> -static void destroy_qp_rings(PvrdmaRing *ring)
> +static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq)
>  {
>      pvrdma_ring_free(&ring[0]);
> -    pvrdma_ring_free(&ring[1]);
> +    if (!is_srq) {
> +        pvrdma_ring_free(&ring[1]);
> +    }
>  
>      rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
>      g_free(ring);
> @@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
>      rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
>                           cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
>                           cmd->max_recv_wr, cmd->max_recv_sge,
> -                         cmd->total_chunks - cmd->send_chunks - 1);
> +                         cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq);
>      if (rc) {
>          return rc;
>      }
> @@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
>                            cmd->max_send_wr, cmd->max_send_sge,
>                            cmd->send_cq_handle, cmd->max_recv_wr,
>                            cmd->max_recv_sge, cmd->recv_cq_handle, rings,
> -                          &resp->qpn);
> +                          &resp->qpn, cmd->is_srq, cmd->srq_handle);
>      if (rc) {
> -        destroy_qp_rings(rings);
> +        destroy_qp_rings(rings, cmd->is_srq);
>          return rc;
>      }
>  
> @@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
>          return -EINVAL;
>      }
>  
> -    rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
> -
>      ring = (PvrdmaRing *)qp->opaque;
> -    destroy_qp_rings(ring);
> +    destroy_qp_rings(ring, qp->is_srq);
> +    rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
>  
>      return 0;
>  }

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 18:16   ` Yuval Shaia
  0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:16 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 02:33:43PM +0300, Kamal Heib wrote:
> Implement the pvrdma device commands for supporting SRQ
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  hw/rdma/vmw/pvrdma_cmd.c    | 147 ++++++++++++++++++++++++++++++++++++
>  hw/rdma/vmw/pvrdma_main.c   |  16 ++++
>  hw/rdma/vmw/pvrdma_qp_ops.c |  46 ++++++++++-
>  hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
>  4 files changed, 209 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index b931bb6dc9d4..8d70c0d23de4 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -609,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
>      return 0;
>  }
>  
> +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
> +                           uint64_t pdir_dma, uint32_t max_wr,
> +                           uint32_t max_sge, uint32_t nchunks)
> +{
> +    uint64_t *dir = NULL, *tbl = NULL;
> +    PvrdmaRing *r;
> +    int rc = -EINVAL;
> +    char ring_name[MAX_RING_NAME_SZ];
> +    uint32_t wqe_sz;
> +
> +    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
> +        rdma_error_report("Got invalid page count for SRQ ring: %d",
> +                          nchunks);
> +        return rc;
> +    }
> +
> +    dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
> +    if (!dir) {
> +        rdma_error_report("Failed to map to SRQ page directory");
> +        goto out;
> +    }
> +
> +    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
> +    if (!tbl) {
> +        rdma_error_report("Failed to map to SRQ page table");
> +        goto out;
> +    }
> +
> +    r = g_malloc(sizeof(*r));
> +    *ring = r;
> +
> +    r->ring_state = (struct pvrdma_ring *)
> +            rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
> +    if (!r->ring_state) {
> +        rdma_error_report("Failed to map tp SRQ ring state");
> +        goto out_free_ring_mem;
> +    }
> +
> +    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> +                      sizeof(struct pvrdma_sge) * max_sge - 1);
> +    sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
> +    rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
> +                          wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
> +    if (rc) {
> +        goto out_unmap_ring_state;
> +    }
> +
> +    goto out;
> +
> +out_unmap_ring_state:
> +    rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
> +
> +out_free_ring_mem:
> +    g_free(r);
> +
> +out:
> +    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
> +    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
> +
> +    return rc;
> +}
> +
> +static void destroy_srq_ring(PvrdmaRing *ring)
> +{
> +    pvrdma_ring_free(ring);
> +    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
> +    g_free(ring);
> +}
> +
> +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                      union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
> +    struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
> +    PvrdmaRing *ring = NULL;
> +    int rc;
> +
> +    memset(resp, 0, sizeof(*resp));
> +
> +    rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
> +                         cmd->attrs.max_wr, cmd->attrs.max_sge,
> +                         cmd->nchunks);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
> +                           cmd->attrs.max_wr, cmd->attrs.max_sge,
> +                           cmd->attrs.srq_limit, &resp->srqn, ring);
> +    if (rc) {
> +        destroy_srq_ring(ring);
> +        return rc;
> +    }
> +
> +    return 0;
> +}
> +
> +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                     union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
> +    struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
> +
> +    memset(resp, 0, sizeof(*resp));
> +
> +    return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
> +                             (struct ibv_srq_attr *)&resp->attrs);
> +}
> +
> +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                      union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
> +
> +    /* Only support SRQ limit */
> +    if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
> +        (cmd->attr_mask & IBV_SRQ_MAX_WR))
> +            return -EINVAL;
> +
> +    return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
> +                              (struct ibv_srq_attr *)&cmd->attrs,
> +                              cmd->attr_mask);
> +}
> +
> +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                       union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
> +    RdmaRmSRQ *srq;
> +    PvrdmaRing *ring;
> +
> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
> +    if (!srq) {
> +        return -EINVAL;
> +    }
> +
> +    ring = (PvrdmaRing *)srq->opaque;
> +    destroy_srq_ring(ring);
> +    rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
> +
> +    return 0;
> +}
> +
>  struct cmd_handler {
>      uint32_t cmd;
>      uint32_t ack;
> @@ -634,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = {
>      {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
>      {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
>      {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
> +    {PVRDMA_CMD_CREATE_SRQ,   PVRDMA_CMD_CREATE_SRQ_RESP,        create_srq},
> +    {PVRDMA_CMD_QUERY_SRQ,    PVRDMA_CMD_QUERY_SRQ_RESP,         query_srq},
> +    {PVRDMA_CMD_MODIFY_SRQ,   PVRDMA_CMD_MODIFY_SRQ_RESP,        modify_srq},
> +    {PVRDMA_CMD_DESTROY_SRQ,  PVRDMA_CMD_DESTROY_SRQ_RESP,       destroy_srq},
>  };
>  
>  int pvrdma_exec_cmd(PVRDMADev *dev)
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 0b46561bad11..769f7990f864 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
>      DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
>                        dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
>      DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
> +    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
>      DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
>      dsr->caps.max_mr = dev->dev_attr.max_mr;
>      dsr->caps.max_pd = dev->dev_attr.max_pd;
>      dsr->caps.max_ah = dev->dev_attr.max_ah;
> +    dsr->caps.max_srq = dev->dev_attr.max_srq;
> +    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
> +    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
>      dsr->caps.gid_tbl_len = MAX_GIDS;
>      dsr->caps.sys_image_guid = 0;
>      dsr->caps.node_guid = dev->node_guid;
> @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
>              pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
>          }
>          break;
> +    case PVRDMA_UAR_SRQ_OFFSET:
> +        if (val & PVRDMA_UAR_SRQ_RECV) {
> +            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
> +                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
> +            pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
> +        }
> +        break;
>      default:
>          rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
>                            addr, val);
> @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
>  
>      dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
>                              TARGET_PAGE_SIZE; /* First page is ring state */
> +
> +    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
> +                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
> +                                sizeof(struct pvrdma_sge)) *
> +                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
>  }
>  
>  static int pvrdma_check_ram_shared(Object *obj, void *opaque)
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
> index 5b9786efbe4b..bd6db858de32 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
> @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
>  
>      memset(cqe1, 0, sizeof(*cqe1));
>      cqe1->wr_id = cqe->wr_id;
> -    cqe1->qp = cqe->qp;
> +    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
>      cqe1->opcode = cqe->opcode;
>      cqe1->status = wc->status;
>      cqe1->byte_len = wc->byte_len;
> @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>      }
>  }
>  
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
> +{
> +    RdmaRmSRQ *srq;
> +    PvrdmaRqWqe *wqe;
> +    PvrdmaRing *ring;
> +
> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
> +    if (unlikely(!srq)) {
> +        return;
> +    }
> +
> +    ring = (PvrdmaRing *)srq->opaque;
> +
> +    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
> +    while (wqe) {
> +        CompHandlerCtx *comp_ctx;
> +
> +        /* Prepare CQE */
> +        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
> +        comp_ctx->dev = dev;
> +        comp_ctx->cq_handle = srq->recv_cq_handle;
> +        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
> +        comp_ctx->cqe.qp = 0;
> +        comp_ctx->cqe.opcode = IBV_WC_RECV;
> +
> +        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
> +            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
> +                              dev->dev_attr.max_sge);
> +            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
> +            continue;
> +        }
> +
> +        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
> +                                   (struct ibv_sge *)&wqe->sge[0],
> +                                   wqe->hdr.num_sge,
> +                                   comp_ctx);
> +
> +        pvrdma_ring_read_inc(ring);
> +
> +        wqe = pvrdma_ring_next_elem_read(ring);
> +    }
> +
> +}
> +
>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
>  {
>      RdmaRmCQ *cq;
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
> index 31cb48ba29f1..82e720a76fb4 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.h
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.h
> @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
>  void pvrdma_qp_ops_fini(void);
>  void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
>  void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
>  
>  #endif

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ
  2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
                   ` (3 preceding siblings ...)
  2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
@ 2019-04-03 18:19 ` Yuval Shaia
  2019-04-07  9:15   ` Yuval Shaia
  4 siblings, 1 reply; 13+ messages in thread
From: Yuval Shaia @ 2019-04-03 18:19 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 02:33:39PM +0300, Kamal Heib wrote:
> This series implements the SRQ (Shared Receive Queue) for the pvrdma
> device, It also includes all the needed functions and definitions for
> support SRQ in the backend and resource management layers.
> 
> Changes from v2->3:
> - Patch #1:
> -- Fix commit message.
> -- Remove initialization of backend_qp from rdma_backend_post_srq_recv().
> -- Add rx_srq counter.
> - Patch #2:
> -- Add checks for srq attrs.
> - Patch #3:
> -- Move initialization of recv_cq_handle to be under is_srq.
> -- Rearrange destroy_qp() to avoid use after free.
> - Patch #4:
> -- Avoid use after free.
> -- Fix indentation.
> 
> Changes from v1->v2:
> - Handle checkpatch.pl warnings. 

Appreciate if you can post v4 with a new patch to display the new counter
(function rdma_dump_device_counters).

> 
> Kamal Heib (4):
>   hw/rdma: Add SRQ support to backend layer
>   hw/rdma: Add support for managing SRQ resource
>   hw/rdma: Modify create/destroy QP to support SRQ
>   hw/pvrdma: Add support for SRQ
> 
>  hw/rdma/rdma_backend.c      | 125 +++++++++++++++++++++-
>  hw/rdma/rdma_backend.h      |  18 +++-
>  hw/rdma/rdma_backend_defs.h |   5 +
>  hw/rdma/rdma_rm.c           | 117 +++++++++++++++++++-
>  hw/rdma/rdma_rm.h           |  13 ++-
>  hw/rdma/rdma_rm_defs.h      |  10 ++
>  hw/rdma/vmw/pvrdma_cmd.c    | 206 ++++++++++++++++++++++++++++++++----
>  hw/rdma/vmw/pvrdma_main.c   |  16 +++
>  hw/rdma/vmw/pvrdma_qp_ops.c |  46 +++++++-
>  hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
>  10 files changed, 521 insertions(+), 36 deletions(-)
> 
> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
  2019-04-03 18:05   ` Yuval Shaia
@ 2019-04-07  8:13     ` Kamal Heib
  2019-04-07  9:13       ` Yuval Shaia
  0 siblings, 1 reply; 13+ messages in thread
From: Kamal Heib @ 2019-04-07  8:13 UTC (permalink / raw)
  To: Yuval Shaia; +Cc: qemu-devel



On 4/3/19 9:05 PM, Yuval Shaia wrote:
> On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
>> Add the required functions and definitions to support shared receive
>> queues (SRQs) in the backend layer.
>>
>> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
>> ---
>>  hw/rdma/rdma_backend.c      | 116 +++++++++++++++++++++++++++++++++++-
>>  hw/rdma/rdma_backend.h      |  12 ++++
>>  hw/rdma/rdma_backend_defs.h |   5 ++
>>  hw/rdma/rdma_rm.c           |   2 +
>>  hw/rdma/rdma_rm_defs.h      |   1 +
>>  5 files changed, 134 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>> index d1660b6474fa..04dfd63a573b 100644
>> --- a/hw/rdma/rdma_backend.c
>> +++ b/hw/rdma/rdma_backend.c
>> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
>>      void *up_ctx;
>>      struct ibv_sge sge; /* Used to save MAD recv buffer */
>>      RdmaBackendQP *backend_qp; /* To maintain recv buffers */
>> +    RdmaBackendSRQ *backend_srq;
>>  } BackendCtx;
>>  
>>  struct backend_umad {
>> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>>      int i, ne, total_ne = 0;
>>      BackendCtx *bctx;
>>      struct ibv_wc wc[2];
>> +    RdmaProtectedGSList *cqe_ctx_list;
>>  
>>      qemu_mutex_lock(&rdma_dev_res->lock);
>>      do {
>> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>>  
>>              comp_handler(bctx->up_ctx, &wc[i]);
>>  
>> -            rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
>> -                                               wc[i].wr_id);
>> +            if (bctx->backend_qp) {
>> +                cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
>> +            } else {
>> +                cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
>> +            }
>> +
>> +            rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
>>              rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>>              g_free(bctx);
>>          }
>> @@ -662,6 +669,60 @@ err_free_bctx:
>>      g_free(bctx);
>>  }
>>  
>> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
>> +                                uint32_t num_sge, void *ctx)
>> +{
>> +    BackendCtx *bctx;
>> +    struct ibv_sge new_sge[MAX_SGE];
>> +    uint32_t bctx_id;
>> +    int rc;
>> +    struct ibv_recv_wr wr = {}, *bad_wr;
>> +
>> +    bctx = g_malloc0(sizeof(*bctx));
>> +    bctx->up_ctx = ctx;
>> +    bctx->backend_srq = srq;
>> +
>> +    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
>> +    if (unlikely(rc)) {
>> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
>> +        goto err_free_bctx;
>> +    }
>> +
>> +    rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
>> +
>> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
>> +                              &backend_dev->rdma_dev_res->stats.rx_bufs_len);
>> +    if (rc) {
>> +        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
>> +        goto err_dealloc_cqe_ctx;
>> +    }
>> +
>> +    wr.num_sge = num_sge;
>> +    wr.sg_list = new_sge;
>> +    wr.wr_id = bctx_id;
>> +    rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
>> +    if (rc) {
>> +        rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
>> +                          srq->ibsrq->handle, rc, errno);
>> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
>> +        goto err_dealloc_cqe_ctx;
>> +    }
>> +
>> +    atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
>> +    backend_dev->rdma_dev_res->stats.rx_bufs++;
>> +    backend_dev->rdma_dev_res->stats.rx_srq++;
> 
> You should update function rdma_dump_device_counters with this new
> counter.
> 
>> +
>> +    return;
>> +
>> +err_dealloc_cqe_ctx:
>> +    backend_dev->rdma_dev_res->stats.rx_bufs_err++;
>> +    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
>> +
>> +err_free_bctx:
>> +    g_free(bctx);
>> +}
>> +
>>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
>>  {
>>      pd->ibpd = ibv_alloc_pd(backend_dev->context);
>> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
>>      rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
>>  }
>>  
>> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
>> +                            uint32_t max_wr, uint32_t max_sge,
>> +                            uint32_t srq_limit)
>> +{
>> +    struct ibv_srq_init_attr srq_init_attr = {};
>> +
>> +    srq_init_attr.attr.max_wr = max_wr;
>> +    srq_init_attr.attr.max_sge = max_sge;
>> +    srq_init_attr.attr.srq_limit = srq_limit;
>> +
>> +    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
>> +    if (!srq->ibsrq) {
>> +        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
>> +        return -EIO;
>> +    }
>> +
>> +    rdma_protected_gslist_init(&srq->cqe_ctx_list);
>> +
>> +    return 0;
>> +}
>> +
>> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
>> +{
>> +    if (!srq->ibsrq) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    return ibv_query_srq(srq->ibsrq, srq_attr);
>> +}
>> +
>> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
>> +                int srq_attr_mask)
>> +{
>> +    if (!srq->ibsrq) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
>> +}
>> +
>> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
>> +{
>> +    if (srq->ibsrq) {
>> +        ibv_destroy_srq(srq->ibsrq);
>> +    }
>> +    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
>> +    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
>> +}
>> +
>>  #define CHK_ATTR(req, dev, member, fmt) ({ \
>>      trace_rdma_check_dev_attr(#member, dev.member, req->member); \
>>      if (req->member > dev.member) { \
>> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>>      }
>>  
>>      dev_attr->max_sge = MAX_SGE;
>> +    dev_attr->max_srq_sge = MAX_SGE;
>>  
>>      CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
>>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
>> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
>>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
>>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
>>      CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
>> +    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
>>  
>>      return 0;
>>  }
>> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
>> index 38056d97c7fc..cad7956d98e8 100644
>> --- a/hw/rdma/rdma_backend.h
>> +++ b/hw/rdma/rdma_backend.h
>> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>>                              RdmaBackendQP *qp, uint8_t qp_type,
>>                              struct ibv_sge *sge, uint32_t num_sge, void *ctx);
>>  
>> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
>> +                            uint32_t max_wr, uint32_t max_sge,
>> +                            uint32_t srq_limit);
>> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
>> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
>> +                            int srq_attr_mask);
>> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
>> +                              RdmaDeviceResources *dev_res);
>> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
>> +                                uint32_t num_sge, void *ctx);
>> +
>>  #endif
>> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
>> index 817153dc8cf4..0b55be35038d 100644
>> --- a/hw/rdma/rdma_backend_defs.h
>> +++ b/hw/rdma/rdma_backend_defs.h
>> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
>>      RdmaProtectedGSList cqe_ctx_list;
>>  } RdmaBackendQP;
>>  
>> +typedef struct RdmaBackendSRQ {
>> +    struct ibv_srq *ibsrq;
>> +    RdmaProtectedGSList cqe_ctx_list;
>> +} RdmaBackendSRQ;
>> +
>>  #endif
>> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
>> index bac3b2f4a6c3..b683506b8616 100644
>> --- a/hw/rdma/rdma_rm.c
>> +++ b/hw/rdma/rdma_rm.c
>> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
>>                     dev_res->stats.tx_err);
>>      monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
>>                     dev_res->stats.rx_bufs);
>> +    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
>> +                   dev_res->stats.rx_srq);
>>      monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
>>                     dev_res->stats.rx_bufs_len);
>>      monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
>> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
>> index c200d311de37..e774af528022 100644
>> --- a/hw/rdma/rdma_rm_defs.h
>> +++ b/hw/rdma/rdma_rm_defs.h
>> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
>>      uint64_t rx_bufs;
>>      uint64_t rx_bufs_len;
>>      uint64_t rx_bufs_err;
>> +    uint64_t rx_srq;
>>      uint64_t completions;
>>      uint64_t mad_tx;
>>      uint64_t mad_tx_err;
> 
> Please make a separate patch to update the function
> rdma_dump_device_counters.
> 

You mean a separate patch for introducing the "rx_srq" counter & update the
function rdma_dump_device_counters()?

> Besides that patch lgtm.
> 
> Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> 
>> -- 
>> 2.20.1
>>
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
  2019-04-07  8:13     ` Kamal Heib
@ 2019-04-07  9:13       ` Yuval Shaia
  0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-07  9:13 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Sun, Apr 07, 2019 at 11:13:15AM +0300, Kamal Heib wrote:
> 
> 
> On 4/3/19 9:05 PM, Yuval Shaia wrote:
> > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> >> Add the required functions and definitions to support shared receive
> >> queues (SRQs) in the backend layer.
> >>
> >> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> >> ---
> >>  hw/rdma/rdma_backend.c      | 116 +++++++++++++++++++++++++++++++++++-
> >>  hw/rdma/rdma_backend.h      |  12 ++++
> >>  hw/rdma/rdma_backend_defs.h |   5 ++
> >>  hw/rdma/rdma_rm.c           |   2 +
> >>  hw/rdma/rdma_rm_defs.h      |   1 +
> >>  5 files changed, 134 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> >> index d1660b6474fa..04dfd63a573b 100644
> >> --- a/hw/rdma/rdma_backend.c
> >> +++ b/hw/rdma/rdma_backend.c
> >> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
> >>      void *up_ctx;
> >>      struct ibv_sge sge; /* Used to save MAD recv buffer */
> >>      RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> >> +    RdmaBackendSRQ *backend_srq;
> >>  } BackendCtx;
> >>  
> >>  struct backend_umad {
> >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> >>      int i, ne, total_ne = 0;
> >>      BackendCtx *bctx;
> >>      struct ibv_wc wc[2];
> >> +    RdmaProtectedGSList *cqe_ctx_list;
> >>  
> >>      qemu_mutex_lock(&rdma_dev_res->lock);
> >>      do {
> >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> >>  
> >>              comp_handler(bctx->up_ctx, &wc[i]);
> >>  
> >> -            rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
> >> -                                               wc[i].wr_id);
> >> +            if (bctx->backend_qp) {
> >> +                cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
> >> +            } else {
> >> +                cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
> >> +            }
> >> +
> >> +            rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
> >>              rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
> >>              g_free(bctx);
> >>          }
> >> @@ -662,6 +669,60 @@ err_free_bctx:
> >>      g_free(bctx);
> >>  }
> >>  
> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> >> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
> >> +                                uint32_t num_sge, void *ctx)
> >> +{
> >> +    BackendCtx *bctx;
> >> +    struct ibv_sge new_sge[MAX_SGE];
> >> +    uint32_t bctx_id;
> >> +    int rc;
> >> +    struct ibv_recv_wr wr = {}, *bad_wr;
> >> +
> >> +    bctx = g_malloc0(sizeof(*bctx));
> >> +    bctx->up_ctx = ctx;
> >> +    bctx->backend_srq = srq;
> >> +
> >> +    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
> >> +    if (unlikely(rc)) {
> >> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> >> +        goto err_free_bctx;
> >> +    }
> >> +
> >> +    rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
> >> +
> >> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> >> +                              &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> >> +    if (rc) {
> >> +        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> >> +        goto err_dealloc_cqe_ctx;
> >> +    }
> >> +
> >> +    wr.num_sge = num_sge;
> >> +    wr.sg_list = new_sge;
> >> +    wr.wr_id = bctx_id;
> >> +    rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
> >> +    if (rc) {
> >> +        rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
> >> +                          srq->ibsrq->handle, rc, errno);
> >> +        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> >> +        goto err_dealloc_cqe_ctx;
> >> +    }
> >> +
> >> +    atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
> >> +    backend_dev->rdma_dev_res->stats.rx_bufs++;
> >> +    backend_dev->rdma_dev_res->stats.rx_srq++;
> > 
> > You should update function rdma_dump_device_counters with this new
> > counter.
> > 
> >> +
> >> +    return;
> >> +
> >> +err_dealloc_cqe_ctx:
> >> +    backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> >> +    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> >> +
> >> +err_free_bctx:
> >> +    g_free(bctx);
> >> +}
> >> +
> >>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
> >>  {
> >>      pd->ibpd = ibv_alloc_pd(backend_dev->context);
> >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
> >>      rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
> >>  }
> >>  
> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> >> +                            uint32_t max_wr, uint32_t max_sge,
> >> +                            uint32_t srq_limit)
> >> +{
> >> +    struct ibv_srq_init_attr srq_init_attr = {};
> >> +
> >> +    srq_init_attr.attr.max_wr = max_wr;
> >> +    srq_init_attr.attr.max_sge = max_sge;
> >> +    srq_init_attr.attr.srq_limit = srq_limit;
> >> +
> >> +    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
> >> +    if (!srq->ibsrq) {
> >> +        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
> >> +        return -EIO;
> >> +    }
> >> +
> >> +    rdma_protected_gslist_init(&srq->cqe_ctx_list);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
> >> +{
> >> +    if (!srq->ibsrq) {
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    return ibv_query_srq(srq->ibsrq, srq_attr);
> >> +}
> >> +
> >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> >> +                int srq_attr_mask)
> >> +{
> >> +    if (!srq->ibsrq) {
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
> >> +}
> >> +
> >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
> >> +{
> >> +    if (srq->ibsrq) {
> >> +        ibv_destroy_srq(srq->ibsrq);
> >> +    }
> >> +    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
> >> +    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
> >> +}
> >> +
> >>  #define CHK_ATTR(req, dev, member, fmt) ({ \
> >>      trace_rdma_check_dev_attr(#member, dev.member, req->member); \
> >>      if (req->member > dev.member) { \
> >> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> >>      }
> >>  
> >>      dev_attr->max_sge = MAX_SGE;
> >> +    dev_attr->max_srq_sge = MAX_SGE;
> >>  
> >>      CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
> >>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
> >> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
> >>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
> >>      CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
> >>      CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
> >> +    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
> >>  
> >>      return 0;
> >>  }
> >> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> >> index 38056d97c7fc..cad7956d98e8 100644
> >> --- a/hw/rdma/rdma_backend.h
> >> +++ b/hw/rdma/rdma_backend.h
> >> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
> >>                              RdmaBackendQP *qp, uint8_t qp_type,
> >>                              struct ibv_sge *sge, uint32_t num_sge, void *ctx);
> >>  
> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> >> +                            uint32_t max_wr, uint32_t max_sge,
> >> +                            uint32_t srq_limit);
> >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
> >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
> >> +                            int srq_attr_mask);
> >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
> >> +                              RdmaDeviceResources *dev_res);
> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> >> +                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
> >> +                                uint32_t num_sge, void *ctx);
> >> +
> >>  #endif
> >> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> >> index 817153dc8cf4..0b55be35038d 100644
> >> --- a/hw/rdma/rdma_backend_defs.h
> >> +++ b/hw/rdma/rdma_backend_defs.h
> >> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
> >>      RdmaProtectedGSList cqe_ctx_list;
> >>  } RdmaBackendQP;
> >>  
> >> +typedef struct RdmaBackendSRQ {
> >> +    struct ibv_srq *ibsrq;
> >> +    RdmaProtectedGSList cqe_ctx_list;
> >> +} RdmaBackendSRQ;
> >> +
> >>  #endif
> >> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> >> index bac3b2f4a6c3..b683506b8616 100644
> >> --- a/hw/rdma/rdma_rm.c
> >> +++ b/hw/rdma/rdma_rm.c
> >> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
> >>                     dev_res->stats.tx_err);
> >>      monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
> >>                     dev_res->stats.rx_bufs);
> >> +    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
> >> +                   dev_res->stats.rx_srq);

[1]

> >>      monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
> >>                     dev_res->stats.rx_bufs_len);
> >>      monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
> >> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> >> index c200d311de37..e774af528022 100644
> >> --- a/hw/rdma/rdma_rm_defs.h
> >> +++ b/hw/rdma/rdma_rm_defs.h
> >> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats {
> >>      uint64_t rx_bufs;
> >>      uint64_t rx_bufs_len;
> >>      uint64_t rx_bufs_err;
> >> +    uint64_t rx_srq;
> >>      uint64_t completions;
> >>      uint64_t mad_tx;
> >>      uint64_t mad_tx_err;
> > 
> > Please make a separate patch to update the function
> > rdma_dump_device_counters.
> > 
> 
> You mean a separate patch for introducing the "rx_srq" counter & update the
> function rdma_dump_device_counters()?

My bad, missed that ([1]).
No need for a separate patch.

> 
> > Besides that patch lgtm.
> > 
> > Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
> > 
> >> -- 
> >> 2.20.1
> >>
> >>
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ
  2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
@ 2019-04-07  9:15   ` Yuval Shaia
  0 siblings, 0 replies; 13+ messages in thread
From: Yuval Shaia @ 2019-04-07  9:15 UTC (permalink / raw)
  To: Kamal Heib; +Cc: qemu-devel

On Wed, Apr 03, 2019 at 09:19:38PM +0300, Yuval Shaia wrote:
> On Wed, Apr 03, 2019 at 02:33:39PM +0300, Kamal Heib wrote:
> > This series implements the SRQ (Shared Receive Queue) for the pvrdma
> > device, It also includes all the needed functions and definitions for
> > support SRQ in the backend and resource management layers.
> > 
> > Changes from v2->3:
> > - Patch #1:
> > -- Fix commit message.
> > -- Remove initialization of backend_qp from rdma_backend_post_srq_recv().
> > -- Add rx_srq counter.
> > - Patch #2:
> > -- Add checks for srq attrs.
> > - Patch #3:
> > -- Move initialization of recv_cq_handle to be under is_srq.
> > -- Rearrange destroy_qp() to avoid use after free.
> > - Patch #4:
> > -- Avoid use after free.
> > -- Fix indentation.
> > 
> > Changes from v1->v2:
> > - Handle checkpatch.pl warnings. 
> 
> Appreciate if you can post v4 with a new patch to display the new counter
> (function rdma_dump_device_counters).

Please ignore this comment, counter is already added - i missed that.

With that - patchset lgtm, from my perspective ready to merge.

> 
> > 
> > Kamal Heib (4):
> >   hw/rdma: Add SRQ support to backend layer
> >   hw/rdma: Add support for managing SRQ resource
> >   hw/rdma: Modify create/destroy QP to support SRQ
> >   hw/pvrdma: Add support for SRQ
> > 
> >  hw/rdma/rdma_backend.c      | 125 +++++++++++++++++++++-
> >  hw/rdma/rdma_backend.h      |  18 +++-
> >  hw/rdma/rdma_backend_defs.h |   5 +
> >  hw/rdma/rdma_rm.c           | 117 +++++++++++++++++++-
> >  hw/rdma/rdma_rm.h           |  13 ++-
> >  hw/rdma/rdma_rm_defs.h      |  10 ++
> >  hw/rdma/vmw/pvrdma_cmd.c    | 206 ++++++++++++++++++++++++++++++++----
> >  hw/rdma/vmw/pvrdma_main.c   |  16 +++
> >  hw/rdma/vmw/pvrdma_qp_ops.c |  46 +++++++-
> >  hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
> >  10 files changed, 521 insertions(+), 36 deletions(-)
> > 
> > -- 
> > 2.20.1
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-04-07  9:15 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-03 11:33 [Qemu-devel] [PATCH v3 0/4] pvrdma: Add support for SRQ Kamal Heib
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer Kamal Heib
2019-04-03 18:05   ` Yuval Shaia
2019-04-07  8:13     ` Kamal Heib
2019-04-07  9:13       ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Kamal Heib
2019-04-03 18:10   ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 3/4] hw/rdma: Modify create/destroy QP to support SRQ Kamal Heib
2019-04-03 18:15   ` Yuval Shaia
2019-04-03 11:33 ` [Qemu-devel] [PATCH v3 4/4] hw/pvrdma: Add support for SRQ Kamal Heib
2019-04-03 18:16   ` Yuval Shaia
2019-04-03 18:19 ` [Qemu-devel] [PATCH v3 0/4] pvrdma: " Yuval Shaia
2019-04-07  9:15   ` Yuval Shaia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.