All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] hw/rdma: Last step in eliminating data-path processing
@ 2020-03-07 12:56 Yuval Shaia
  2020-03-07 12:56 ` [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays Yuval Shaia
  2020-03-07 12:56 ` [PATCH 2/2] hw/rdma: Skip data-path mr_id translation Yuval Shaia
  0 siblings, 2 replies; 6+ messages in thread
From: Yuval Shaia @ 2020-03-07 12:56 UTC (permalink / raw)
  To: qemu-devel, yuval.shaia.ml, marcel.apfelbaum

With emulated device we have the luxury of doing things in data-path.
Considering a future virtio-rdma device we no longer have this luxury. The
driver will directly post WQEs to HW queues, bypassing the device emulator
in qemu. These WQEs will consist of addresses known to the device (even if
it is guest addresses) and ID of an HW memory region (mr).

Commit 68b89aee71 ("Utilize ibv_reg_mr_iova for memory registration") did
the first important step of utilizing a new rdma API so addresses
translation is no longer needed.
This patch-set continues and remove entirely the processing in data-path by
eliminating the need to translate emulated mr_id to backend device mr_id.

Yuval Shaia (2):
  hw/rdma: Cosmetic change - no need for two sge arrays
  hw/rdma: Skip data-path mr_id translation

 hw/rdma/rdma_backend.c | 61 +++++++++++++++++++++---------------------
 hw/rdma/rdma_backend.h |  5 ----
 hw/rdma/rdma_rm.c      | 13 +++++----
 3 files changed, 36 insertions(+), 43 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays
  2020-03-07 12:56 [PATCH 0/2] hw/rdma: Last step in eliminating data-path processing Yuval Shaia
@ 2020-03-07 12:56 ` Yuval Shaia
  2020-03-16 13:32   ` Marcel Apfelbaum
  2020-03-07 12:56 ` [PATCH 2/2] hw/rdma: Skip data-path mr_id translation Yuval Shaia
  1 sibling, 1 reply; 6+ messages in thread
From: Yuval Shaia @ 2020-03-07 12:56 UTC (permalink / raw)
  To: qemu-devel, yuval.shaia.ml, marcel.apfelbaum

The function build_host_sge_array uses two sge arrays, one for input and
one for output.
Since the size of the two arrays is the same, the function can write
directly to the given source array (i.e. input/output argument).

Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
---
 hw/rdma/rdma_backend.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index c346407cd3..79b9cfb487 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -378,30 +378,27 @@ static void ah_cache_init(void)
 }
 
 static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
-                                struct ibv_sge *dsge, struct ibv_sge *ssge,
-                                uint8_t num_sge, uint64_t *total_length)
+                                struct ibv_sge *sge, uint8_t num_sge,
+                                uint64_t *total_length)
 {
     RdmaRmMR *mr;
-    int ssge_idx;
+    int idx;
 
-    for (ssge_idx = 0; ssge_idx < num_sge; ssge_idx++) {
-        mr = rdma_rm_get_mr(rdma_dev_res, ssge[ssge_idx].lkey);
+    for (idx = 0; idx < num_sge; idx++) {
+        mr = rdma_rm_get_mr(rdma_dev_res, sge[idx].lkey);
         if (unlikely(!mr)) {
-            rdma_error_report("Invalid lkey 0x%x", ssge[ssge_idx].lkey);
-            return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey;
+            rdma_error_report("Invalid lkey 0x%x", sge[idx].lkey);
+            return VENDOR_ERR_INVLKEY | sge[idx].lkey;
         }
 
 #ifdef LEGACY_RDMA_REG_MR
-        dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start;
+        sge[idx].addr = (uintptr_t)mr->virt + sge[idx].addr - mr->start;
 #else
-        dsge->addr = ssge[ssge_idx].addr;
+        sge[idx].addr = sge[idx].addr;
 #endif
-        dsge->length = ssge[ssge_idx].length;
-        dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
+        sge[idx].lkey = rdma_backend_mr_lkey(&mr->backend_mr);
 
-        *total_length += dsge->length;
-
-        dsge++;
+        *total_length += sge[idx].length;
     }
 
     return 0;
@@ -484,7 +481,6 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
                             void *ctx)
 {
     BackendCtx *bctx;
-    struct ibv_sge new_sge[MAX_SGE];
     uint32_t bctx_id;
     int rc;
     struct ibv_send_wr wr = {}, *bad_wr;
@@ -518,7 +514,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
 
     rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
 
-    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
+    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
                               &backend_dev->rdma_dev_res->stats.tx_len);
     if (rc) {
         complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
@@ -538,7 +534,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
     wr.num_sge = num_sge;
     wr.opcode = IBV_WR_SEND;
     wr.send_flags = IBV_SEND_SIGNALED;
-    wr.sg_list = new_sge;
+    wr.sg_list = sge;
     wr.wr_id = bctx_id;
 
     rc = ibv_post_send(qp->ibqp, &wr, &bad_wr);
@@ -601,7 +597,6 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
                             struct ibv_sge *sge, uint32_t num_sge, void *ctx)
 {
     BackendCtx *bctx;
-    struct ibv_sge new_sge[MAX_SGE];
     uint32_t bctx_id;
     int rc;
     struct ibv_recv_wr wr = {}, *bad_wr;
@@ -635,7 +630,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
 
     rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
 
-    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
+    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
                               &backend_dev->rdma_dev_res->stats.rx_bufs_len);
     if (rc) {
         complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
@@ -643,7 +638,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
     }
 
     wr.num_sge = num_sge;
-    wr.sg_list = new_sge;
+    wr.sg_list = sge;
     wr.wr_id = bctx_id;
     rc = ibv_post_recv(qp->ibqp, &wr, &bad_wr);
     if (rc) {
@@ -671,7 +666,6 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
                                 uint32_t num_sge, void *ctx)
 {
     BackendCtx *bctx;
-    struct ibv_sge new_sge[MAX_SGE];
     uint32_t bctx_id;
     int rc;
     struct ibv_recv_wr wr = {}, *bad_wr;
@@ -688,7 +682,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
 
     rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
 
-    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
+    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
                               &backend_dev->rdma_dev_res->stats.rx_bufs_len);
     if (rc) {
         complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
@@ -696,7 +690,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
     }
 
     wr.num_sge = num_sge;
-    wr.sg_list = new_sge;
+    wr.sg_list = sge;
     wr.wr_id = bctx_id;
     rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
     if (rc) {
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] hw/rdma: Skip data-path mr_id translation
  2020-03-07 12:56 [PATCH 0/2] hw/rdma: Last step in eliminating data-path processing Yuval Shaia
  2020-03-07 12:56 ` [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays Yuval Shaia
@ 2020-03-07 12:56 ` Yuval Shaia
  2020-03-16 13:37   ` Marcel Apfelbaum
  1 sibling, 1 reply; 6+ messages in thread
From: Yuval Shaia @ 2020-03-07 12:56 UTC (permalink / raw)
  To: qemu-devel, yuval.shaia.ml, marcel.apfelbaum

With the change made in commit 68b89aee71 ("Utilize ibv_reg_mr_iova for
memory registration") the MR emulation is no longer needed in order to
translate the guest addresses into host addresses.
With that, the next obvious step is to skip entirely the processing in
data-path.
To accomplish this, return the backend's lkey to driver so we will not
need to do the emulated mr_id to backend mr_id translation in data-path.

The function build_host_sge_array is still called in data-path but only
for backward computability with statistics collection.

While there, as a cosmetic change to make the code cleaner - make one
copy of the function rdma_backend_create_mr and leave the redundant
guest_start argument in the legacy code.

Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
---
 hw/rdma/rdma_backend.c | 23 ++++++++++++++---------
 hw/rdma/rdma_backend.h |  5 -----
 hw/rdma/rdma_rm.c      | 13 ++++++-------
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 79b9cfb487..3dd39fe1a7 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -377,6 +377,7 @@ static void ah_cache_init(void)
                                     destroy_ah_hash_key, destroy_ah_hast_data);
 }
 
+#ifdef LEGACY_RDMA_REG_MR
 static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
                                 struct ibv_sge *sge, uint8_t num_sge,
                                 uint64_t *total_length)
@@ -391,11 +392,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
             return VENDOR_ERR_INVLKEY | sge[idx].lkey;
         }
 
-#ifdef LEGACY_RDMA_REG_MR
         sge[idx].addr = (uintptr_t)mr->virt + sge[idx].addr - mr->start;
-#else
-        sge[idx].addr = sge[idx].addr;
-#endif
         sge[idx].lkey = rdma_backend_mr_lkey(&mr->backend_mr);
 
         *total_length += sge[idx].length;
@@ -403,6 +400,19 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
 
     return 0;
 }
+#else
+static inline int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
+                                       struct ibv_sge *sge, uint8_t num_sge,
+                                       uint64_t *total_length)
+{
+    int idx;
+
+    for (idx = 0; idx < num_sge; idx++) {
+        *total_length += sge[idx].length;
+    }
+    return 0;
+}
+#endif
 
 static void trace_mad_message(const char *title, char *buf, int len)
 {
@@ -733,13 +743,8 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd)
     }
 }
 
-#ifdef LEGACY_RDMA_REG_MR
-int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
-                           size_t length, int access)
-#else
 int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
                            size_t length, uint64_t guest_start, int access)
-#endif
 {
 #ifdef LEGACY_RDMA_REG_MR
     mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access);
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 127f96e2d5..225af481e0 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -78,13 +78,8 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev,
 int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd);
 void rdma_backend_destroy_pd(RdmaBackendPD *pd);
 
-#ifdef LEGACY_RDMA_REG_MR
-int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
-                           size_t length, int access);
-#else
 int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
                            size_t length, uint64_t guest_start, int access);
-#endif
 void rdma_backend_destroy_mr(RdmaBackendMR *mr);
 
 int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 1524dfaeaa..7e9ea283c9 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -227,21 +227,20 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
         mr->length = guest_length;
         mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
 
-#ifdef LEGACY_RDMA_REG_MR
-        ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
-                                     mr->length, access_flags);
-#else
         ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
                                      mr->length, guest_start, access_flags);
-#endif
         if (ret) {
             ret = -EIO;
             goto out_dealloc_mr;
         }
+#ifdef LEGACY_RDMA_REG_MR
+        /* We keep mr_handle in lkey so send and recv get get mr ptr */
+        *lkey = *mr_handle;
+#else
+        *lkey = rdma_backend_mr_lkey(&mr->backend_mr);
+#endif
     }
 
-    /* We keep mr_handle in lkey so send and recv get get mr ptr */
-    *lkey = *mr_handle;
     *rkey = -1;
 
     mr->pd_handle = pd_handle;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays
  2020-03-07 12:56 ` [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays Yuval Shaia
@ 2020-03-16 13:32   ` Marcel Apfelbaum
  2020-03-20 12:30     ` Yuval Shaia
  0 siblings, 1 reply; 6+ messages in thread
From: Marcel Apfelbaum @ 2020-03-16 13:32 UTC (permalink / raw)
  To: Yuval Shaia, qemu-devel

Hi Yuval,

On 3/7/20 2:56 PM, Yuval Shaia wrote:
> The function build_host_sge_array uses two sge arrays, one for input and
> one for output.
> Since the size of the two arrays is the same, the function can write
> directly to the given source array (i.e. input/output argument).
>
> Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
> ---
>   hw/rdma/rdma_backend.c | 40 +++++++++++++++++-----------------------
>   1 file changed, 17 insertions(+), 23 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index c346407cd3..79b9cfb487 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -378,30 +378,27 @@ static void ah_cache_init(void)
>   }
>   
>   static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
> -                                struct ibv_sge *dsge, struct ibv_sge *ssge,
> -                                uint8_t num_sge, uint64_t *total_length)
> +                                struct ibv_sge *sge, uint8_t num_sge,
> +                                uint64_t *total_length)
>   {
>       RdmaRmMR *mr;
> -    int ssge_idx;
> +    int idx;
>   
> -    for (ssge_idx = 0; ssge_idx < num_sge; ssge_idx++) {
> -        mr = rdma_rm_get_mr(rdma_dev_res, ssge[ssge_idx].lkey);
> +    for (idx = 0; idx < num_sge; idx++) {
> +        mr = rdma_rm_get_mr(rdma_dev_res, sge[idx].lkey);
>           if (unlikely(!mr)) {
> -            rdma_error_report("Invalid lkey 0x%x", ssge[ssge_idx].lkey);
> -            return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey;
> +            rdma_error_report("Invalid lkey 0x%x", sge[idx].lkey);
> +            return VENDOR_ERR_INVLKEY | sge[idx].lkey;
>           }
>   
>   #ifdef LEGACY_RDMA_REG_MR
> -        dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start;
> +        sge[idx].addr = (uintptr_t)mr->virt + sge[idx].addr - mr->start;
>   #else
> -        dsge->addr = ssge[ssge_idx].addr;
> +        sge[idx].addr = sge[idx].addr;

It seems we don't need the above line.
Other than that it looks good to me.

Thanks,
Marcel

>   #endif
> -        dsge->length = ssge[ssge_idx].length;
> -        dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
> +        sge[idx].lkey = rdma_backend_mr_lkey(&mr->backend_mr);
>   
> -        *total_length += dsge->length;
> -
> -        dsge++;
> +        *total_length += sge[idx].length;
>       }
>   
>       return 0;
> @@ -484,7 +481,6 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
>                               void *ctx)
>   {
>       BackendCtx *bctx;
> -    struct ibv_sge new_sge[MAX_SGE];
>       uint32_t bctx_id;
>       int rc;
>       struct ibv_send_wr wr = {}, *bad_wr;
> @@ -518,7 +514,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
>   
>       rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
>   
> -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
>                                 &backend_dev->rdma_dev_res->stats.tx_len);
>       if (rc) {
>           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> @@ -538,7 +534,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
>       wr.num_sge = num_sge;
>       wr.opcode = IBV_WR_SEND;
>       wr.send_flags = IBV_SEND_SIGNALED;
> -    wr.sg_list = new_sge;
> +    wr.sg_list = sge;
>       wr.wr_id = bctx_id;
>   
>       rc = ibv_post_send(qp->ibqp, &wr, &bad_wr);
> @@ -601,7 +597,6 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>                               struct ibv_sge *sge, uint32_t num_sge, void *ctx)
>   {
>       BackendCtx *bctx;
> -    struct ibv_sge new_sge[MAX_SGE];
>       uint32_t bctx_id;
>       int rc;
>       struct ibv_recv_wr wr = {}, *bad_wr;
> @@ -635,7 +630,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>   
>       rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
>   
> -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
>                                 &backend_dev->rdma_dev_res->stats.rx_bufs_len);
>       if (rc) {
>           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> @@ -643,7 +638,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
>       }
>   
>       wr.num_sge = num_sge;
> -    wr.sg_list = new_sge;
> +    wr.sg_list = sge;
>       wr.wr_id = bctx_id;
>       rc = ibv_post_recv(qp->ibqp, &wr, &bad_wr);
>       if (rc) {
> @@ -671,7 +666,6 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>                                   uint32_t num_sge, void *ctx)
>   {
>       BackendCtx *bctx;
> -    struct ibv_sge new_sge[MAX_SGE];
>       uint32_t bctx_id;
>       int rc;
>       struct ibv_recv_wr wr = {}, *bad_wr;
> @@ -688,7 +682,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>   
>       rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
>   
> -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
> +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
>                                 &backend_dev->rdma_dev_res->stats.rx_bufs_len);
>       if (rc) {
>           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> @@ -696,7 +690,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>       }
>   
>       wr.num_sge = num_sge;
> -    wr.sg_list = new_sge;
> +    wr.sg_list = sge;
>       wr.wr_id = bctx_id;
>       rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
>       if (rc) {



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] hw/rdma: Skip data-path mr_id translation
  2020-03-07 12:56 ` [PATCH 2/2] hw/rdma: Skip data-path mr_id translation Yuval Shaia
@ 2020-03-16 13:37   ` Marcel Apfelbaum
  0 siblings, 0 replies; 6+ messages in thread
From: Marcel Apfelbaum @ 2020-03-16 13:37 UTC (permalink / raw)
  To: Yuval Shaia, qemu-devel

Hi Yuval,

On 3/7/20 2:56 PM, Yuval Shaia wrote:
> With the change made in commit 68b89aee71 ("Utilize ibv_reg_mr_iova for
> memory registration") the MR emulation is no longer needed in order to
> translate the guest addresses into host addresses.
> With that, the next obvious step is to skip entirely the processing in
> data-path.
> To accomplish this, return the backend's lkey to driver so we will not
> need to do the emulated mr_id to backend mr_id translation in data-path.
>
> The function build_host_sge_array is still called in data-path but only
> for backward computability with statistics collection.
>
> While there, as a cosmetic change to make the code cleaner - make one
> copy of the function rdma_backend_create_mr and leave the redundant
> guest_start argument in the legacy code.
>
> Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
> ---
>   hw/rdma/rdma_backend.c | 23 ++++++++++++++---------
>   hw/rdma/rdma_backend.h |  5 -----
>   hw/rdma/rdma_rm.c      | 13 ++++++-------
>   3 files changed, 20 insertions(+), 21 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index 79b9cfb487..3dd39fe1a7 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -377,6 +377,7 @@ static void ah_cache_init(void)
>                                       destroy_ah_hash_key, destroy_ah_hast_data);
>   }
>   
> +#ifdef LEGACY_RDMA_REG_MR
>   static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
>                                   struct ibv_sge *sge, uint8_t num_sge,
>                                   uint64_t *total_length)
> @@ -391,11 +392,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
>               return VENDOR_ERR_INVLKEY | sge[idx].lkey;
>           }
>   
> -#ifdef LEGACY_RDMA_REG_MR
>           sge[idx].addr = (uintptr_t)mr->virt + sge[idx].addr - mr->start;
> -#else
> -        sge[idx].addr = sge[idx].addr;
> -#endif
>           sge[idx].lkey = rdma_backend_mr_lkey(&mr->backend_mr);
>   
>           *total_length += sge[idx].length;
> @@ -403,6 +400,19 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
>   
>       return 0;
>   }
> +#else
> +static inline int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
> +                                       struct ibv_sge *sge, uint8_t num_sge,
> +                                       uint64_t *total_length)
> +{
> +    int idx;
> +
> +    for (idx = 0; idx < num_sge; idx++) {
> +        *total_length += sge[idx].length;
> +    }
> +    return 0;
> +}
> +#endif
>   
>   static void trace_mad_message(const char *title, char *buf, int len)
>   {
> @@ -733,13 +743,8 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd)
>       }
>   }
>   
> -#ifdef LEGACY_RDMA_REG_MR
> -int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
> -                           size_t length, int access)
> -#else
>   int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
>                              size_t length, uint64_t guest_start, int access)
> -#endif
>   {
>   #ifdef LEGACY_RDMA_REG_MR
>       mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access);
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index 127f96e2d5..225af481e0 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -78,13 +78,8 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev,
>   int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd);
>   void rdma_backend_destroy_pd(RdmaBackendPD *pd);
>   
> -#ifdef LEGACY_RDMA_REG_MR
> -int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
> -                           size_t length, int access);
> -#else
>   int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
>                              size_t length, uint64_t guest_start, int access);
> -#endif
>   void rdma_backend_destroy_mr(RdmaBackendMR *mr);
>   
>   int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index 1524dfaeaa..7e9ea283c9 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -227,21 +227,20 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
>           mr->length = guest_length;
>           mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
>   
> -#ifdef LEGACY_RDMA_REG_MR
> -        ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
> -                                     mr->length, access_flags);
> -#else
>           ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
>                                        mr->length, guest_start, access_flags);
> -#endif
>           if (ret) {
>               ret = -EIO;
>               goto out_dealloc_mr;
>           }
> +#ifdef LEGACY_RDMA_REG_MR
> +        /* We keep mr_handle in lkey so send and recv get get mr ptr */
> +        *lkey = *mr_handle;
> +#else
> +        *lkey = rdma_backend_mr_lkey(&mr->backend_mr);
> +#endif
>       }
>   
> -    /* We keep mr_handle in lkey so send and recv get get mr ptr */
> -    *lkey = *mr_handle;
>       *rkey = -1;
>   
>       mr->pd_handle = pd_handle;

Reviewed-by: Marcel Apfelbaum<marcel.apfelbaum@gmail.com>

Thanks,
Marcel



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays
  2020-03-16 13:32   ` Marcel Apfelbaum
@ 2020-03-20 12:30     ` Yuval Shaia
  0 siblings, 0 replies; 6+ messages in thread
From: Yuval Shaia @ 2020-03-20 12:30 UTC (permalink / raw)
  To: Marcel Apfelbaum; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 5988 bytes --]

On Mon, 16 Mar 2020 at 15:30, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
wrote:

> Hi Yuval,
>
> On 3/7/20 2:56 PM, Yuval Shaia wrote:
> > The function build_host_sge_array uses two sge arrays, one for input and
> > one for output.
> > Since the size of the two arrays is the same, the function can write
> > directly to the given source array (i.e. input/output argument).
> >
> > Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
> > ---
> >   hw/rdma/rdma_backend.c | 40 +++++++++++++++++-----------------------
> >   1 file changed, 17 insertions(+), 23 deletions(-)
> >
> > diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> > index c346407cd3..79b9cfb487 100644
> > --- a/hw/rdma/rdma_backend.c
> > +++ b/hw/rdma/rdma_backend.c
> > @@ -378,30 +378,27 @@ static void ah_cache_init(void)
> >   }
> >
> >   static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
> > -                                struct ibv_sge *dsge, struct ibv_sge
> *ssge,
> > -                                uint8_t num_sge, uint64_t *total_length)
> > +                                struct ibv_sge *sge, uint8_t num_sge,
> > +                                uint64_t *total_length)
> >   {
> >       RdmaRmMR *mr;
> > -    int ssge_idx;
> > +    int idx;
> >
> > -    for (ssge_idx = 0; ssge_idx < num_sge; ssge_idx++) {
> > -        mr = rdma_rm_get_mr(rdma_dev_res, ssge[ssge_idx].lkey);
> > +    for (idx = 0; idx < num_sge; idx++) {
> > +        mr = rdma_rm_get_mr(rdma_dev_res, sge[idx].lkey);
> >           if (unlikely(!mr)) {
> > -            rdma_error_report("Invalid lkey 0x%x", ssge[ssge_idx].lkey);
> > -            return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey;
> > +            rdma_error_report("Invalid lkey 0x%x", sge[idx].lkey);
> > +            return VENDOR_ERR_INVLKEY | sge[idx].lkey;
> >           }
> >
> >   #ifdef LEGACY_RDMA_REG_MR
> > -        dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr -
> mr->start;
> > +        sge[idx].addr = (uintptr_t)mr->virt + sge[idx].addr - mr->start;
> >   #else
> > -        dsge->addr = ssge[ssge_idx].addr;
> > +        sge[idx].addr = sge[idx].addr;
>
> It seems we don't need the above line.
> Other than that it looks good to me.
>

Yeah, thanks.
It is fixed in the next patch but better be fixed here.
Will fix and post new set.


>
> Thanks,
> Marcel
>
> >   #endif
> > -        dsge->length = ssge[ssge_idx].length;
> > -        dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
> > +        sge[idx].lkey = rdma_backend_mr_lkey(&mr->backend_mr);
> >
> > -        *total_length += dsge->length;
> > -
> > -        dsge++;
> > +        *total_length += sge[idx].length;
> >       }
> >
> >       return 0;
> > @@ -484,7 +481,6 @@ void rdma_backend_post_send(RdmaBackendDev
> *backend_dev,
> >                               void *ctx)
> >   {
> >       BackendCtx *bctx;
> > -    struct ibv_sge new_sge[MAX_SGE];
> >       uint32_t bctx_id;
> >       int rc;
> >       struct ibv_send_wr wr = {}, *bad_wr;
> > @@ -518,7 +514,7 @@ void rdma_backend_post_send(RdmaBackendDev
> *backend_dev,
> >
> >       rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
> >
> > -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge,
> num_sge,
> > +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
> >
>  &backend_dev->rdma_dev_res->stats.tx_len);
> >       if (rc) {
> >           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> > @@ -538,7 +534,7 @@ void rdma_backend_post_send(RdmaBackendDev
> *backend_dev,
> >       wr.num_sge = num_sge;
> >       wr.opcode = IBV_WR_SEND;
> >       wr.send_flags = IBV_SEND_SIGNALED;
> > -    wr.sg_list = new_sge;
> > +    wr.sg_list = sge;
> >       wr.wr_id = bctx_id;
> >
> >       rc = ibv_post_send(qp->ibqp, &wr, &bad_wr);
> > @@ -601,7 +597,6 @@ void rdma_backend_post_recv(RdmaBackendDev
> *backend_dev,
> >                               struct ibv_sge *sge, uint32_t num_sge,
> void *ctx)
> >   {
> >       BackendCtx *bctx;
> > -    struct ibv_sge new_sge[MAX_SGE];
> >       uint32_t bctx_id;
> >       int rc;
> >       struct ibv_recv_wr wr = {}, *bad_wr;
> > @@ -635,7 +630,7 @@ void rdma_backend_post_recv(RdmaBackendDev
> *backend_dev,
> >
> >       rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id);
> >
> > -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge,
> num_sge,
> > +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
> >
>  &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> >       if (rc) {
> >           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> > @@ -643,7 +638,7 @@ void rdma_backend_post_recv(RdmaBackendDev
> *backend_dev,
> >       }
> >
> >       wr.num_sge = num_sge;
> > -    wr.sg_list = new_sge;
> > +    wr.sg_list = sge;
> >       wr.wr_id = bctx_id;
> >       rc = ibv_post_recv(qp->ibqp, &wr, &bad_wr);
> >       if (rc) {
> > @@ -671,7 +666,6 @@ void rdma_backend_post_srq_recv(RdmaBackendDev
> *backend_dev,
> >                                   uint32_t num_sge, void *ctx)
> >   {
> >       BackendCtx *bctx;
> > -    struct ibv_sge new_sge[MAX_SGE];
> >       uint32_t bctx_id;
> >       int rc;
> >       struct ibv_recv_wr wr = {}, *bad_wr;
> > @@ -688,7 +682,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev
> *backend_dev,
> >
> >       rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
> >
> > -    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge,
> num_sge,
> > +    rc = build_host_sge_array(backend_dev->rdma_dev_res, sge, num_sge,
> >
>  &backend_dev->rdma_dev_res->stats.rx_bufs_len);
> >       if (rc) {
> >           complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> > @@ -696,7 +690,7 @@ void rdma_backend_post_srq_recv(RdmaBackendDev
> *backend_dev,
> >       }
> >
> >       wr.num_sge = num_sge;
> > -    wr.sg_list = new_sge;
> > +    wr.sg_list = sge;
> >       wr.wr_id = bctx_id;
> >       rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
> >       if (rc) {
>
>

[-- Attachment #2: Type: text/html, Size: 8167 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-03-20 12:31 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-07 12:56 [PATCH 0/2] hw/rdma: Last step in eliminating data-path processing Yuval Shaia
2020-03-07 12:56 ` [PATCH 1/2] hw/rdma: Cosmetic change - no need for two sge arrays Yuval Shaia
2020-03-16 13:32   ` Marcel Apfelbaum
2020-03-20 12:30     ` Yuval Shaia
2020-03-07 12:56 ` [PATCH 2/2] hw/rdma: Skip data-path mr_id translation Yuval Shaia
2020-03-16 13:37   ` Marcel Apfelbaum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.