All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: Yuval Shaia <yuval.shaia@oracle.com>,
	dgilbert@redhat.com, armbru@redhat.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 03/10] hw/rdma: Warn when too many consecutive poll CQ triggered on an empty CQ
Date: Wed, 6 Feb 2019 12:14:24 +0200	[thread overview]
Message-ID: <3dc624dc-7940-f3cc-6d22-bfe0b9d6bc14@gmail.com> (raw)
In-Reply-To: <20190131130850.6850-4-yuval.shaia@oracle.com>

Hi Yuval,

On 1/31/19 3:08 PM, Yuval Shaia wrote:
> To protect against CPU over utilization when guest performs unneeded
> busy waiting loop on an empty CQ.
>
> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
> ---
>   hw/rdma/rdma_backend.c      | 11 +++++++----
>   hw/rdma/rdma_backend.h      |  2 +-
>   hw/rdma/rdma_rm.c           |  1 +
>   hw/rdma/rdma_rm_defs.h      |  6 +++++-
>   hw/rdma/vmw/pvrdma_qp_ops.c | 24 +++++++++++++++++++++++-
>   5 files changed, 37 insertions(+), 7 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index 2f6372f8f0..b7d6afb5da 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -64,9 +64,9 @@ static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err,
>       comp_handler(ctx, &wc);
>   }
>   
> -static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
> +static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>   {
> -    int i, ne;
> +    int i, ne, total_ne = 0;
>       BackendCtx *bctx;
>       struct ibv_wc wc[2];
>   
> @@ -76,6 +76,7 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>           trace_rdma_poll_cq(ne, ibcq);
>   
>           for (i = 0; i < ne; i++) {
> +            total_ne++;

It seems 'i' and 'total_ne' hold the same value, do you need them both?

>               bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>               if (unlikely(!bctx)) {
>                   rdma_error_report("No matching ctx for req %"PRId64,
> @@ -93,6 +94,8 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
>       if (ne < 0) {
>           rdma_error_report("ibv_poll_cq fail, rc=%d, errno=%d", ne, errno);
>       }
> +
> +    return total_ne;
>   }
>   
>   static void *comp_handler_thread(void *arg)
> @@ -267,9 +270,9 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev,
>       return 0;
>   }
>   
> -void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq)
> +int rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq)
>   {
> -    rdma_poll_cq(rdma_dev_res, cq->ibcq);
> +    return rdma_poll_cq(rdma_dev_res, cq->ibcq);
>   }
>   
>   static GHashTable *ah_hash;
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index 5114c90e67..36305cd148 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -85,7 +85,7 @@ void rdma_backend_destroy_mr(RdmaBackendMR *mr);
>   int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
>                              int cqe);
>   void rdma_backend_destroy_cq(RdmaBackendCQ *cq);
> -void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
> +int rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
>   
>   int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
>                              RdmaBackendPD *pd, RdmaBackendCQ *scq,
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index 64c6ea1a4e..1ba77ac42c 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -261,6 +261,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
>       if (!cq) {
>           return -ENOMEM;
>       }
> +    atomic_set(&cq->missing_cqe, 0);
>   
>       cq->opaque = opaque;
>       cq->notify = CNT_CLEAR;
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index 0ba61d1838..08692e87d4 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -34,7 +34,9 @@
>   #define MAX_QP_INIT_RD_ATOM   16
>   #define MAX_AH                64
>   
> -#define MAX_RM_TBL_NAME 16
> +#define MAX_RM_TBL_NAME             16
> +#define MAX_CONSEQ_EMPTY_POLL_CQ    2048 /* considered as error above this */
> +
>   typedef struct RdmaRmResTbl {
>       char name[MAX_RM_TBL_NAME];
>       QemuMutex lock;
> @@ -59,6 +61,8 @@ typedef struct RdmaRmCQ {
>       RdmaBackendCQ backend_cq;
>       void *opaque;
>       CQNotificationType notify;
> +    int missing_cqe;

Maybe cq_empty_hit_cnt ? We don't really have a missing cqe.

> +    int conseq_empty_poll;
>   } RdmaRmCQ;
>   
>   /* MR (DMA region) */
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
> index 16db726dac..5d650a4943 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
> @@ -60,6 +60,8 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
>           return -EINVAL;
>       }
>   
> +    atomic_dec(&cq->missing_cqe);
> +

We should set it to 0 here? (If we are counting cq-empty hits)

>       ring = (PvrdmaRing *)cq->opaque;
>   
>       /* Step #1: Put CQE on CQ ring */
> @@ -141,12 +143,15 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
>       PvrdmaRing *ring;
>       int sgid_idx;
>       union ibv_gid *sgid;
> +    RdmaRmCQ *cq;
>   
>       qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
>       if (unlikely(!qp)) {
>           return;
>       }
>   
> +    cq = rdma_rm_get_cq(&dev->rdma_dev_res, qp->send_cq_handle);
> +
>       ring = (PvrdmaRing *)qp->opaque;
>   
>       wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
> @@ -186,6 +191,7 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
>               continue;
>           }
>   
> +        atomic_inc(&cq->missing_cqe);
>           rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
>                                  (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
>                                  sgid_idx, sgid,
> @@ -204,12 +210,15 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>       RdmaRmQP *qp;
>       PvrdmaRqWqe *wqe;
>       PvrdmaRing *ring;
> +    RdmaRmCQ *cq;
>   
>       qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
>       if (unlikely(!qp)) {
>           return;
>       }
>   
> +    cq = rdma_rm_get_cq(&dev->rdma_dev_res, qp->recv_cq_handle);
> +
>       ring = &((PvrdmaRing *)qp->opaque)[1];
>   
>       wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
> @@ -231,6 +240,7 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>               continue;
>           }
>   
> +        atomic_inc(&cq->missing_cqe);
>           rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
>                                  &qp->backend_qp, qp->qp_type,
>                                  (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
> @@ -245,11 +255,23 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>   void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
>   {
>       RdmaRmCQ *cq;
> +    int polled;
>   
>       cq = rdma_rm_get_cq(dev_res, cq_handle);
>       if (!cq) {
>           return;
>       }
>   
> -    rdma_backend_poll_cq(dev_res, &cq->backend_cq);
> +    polled = rdma_backend_poll_cq(dev_res, &cq->backend_cq);
> +    if (!polled) {
> +        if (cq->conseq_empty_poll == MAX_CONSEQ_EMPTY_POLL_CQ) {
> +            rdma_warn_report("%d consequtive empty polls from CQ %d, missing cqe %d",
> +                             cq->conseq_empty_poll, cq_handle,
> +                             atomic_read(&cq->missing_cqe));
> +            cq->conseq_empty_poll = 0;
> +        }
> +        cq->conseq_empty_poll++;
> +    } else {
> +        cq->conseq_empty_poll = 0;
> +    }
>   }

So we don't really protect against high CPU usage, we only warn.
Are the both counters interesting?

Thanks,
Marcel

  reply	other threads:[~2019-02-06 10:14 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-31 13:08 [Qemu-devel] [PATCH 00/10] Misc fixes to pvrdma device Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 01/10] hw/rdma: Switch to generic error reporting way Yuval Shaia
2019-02-01 12:36   ` Dr. David Alan Gilbert
2019-02-03  7:32     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 02/10] hw/rdma: Introduce locked qlist Yuval Shaia
2019-02-07  9:05   ` Marcel Apfelbaum
2019-02-07 10:28     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 03/10] hw/rdma: Warn when too many consecutive poll CQ triggered on an empty CQ Yuval Shaia
2019-02-06 10:14   ` Marcel Apfelbaum [this message]
2019-02-06 14:59     ` Yuval Shaia
2019-02-06 15:02     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 04/10] hw/rdma: Protect against concurrent execution of poll_cq Yuval Shaia
2019-02-05 20:14   ` Marcel Apfelbaum
2019-01-31 13:08 ` [Qemu-devel] [PATCH 05/10] hw/pvrdma: Add device statistics counters Yuval Shaia
2019-02-06 10:17   ` Marcel Apfelbaum
2019-02-06 14:44     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 06/10] hw/pvrdma: Dump device statistics counters to file Yuval Shaia
2019-02-04 13:03   ` Markus Armbruster
2019-02-04 16:14     ` Yuval Shaia
2019-02-04 18:21       ` Markus Armbruster
2019-01-31 13:08 ` [Qemu-devel] [PATCH 07/10] monitor: Expose pvrdma device statistics counters Yuval Shaia
2019-01-31 13:17   ` Eric Blake
2019-01-31 20:08     ` Yuval Shaia
2019-01-31 20:52       ` Eric Blake
2019-02-01  7:33         ` Markus Armbruster
2019-02-01 11:42           ` Dr. David Alan Gilbert
2019-02-03  7:12             ` Yuval Shaia
2019-02-03  7:06           ` Yuval Shaia
2019-02-04  8:23             ` Markus Armbruster
2019-02-04 16:07               ` Yuval Shaia
2019-02-05  7:21                 ` Markus Armbruster
2019-02-04  8:00       ` Markus Armbruster
2019-01-31 13:08 ` [Qemu-devel] [PATCH 08/10] hw/rdma: Free all MAD receive buffers when device is closed Yuval Shaia
2019-02-06 10:19   ` Marcel Apfelbaum
2019-01-31 13:08 ` [Qemu-devel] [PATCH 09/10] hw/rdma: Free all receive buffers when QP is destroyed Yuval Shaia
2019-02-06 10:23   ` Marcel Apfelbaum
2019-02-06 15:55     ` Yuval Shaia
2019-01-31 13:08 ` [Qemu-devel] [PATCH 10/10] hw/pvrdma: Delete unneeded function argument Yuval Shaia
2019-02-05 20:16   ` Marcel Apfelbaum
2019-02-02 13:50 ` [Qemu-devel] [PATCH 00/10] Misc fixes to pvrdma device no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3dc624dc-7940-f3cc-6d22-bfe0b9d6bc14@gmail.com \
    --to=marcel.apfelbaum@gmail.com \
    --cc=armbru@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yuval.shaia@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.