All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Marcel Apfelbaum <marcel@redhat.com>
Cc: qemu-devel@nongnu.org, ehabkost@redhat.com, imammedo@redhat.com,
	yuval.shaia@oracle.com, pbonzini@redhat.com
Subject: Re: [Qemu-devel] [PATCH V2 4/5] pvrdma: initial implementation
Date: Tue, 19 Dec 2017 19:48:44 +0200	[thread overview]
Message-ID: <20171219181257-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20171217125457.3429-5-marcel@redhat.com>

I won't have time to review before the next year.
Results of a quick peek.

On Sun, Dec 17, 2017 at 02:54:56PM +0200, Marcel Apfelbaum wrote:
> +static void *mad_handler_thread(void *arg)
> +{
> +    PVRDMADev *dev = (PVRDMADev *)arg;
> +    int rc;
> +    QObject *o_ctx_id;
> +    unsigned long cqe_ctx_id;
> +    BackendCtx *bctx;
> +    /*
> +    int len;
> +    void *mad;
> +    */
> +
> +    pr_dbg("Starting\n");
> +
> +    dev->backend_dev.mad_thread.run = false;
> +
> +    while (dev->backend_dev.mad_thread.run) {
> +        /* Get next buffer to pust MAD into */
> +        o_ctx_id = qlist_pop(dev->backend_dev.mad_agent.recv_mads_list);
> +        if (!o_ctx_id) {
> +            /* pr_dbg("Error: No more free MADs buffers\n"); */
> +            sleep(5);

Looks suspicious.  What is above doing?

> +            continue;
> +        }
> +        cqe_ctx_id = qnum_get_uint(qobject_to_qnum(o_ctx_id));
> +        bctx = rm_get_cqe_ctx(dev, cqe_ctx_id);
> +        if (unlikely(!bctx)) {
> +            pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id);
> +            continue;
> +        }
> +
> +        pr_dbg("Calling umad_recv\n");
> +        /*
> +        mad = pvrdma_pci_dma_map(PCI_DEVICE(dev), bctx->req.sge[0].addr,
> +                                 bctx->req.sge[0].length);
> +
> +        len = bctx->req.sge[0].length;
> +
> +        do {
> +            rc = umad_recv(dev->backend_dev.mad_agent.port_id, mad, &len, 5000);

That's a huge timeout.

> +        } while ( (rc != ETIMEDOUT) && dev->backend_dev.mad_thread.run);
> +        pr_dbg("umad_recv, rc=%d\n", rc);
> +
> +        pvrdma_pci_dma_unmap(PCI_DEVICE(dev), mad, bctx->req.sge[0].length);
> +        */
> +        rc = -1;
> +
> +        /* rc is used as vendor_err */
> +        comp_handler(rc > 0 ? IB_WC_SUCCESS : IB_WC_GENERAL_ERR, rc,
> +                     bctx->up_ctx);
> +
> +        rm_dealloc_cqe_ctx(dev, cqe_ctx_id);
> +        free(bctx);
> +    }
> +
> +    pr_dbg("Going down\n");
> +    /* TODO: Post cqe for all remaining MADs in list */
> +
> +    qlist_destroy_obj(QOBJECT(dev->backend_dev.mad_agent.recv_mads_list));
> +
> +    return NULL;
> +}
> +
> +static void *comp_handler_thread(void *arg)
> +{
> +    PVRDMADev *dev = (PVRDMADev *)arg;
> +    int rc;
> +    struct ibv_cq *ev_cq;
> +    void *ev_ctx;
> +
> +    pr_dbg("Starting\n");
> +
> +    while (dev->backend_dev.comp_thread.run) {
> +        pr_dbg("Waiting for completion on channel %p\n",
> +               dev->backend_dev.channel);
> +        rc = ibv_get_cq_event(dev->backend_dev.channel, &ev_cq, &ev_ctx);
> +        pr_dbg("ibv_get_cq_event=%d\n", rc);
> +        if (unlikely(rc)) {
> +            pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
> +            continue;
> +        }
> +
> +        if (unlikely(ibv_req_notify_cq(ev_cq, 0))) {
> +            pr_dbg("---> ibv_req_notify_cq\n");
> +        }
> +
> +        poll_cq(dev, ev_cq, false);
> +
> +        ibv_ack_cq_events(ev_cq, 1);
> +    }
> +
> +    pr_dbg("Going down\n");
> +    /* TODO: Post cqe for all remaining buffs that were posted */
> +
> +    return NULL;
> +}
> +
> +void backend_register_comp_handler(void (*handler)(int status,
> +                                   unsigned int vendor_err, void *ctx))
> +{
> +    comp_handler = handler;
> +}
> +
> +int backend_query_port(BackendDevice *dev, struct pvrdma_port_attr *attrs)
> +{
> +    int rc;
> +    struct ibv_port_attr port_attr;
> +
> +    rc = ibv_query_port(dev->context, dev->port_num, &port_attr);
> +    if (rc) {
> +        pr_dbg("Error %d from ibv_query_port\n", rc);
> +        return -EIO;
> +    }
> +
> +    attrs->state = port_attr.state;
> +    attrs->max_mtu = port_attr.max_mtu;
> +    attrs->active_mtu = port_attr.active_mtu;
> +    attrs->gid_tbl_len = port_attr.gid_tbl_len;
> +    attrs->pkey_tbl_len = port_attr.pkey_tbl_len;
> +    attrs->phys_state = port_attr.phys_state;
> +
> +    return 0;
> +}
> +
> +void backend_poll_cq(PVRDMADev *dev, BackendCQ *cq)
> +{
> +    poll_cq(dev, cq->ibcq, true);
> +}
> +
> +static GHashTable *ah_hash;
> +
> +static struct ibv_ah *create_ah(BackendDevice *dev, struct ibv_pd *pd,
> +                                union ibv_gid *dgid, uint8_t sgid_idx)
> +{
> +    GBytes *ah_key = g_bytes_new(dgid, sizeof(*dgid));
> +    struct ibv_ah *ah = g_hash_table_lookup(ah_hash, ah_key);
> +
> +    if (ah) {
> +        trace_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix),
> +                                  be64_to_cpu(dgid->global.interface_id));
> +    } else {
> +        struct ibv_ah_attr ah_attr = {
> +            .is_global     = 1,
> +            .port_num      = dev->port_num,
> +            .grh.hop_limit = 1,
> +        };
> +
> +        ah_attr.grh.dgid = *dgid;
> +        ah_attr.grh.sgid_index = sgid_idx;
> +
> +        ah = ibv_create_ah(pd, &ah_attr);
> +        if (ah) {
> +            g_hash_table_insert(ah_hash, ah_key, ah);
> +        } else {
> +            pr_dbg("ibv_create_ah failed for gid <%lx %lx>\n",
> +                    be64_to_cpu(dgid->global.subnet_prefix),
> +                    be64_to_cpu(dgid->global.interface_id));
> +        }
> +
> +        trace_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix),
> +                                   be64_to_cpu(dgid->global.interface_id));
> +    }
> +
> +    return ah;
> +}
> +
> +static void destroy_ah(gpointer data)
> +{
> +    struct ibv_ah *ah = data;
> +    ibv_destroy_ah(ah);
> +}
> +
> +static void ah_cache_init(void)
> +{
> +    ah_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
> +                                    NULL, destroy_ah);
> +}
> +
> +static int send_mad(PVRDMADev *dev, struct pvrdma_sge *sge, u32 num_sge)
> +{
> +    int ret = -1;
> +
> +    /*
> +     * TODO: Currently QP1 is not supported
> +     *
> +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> +    char mad_msg[1024];
> +    void *hdr, *msg;
> +    struct ib_user_mad *umad = (struct ib_user_mad *)&mad_msg;
> +
> +    umad->length = sge[0].length + sge[1].length;
> +
> +    if (num_sge != 2)
> +        return -EINVAL;
> +
> +    pr_dbg("msg_len=%d\n", umad->length);
> +
> +    hdr = pvrdma_pci_dma_map(pci_dev, sge[0].addr, sge[0].length);
> +    msg = pvrdma_pci_dma_map(pci_dev, sge[1].addr, sge[1].length);
> +
> +    memcpy(&mad_msg[64], hdr, sge[0].length);
> +    memcpy(&mad_msg[sge[0].length+64], msg, sge[1].length);
> +
> +    pvrdma_pci_dma_unmap(pci_dev, msg, sge[1].length);
> +    pvrdma_pci_dma_unmap(pci_dev, hdr, sge[0].length);
> +
> +    ret = umad_send(dev->backend_dev.mad_agent.port_id,
> +                    dev->backend_dev.mad_agent.agent_id,
> +                    mad_msg, umad->length, 10, 10);
> +    */

Then what is above code doing here?

Also, isn't QP1 a big deal? If it's missing then how do you
do multicast etc?

How does guest know it's missing?



...

> diff --git a/hw/net/pvrdma/pvrdma_utils.h b/hw/net/pvrdma/pvrdma_utils.h
> new file mode 100644
> index 0000000000..a09e39946c
> --- /dev/null
> +++ b/hw/net/pvrdma/pvrdma_utils.h
> @@ -0,0 +1,41 @@
> +/*
> + * QEMU VMWARE paravirtual RDMA interface definitions
> + *
> + * Developed by Oracle & Redhat
> + *
> + * Authors:
> + *     Yuval Shaia <yuval.shaia@oracle.com>
> + *     Marcel Apfelbaum <marcel@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef PVRDMA_UTILS_H
> +#define PVRDMA_UTILS_H
> +
> +#include <include/hw/pci/pci.h>
> +
> +#define pr_info(fmt, ...) \
> +    fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma",  __func__, __LINE__,\
> +           ## __VA_ARGS__)
> +
> +#define pr_err(fmt, ...) \
> +    fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "pvrdma", __func__, \
> +        __LINE__, ## __VA_ARGS__)
> +
> +#ifdef PVRDMA_DEBUG
> +#define pr_dbg(fmt, ...) \
> +    fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\
> +           ## __VA_ARGS__)
> +#else
> +#define pr_dbg(fmt, ...)
> +#endif
> +
> +void pvrdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
> +void *pvrdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
> +void *map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, uint32_t nchunks,
> +                  size_t length);
> +
> +#endif

Can you make sure all prefixes are pvrdma_?

> diff --git a/hw/net/pvrdma/trace-events b/hw/net/pvrdma/trace-events
> new file mode 100644
> index 0000000000..bbc52286bc
> --- /dev/null
> +++ b/hw/net/pvrdma/trace-events
> @@ -0,0 +1,9 @@
> +# See docs/tracing.txt for syntax documentation.
> +
> +# hw/net/pvrdma/pvrdma_main.c
> +pvrdma_regs_read(uint64_t addr, uint64_t val) "regs[0x%lx] = 0x%lx"
> +pvrdma_regs_write(uint64_t addr, uint64_t val) "regs[0x%lx] = 0x%lx"
> +
> +#hw/net/pvrdma/pvrdma_backend.c
> +create_ah_cache_hit(uint64_t subnet, uint64_t net_id) "subnet = 0x%lx net_id = 0x%lx"
> +create_ah_cache_miss(uint64_t subnet, uint64_t net_id) "subnet = 0x%lx net_id = 0x%lx"
> diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
> index 35df1874a9..1dbf53627c 100644
> --- a/include/hw/pci/pci_ids.h
> +++ b/include/hw/pci/pci_ids.h
> @@ -266,4 +266,7 @@
>  #define PCI_VENDOR_ID_TEWS               0x1498
>  #define PCI_DEVICE_ID_TEWS_TPCI200       0x30C8
>  
> +#define PCI_VENDOR_ID_VMWARE             0x15ad
> +#define PCI_DEVICE_ID_VMWARE_PVRDMA      0x0820
> +
>  #endif
> -- 
> 2.13.5

  parent reply	other threads:[~2017-12-19 17:48 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-17 12:54 [Qemu-devel] [PATCH V2 0/5] hw/pvrdma: PVRDMA device implementation Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 1/5] pci/shpc: Move function to generic header file Marcel Apfelbaum
2017-12-17 18:16   ` Philippe Mathieu-Daudé
2017-12-17 19:03     ` Yuval Shaia
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 2/5] mem: add share parameter to memory-backend-ram Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 3/5] docs: add pvrdma device documentation Marcel Apfelbaum
2017-12-19 17:47   ` Michael S. Tsirkin
2017-12-20 14:45     ` Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 4/5] pvrdma: initial implementation Marcel Apfelbaum
2017-12-19 16:12   ` Michael S. Tsirkin
2017-12-19 17:29     ` Marcel Apfelbaum
2017-12-19 17:48   ` Michael S. Tsirkin [this message]
2017-12-20 15:25     ` Yuval Shaia
2017-12-20 18:01       ` Michael S. Tsirkin
2017-12-19 19:13   ` Philippe Mathieu-Daudé
2017-12-20  4:08     ` Michael S. Tsirkin
2017-12-20 14:46       ` Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 5/5] MAINTAINERS: add entry for hw/net/pvrdma Marcel Apfelbaum
2017-12-19 17:49   ` Michael S. Tsirkin
2017-12-19 18:05 ` [Qemu-devel] [PATCH V2 0/5] hw/pvrdma: PVRDMA device implementation Michael S. Tsirkin
2017-12-20 15:07   ` Marcel Apfelbaum
2017-12-21  0:05     ` Michael S. Tsirkin
2017-12-21  7:27       ` Yuval Shaia
2017-12-21 14:22         ` Michael S. Tsirkin
2017-12-21 15:59           ` Marcel Apfelbaum
2017-12-21 20:46             ` Michael S. Tsirkin
2017-12-21 22:30               ` Yuval Shaia
2017-12-22  4:58                 ` Marcel Apfelbaum
2017-12-20 17:56   ` Yuval Shaia
2017-12-20 18:05     ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171219181257-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=marcel@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yuval.shaia@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.