From: "Michael S. Tsirkin" <mst@redhat.com>
To: Marcel Apfelbaum <marcel@redhat.com>
Cc: qemu-devel@nongnu.org, ehabkost@redhat.com, imammedo@redhat.com,
yuval.shaia@oracle.com, pbonzini@redhat.com
Subject: Re: [Qemu-devel] [PATCH V2 4/5] pvrdma: initial implementation
Date: Tue, 19 Dec 2017 19:48:44 +0200 [thread overview]
Message-ID: <20171219181257-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20171217125457.3429-5-marcel@redhat.com>
I won't have time to review before the next year.
Results of a quick peek.
On Sun, Dec 17, 2017 at 02:54:56PM +0200, Marcel Apfelbaum wrote:
> +static void *mad_handler_thread(void *arg)
> +{
> + PVRDMADev *dev = (PVRDMADev *)arg;
> + int rc;
> + QObject *o_ctx_id;
> + unsigned long cqe_ctx_id;
> + BackendCtx *bctx;
> + /*
> + int len;
> + void *mad;
> + */
> +
> + pr_dbg("Starting\n");
> +
> + dev->backend_dev.mad_thread.run = false;
> +
> + while (dev->backend_dev.mad_thread.run) {
> + /* Get next buffer to pust MAD into */
> + o_ctx_id = qlist_pop(dev->backend_dev.mad_agent.recv_mads_list);
> + if (!o_ctx_id) {
> + /* pr_dbg("Error: No more free MADs buffers\n"); */
> + sleep(5);
Looks suspicious. What is above doing?
> + continue;
> + }
> + cqe_ctx_id = qnum_get_uint(qobject_to_qnum(o_ctx_id));
> + bctx = rm_get_cqe_ctx(dev, cqe_ctx_id);
> + if (unlikely(!bctx)) {
> + pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id);
> + continue;
> + }
> +
> + pr_dbg("Calling umad_recv\n");
> + /*
> + mad = pvrdma_pci_dma_map(PCI_DEVICE(dev), bctx->req.sge[0].addr,
> + bctx->req.sge[0].length);
> +
> + len = bctx->req.sge[0].length;
> +
> + do {
> + rc = umad_recv(dev->backend_dev.mad_agent.port_id, mad, &len, 5000);
That's a huge timeout.
> + } while ( (rc != ETIMEDOUT) && dev->backend_dev.mad_thread.run);
> + pr_dbg("umad_recv, rc=%d\n", rc);
> +
> + pvrdma_pci_dma_unmap(PCI_DEVICE(dev), mad, bctx->req.sge[0].length);
> + */
> + rc = -1;
> +
> + /* rc is used as vendor_err */
> + comp_handler(rc > 0 ? IB_WC_SUCCESS : IB_WC_GENERAL_ERR, rc,
> + bctx->up_ctx);
> +
> + rm_dealloc_cqe_ctx(dev, cqe_ctx_id);
> + free(bctx);
> + }
> +
> + pr_dbg("Going down\n");
> + /* TODO: Post cqe for all remaining MADs in list */
> +
> + qlist_destroy_obj(QOBJECT(dev->backend_dev.mad_agent.recv_mads_list));
> +
> + return NULL;
> +}
> +
> +static void *comp_handler_thread(void *arg)
> +{
> + PVRDMADev *dev = (PVRDMADev *)arg;
> + int rc;
> + struct ibv_cq *ev_cq;
> + void *ev_ctx;
> +
> + pr_dbg("Starting\n");
> +
> + while (dev->backend_dev.comp_thread.run) {
> + pr_dbg("Waiting for completion on channel %p\n",
> + dev->backend_dev.channel);
> + rc = ibv_get_cq_event(dev->backend_dev.channel, &ev_cq, &ev_ctx);
> + pr_dbg("ibv_get_cq_event=%d\n", rc);
> + if (unlikely(rc)) {
> + pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
> + continue;
> + }
> +
> + if (unlikely(ibv_req_notify_cq(ev_cq, 0))) {
> + pr_dbg("---> ibv_req_notify_cq\n");
> + }
> +
> + poll_cq(dev, ev_cq, false);
> +
> + ibv_ack_cq_events(ev_cq, 1);
> + }
> +
> + pr_dbg("Going down\n");
> + /* TODO: Post cqe for all remaining buffs that were posted */
> +
> + return NULL;
> +}
> +
> +void backend_register_comp_handler(void (*handler)(int status,
> + unsigned int vendor_err, void *ctx))
> +{
> + comp_handler = handler;
> +}
> +
> +int backend_query_port(BackendDevice *dev, struct pvrdma_port_attr *attrs)
> +{
> + int rc;
> + struct ibv_port_attr port_attr;
> +
> + rc = ibv_query_port(dev->context, dev->port_num, &port_attr);
> + if (rc) {
> + pr_dbg("Error %d from ibv_query_port\n", rc);
> + return -EIO;
> + }
> +
> + attrs->state = port_attr.state;
> + attrs->max_mtu = port_attr.max_mtu;
> + attrs->active_mtu = port_attr.active_mtu;
> + attrs->gid_tbl_len = port_attr.gid_tbl_len;
> + attrs->pkey_tbl_len = port_attr.pkey_tbl_len;
> + attrs->phys_state = port_attr.phys_state;
> +
> + return 0;
> +}
> +
> +void backend_poll_cq(PVRDMADev *dev, BackendCQ *cq)
> +{
> + poll_cq(dev, cq->ibcq, true);
> +}
> +
> +static GHashTable *ah_hash;
> +
> +static struct ibv_ah *create_ah(BackendDevice *dev, struct ibv_pd *pd,
> + union ibv_gid *dgid, uint8_t sgid_idx)
> +{
> + GBytes *ah_key = g_bytes_new(dgid, sizeof(*dgid));
> + struct ibv_ah *ah = g_hash_table_lookup(ah_hash, ah_key);
> +
> + if (ah) {
> + trace_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix),
> + be64_to_cpu(dgid->global.interface_id));
> + } else {
> + struct ibv_ah_attr ah_attr = {
> + .is_global = 1,
> + .port_num = dev->port_num,
> + .grh.hop_limit = 1,
> + };
> +
> + ah_attr.grh.dgid = *dgid;
> + ah_attr.grh.sgid_index = sgid_idx;
> +
> + ah = ibv_create_ah(pd, &ah_attr);
> + if (ah) {
> + g_hash_table_insert(ah_hash, ah_key, ah);
> + } else {
> + pr_dbg("ibv_create_ah failed for gid <%lx %lx>\n",
> + be64_to_cpu(dgid->global.subnet_prefix),
> + be64_to_cpu(dgid->global.interface_id));
> + }
> +
> + trace_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix),
> + be64_to_cpu(dgid->global.interface_id));
> + }
> +
> + return ah;
> +}
> +
> +static void destroy_ah(gpointer data)
> +{
> + struct ibv_ah *ah = data;
> + ibv_destroy_ah(ah);
> +}
> +
> +static void ah_cache_init(void)
> +{
> + ah_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
> + NULL, destroy_ah);
> +}
> +
> +static int send_mad(PVRDMADev *dev, struct pvrdma_sge *sge, u32 num_sge)
> +{
> + int ret = -1;
> +
> + /*
> + * TODO: Currently QP1 is not supported
> + *
> + PCIDevice *pci_dev = PCI_DEVICE(dev);
> + char mad_msg[1024];
> + void *hdr, *msg;
> + struct ib_user_mad *umad = (struct ib_user_mad *)&mad_msg;
> +
> + umad->length = sge[0].length + sge[1].length;
> +
> + if (num_sge != 2)
> + return -EINVAL;
> +
> + pr_dbg("msg_len=%d\n", umad->length);
> +
> + hdr = pvrdma_pci_dma_map(pci_dev, sge[0].addr, sge[0].length);
> + msg = pvrdma_pci_dma_map(pci_dev, sge[1].addr, sge[1].length);
> +
> + memcpy(&mad_msg[64], hdr, sge[0].length);
> + memcpy(&mad_msg[sge[0].length+64], msg, sge[1].length);
> +
> + pvrdma_pci_dma_unmap(pci_dev, msg, sge[1].length);
> + pvrdma_pci_dma_unmap(pci_dev, hdr, sge[0].length);
> +
> + ret = umad_send(dev->backend_dev.mad_agent.port_id,
> + dev->backend_dev.mad_agent.agent_id,
> + mad_msg, umad->length, 10, 10);
> + */
Then what is above code doing here?
Also, isn't QP1 a big deal? If it's missing then how do you
do multicast etc?
How does guest know it's missing?
...
> diff --git a/hw/net/pvrdma/pvrdma_utils.h b/hw/net/pvrdma/pvrdma_utils.h
> new file mode 100644
> index 0000000000..a09e39946c
> --- /dev/null
> +++ b/hw/net/pvrdma/pvrdma_utils.h
> @@ -0,0 +1,41 @@
> +/*
> + * QEMU VMWARE paravirtual RDMA interface definitions
> + *
> + * Developed by Oracle & Redhat
> + *
> + * Authors:
> + * Yuval Shaia <yuval.shaia@oracle.com>
> + * Marcel Apfelbaum <marcel@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef PVRDMA_UTILS_H
> +#define PVRDMA_UTILS_H
> +
> +#include <include/hw/pci/pci.h>
> +
> +#define pr_info(fmt, ...) \
> + fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\
> + ## __VA_ARGS__)
> +
> +#define pr_err(fmt, ...) \
> + fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "pvrdma", __func__, \
> + __LINE__, ## __VA_ARGS__)
> +
> +#ifdef PVRDMA_DEBUG
> +#define pr_dbg(fmt, ...) \
> + fprintf(stdout, "%s: %-20s (%3d): " fmt, "pvrdma", __func__, __LINE__,\
> + ## __VA_ARGS__)
> +#else
> +#define pr_dbg(fmt, ...)
> +#endif
> +
> +void pvrdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
> +void *pvrdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
> +void *map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, uint32_t nchunks,
> + size_t length);
> +
> +#endif
Can you make sure all prefixes are pvrdma_?
> diff --git a/hw/net/pvrdma/trace-events b/hw/net/pvrdma/trace-events
> new file mode 100644
> index 0000000000..bbc52286bc
> --- /dev/null
> +++ b/hw/net/pvrdma/trace-events
> @@ -0,0 +1,9 @@
> +# See docs/tracing.txt for syntax documentation.
> +
> +# hw/net/pvrdma/pvrdma_main.c
> +pvrdma_regs_read(uint64_t addr, uint64_t val) "regs[0x%lx] = 0x%lx"
> +pvrdma_regs_write(uint64_t addr, uint64_t val) "regs[0x%lx] = 0x%lx"
> +
> +#hw/net/pvrdma/pvrdma_backend.c
> +create_ah_cache_hit(uint64_t subnet, uint64_t net_id) "subnet = 0x%lx net_id = 0x%lx"
> +create_ah_cache_miss(uint64_t subnet, uint64_t net_id) "subnet = 0x%lx net_id = 0x%lx"
> diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
> index 35df1874a9..1dbf53627c 100644
> --- a/include/hw/pci/pci_ids.h
> +++ b/include/hw/pci/pci_ids.h
> @@ -266,4 +266,7 @@
> #define PCI_VENDOR_ID_TEWS 0x1498
> #define PCI_DEVICE_ID_TEWS_TPCI200 0x30C8
>
> +#define PCI_VENDOR_ID_VMWARE 0x15ad
> +#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
> +
> #endif
> --
> 2.13.5
next prev parent reply other threads:[~2017-12-19 17:48 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-17 12:54 [Qemu-devel] [PATCH V2 0/5] hw/pvrdma: PVRDMA device implementation Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 1/5] pci/shpc: Move function to generic header file Marcel Apfelbaum
2017-12-17 18:16 ` Philippe Mathieu-Daudé
2017-12-17 19:03 ` Yuval Shaia
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 2/5] mem: add share parameter to memory-backend-ram Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 3/5] docs: add pvrdma device documentation Marcel Apfelbaum
2017-12-19 17:47 ` Michael S. Tsirkin
2017-12-20 14:45 ` Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 4/5] pvrdma: initial implementation Marcel Apfelbaum
2017-12-19 16:12 ` Michael S. Tsirkin
2017-12-19 17:29 ` Marcel Apfelbaum
2017-12-19 17:48 ` Michael S. Tsirkin [this message]
2017-12-20 15:25 ` Yuval Shaia
2017-12-20 18:01 ` Michael S. Tsirkin
2017-12-19 19:13 ` Philippe Mathieu-Daudé
2017-12-20 4:08 ` Michael S. Tsirkin
2017-12-20 14:46 ` Marcel Apfelbaum
2017-12-17 12:54 ` [Qemu-devel] [PATCH V2 5/5] MAINTAINERS: add entry for hw/net/pvrdma Marcel Apfelbaum
2017-12-19 17:49 ` Michael S. Tsirkin
2017-12-19 18:05 ` [Qemu-devel] [PATCH V2 0/5] hw/pvrdma: PVRDMA device implementation Michael S. Tsirkin
2017-12-20 15:07 ` Marcel Apfelbaum
2017-12-21 0:05 ` Michael S. Tsirkin
2017-12-21 7:27 ` Yuval Shaia
2017-12-21 14:22 ` Michael S. Tsirkin
2017-12-21 15:59 ` Marcel Apfelbaum
2017-12-21 20:46 ` Michael S. Tsirkin
2017-12-21 22:30 ` Yuval Shaia
2017-12-22 4:58 ` Marcel Apfelbaum
2017-12-20 17:56 ` Yuval Shaia
2017-12-20 18:05 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171219181257-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=ehabkost@redhat.com \
--cc=imammedo@redhat.com \
--cc=marcel@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=yuval.shaia@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.