All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: qemu-devel@nongnu.org, qemu-ppc@nongnu.org,
	"David Gibson" <david@gibson.dropbear.id.au>,
	"Gavin Shan" <gwshan@linux.vnet.ibm.com>,
	"Sam Bobroff" <sbobroff@linux.ibm.com>,
	"Piotr Jaroszynski" <pjaroszynski@nvidia.com>,
	"Leonardo Augusto Guimarães Garcia" <lagarcia@br.ibm.com>,
	"Jose Ricardo Ziviani" <joserz@linux.ibm.com>,
	"Daniel Henrique Barboza" <danielhb413@gmail.com>
Subject: Re: [Qemu-devel] [PATCH qemu v4 3/3] spapr: Support NVIDIA V100 GPU with NVLink2
Date: Thu, 7 Mar 2019 15:02:32 -0700	[thread overview]
Message-ID: <20190307150232.7384b7ce@w520.home> (raw)
In-Reply-To: <20190307050518.64968-4-aik@ozlabs.ru>

On Thu,  7 Mar 2019 16:05:18 +1100
Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index 40a12001f580..15ec0b4c2723 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -2180,3 +2180,123 @@ int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
>  
>      return 0;
>  }
> +
> +static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
> +                                     const char *name,
> +                                     void *opaque, Error **errp)
> +{
> +    uint64_t tgt = (uint64_t) opaque;
> +    visit_type_uint64(v, name, &tgt, errp);
> +}
> +
> +static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
> +                                                 const char *name,
> +                                                 void *opaque, Error **errp)
> +{
> +    uint32_t link_speed = (uint32_t)(uint64_t) opaque;
> +    visit_type_uint32(v, name, &link_speed, errp);
> +}
> +
> +int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    int ret;
> +    void *p;
> +    struct vfio_region_info *nv2region = NULL;
> +    struct vfio_info_cap_header *hdr;
> +    MemoryRegion *nv2mr = g_malloc0(sizeof(*nv2mr));

This is leaked in the below error paths and there's no cleanup on
finalize.  I assume these devices don't support hotplug, but they could
at least use the existing quirk infrastructure so as not to set a bad
precedent. 

> +
> +    ret = vfio_get_dev_region_info(&vdev->vbasedev,
> +                                   VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
> +                                   PCI_VENDOR_ID_NVIDIA,
> +                                   VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
> +                                   &nv2region);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    p = mmap(NULL, nv2region->size, PROT_READ | PROT_WRITE | PROT_EXEC,
> +             MAP_SHARED, vdev->vbasedev.fd, nv2region->offset);
> +
> +    if (!p) {
> +        return -errno;
> +    }

I think the above suggestion requires simply defining a quirk above:

VFIOQuirk *quirk;

Initializing it with one MemoryRegion here:

quirk = vfio_quirk_alloc(1);

> +
> +    memory_region_init_ram_ptr(nv2mr, OBJECT(vdev), "nvlink2-mr",

s/nv2mr/quirk->mem/

> +                               nv2region->size, p);

Then adding it to the device, for instance assuming there's always a
BAR0, attach it there:

QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);

At least then it pretends to support cleanup.

> +
> +    hdr = vfio_get_region_info_cap(nv2region,
> +                                   VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
> +    if (hdr) {
> +        struct vfio_region_info_cap_nvlink2_ssatgt *cap = (void *) hdr;
> +
> +        object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
> +                            vfio_pci_nvlink2_get_tgt, NULL, NULL,
> +                            (void *) cap->tgt, NULL);
> +        trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
> +                                              nv2region->size);
> +    }
> +    g_free(nv2region);
> +
> +    return 0;
> +}
> +
> +int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    int ret;
> +    void *p;
> +    struct vfio_region_info *atsd_region = NULL;
> +    struct vfio_info_cap_header *hdr;
> +
> +    ret = vfio_get_dev_region_info(&vdev->vbasedev,
> +                                   VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
> +                                   PCI_VENDOR_ID_IBM,
> +                                   VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
> +                                   &atsd_region);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    /* Some NVLink bridges come without assigned ATSD, skip MR part */
> +    if (atsd_region->size) {
> +        MemoryRegion *atsd_mr = g_malloc0(sizeof(*atsd_mr));
> +
> +        p = mmap(NULL, atsd_region->size, PROT_READ | PROT_WRITE | PROT_EXEC,
> +                 MAP_SHARED, vdev->vbasedev.fd, atsd_region->offset);
> +
> +        if (!p) {
> +            return -errno;

Leaks atsd_mr.  This could similarly use the existing VFIOQuirk
infrastructure.

> +        }
> +
> +        memory_region_init_ram_device_ptr(atsd_mr, OBJECT(vdev),
> +                                          "nvlink2-atsd-mr",
> +                                          atsd_region->size,
> +                                          p);
> +    }
> +
> +    hdr = vfio_get_region_info_cap(atsd_region,
> +                                   VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
> +    if (hdr) {
> +        struct vfio_region_info_cap_nvlink2_ssatgt *cap = (void *) hdr;
> +
> +        object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
> +                            vfio_pci_nvlink2_get_tgt, NULL, NULL,
> +                            (void *) cap->tgt, NULL);
> +        trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, cap->tgt,
> +                                                  atsd_region->size);
> +    }
> +
> +    hdr = vfio_get_region_info_cap(atsd_region,
> +                                   VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
> +    if (hdr) {
> +        struct vfio_region_info_cap_nvlink2_lnkspd *cap = (void *) hdr;
> +
> +        object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
> +                            vfio_pci_nvlink2_get_link_speed, NULL, NULL,
> +                            (void *) (uint64_t) cap->link_speed, NULL);
> +        trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
> +                                                  cap->link_speed);
> +    }
> +    g_free(atsd_region);
> +
> +    return 0;
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index dd12f363915d..07aa141aabe6 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3069,6 +3069,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          goto out_teardown;
>      }
>  
> +    if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) {
> +        ret = vfio_pci_nvidia_v100_ram_init(vdev, errp);
> +        if (ret && ret != -ENODEV) {
> +            error_report("Failed to setup NVIDIA V100 GPU RAM");
> +        }
> +    }
> +
> +    if (vdev->vendor_id == PCI_VENDOR_ID_IBM) {
> +        ret = vfio_pci_nvlink2_init(vdev, errp);
> +        if (ret && ret != -ENODEV) {
> +            error_report("Failed to setup NVlink2 bridge");
> +        }
> +    }
> +
>      vfio_register_err_notifier(vdev);
>      vfio_register_req_notifier(vdev);
>      vfio_setup_resetfn_quirk(vdev);
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index cf1e8868182b..88841e9a61da 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -87,6 +87,10 @@ vfio_pci_igd_opregion_enabled(const char *name) "%s"
>  vfio_pci_igd_host_bridge_enabled(const char *name) "%s"
>  vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
>  
> +vfio_pci_nvidia_gpu_setup_quirk(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
> +vfio_pci_nvlink2_setup_quirk_ssatgt(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
> +vfio_pci_nvlink2_setup_quirk_lnkspd(const char *name, uint32_t link_speed) "%s link_speed=0x%x"
> +
>  # hw/vfio/common.c
>  vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
>  vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64

  reply	other threads:[~2019-03-07 22:02 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-07  5:05 [Qemu-devel] [PATCH qemu v4 0/3] spapr_pci, vfio: NVIDIA V100 + POWER9 passthrough Alexey Kardashevskiy
2019-03-07  5:05 ` [Qemu-devel] [PATCH qemu v4 1/3] spapr_iommu: Do not replay mappings from just created DMA window Alexey Kardashevskiy
2019-03-07 23:11   ` David Gibson
2019-03-07  5:05 ` [Qemu-devel] [PATCH qemu v4 2/3] vfio: Make vfio_get_region_info_cap public Alexey Kardashevskiy
2019-03-07 22:03   ` Alex Williamson
2019-03-07  5:05 ` [Qemu-devel] [PATCH qemu v4 3/3] spapr: Support NVIDIA V100 GPU with NVLink2 Alexey Kardashevskiy
2019-03-07 22:02   ` Alex Williamson [this message]
2019-03-08  4:34     ` David Gibson
2019-03-08 14:52       ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190307150232.7384b7ce@w520.home \
    --to=alex.williamson@redhat.com \
    --cc=aik@ozlabs.ru \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=gwshan@linux.vnet.ibm.com \
    --cc=joserz@linux.ibm.com \
    --cc=lagarcia@br.ibm.com \
    --cc=pjaroszynski@nvidia.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=sbobroff@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.