All of lore.kernel.org
 help / color / mirror / Atom feed
From: Liu Yi L <yi.l.liu@intel.com>
To: qemu-devel@nongnu.org, alex.williamson@redhat.com,
	peterx@redhat.com, jasowang@redhat.com
Cc: mst@redhat.com, pbonzini@redhat.com, eric.auger@redhat.com,
	david@gibson.dropbear.id.au, jean-philippe@linaro.org,
	kevin.tian@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com,
	yi.y.sun@intel.com, hao.wu@intel.com, kvm@vger.kernel.org,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	Yi Sun <yi.y.sun@linux.intel.com>
Subject: [RFC v9 12/25] vfio: init HostIOMMUContext per-container
Date: Mon, 27 Jul 2020 23:34:05 -0700	[thread overview]
Message-ID: <1595918058-33392-13-git-send-email-yi.l.liu@intel.com> (raw)
In-Reply-To: <1595918058-33392-1-git-send-email-yi.l.liu@intel.com>

In this patch, QEMU firstly gets iommu info from kernel to check the
supported capabilities by a VFIO_IOMMU_TYPE1_NESTING iommu. And inits
HostIOMMUContet instance.

For vfio-pci devices, it could use pci_device_set/unset_iommu() to
expose host iommu context to vIOMMU emulators. vIOMMU emulators
could make use the methods provided by host iommu context. e.g.
propagate requests to host iommu.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/vfio/common.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c    |  17 +++++++++
 2 files changed, 130 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 41aaf41..9d90732 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1227,10 +1227,102 @@ static int vfio_host_iommu_ctx_pasid_free(HostIOMMUContext *iommu_ctx,
     return ret;
 }
 
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
+static int vfio_get_iommu_info(VFIOContainer *container,
+                         struct vfio_iommu_type1_info **info)
+{
+
+    size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+    *info = g_malloc0(argsz);
+
+retry:
+    (*info)->argsz = argsz;
+
+    if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+        g_free(*info);
+        *info = NULL;
+        return -errno;
+    }
+
+    if (((*info)->argsz > argsz)) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+        goto retry;
+    }
+
+    return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *hdr;
+    void *ptr = info;
+
+    if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+        return NULL;
+    }
+
+    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+        if (hdr->id == id) {
+            return hdr;
+        }
+    }
+
+    return NULL;
+}
+
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+                   struct vfio_iommu_type1_info_cap_nesting **cap_nesting)
+{
+    struct vfio_iommu_type1_info *info;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_nesting *cap;
+    struct iommu_nesting_info *nest_info;
+    int ret;
+    uint32_t minsz, cap_size;
+
+    ret = vfio_get_iommu_info(container, &info);
+    if (ret) {
+        return ret;
+    }
+
+    hdr = vfio_get_iommu_info_cap(info,
+                        VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+    if (!hdr) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap = container_of(hdr,
+                struct vfio_iommu_type1_info_cap_nesting, header);
+
+    nest_info = &cap->info;
+    minsz = offsetof(struct iommu_nesting_info, data);
+    if (nest_info->argsz < minsz) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap_size = offsetof(struct vfio_iommu_type1_info_cap_nesting, info) +
+               nest_info->argsz;
+    *cap_nesting = g_malloc0(cap_size);
+    memcpy(*cap_nesting, cap, cap_size);
+
+    g_free(info);
+    return 0;
+}
+
 static int vfio_init_container(VFIOContainer *container, int group_fd,
                                bool want_nested, Error **errp)
 {
     int iommu_type, ret;
+    uint64_t flags = 0;
 
     iommu_type = vfio_get_iommu_type(container, want_nested, errp);
     if (iommu_type < 0) {
@@ -1258,6 +1350,27 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
         return -errno;
     }
 
+    if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+        struct vfio_iommu_type1_info_cap_nesting *nesting = NULL;
+        struct iommu_nesting_info *nest_info;
+
+        ret = vfio_get_nesting_iommu_cap(container, &nesting);
+        if (ret) {
+            error_setg_errno(errp, -ret,
+                             "Failed to get nesting iommu cap");
+            return ret;
+        }
+
+        nest_info = (struct iommu_nesting_info *) &nesting->info;
+        flags |= (nest_info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) ?
+                 HOST_IOMMU_PASID_REQUEST : 0;
+        host_iommu_ctx_init(&container->iommu_ctx,
+                            sizeof(container->iommu_ctx),
+                            TYPE_VFIO_HOST_IOMMU_CONTEXT,
+                            flags);
+        g_free(nesting);
+    }
+
     container->iommu_type = iommu_type;
     return 0;
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8cd1e72..f954c28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2707,6 +2707,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
+    VFIOContainer *container;
     char *tmp, *subsys, group_path[PATH_MAX], *group_name;
     Error *err = NULL;
     ssize_t len;
@@ -2783,6 +2784,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    container = group->container;
+    if (container->iommu_ctx.initialized &&
+        pci_device_set_iommu_context(pdev, &container->iommu_ctx)) {
+        error_setg(errp, "device attachment is denied by vIOMMU, "
+                   "please check host IOMMU nesting capability");
+        vfio_put_group(group);
+        goto error;
+    }
+
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
             error_setg(errp, "device is already attached");
@@ -3068,9 +3078,16 @@ static void vfio_instance_finalize(Object *obj)
 static void vfio_exitfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
+    VFIOContainer *container;
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
+
+    container = vdev->vbasedev.group->container;
+    if (container->iommu_ctx.initialized) {
+        pci_device_unset_iommu_context(pdev);
+    }
+
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     if (vdev->irqchip_change_notifier.notify) {
         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
-- 
2.7.4


WARNING: multiple messages have this Message-ID (diff)
From: Liu Yi L <yi.l.liu@intel.com>
To: qemu-devel@nongnu.org, alex.williamson@redhat.com,
	peterx@redhat.com, jasowang@redhat.com
Cc: jean-philippe@linaro.org, kevin.tian@intel.com,
	yi.l.liu@intel.com, Yi Sun <yi.y.sun@linux.intel.com>,
	kvm@vger.kernel.org, mst@redhat.com, jun.j.tian@intel.com,
	eric.auger@redhat.com, yi.y.sun@intel.com,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	pbonzini@redhat.com, hao.wu@intel.com,
	david@gibson.dropbear.id.au
Subject: [RFC v9 12/25] vfio: init HostIOMMUContext per-container
Date: Mon, 27 Jul 2020 23:34:05 -0700	[thread overview]
Message-ID: <1595918058-33392-13-git-send-email-yi.l.liu@intel.com> (raw)
In-Reply-To: <1595918058-33392-1-git-send-email-yi.l.liu@intel.com>

In this patch, QEMU firstly gets iommu info from kernel to check the
supported capabilities by a VFIO_IOMMU_TYPE1_NESTING iommu. And inits
HostIOMMUContet instance.

For vfio-pci devices, it could use pci_device_set/unset_iommu() to
expose host iommu context to vIOMMU emulators. vIOMMU emulators
could make use the methods provided by host iommu context. e.g.
propagate requests to host iommu.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/vfio/common.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c    |  17 +++++++++
 2 files changed, 130 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 41aaf41..9d90732 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1227,10 +1227,102 @@ static int vfio_host_iommu_ctx_pasid_free(HostIOMMUContext *iommu_ctx,
     return ret;
 }
 
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
+static int vfio_get_iommu_info(VFIOContainer *container,
+                         struct vfio_iommu_type1_info **info)
+{
+
+    size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+    *info = g_malloc0(argsz);
+
+retry:
+    (*info)->argsz = argsz;
+
+    if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+        g_free(*info);
+        *info = NULL;
+        return -errno;
+    }
+
+    if (((*info)->argsz > argsz)) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+        goto retry;
+    }
+
+    return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *hdr;
+    void *ptr = info;
+
+    if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+        return NULL;
+    }
+
+    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+        if (hdr->id == id) {
+            return hdr;
+        }
+    }
+
+    return NULL;
+}
+
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+                   struct vfio_iommu_type1_info_cap_nesting **cap_nesting)
+{
+    struct vfio_iommu_type1_info *info;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_nesting *cap;
+    struct iommu_nesting_info *nest_info;
+    int ret;
+    uint32_t minsz, cap_size;
+
+    ret = vfio_get_iommu_info(container, &info);
+    if (ret) {
+        return ret;
+    }
+
+    hdr = vfio_get_iommu_info_cap(info,
+                        VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+    if (!hdr) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap = container_of(hdr,
+                struct vfio_iommu_type1_info_cap_nesting, header);
+
+    nest_info = &cap->info;
+    minsz = offsetof(struct iommu_nesting_info, data);
+    if (nest_info->argsz < minsz) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap_size = offsetof(struct vfio_iommu_type1_info_cap_nesting, info) +
+               nest_info->argsz;
+    *cap_nesting = g_malloc0(cap_size);
+    memcpy(*cap_nesting, cap, cap_size);
+
+    g_free(info);
+    return 0;
+}
+
 static int vfio_init_container(VFIOContainer *container, int group_fd,
                                bool want_nested, Error **errp)
 {
     int iommu_type, ret;
+    uint64_t flags = 0;
 
     iommu_type = vfio_get_iommu_type(container, want_nested, errp);
     if (iommu_type < 0) {
@@ -1258,6 +1350,27 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
         return -errno;
     }
 
+    if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+        struct vfio_iommu_type1_info_cap_nesting *nesting = NULL;
+        struct iommu_nesting_info *nest_info;
+
+        ret = vfio_get_nesting_iommu_cap(container, &nesting);
+        if (ret) {
+            error_setg_errno(errp, -ret,
+                             "Failed to get nesting iommu cap");
+            return ret;
+        }
+
+        nest_info = (struct iommu_nesting_info *) &nesting->info;
+        flags |= (nest_info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) ?
+                 HOST_IOMMU_PASID_REQUEST : 0;
+        host_iommu_ctx_init(&container->iommu_ctx,
+                            sizeof(container->iommu_ctx),
+                            TYPE_VFIO_HOST_IOMMU_CONTEXT,
+                            flags);
+        g_free(nesting);
+    }
+
     container->iommu_type = iommu_type;
     return 0;
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8cd1e72..f954c28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2707,6 +2707,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
+    VFIOContainer *container;
     char *tmp, *subsys, group_path[PATH_MAX], *group_name;
     Error *err = NULL;
     ssize_t len;
@@ -2783,6 +2784,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    container = group->container;
+    if (container->iommu_ctx.initialized &&
+        pci_device_set_iommu_context(pdev, &container->iommu_ctx)) {
+        error_setg(errp, "device attachment is denied by vIOMMU, "
+                   "please check host IOMMU nesting capability");
+        vfio_put_group(group);
+        goto error;
+    }
+
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
             error_setg(errp, "device is already attached");
@@ -3068,9 +3078,16 @@ static void vfio_instance_finalize(Object *obj)
 static void vfio_exitfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
+    VFIOContainer *container;
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
+
+    container = vdev->vbasedev.group->container;
+    if (container->iommu_ctx.initialized) {
+        pci_device_unset_iommu_context(pdev);
+    }
+
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     if (vdev->irqchip_change_notifier.notify) {
         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
-- 
2.7.4



  parent reply	other threads:[~2020-07-28  6:27 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-28  6:33 [RFC v9 00/25] intel_iommu: expose Shared Virtual Addressing to VMs Liu Yi L
2020-07-28  6:33 ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 01/25] scripts/update-linux-headers: Import iommu.h Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 02/25] header file update VFIO/IOMMU vSVA APIs kernel 5.8-rc6 Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 03/25] hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 04/25] hw/pci: introduce pci_device_get_iommu_attr() Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 05/25] intel_iommu: add get_iommu_attr() callback Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:33 ` [RFC v9 06/25] vfio: pass nesting requirement into vfio_get_group() Liu Yi L
2020-07-28  6:33   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 07/25] vfio: check VFIO_TYPE1_NESTING_IOMMU support Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 08/25] hw/iommu: introduce HostIOMMUContext Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 09/25] hw/pci: introduce pci_device_set/unset_iommu_context() Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 10/25] intel_iommu: add set/unset_iommu_context callback Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 11/25] vfio/common: provide PASID alloc/free hooks Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` Liu Yi L [this message]
2020-07-28  6:34   ` [RFC v9 12/25] vfio: init HostIOMMUContext per-container Liu Yi L
2020-07-28  6:34 ` [RFC v9 13/25] intel_iommu: add virtual command capability support Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 14/25] intel_iommu: process PASID cache invalidation Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 15/25] intel_iommu: add PASID cache management infrastructure Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 16/25] vfio: add bind stage-1 page table support Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 17/25] intel_iommu: sync IOMMU nesting cap info for assigned devices Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 18/25] intel_iommu: bind/unbind guest page table to host Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 19/25] intel_iommu: replay pasid binds after context cache invalidation Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 20/25] intel_iommu: do not pass down pasid bind for PASID #0 Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 21/25] vfio: add support for flush iommu stage-1 cache Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 22/25] intel_iommu: process PASID-based iotlb invalidation Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 23/25] intel_iommu: propagate PASID-based iotlb invalidation to host Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 24/25] intel_iommu: process PASID-based Device-TLB invalidation Liu Yi L
2020-07-28  6:34   ` Liu Yi L
2020-07-28  6:34 ` [RFC v9 25/25] intel_iommu: modify x-scalable-mode to be string option Liu Yi L
2020-07-28  6:34   ` Liu Yi L

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1595918058-33392-13-git-send-email-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=eric.auger@redhat.com \
    --cc=hao.wu@intel.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jasowang@redhat.com \
    --cc=jean-philippe@linaro.org \
    --cc=jun.j.tian@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.y.sun@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.