KVM Archive on lore.kernel.org
 help / color / Atom feed
From: Liu Yi L <yi.l.liu@intel.com>
To: alex.williamson@redhat.com, eric.auger@redhat.com,
	baolu.lu@linux.intel.com, joro@8bytes.org
Cc: kevin.tian@intel.com, jacob.jun.pan@linux.intel.com,
	ashok.raj@intel.com, yi.l.liu@intel.com, jun.j.tian@intel.com,
	yi.y.sun@intel.com, jean-philippe@linaro.org, peterx@redhat.com,
	hao.wu@intel.com, stefanha@gmail.com,
	iommu@lists.linux-foundation.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Date: Sat,  4 Jul 2020 04:26:18 -0700
Message-ID: <1593861989-35920-5-git-send-email-yi.l.liu@intel.com> (raw)
In-Reply-To: <1593861989-35920-1-git-send-email-yi.l.liu@intel.com>

This patch exports iommu nesting capability info to user space through
VFIO. User space is expected to check this info for supported uAPIs (e.g.
PASID alloc/free, bind page table, and cache invalidation) and the vendor
specific format information for first level/stage page table that will be
bound to.

The nesting info is available only after the nesting iommu type is set
for a container. Current implementation imposes one limitation - one
nesting container should include at most one group. The philosophy of
vfio container is having all groups/devices within the container share
the same IOMMU context. When vSVA is enabled, one IOMMU context could
include one 2nd-level address space and multiple 1st-level address spaces.
While the 2nd-leve address space is reasonably sharable by multiple groups
, blindly sharing 1st-level address spaces across all groups within the
container might instead break the guest expectation. In the future sub/
super container concept might be introduced to allow partial address space
sharing within an IOMMU context. But for now let's go with this restriction
by requiring singleton container for using nesting iommu features. Below
link has the related discussion about this decision.

https://lkml.org/lkml/2020/5/15/1028

Cc: Kevin Tian <kevin.tian@intel.com>
CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
v3 -> v4:
*) address comments against v3.

v1 -> v2:
*) added in v2
---

 drivers/vfio/vfio_iommu_type1.c | 105 +++++++++++++++++++++++++++++++++++-----
 include/uapi/linux/vfio.h       |  16 ++++++
 2 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7accb59..80623b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
 		 "Maximum number of user DMA mappings per container (65535).");
 
 struct vfio_iommu {
-	struct list_head	domain_list;
-	struct list_head	iova_list;
-	struct vfio_domain	*external_domain; /* domain for external user */
-	struct mutex		lock;
-	struct rb_root		dma_list;
-	struct blocking_notifier_head notifier;
-	unsigned int		dma_avail;
-	uint64_t		pgsize_bitmap;
-	bool			v2;
-	bool			nesting;
-	bool			dirty_page_tracking;
-	bool			pinned_page_dirty_scope;
+	struct list_head		domain_list;
+	struct list_head		iova_list;
+	struct vfio_domain		*external_domain; /* domain for
+							     external user */
+	struct mutex			lock;
+	struct rb_root			dma_list;
+	struct blocking_notifier_head	notifier;
+	unsigned int			dma_avail;
+	uint64_t			pgsize_bitmap;
+	bool				v2;
+	bool				nesting;
+	bool				dirty_page_tracking;
+	bool				pinned_page_dirty_scope;
+	struct iommu_nesting_info	*nesting_info;
 };
 
 struct vfio_domain {
@@ -130,6 +132,9 @@ struct vfio_regions {
 #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
 					(!list_empty(&iommu->domain_list))
 
+#define IS_DOMAIN_IN_CONTAINER(iommu)	((iommu->external_domain) || \
+					 (!list_empty(&iommu->domain_list)))
+
 #define DIRTY_BITMAP_BYTES(n)	(ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
 
 /*
@@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
 
 	list_splice_tail(iova_copy, iova);
 }
+
+static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
+{
+	kfree(iommu->nesting_info);
+	iommu->nesting_info = NULL;
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 					 struct iommu_group *iommu_group)
 {
@@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		}
 	}
 
+	/* Nesting type container can include only one group */
+	if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
+		mutex_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
 	group = kzalloc(sizeof(*group), GFP_KERNEL);
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!group || !domain) {
@@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_domain;
 
+	/* Nesting cap info is available only after attaching */
+	if (iommu->nesting) {
+		struct iommu_nesting_info tmp;
+		struct iommu_nesting_info *info;
+
+		/* First get the size of vendor specific nesting info */
+		ret = iommu_domain_get_attr(domain->domain,
+					    DOMAIN_ATTR_NESTING,
+					    &tmp);
+		if (ret)
+			goto out_detach;
+
+		info = kzalloc(tmp.size, GFP_KERNEL);
+		if (!info) {
+			ret = -ENOMEM;
+			goto out_detach;
+		}
+
+		/* Now get the nesting info */
+		info->size = tmp.size;
+		ret = iommu_domain_get_attr(domain->domain,
+					    DOMAIN_ATTR_NESTING,
+					    info);
+		if (ret) {
+			kfree(info);
+			goto out_detach;
+		}
+		iommu->nesting_info = info;
+	}
+
 	/* Get aperture info */
 	iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
 
@@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	return 0;
 
 out_detach:
+	vfio_iommu_release_nesting_info(iommu);
 	vfio_iommu_detach_group(domain, group);
 out_domain:
 	iommu_domain_free(domain->domain);
@@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 					vfio_iommu_unmap_unpin_all(iommu);
 				else
 					vfio_iommu_unmap_unpin_reaccount(iommu);
+
+				vfio_iommu_release_nesting_info(iommu);
 			}
 			iommu_domain_free(domain->domain);
 			list_del(&domain->next);
@@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
 	return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
 }
 
+static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
+					   struct vfio_info_cap *caps)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
+	size_t size;
+
+	size = sizeof(*nesting_cap) + iommu->nesting_info->size;
+
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	nesting_cap = container_of(header,
+				   struct vfio_iommu_type1_info_cap_nesting,
+				   header);
+
+	memcpy(&nesting_cap->info, iommu->nesting_info,
+	       iommu->nesting_info->size);
+
+	return 0;
+}
+
 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 				     unsigned long arg)
 {
@@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 	if (ret)
 		return ret;
 
+	if (iommu->nesting_info) {
+		ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
+		if (ret)
+			return ret;
+	}
+
 	if (caps.size) {
 		info.flags |= VFIO_IOMMU_INFO_CAPS;
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9204705..3e3de9c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
 	__u64	max_dirty_bitmap_size;		/* in bytes */
 };
 
+#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING  3
+
+/*
+ * Reporting nesting info to user space.
+ *
+ * @info:	the nesting info provided by IOMMU driver. Today
+ *		it is expected to be a struct iommu_nesting_info
+ *		data.
+ */
+struct vfio_iommu_type1_info_cap_nesting {
+	struct	vfio_info_cap_header header;
+	__u32	flags;
+	__u32	padding;
+	__u8	info[];
+};
+
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 
 /**
-- 
2.7.4


  parent reply index

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-04 11:26 [PATCH v4 00/15] vfio: expose virtual Shared Virtual Addressing to VMs Liu Yi L
2020-07-04 11:26 ` [PATCH v4 01/15] vfio/type1: Refactor vfio_iommu_type1_ioctl() Liu Yi L
2020-07-06  9:34   ` Auger Eric
2020-07-06 12:27     ` Liu, Yi L
2020-07-06 12:55       ` Auger Eric
2020-07-06 13:00         ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 02/15] iommu: Report domain nesting info Liu Yi L
2020-07-06  9:34   ` Auger Eric
2020-07-06 12:20     ` Liu, Yi L
2020-07-06 13:00       ` Auger Eric
2020-07-06 13:17         ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 03/15] iommu/smmu: Report empty " Liu Yi L
2020-07-06 10:37   ` Auger Eric
2020-07-06 12:46     ` Liu, Yi L
2020-07-06 13:21       ` Auger Eric
2020-07-06 13:26         ` Liu, Yi L
2020-07-04 11:26 ` Liu Yi L [this message]
2020-07-06 10:37   ` [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace Auger Eric
2020-07-06 13:10     ` Liu, Yi L
2020-07-06 13:45       ` Auger Eric
2020-07-07  9:31         ` Liu, Yi L
2020-07-08  8:08           ` Liu, Yi L
2020-07-08 19:29             ` Alex Williamson
2020-07-09  0:25               ` Liu, Yi L
2020-07-06 14:06   ` Auger Eric
2020-07-07  9:34     ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 05/15] vfio: Add PASID allocation/free support Liu Yi L
2020-07-06 14:52   ` Auger Eric
2020-07-07  9:45     ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 06/15] iommu/vt-d: Support setting ioasid set to domain Liu Yi L
2020-07-06 14:52   ` Auger Eric
2020-07-07  9:37     ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 07/15] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free) Liu Yi L
2020-07-06 15:17   ` Auger Eric
2020-07-07  9:51     ` Liu, Yi L
2020-07-04 11:26 ` [PATCH v4 08/15] iommu: Pass domain to sva_unbind_gpasid() Liu Yi L
2020-07-04 11:26 ` [PATCH v4 09/15] iommu/vt-d: Check ownership for PASIDs from user-space Liu Yi L
2020-07-04 11:26 ` [PATCH v4 10/15] vfio/type1: Support binding guest page tables to PASID Liu Yi L
2020-07-04 11:26 ` [PATCH v4 11/15] vfio/type1: Allow invalidating first-level/stage IOMMU cache Liu Yi L
2020-07-04 11:26 ` [PATCH v4 12/15] vfio/type1: Add vSVA support for IOMMU-backed mdevs Liu Yi L
2020-07-04 11:26 ` [PATCH v4 13/15] vfio/pci: Expose PCIe PASID capability to guest Liu Yi L
2020-07-04 11:26 ` [PATCH v4 14/15] vfio: Document dual stage control Liu Yi L
2020-07-04 11:26 ` [PATCH v4 15/15] iommu/vt-d: Support reporting nesting capability info Liu Yi L

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593861989-35920-5-git-send-email-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=ashok.raj@intel.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=eric.auger@redhat.com \
    --cc=hao.wu@intel.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=joro@8bytes.org \
    --cc=jun.j.tian@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    --cc=stefanha@gmail.com \
    --cc=yi.y.sun@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

KVM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/kvm/0 kvm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 kvm kvm/ https://lore.kernel.org/kvm \
		kvm@vger.kernel.org
	public-inbox-index kvm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.kvm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git