linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	iommu@lists.linux-foundation.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, kvmarm@lists.cs.columbia.edu,
	joro@8bytes.org, alex.williamson@redhat.com,
	jacob.jun.pan@linux.intel.com, yi.l.liu@linux.intel.com,
	jean-philippe.brucker@arm.com, will.deacon@arm.com,
	robin.murphy@arm.com
Cc: tianyu.lan@intel.com, ashok.raj@intel.com, marc.zyngier@arm.com,
	christoffer.dall@arm.com, peter.maydell@linaro.org
Subject: [RFC v2 12/20] dma-iommu: Implement NESTED_MSI cookie
Date: Tue, 18 Sep 2018 16:24:49 +0200	[thread overview]
Message-ID: <20180918142457.3325-13-eric.auger@redhat.com> (raw)
In-Reply-To: <20180918142457.3325-1-eric.auger@redhat.com>

Up to now, when the type was UNMANAGED, we used to
allocate IOVA pages within a range provided by the user.
This does not work in nested mode.

If both the host and the guest are exposed with SMMUs, each
would allocate an IOVA. The guest allocates an IOVA (gIOVA)
to map onto the guest MSI doorbell (gDB). The Host allocates
another IOVA (hIOVA) to map onto the physical doorbell (hDB).

So we end up with 2 unrelated mappings, at S1 and S2:
         S1             S2
gIOVA    ->     gDB
               hIOVA    ->    hDB

The PCI device would be programmed with hIOVA.

iommu_dma_bind_doorbell allows to pass gIOVA/gDB to the host
so that gIOVA can be used by the host instead of re-allocating
a new IOVA. That way the host can create the following nested
mapping:

         S1           S2
gIOVA    ->    gDB    ->    hDB

this time, the PCI device will be programmed with the gIOVA MSI
doorbell which is correctly map through the 2 stages.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v1 -> v2:
- unmap stage2 on put()
---
 drivers/iommu/dma-iommu.c | 97 +++++++++++++++++++++++++++++++++++++--
 include/linux/dma-iommu.h | 11 +++++
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 511ff9a1d6d9..53444c3e8f2f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -37,12 +37,14 @@
 struct iommu_dma_msi_page {
 	struct list_head	list;
 	dma_addr_t		iova;
+	dma_addr_t		ipa;
 	phys_addr_t		phys;
 };
 
 enum iommu_dma_cookie_type {
 	IOMMU_DMA_IOVA_COOKIE,
 	IOMMU_DMA_MSI_COOKIE,
+	IOMMU_DMA_NESTED_MSI_COOKIE,
 };
 
 struct iommu_dma_cookie {
@@ -109,14 +111,17 @@ EXPORT_SYMBOL(iommu_get_dma_cookie);
  *
  * Users who manage their own IOVA allocation and do not want DMA API support,
  * but would still like to take advantage of automatic MSI remapping, can use
- * this to initialise their own domain appropriately. Users should reserve a
+ * this to initialise their own domain appropriately. Users may reserve a
  * contiguous IOVA region, starting at @base, large enough to accommodate the
  * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
- * used by the devices attached to @domain.
+ * used by the devices attached to @domain. The other way round is to provide
+ * usable iova pages through the iommu_dma_bind_doorbell API (nested stages
+ * use case)
  */
 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
 {
 	struct iommu_dma_cookie *cookie;
+	int nesting, ret;
 
 	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
 		return -EINVAL;
@@ -124,7 +129,12 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
 	if (domain->iova_cookie)
 		return -EEXIST;
 
-	cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
+	ret =  iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting);
+	if (!ret && nesting)
+		cookie = cookie_alloc(IOMMU_DMA_NESTED_MSI_COOKIE);
+	else
+		cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
+
 	if (!cookie)
 		return -ENOMEM;
 
@@ -145,6 +155,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iommu_dma_msi_page *msi, *tmp;
+	bool s2_unmap = false;
 
 	if (!cookie)
 		return;
@@ -152,7 +163,15 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
 	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
 		put_iova_domain(&cookie->iovad);
 
+	if (cookie->type == IOMMU_DMA_NESTED_MSI_COOKIE)
+		s2_unmap = true;
+
 	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
+		if (s2_unmap && msi->phys) {
+			size_t size = cookie_msi_granule(cookie);
+
+			WARN_ON(iommu_unmap(domain, msi->ipa, size) != size);
+		}
 		list_del(&msi->list);
 		kfree(msi);
 	}
@@ -161,6 +180,50 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL(iommu_put_dma_cookie);
 
+/**
+ * iommu_dma_bind_doorbell - Allows to provide a usable IOVA page
+ * @domain: domain handle
+ * @binding: IOVA/IPA binding
+ *
+ * In nested stage use case, the user can provide IOVA/IPA bindings
+ * corresponding to a guest MSI stage 1 mapping. When the host needs
+ * to map its own MSI doorbells, it can use the IPA as stage 2 input
+ * and map it onto the physical MSI doorbell.
+ */
+int iommu_dma_bind_doorbell(struct iommu_domain *domain,
+			    struct iommu_guest_msi_binding *binding)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iommu_dma_msi_page *msi;
+	dma_addr_t ipa, iova;
+	size_t size;
+
+	if (!cookie)
+		return -EINVAL;
+
+	if (cookie->type != IOMMU_DMA_NESTED_MSI_COOKIE)
+		return -EINVAL;
+
+	size = 1 << binding->granule;
+	iova = binding->iova & ~(phys_addr_t)(size - 1);
+	ipa = binding->gpa & ~(phys_addr_t)(size - 1);
+
+	list_for_each_entry(msi, &cookie->msi_page_list, list) {
+		if (msi->iova == iova)
+			return 0; /* this page is already registered */
+	}
+
+	msi = kzalloc(sizeof(*msi), GFP_KERNEL);
+	if (!msi)
+		return -ENOMEM;
+
+	msi->iova = iova;
+	msi->ipa = ipa;
+	list_add(&msi->list, &cookie->msi_page_list);
+	return 0;
+}
+EXPORT_SYMBOL(iommu_dma_bind_doorbell);
+
 /**
  * iommu_dma_get_resv_regions - Reserved region driver helper
  * @dev: Device from iommu_get_resv_regions()
@@ -846,6 +909,34 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 		if (msi_page->phys == msi_addr)
 			return msi_page;
 
+	/*
+	 * In nested stage mode, we do not allocate an MSI page in
+	 * a range provided by the user. Instead, IOVA/IPA bindings are
+	 * individually provided. We reuse thise IOVAs to build the
+	 * IOVA -> IPA -> MSI PA nested stage mapping.
+	 */
+	if (cookie->type == IOMMU_DMA_NESTED_MSI_COOKIE) {
+		list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+			if (!msi_page->phys) { /* this binding is free to use */
+				dma_addr_t ipa = msi_page->ipa;
+				int ret;
+
+				msi_page->phys = msi_addr;
+
+				/* do the stage 2 mapping */
+				ret = iommu_map(domain, ipa, msi_addr, size,
+						IOMMU_MMIO | IOMMU_WRITE);
+				if (ret) {
+					pr_warn("MSI S2 mapping failed (%d)\n",
+						ret);
+					return NULL;
+				}
+				return msi_page;
+			}
+		pr_warn("%s no MSI binding found\n", __func__);
+		return NULL;
+	}
+
 	msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
 	if (!msi_page)
 		return NULL;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index e8ca5e654277..324745eef644 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -24,6 +24,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
 #include <linux/msi.h>
+#include <uapi/linux/iommu.h>
 
 int iommu_dma_init(void);
 
@@ -74,12 +75,15 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 /* The DMA API isn't _quite_ the whole story, though... */
 void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
+int iommu_dma_bind_doorbell(struct iommu_domain *domain,
+			    struct iommu_guest_msi_binding *binding);
 
 #else
 
 struct iommu_domain;
 struct msi_msg;
 struct device;
+struct iommu_guest_msi_binding;
 
 static inline int iommu_dma_init(void)
 {
@@ -104,6 +108,13 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
 {
 }
 
+static inline int
+iommu_dma_bind_doorbell(struct iommu_domain *domain,
+			struct iommu_guest_msi_binding *binding)
+{
+	return -ENODEV;
+}
+
 static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
 }
-- 
2.17.1


  parent reply	other threads:[~2018-09-18 14:26 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-18 14:24 [RFC v2 00/20] SMMUv3 Nested Stage Setup Eric Auger
2018-09-18 14:24 ` [RFC v2 01/20] iommu: Introduce bind_pasid_table API Eric Auger
2018-09-20 17:21   ` Jacob Pan
2018-09-21  9:45     ` Auger Eric
2018-09-18 14:24 ` [RFC v2 02/20] iommu: Introduce cache_invalidate API Eric Auger
2018-09-18 14:24 ` [RFC v2 03/20] iommu: Introduce bind_guest_msi Eric Auger
2018-09-18 14:24 ` [RFC v2 04/20] vfio: VFIO_IOMMU_BIND_PASID_TABLE Eric Auger
2018-09-18 14:24 ` [RFC v2 05/20] vfio: VFIO_IOMMU_CACHE_INVALIDATE Eric Auger
2018-09-18 14:24 ` [RFC v2 06/20] vfio: VFIO_IOMMU_BIND_MSI Eric Auger
2018-09-18 14:24 ` [RFC v2 07/20] iommu/arm-smmu-v3: Link domains and devices Eric Auger
2018-09-18 14:24 ` [RFC v2 08/20] iommu/arm-smmu-v3: Maintain a SID->device structure Eric Auger
2018-09-18 14:24 ` [RFC v2 09/20] iommu/smmuv3: Get prepared for nested stage support Eric Auger
2018-09-18 14:24 ` [RFC v2 10/20] iommu/smmuv3: Implement bind_pasid_table Eric Auger
2018-09-18 14:24 ` [RFC v2 11/20] iommu/smmuv3: Implement cache_invalidate Eric Auger
2018-09-18 14:24 ` Eric Auger [this message]
2018-10-24 18:02   ` [RFC v2 12/20] dma-iommu: Implement NESTED_MSI cookie Robin Murphy
2018-10-24 18:44     ` Auger Eric
2018-10-24 22:05       ` Robin Murphy
2018-10-27  9:24         ` Auger Eric
2018-09-18 14:24 ` [RFC v2 13/20] iommu/smmuv3: Implement bind_guest_msi Eric Auger
2018-09-18 14:24 ` [RFC v2 14/20] iommu: introduce device fault data Eric Auger
2018-09-20 22:06   ` Jacob Pan
2018-09-21  9:54     ` Auger Eric
2018-09-21 16:18       ` Jacob Pan
2018-12-12  8:21     ` Auger Eric
2018-12-15  0:30       ` Jacob Pan
2018-12-17  9:04         ` Auger Eric
2018-09-18 14:24 ` [RFC v2 15/20] driver core: add per device iommu param Eric Auger
2018-09-18 14:24 ` [RFC v2 16/20] iommu: introduce device fault report API Eric Auger
2018-09-18 14:24 ` [RFC v2 17/20] vfio: VFIO_IOMMU_SET_FAULT_EVENTFD Eric Auger
2018-09-18 14:24 ` [RFC v2 18/20] vfio: VFIO_IOMMU_GET_FAULT_EVENTS Eric Auger
2018-09-18 14:24 ` [RFC v2 19/20] vfio: Document nested stage control Eric Auger
2018-09-18 14:24 ` [RFC v2 20/20] iommu/smmuv3: Report non recoverable faults Eric Auger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180918142457.3325-13-eric.auger@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=ashok.raj@intel.com \
    --cc=christoffer.dall@arm.com \
    --cc=eric.auger.pro@gmail.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe.brucker@arm.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marc.zyngier@arm.com \
    --cc=peter.maydell@linaro.org \
    --cc=robin.murphy@arm.com \
    --cc=tianyu.lan@intel.com \
    --cc=will.deacon@arm.com \
    --cc=yi.l.liu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).