[RFC PATCH] iommu/vt-d: Add IOTLB flush support for kernel addresses

* [RFC PATCH] iommu/vt-d: Add IOTLB flush support for kernel addresses
@ 2015-10-20 15:52 David Woodhouse
  2015-10-20 16:03 ` Joerg Roedel
  0 siblings, 1 reply; 10+ messages in thread
From: David Woodhouse @ 2015-10-20 15:52 UTC (permalink / raw)
  To: linux-mm; +Cc: iommu, Sudeep Dutt

[-- Attachment #1: Type: text/plain, Size: 6764 bytes --]

On top of the tree at git.infradead.org/users/dwmw2/linux-svm.git
(http:// or git://).

For userspace addresses, we use the MMU notifiers and flush the IOTLB
as appropriate.

However, we need to do it for kernel addresses too — which basically
means adding a hook to tlb_flush_kernel_range(). Does this look
reasonable? I was trying to avoid it and insist on supporting addresses
within the kernel's static mapping only. But it doesn't look like
that's a reasonable thing to require.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/x86/mm/tlb.c           |    2 ++
 drivers/iommu/intel-svm.c   |   37 ++++++++++++++++++++++++++++++++++---
 include/linux/intel-iommu.h |    6 +++++-
 include/linux/intel-svm.h   |   13 +++++--------
 4 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 0a48ccf..61d9533 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -44,14 +44,11 @@ struct svm_dev_ops {
 
 /*
  * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
+ * for access to kernel addresses. IOTLB flushes are performed as required
+ * by means of a hook from flush_tlb_kernel_range(). This flag is mutually
+ * exclusive with the SVM_FLAG_PRIVATE_PASID flag — there can be only one
+ * PASID used for kernel mode, to keep the performance implications of the
+ * IOTLB flush hook relatively sane.
  */
 #define SVM_FLAG_SUPERVISOR_MODE	(1<<1)
 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8ddb5d0..40ebe83 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -266,6 +267,7 @@ static void do_kernel_range_flush(void *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
+	intel_iommu_flush_kernel_pasid(start, end);
 
 	/* Balance as user space task's flush, a bit conservative */
 	if (end == TLB_FLUSH_ALL ||
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a584df0..f8ca3c1 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
 #include <linux/pci-ats.h>
 #include <linux/dmar.h>
 #include <linux/interrupt.h>
+#include <asm/tlbflush.h>
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
@@ -264,6 +265,26 @@ static const struct mmu_notifier_ops intel_mmuops = {
 	.invalidate_range = intel_invalidate_range,
 };
 
+void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	unsigned long pages;
+
+	if (end == TLB_FLUSH_ALL)
+		pages = end;
+	else
+		pages = (end - start) >> VTD_PAGE_SHIFT;
+
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		struct intel_svm *svm = rcu_dereference(iommu->kernel_svm);
+		if (svm)
+			intel_flush_svm_range(svm, start, pages, 0, 1);
+	}
+	rcu_read_unlock();
+}
+
 static DEFINE_MUTEX(pasid_mutex);
 
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
@@ -286,6 +307,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		pasid_max = 1 << 20;
 
 	if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+		if (flags & SVM_FLAG_PRIVATE_PASID)
+			return -EINVAL;
 		if (!ecap_srs(iommu->ecap))
 			return -EINVAL;
 	} else if (pasid) {
@@ -294,7 +317,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	}
 
 	mutex_lock(&pasid_mutex);
-	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+	if (SVM_FLAG_SUPERVISOR_MODE)
+		svm = iommu->kernel_svm;
+	else if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
 		int i;
 
 		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
@@ -378,8 +403,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 			}
 			iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
 			mm = NULL;
-		} else
+		} else {
 			iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+			rcu_assign_pointer(iommu->kernel_svm, svm);
+		}
 		wmb();
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
@@ -432,8 +459,12 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					mmu_notifier_unregister(&svm->notifier, svm->mm);
 
 					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
-					if (svm->mm)
+					if (svm->mm) {
 						mmput(svm->mm);
+					} else {
+						rcu_assign_pointer(iommu->kernel_svm, NULL);
+						synchronize_rcu();
+					}
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 821273c..169bc84 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -391,6 +391,7 @@ enum {
 struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
+struct intel_svm;
 
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
@@ -426,6 +427,7 @@ struct intel_iommu {
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
 	struct idr pasid_idr;
+	struct intel_svm __rcu *kernel_svm;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -496,8 +498,10 @@ struct intel_svm {
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
 extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
+extern void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end);
+#else
+#define intel_iommu_flush_kernel_pasid(start, end) do { ; } while(0)
 #endif
-
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply related	[flat|nested] 10+ messages in thread