[PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries

* [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries
@ 2019-03-20  8:14 Suthikulpanit, Suravee
  2019-03-25 13:46 ` joro
  2020-01-22 15:44 ` Paolo Bonzini
  0 siblings, 2 replies; 3+ messages in thread
From: Suthikulpanit, Suravee @ 2019-03-20  8:14 UTC (permalink / raw)
  To: linux-kernel, kvm, iommu; +Cc: joro, pbonzini, rkrcmar, Suthikulpanit, Suravee

When AVIC is enabled and the VM has discrete device assignment,
the interrupt remapping table (IRT) is used to keep track of which
destination APIC ID the IOMMU will inject the device interrput to.

This means every time a vcpu is blocked or context-switched (i.e.
vcpu_blocking/unblocking() and vcpu_load/put()), the information
in IRT must be updated and the IOMMU IRT flush command must be
issued.

The current implementation flushes IOMMU IRT every time an entry
is modified. If the assigned device has large number of interrupts
(hence large number of entries), this would add large amount of
overhead to vcpu context-switch. Instead, this can be optmized by
only flush IRT once per vcpu context-switch per device after all
IRT entries are modified.

The function amd_iommu_update_ga() is refactored to only update
IRT entry, while the amd_iommu_sync_ga() is introduced to allow
IRT flushing to be done separately.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/kvm/svm.c        | 35 ++++++++++++++++++++++++++++++++++-
 drivers/iommu/amd_iommu.c | 20 +++++++++++++++++---
 include/linux/amd-iommu.h | 13 ++++++++++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 47c4993448c7..a5c7ca5d70d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -118,6 +118,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
 
+#define AVIC_DEVID_BITMAP_SIZE		(1 << 16)
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -251,6 +253,12 @@ struct vcpu_svm {
 
 	/* which host CPU was used for running this vcpu */
 	unsigned int last_cpu;
+
+	/*
+	 * Bitmap used to store PCI devid to sync
+	 * AMD IOMMU interrupt remapping table
+	 */
+	unsigned long *avic_devid_sync_bitmap;
 };
 
 /*
@@ -1984,6 +1992,7 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
 	int ret = 0;
+	int devid = 0;
 	unsigned long flags;
 	struct amd_svm_iommu_ir *ir;
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2001,9 +2010,21 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 		goto out;
 
 	list_for_each_entry(ir, &svm->ir_list, node) {
-		ret = amd_iommu_update_ga(cpu, r, ir->data);
+		ret = amd_iommu_update_ga(cpu, r, ir->data, &devid);
 		if (ret)
 			break;
+		set_bit(devid, svm->avic_devid_sync_bitmap);
+	}
+
+	/* Sync AMD IOMMU interrupt remapping table changes for each device. */
+	devid = find_next_bit(svm->avic_devid_sync_bitmap,
+			      AVIC_DEVID_BITMAP_SIZE, 0);
+
+	while (devid < AVIC_DEVID_BITMAP_SIZE) {
+		clear_bit(devid, svm->avic_devid_sync_bitmap);
+		ret = amd_iommu_sync_ga(devid);
+		devid = find_next_bit(svm->avic_devid_sync_bitmap,
+				      AVIC_DEVID_BITMAP_SIZE, devid+1);
 	}
 out:
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
@@ -2107,6 +2128,13 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
 	INIT_LIST_HEAD(&svm->ir_list);
 	spin_lock_init(&svm->ir_list_lock);
 
+	svm->avic_devid_sync_bitmap = (void *)__get_free_pages(
+					GFP_KERNEL | __GFP_ZERO,
+					get_order(AVIC_DEVID_BITMAP_SIZE/8));
+	if (svm->avic_devid_sync_bitmap == NULL)
+		ret = -ENOMEM;
+	memset(svm->avic_devid_sync_bitmap, 0, AVIC_DEVID_BITMAP_SIZE/8);
+
 	return ret;
 }
 
@@ -2221,6 +2249,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+
+	free_pages((unsigned long)svm->avic_devid_sync_bitmap,
+		   get_order(AVIC_DEVID_BITMAP_SIZE/8));
+	svm->avic_devid_sync_bitmap = NULL;
+
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a7b78bb98b4..637bcc9192e5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4499,7 +4499,20 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 	return 0;
 }
 
-int amd_iommu_update_ga(int cpu, bool is_run, void *data)
+int amd_iommu_sync_ga(int devid)
+{
+	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+	if (!iommu)
+		return -ENODEV;
+
+	iommu_flush_irt(iommu, devid);
+	iommu_completion_wait(iommu);
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_sync_ga);
+
+int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *id)
 {
 	unsigned long flags;
 	struct amd_iommu *iommu;
@@ -4521,6 +4534,9 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 	if (!table)
 		return -ENODEV;
 
+	if (id)
+		*id = devid;
+
 	raw_spin_lock_irqsave(&table->lock, flags);
 
 	if (ref->lo.fields_vapic.guest_mode) {
@@ -4536,8 +4552,6 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 
 	raw_spin_unlock_irqrestore(&table->lock, flags);
 
-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
 	return 0;
 }
 EXPORT_SYMBOL(amd_iommu_update_ga);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 09751d349963..b94d4b33dfd7 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -193,8 +193,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; }
 /* IOMMU AVIC Function */
 extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
 
-extern int
-amd_iommu_update_ga(int cpu, bool is_run, void *data);
+extern int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid);
+
+extern int amd_iommu_sync_ga(int devid);
 
 #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
 
@@ -205,7 +206,13 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
 }
 
 static inline int
-amd_iommu_update_ga(int cpu, bool is_run, void *data)
+amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid)
+{
+	return 0;
+}
+
+static inline int
+amd_iommu_sync_ga(int devid)
 {
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread