linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries
@ 2019-03-20  8:14 Suthikulpanit, Suravee
  2019-03-25 13:46 ` joro
  2020-01-22 15:44 ` Paolo Bonzini
  0 siblings, 2 replies; 3+ messages in thread
From: Suthikulpanit, Suravee @ 2019-03-20  8:14 UTC (permalink / raw)
  To: linux-kernel, kvm, iommu; +Cc: joro, pbonzini, rkrcmar, Suthikulpanit, Suravee

When AVIC is enabled and the VM has discrete device assignment,
the interrupt remapping table (IRT) is used to keep track of which
destination APIC ID the IOMMU will inject the device interrput to.

This means every time a vcpu is blocked or context-switched (i.e.
vcpu_blocking/unblocking() and vcpu_load/put()), the information
in IRT must be updated and the IOMMU IRT flush command must be
issued.

The current implementation flushes IOMMU IRT every time an entry
is modified. If the assigned device has large number of interrupts
(hence large number of entries), this would add large amount of
overhead to vcpu context-switch. Instead, this can be optmized by
only flush IRT once per vcpu context-switch per device after all
IRT entries are modified.

The function amd_iommu_update_ga() is refactored to only update
IRT entry, while the amd_iommu_sync_ga() is introduced to allow
IRT flushing to be done separately.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/kvm/svm.c        | 35 ++++++++++++++++++++++++++++++++++-
 drivers/iommu/amd_iommu.c | 20 +++++++++++++++++---
 include/linux/amd-iommu.h | 13 ++++++++++---
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 47c4993448c7..a5c7ca5d70d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -118,6 +118,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
 
+#define AVIC_DEVID_BITMAP_SIZE		(1 << 16)
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -251,6 +253,12 @@ struct vcpu_svm {
 
 	/* which host CPU was used for running this vcpu */
 	unsigned int last_cpu;
+
+	/*
+	 * Bitmap used to store PCI devid to sync
+	 * AMD IOMMU interrupt remapping table
+	 */
+	unsigned long *avic_devid_sync_bitmap;
 };
 
 /*
@@ -1984,6 +1992,7 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
 	int ret = 0;
+	int devid = 0;
 	unsigned long flags;
 	struct amd_svm_iommu_ir *ir;
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2001,9 +2010,21 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 		goto out;
 
 	list_for_each_entry(ir, &svm->ir_list, node) {
-		ret = amd_iommu_update_ga(cpu, r, ir->data);
+		ret = amd_iommu_update_ga(cpu, r, ir->data, &devid);
 		if (ret)
 			break;
+		set_bit(devid, svm->avic_devid_sync_bitmap);
+	}
+
+	/* Sync AMD IOMMU interrupt remapping table changes for each device. */
+	devid = find_next_bit(svm->avic_devid_sync_bitmap,
+			      AVIC_DEVID_BITMAP_SIZE, 0);
+
+	while (devid < AVIC_DEVID_BITMAP_SIZE) {
+		clear_bit(devid, svm->avic_devid_sync_bitmap);
+		ret = amd_iommu_sync_ga(devid);
+		devid = find_next_bit(svm->avic_devid_sync_bitmap,
+				      AVIC_DEVID_BITMAP_SIZE, devid+1);
 	}
 out:
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
@@ -2107,6 +2128,13 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
 	INIT_LIST_HEAD(&svm->ir_list);
 	spin_lock_init(&svm->ir_list_lock);
 
+	svm->avic_devid_sync_bitmap = (void *)__get_free_pages(
+					GFP_KERNEL | __GFP_ZERO,
+					get_order(AVIC_DEVID_BITMAP_SIZE/8));
+	if (svm->avic_devid_sync_bitmap == NULL)
+		ret = -ENOMEM;
+	memset(svm->avic_devid_sync_bitmap, 0, AVIC_DEVID_BITMAP_SIZE/8);
+
 	return ret;
 }
 
@@ -2221,6 +2249,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+
+	free_pages((unsigned long)svm->avic_devid_sync_bitmap,
+		   get_order(AVIC_DEVID_BITMAP_SIZE/8));
+	svm->avic_devid_sync_bitmap = NULL;
+
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a7b78bb98b4..637bcc9192e5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4499,7 +4499,20 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 	return 0;
 }
 
-int amd_iommu_update_ga(int cpu, bool is_run, void *data)
+int amd_iommu_sync_ga(int devid)
+{
+	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+	if (!iommu)
+		return -ENODEV;
+
+	iommu_flush_irt(iommu, devid);
+	iommu_completion_wait(iommu);
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_sync_ga);
+
+int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *id)
 {
 	unsigned long flags;
 	struct amd_iommu *iommu;
@@ -4521,6 +4534,9 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 	if (!table)
 		return -ENODEV;
 
+	if (id)
+		*id = devid;
+
 	raw_spin_lock_irqsave(&table->lock, flags);
 
 	if (ref->lo.fields_vapic.guest_mode) {
@@ -4536,8 +4552,6 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 
 	raw_spin_unlock_irqrestore(&table->lock, flags);
 
-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
 	return 0;
 }
 EXPORT_SYMBOL(amd_iommu_update_ga);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 09751d349963..b94d4b33dfd7 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -193,8 +193,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; }
 /* IOMMU AVIC Function */
 extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
 
-extern int
-amd_iommu_update_ga(int cpu, bool is_run, void *data);
+extern int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid);
+
+extern int amd_iommu_sync_ga(int devid);
 
 #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
 
@@ -205,7 +206,13 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
 }
 
 static inline int
-amd_iommu_update_ga(int cpu, bool is_run, void *data)
+amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid)
+{
+	return 0;
+}
+
+static inline int
+amd_iommu_sync_ga(int devid)
 {
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries
  2019-03-20  8:14 [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries Suthikulpanit, Suravee
@ 2019-03-25 13:46 ` joro
  2020-01-22 15:44 ` Paolo Bonzini
  1 sibling, 0 replies; 3+ messages in thread
From: joro @ 2019-03-25 13:46 UTC (permalink / raw)
  To: Suthikulpanit, Suravee; +Cc: linux-kernel, kvm, iommu, pbonzini, rkrcmar

On Wed, Mar 20, 2019 at 08:14:56AM +0000, Suthikulpanit, Suravee wrote:
> When AVIC is enabled and the VM has discrete device assignment,
> the interrupt remapping table (IRT) is used to keep track of which
> destination APIC ID the IOMMU will inject the device interrput to.
> 
> This means every time a vcpu is blocked or context-switched (i.e.
> vcpu_blocking/unblocking() and vcpu_load/put()), the information
> in IRT must be updated and the IOMMU IRT flush command must be
> issued.
> 
> The current implementation flushes IOMMU IRT every time an entry
> is modified. If the assigned device has large number of interrupts
> (hence large number of entries), this would add large amount of
> overhead to vcpu context-switch. Instead, this can be optmized by
> only flush IRT once per vcpu context-switch per device after all
> IRT entries are modified.
> 
> The function amd_iommu_update_ga() is refactored to only update
> IRT entry, while the amd_iommu_sync_ga() is introduced to allow
> IRT flushing to be done separately.
> 
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  arch/x86/kvm/svm.c        | 35 ++++++++++++++++++++++++++++++++++-
>  drivers/iommu/amd_iommu.c | 20 +++++++++++++++++---
>  include/linux/amd-iommu.h | 13 ++++++++++---
>  3 files changed, 61 insertions(+), 7 deletions(-)

For the IOMMU parts:

	Acked-by: Joerg Roedel <jroedel@suse.de>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries
  2019-03-20  8:14 [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries Suthikulpanit, Suravee
  2019-03-25 13:46 ` joro
@ 2020-01-22 15:44 ` Paolo Bonzini
  1 sibling, 0 replies; 3+ messages in thread
From: Paolo Bonzini @ 2020-01-22 15:44 UTC (permalink / raw)
  To: Suthikulpanit, Suravee, linux-kernel, kvm, iommu; +Cc: joro, rkrcmar

On 20/03/19 09:14, Suthikulpanit, Suravee wrote:
> When AVIC is enabled and the VM has discrete device assignment,
> the interrupt remapping table (IRT) is used to keep track of which
> destination APIC ID the IOMMU will inject the device interrput to.
> 
> This means every time a vcpu is blocked or context-switched (i.e.
> vcpu_blocking/unblocking() and vcpu_load/put()), the information
> in IRT must be updated and the IOMMU IRT flush command must be
> issued.
> 
> The current implementation flushes IOMMU IRT every time an entry
> is modified. If the assigned device has large number of interrupts
> (hence large number of entries), this would add large amount of
> overhead to vcpu context-switch. Instead, this can be optmized by
> only flush IRT once per vcpu context-switch per device after all
> IRT entries are modified.
> 
> The function amd_iommu_update_ga() is refactored to only update
> IRT entry, while the amd_iommu_sync_ga() is introduced to allow
> IRT flushing to be done separately.
> 
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  arch/x86/kvm/svm.c        | 35 ++++++++++++++++++++++++++++++++++-
>  drivers/iommu/amd_iommu.c | 20 +++++++++++++++++---
>  include/linux/amd-iommu.h | 13 ++++++++++---
>  3 files changed, 61 insertions(+), 7 deletions(-)

I found this patch in my inbox...  I'd rather avoid allocating 8k of RAM
per vCPU.  Can you make it per-VM?

Paolo

> +	/*
> +	 * Bitmap used to store PCI devid to sync
> +	 * AMD IOMMU interrupt remapping table
> +	 */
> +	unsigned long *avic_devid_sync_bitmap;
>  };
>  
>  /*
> @@ -1984,6 +1992,7 @@ static inline int
>  avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
>  {
>  	int ret = 0;
> +	int devid = 0;
>  	unsigned long flags;
>  	struct amd_svm_iommu_ir *ir;
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -2001,9 +2010,21 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
>  		goto out;
>  
>  	list_for_each_entry(ir, &svm->ir_list, node) {
> -		ret = amd_iommu_update_ga(cpu, r, ir->data);
> +		ret = amd_iommu_update_ga(cpu, r, ir->data, &devid);
>  		if (ret)
>  			break;
> +		set_bit(devid, svm->avic_devid_sync_bitmap);
> +	}
> +
> +	/* Sync AMD IOMMU interrupt remapping table changes for each device. */
> +	devid = find_next_bit(svm->avic_devid_sync_bitmap,
> +			      AVIC_DEVID_BITMAP_SIZE, 0);
> +
> +	while (devid < AVIC_DEVID_BITMAP_SIZE) {
> +		clear_bit(devid, svm->avic_devid_sync_bitmap);
> +		ret = amd_iommu_sync_ga(devid);
> +		devid = find_next_bit(svm->avic_devid_sync_bitmap,
> +				      AVIC_DEVID_BITMAP_SIZE, devid+1);
>  	}
>  out:
>  	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
> @@ -2107,6 +2128,13 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
>  	INIT_LIST_HEAD(&svm->ir_list);
>  	spin_lock_init(&svm->ir_list_lock);
>  
> +	svm->avic_devid_sync_bitmap = (void *)__get_free_pages(
> +					GFP_KERNEL | __GFP_ZERO,
> +					get_order(AVIC_DEVID_BITMAP_SIZE/8));
> +	if (svm->avic_devid_sync_bitmap == NULL)
> +		ret = -ENOMEM;
> +	memset(svm->avic_devid_sync_bitmap, 0, AVIC_DEVID_BITMAP_SIZE/8);
> +
>  	return ret;
>  }
>  
> @@ -2221,6 +2249,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>  	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
>  	__free_page(virt_to_page(svm->nested.hsave));
>  	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
> +
> +	free_pages((unsigned long)svm->avic_devid_sync_bitmap,
> +		   get_order(AVIC_DEVID_BITMAP_SIZE/8));
> +	svm->avic_devid_sync_bitmap = NULL;
> +
>  	kvm_vcpu_uninit(vcpu);
>  	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
>  	kmem_cache_free(kvm_vcpu_cache, svm);
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 2a7b78bb98b4..637bcc9192e5 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -4499,7 +4499,20 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
>  	return 0;
>  }
>  
> -int amd_iommu_update_ga(int cpu, bool is_run, void *data)
> +int amd_iommu_sync_ga(int devid)
> +{
> +	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
> +
> +	if (!iommu)
> +		return -ENODEV;
> +
> +	iommu_flush_irt(iommu, devid);
> +	iommu_completion_wait(iommu);
> +	return 0;
> +}
> +EXPORT_SYMBOL(amd_iommu_sync_ga);
> +
> +int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *id)
>  {
>  	unsigned long flags;
>  	struct amd_iommu *iommu;
> @@ -4521,6 +4534,9 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
>  	if (!table)
>  		return -ENODEV;
>  
> +	if (id)
> +		*id = devid;
> +
>  	raw_spin_lock_irqsave(&table->lock, flags);
>  
>  	if (ref->lo.fields_vapic.guest_mode) {
> @@ -4536,8 +4552,6 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
>  
>  	raw_spin_unlock_irqrestore(&table->lock, flags);
>  
> -	iommu_flush_irt(iommu, devid);
> -	iommu_completion_wait(iommu);
>  	return 0;
>  }
>  EXPORT_SYMBOL(amd_iommu_update_ga);
> diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
> index 09751d349963..b94d4b33dfd7 100644
> --- a/include/linux/amd-iommu.h
> +++ b/include/linux/amd-iommu.h
> @@ -193,8 +193,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; }
>  /* IOMMU AVIC Function */
>  extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
>  
> -extern int
> -amd_iommu_update_ga(int cpu, bool is_run, void *data);
> +extern int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid);
> +
> +extern int amd_iommu_sync_ga(int devid);
>  
>  #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
>  
> @@ -205,7 +206,13 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
>  }
>  
>  static inline int
> -amd_iommu_update_ga(int cpu, bool is_run, void *data)
> +amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid)
> +{
> +	return 0;
> +}
> +
> +static inline int
> +amd_iommu_sync_ga(int devid)
>  {
>  	return 0;
>  }
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-01-22 15:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-20  8:14 [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all entries Suthikulpanit, Suravee
2019-03-25 13:46 ` joro
2020-01-22 15:44 ` Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).