All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
@ 2016-10-28 10:27 ` Marc Zyngier
  0 siblings, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2016-10-28 10:27 UTC (permalink / raw)
  To: linux-arm-kernel, kvm, kvmarm; +Cc: Will Deacon

Architecturally, TLBs are private to the (physical) CPU they're
associated with. But when multiple vcpus from the same VM are
being multiplexed on the same CPU, the TLBs are not private
to the vcpus (and are actually shared across the VMID).

Let's consider the following scenario:

- vcpu-0 maps PA to VA
- vcpu-1 maps PA' to VA

If run on the same physical CPU, vcpu-1 can hit TLB entries generated
by vcpu-0 accesses, and access the wrong physical page.

The solution to this is to keep a per-VM map of which vcpu ran last
on each given physical CPU, and invalidate local TLBs when switching
to a different vcpu from the same VM.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
Fixed comments, added Mark's RB.

 arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
 arch/arm/include/asm/kvm_hyp.h    |  1 +
 arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
 arch/arm/kvm/hyp/switch.c         |  9 +++++++++
 arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
 arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
 6 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 2d19e02..7290de6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -57,6 +57,9 @@ struct kvm_arch {
 	/* VTTBR value associated with below pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* Timer */
 	struct arch_timer_kvm	timer;
 
@@ -174,6 +177,13 @@ struct kvm_vcpu_arch {
 	/* vcpu power-off state */
 	bool power_off;
 
+	/*
+	 * Local TLBs potentially contain conflicting entries from
+	 * another vCPU within this VMID. All entries for this VMID must
+	 * be invalidated from (local) TLBs before we run this vCPU.
+	 */
+	bool tlb_vmid_stale;
+
 	 /* Don't run the guest (internal implementation need) */
 	bool pause;
 
@@ -292,7 +302,6 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 static inline void kvm_arm_init_debug(void) {}
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 343135e..5850890 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -71,6 +71,7 @@
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
 #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 08bb84f..e0d93cd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
  */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-	int ret = 0;
+	int ret, cpu;
 
 	if (type)
 		return -EINVAL;
 
+	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+	if (!kvm->arch.last_vcpu_ran)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
 	ret = kvm_alloc_stage2_pgd(kvm);
 	if (ret)
 		goto out_fail_alloc;
@@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
 out_fail_alloc:
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
 	return ret;
 }
 
@@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+	int *last_ran;
+
+	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
+
+	/*
+	 * We might get preempted before the vCPU actually runs, but
+	 * this is fine. Our TLBI stays pending until we actually make
+	 * it to __activate_vm, so we won't miss a TLBI. If another
+	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
+	 * and pend a TLBI for itself.
+	 */
+	if (*last_ran != vcpu->vcpu_id) {
+		if (*last_ran != -1)
+			vcpu->arch.tlb_vmid_stale = true;
+
+		*last_ran = vcpu->vcpu_id;
+	}
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 92678b7..a411762 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 	write_sysreg(kvm->arch.vttbr, VTTBR);
+	if (vcpu->arch.tlb_vmid_stale) {
+		/* Force vttbr to be written */
+		isb();
+		/* Local invalidate only for this VMID */
+		write_sysreg(0, TLBIALL);
+		dsb(nsh);
+		vcpu->arch.tlb_vmid_stale = false;
+	}
+
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bd94e67..0f62829 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -62,6 +62,9 @@ struct kvm_arch {
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* The maximum number of vCPUs depends on the used GIC model */
 	int max_vcpus;
 
@@ -252,6 +255,13 @@ struct kvm_vcpu_arch {
 	/* vcpu power-off state */
 	bool power_off;
 
+	/*
+	 * Local TLBs potentially contain conflicting entries from
+	 * another vCPU within this VMID. All entries for this VMID must
+	 * be invalidated from (local) TLBs before we run this vCPU.
+	 */
+	bool tlb_vmid_stale;
+
 	/* Don't run the guest (internal implementation need) */
 	bool pause;
 
@@ -368,7 +378,6 @@ static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 void kvm_arm_init_debug(void);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 83037cd..99d0f33 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -131,6 +131,14 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	if (vcpu->arch.tlb_vmid_stale) {
+		/* Force vttbr_el2 to be written */
+		isb();
+		/* Local invalidate only for this VMID */
+		asm volatile("tlbi vmalle1" : : );
+		dsb(nsh);
+		vcpu->arch.tlb_vmid_stale = false;
+	}
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
@ 2016-10-28 10:27 ` Marc Zyngier
  0 siblings, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2016-10-28 10:27 UTC (permalink / raw)
  To: linux-arm-kernel

Architecturally, TLBs are private to the (physical) CPU they're
associated with. But when multiple vcpus from the same VM are
being multiplexed on the same CPU, the TLBs are not private
to the vcpus (and are actually shared across the VMID).

Let's consider the following scenario:

- vcpu-0 maps PA to VA
- vcpu-1 maps PA' to VA

If run on the same physical CPU, vcpu-1 can hit TLB entries generated
by vcpu-0 accesses, and access the wrong physical page.

The solution to this is to keep a per-VM map of which vcpu ran last
on each given physical CPU, and invalidate local TLBs when switching
to a different vcpu from the same VM.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
Fixed comments, added Mark's RB.

 arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
 arch/arm/include/asm/kvm_hyp.h    |  1 +
 arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
 arch/arm/kvm/hyp/switch.c         |  9 +++++++++
 arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
 arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
 6 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 2d19e02..7290de6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -57,6 +57,9 @@ struct kvm_arch {
 	/* VTTBR value associated with below pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* Timer */
 	struct arch_timer_kvm	timer;
 
@@ -174,6 +177,13 @@ struct kvm_vcpu_arch {
 	/* vcpu power-off state */
 	bool power_off;
 
+	/*
+	 * Local TLBs potentially contain conflicting entries from
+	 * another vCPU within this VMID. All entries for this VMID must
+	 * be invalidated from (local) TLBs before we run this vCPU.
+	 */
+	bool tlb_vmid_stale;
+
 	 /* Don't run the guest (internal implementation need) */
 	bool pause;
 
@@ -292,7 +302,6 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 static inline void kvm_arm_init_debug(void) {}
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 343135e..5850890 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -71,6 +71,7 @@
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
 #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 08bb84f..e0d93cd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
  */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-	int ret = 0;
+	int ret, cpu;
 
 	if (type)
 		return -EINVAL;
 
+	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+	if (!kvm->arch.last_vcpu_ran)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
 	ret = kvm_alloc_stage2_pgd(kvm);
 	if (ret)
 		goto out_fail_alloc;
@@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
 out_fail_alloc:
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
 	return ret;
 }
 
@@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+	int *last_ran;
+
+	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
+
+	/*
+	 * We might get preempted before the vCPU actually runs, but
+	 * this is fine. Our TLBI stays pending until we actually make
+	 * it to __activate_vm, so we won't miss a TLBI. If another
+	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
+	 * and pend a TLBI for itself.
+	 */
+	if (*last_ran != vcpu->vcpu_id) {
+		if (*last_ran != -1)
+			vcpu->arch.tlb_vmid_stale = true;
+
+		*last_ran = vcpu->vcpu_id;
+	}
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 92678b7..a411762 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 	write_sysreg(kvm->arch.vttbr, VTTBR);
+	if (vcpu->arch.tlb_vmid_stale) {
+		/* Force vttbr to be written */
+		isb();
+		/* Local invalidate only for this VMID */
+		write_sysreg(0, TLBIALL);
+		dsb(nsh);
+		vcpu->arch.tlb_vmid_stale = false;
+	}
+
 	write_sysreg(vcpu->arch.midr, VPIDR);
 }
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bd94e67..0f62829 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -62,6 +62,9 @@ struct kvm_arch {
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* The maximum number of vCPUs depends on the used GIC model */
 	int max_vcpus;
 
@@ -252,6 +255,13 @@ struct kvm_vcpu_arch {
 	/* vcpu power-off state */
 	bool power_off;
 
+	/*
+	 * Local TLBs potentially contain conflicting entries from
+	 * another vCPU within this VMID. All entries for this VMID must
+	 * be invalidated from (local) TLBs before we run this vCPU.
+	 */
+	bool tlb_vmid_stale;
+
 	/* Don't run the guest (internal implementation need) */
 	bool pause;
 
@@ -368,7 +378,6 @@ static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 void kvm_arm_init_debug(void);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 83037cd..99d0f33 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -131,6 +131,14 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	if (vcpu->arch.tlb_vmid_stale) {
+		/* Force vttbr_el2 to be written */
+		isb();
+		/* Local invalidate only for this VMID */
+		asm volatile("tlbi vmalle1" : : );
+		dsb(nsh);
+		vcpu->arch.tlb_vmid_stale = false;
+	}
 }
 
 static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
  2016-10-28 10:27 ` Marc Zyngier
@ 2016-11-01  9:04   ` Christoffer Dall
  -1 siblings, 0 replies; 8+ messages in thread
From: Christoffer Dall @ 2016-11-01  9:04 UTC (permalink / raw)
  To: Marc Zyngier; +Cc: linux-arm-kernel, kvm, kvmarm, Mark Rutland, Will Deacon

On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
> Architecturally, TLBs are private to the (physical) CPU they're
> associated with. But when multiple vcpus from the same VM are
> being multiplexed on the same CPU, the TLBs are not private
> to the vcpus (and are actually shared across the VMID).
> 
> Let's consider the following scenario:
> 
> - vcpu-0 maps PA to VA
> - vcpu-1 maps PA' to VA
> 
> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
> by vcpu-0 accesses, and access the wrong physical page.
> 
> The solution to this is to keep a per-VM map of which vcpu ran last
> on each given physical CPU, and invalidate local TLBs when switching
> to a different vcpu from the same VM.
> 
> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
> Fixed comments, added Mark's RB.
> 
>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
>  arch/arm/include/asm/kvm_hyp.h    |  1 +
>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
>  6 files changed, 72 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 2d19e02..7290de6 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -57,6 +57,9 @@ struct kvm_arch {
>  	/* VTTBR value associated with below pgd and vmid */
>  	u64    vttbr;
>  
> +	/* The last vcpu id that ran on each physical CPU */
> +	int __percpu *last_vcpu_ran;
> +
>  	/* Timer */
>  	struct arch_timer_kvm	timer;
>  
> @@ -174,6 +177,13 @@ struct kvm_vcpu_arch {
>  	/* vcpu power-off state */
>  	bool power_off;
>  
> +	/*
> +	 * Local TLBs potentially contain conflicting entries from
> +	 * another vCPU within this VMID. All entries for this VMID must
> +	 * be invalidated from (local) TLBs before we run this vCPU.
> +	 */
> +	bool tlb_vmid_stale;
> +
>  	 /* Don't run the guest (internal implementation need) */
>  	bool pause;
>  
> @@ -292,7 +302,6 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  static inline void kvm_arm_init_debug(void) {}
> diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
> index 343135e..5850890 100644
> --- a/arch/arm/include/asm/kvm_hyp.h
> +++ b/arch/arm/include/asm/kvm_hyp.h
> @@ -71,6 +71,7 @@
>  #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
>  #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
>  #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
> +#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
>  #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
>  #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
>  #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 08bb84f..e0d93cd 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
>   */
>  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  {
> -	int ret = 0;
> +	int ret, cpu;
>  
>  	if (type)
>  		return -EINVAL;
>  
> +	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
> +	if (!kvm->arch.last_vcpu_ran)
> +		return -ENOMEM;
> +
> +	for_each_possible_cpu(cpu)
> +		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
> +
>  	ret = kvm_alloc_stage2_pgd(kvm);
>  	if (ret)
>  		goto out_fail_alloc;
> @@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  out_free_stage2_pgd:
>  	kvm_free_stage2_pgd(kvm);
>  out_fail_alloc:
> +	free_percpu(kvm->arch.last_vcpu_ran);
> +	kvm->arch.last_vcpu_ran = NULL;
>  	return ret;
>  }
>  
> @@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
>  	int i;
>  
> +	free_percpu(kvm->arch.last_vcpu_ran);
> +	kvm->arch.last_vcpu_ran = NULL;
> +
>  	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>  		if (kvm->vcpus[i]) {
>  			kvm_arch_vcpu_free(kvm->vcpus[i]);
> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> +{

why is calling this from here sufficient?

You only get a notification from preempt notifiers if you were preempted
while running (or rather while the vcpu was loaded).  I think this needs
to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
for other vcpu ioctls and doesn't necessarily imply that the vcpu will
actually run, which is also the case for the sched_in notification, btw.
The worst that will happen in that case is a bit of extra TLB
invalidation, so sticking with kvm_arch_vcpu_load is probably fine.

> +	int *last_ran;
> +
> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
> +
> +	/*
> +	 * We might get preempted before the vCPU actually runs, but
> +	 * this is fine. Our TLBI stays pending until we actually make
> +	 * it to __activate_vm, so we won't miss a TLBI. If another
> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
> +	 * and pend a TLBI for itself.
> +	 */
> +	if (*last_ran != vcpu->vcpu_id) {
> +		if (*last_ran != -1)
> +			vcpu->arch.tlb_vmid_stale = true;
> +
> +		*last_ran = vcpu->vcpu_id;
> +	}
> +}
> +
>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>  	vcpu->cpu = cpu;
> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
> index 92678b7..a411762 100644
> --- a/arch/arm/kvm/hyp/switch.c
> +++ b/arch/arm/kvm/hyp/switch.c
> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>  	write_sysreg(kvm->arch.vttbr, VTTBR);
> +	if (vcpu->arch.tlb_vmid_stale) {
> +		/* Force vttbr to be written */
> +		isb();
> +		/* Local invalidate only for this VMID */
> +		write_sysreg(0, TLBIALL);
> +		dsb(nsh);
> +		vcpu->arch.tlb_vmid_stale = false;
> +	}
> +

why not call this directly when you notice it via kvm_call_hyp as
opposed to adding another conditional in the critical path?

>  	write_sysreg(vcpu->arch.midr, VPIDR);
>  }
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index bd94e67..0f62829 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -62,6 +62,9 @@ struct kvm_arch {
>  	/* VTTBR value associated with above pgd and vmid */
>  	u64    vttbr;
>  
> +	/* The last vcpu id that ran on each physical CPU */
> +	int __percpu *last_vcpu_ran;
> +
>  	/* The maximum number of vCPUs depends on the used GIC model */
>  	int max_vcpus;
>  
> @@ -252,6 +255,13 @@ struct kvm_vcpu_arch {
>  	/* vcpu power-off state */
>  	bool power_off;
>  
> +	/*
> +	 * Local TLBs potentially contain conflicting entries from
> +	 * another vCPU within this VMID. All entries for this VMID must
> +	 * be invalidated from (local) TLBs before we run this vCPU.
> +	 */
> +	bool tlb_vmid_stale;
> +
>  	/* Don't run the guest (internal implementation need) */
>  	bool pause;
>  
> @@ -368,7 +378,6 @@ static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  void kvm_arm_init_debug(void);
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 83037cd..99d0f33 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -131,6 +131,14 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>  	write_sysreg(kvm->arch.vttbr, vttbr_el2);
> +	if (vcpu->arch.tlb_vmid_stale) {
> +		/* Force vttbr_el2 to be written */
> +		isb();
> +		/* Local invalidate only for this VMID */
> +		asm volatile("tlbi vmalle1" : : );
> +		dsb(nsh);
> +		vcpu->arch.tlb_vmid_stale = false;
> +	}
>  }
>  
>  static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
> -- 
> 2.1.4
> 

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
@ 2016-11-01  9:04   ` Christoffer Dall
  0 siblings, 0 replies; 8+ messages in thread
From: Christoffer Dall @ 2016-11-01  9:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
> Architecturally, TLBs are private to the (physical) CPU they're
> associated with. But when multiple vcpus from the same VM are
> being multiplexed on the same CPU, the TLBs are not private
> to the vcpus (and are actually shared across the VMID).
> 
> Let's consider the following scenario:
> 
> - vcpu-0 maps PA to VA
> - vcpu-1 maps PA' to VA
> 
> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
> by vcpu-0 accesses, and access the wrong physical page.
> 
> The solution to this is to keep a per-VM map of which vcpu ran last
> on each given physical CPU, and invalidate local TLBs when switching
> to a different vcpu from the same VM.
> 
> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
> Fixed comments, added Mark's RB.
> 
>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
>  arch/arm/include/asm/kvm_hyp.h    |  1 +
>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
>  6 files changed, 72 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 2d19e02..7290de6 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -57,6 +57,9 @@ struct kvm_arch {
>  	/* VTTBR value associated with below pgd and vmid */
>  	u64    vttbr;
>  
> +	/* The last vcpu id that ran on each physical CPU */
> +	int __percpu *last_vcpu_ran;
> +
>  	/* Timer */
>  	struct arch_timer_kvm	timer;
>  
> @@ -174,6 +177,13 @@ struct kvm_vcpu_arch {
>  	/* vcpu power-off state */
>  	bool power_off;
>  
> +	/*
> +	 * Local TLBs potentially contain conflicting entries from
> +	 * another vCPU within this VMID. All entries for this VMID must
> +	 * be invalidated from (local) TLBs before we run this vCPU.
> +	 */
> +	bool tlb_vmid_stale;
> +
>  	 /* Don't run the guest (internal implementation need) */
>  	bool pause;
>  
> @@ -292,7 +302,6 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  static inline void kvm_arm_init_debug(void) {}
> diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
> index 343135e..5850890 100644
> --- a/arch/arm/include/asm/kvm_hyp.h
> +++ b/arch/arm/include/asm/kvm_hyp.h
> @@ -71,6 +71,7 @@
>  #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
>  #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
>  #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
> +#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
>  #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
>  #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
>  #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 08bb84f..e0d93cd 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
>   */
>  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  {
> -	int ret = 0;
> +	int ret, cpu;
>  
>  	if (type)
>  		return -EINVAL;
>  
> +	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
> +	if (!kvm->arch.last_vcpu_ran)
> +		return -ENOMEM;
> +
> +	for_each_possible_cpu(cpu)
> +		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
> +
>  	ret = kvm_alloc_stage2_pgd(kvm);
>  	if (ret)
>  		goto out_fail_alloc;
> @@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  out_free_stage2_pgd:
>  	kvm_free_stage2_pgd(kvm);
>  out_fail_alloc:
> +	free_percpu(kvm->arch.last_vcpu_ran);
> +	kvm->arch.last_vcpu_ran = NULL;
>  	return ret;
>  }
>  
> @@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
>  	int i;
>  
> +	free_percpu(kvm->arch.last_vcpu_ran);
> +	kvm->arch.last_vcpu_ran = NULL;
> +
>  	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>  		if (kvm->vcpus[i]) {
>  			kvm_arch_vcpu_free(kvm->vcpus[i]);
> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> +{

why is calling this from here sufficient?

You only get a notification from preempt notifiers if you were preempted
while running (or rather while the vcpu was loaded).  I think this needs
to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
for other vcpu ioctls and doesn't necessarily imply that the vcpu will
actually run, which is also the case for the sched_in notification, btw.
The worst that will happen in that case is a bit of extra TLB
invalidation, so sticking with kvm_arch_vcpu_load is probably fine.

> +	int *last_ran;
> +
> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
> +
> +	/*
> +	 * We might get preempted before the vCPU actually runs, but
> +	 * this is fine. Our TLBI stays pending until we actually make
> +	 * it to __activate_vm, so we won't miss a TLBI. If another
> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
> +	 * and pend a TLBI for itself.
> +	 */
> +	if (*last_ran != vcpu->vcpu_id) {
> +		if (*last_ran != -1)
> +			vcpu->arch.tlb_vmid_stale = true;
> +
> +		*last_ran = vcpu->vcpu_id;
> +	}
> +}
> +
>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>  	vcpu->cpu = cpu;
> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
> index 92678b7..a411762 100644
> --- a/arch/arm/kvm/hyp/switch.c
> +++ b/arch/arm/kvm/hyp/switch.c
> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>  	write_sysreg(kvm->arch.vttbr, VTTBR);
> +	if (vcpu->arch.tlb_vmid_stale) {
> +		/* Force vttbr to be written */
> +		isb();
> +		/* Local invalidate only for this VMID */
> +		write_sysreg(0, TLBIALL);
> +		dsb(nsh);
> +		vcpu->arch.tlb_vmid_stale = false;
> +	}
> +

why not call this directly when you notice it via kvm_call_hyp as
opposed to adding another conditional in the critical path?

>  	write_sysreg(vcpu->arch.midr, VPIDR);
>  }
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index bd94e67..0f62829 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -62,6 +62,9 @@ struct kvm_arch {
>  	/* VTTBR value associated with above pgd and vmid */
>  	u64    vttbr;
>  
> +	/* The last vcpu id that ran on each physical CPU */
> +	int __percpu *last_vcpu_ran;
> +
>  	/* The maximum number of vCPUs depends on the used GIC model */
>  	int max_vcpus;
>  
> @@ -252,6 +255,13 @@ struct kvm_vcpu_arch {
>  	/* vcpu power-off state */
>  	bool power_off;
>  
> +	/*
> +	 * Local TLBs potentially contain conflicting entries from
> +	 * another vCPU within this VMID. All entries for this VMID must
> +	 * be invalidated from (local) TLBs before we run this vCPU.
> +	 */
> +	bool tlb_vmid_stale;
> +
>  	/* Don't run the guest (internal implementation need) */
>  	bool pause;
>  
> @@ -368,7 +378,6 @@ static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  void kvm_arm_init_debug(void);
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 83037cd..99d0f33 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -131,6 +131,14 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>  	write_sysreg(kvm->arch.vttbr, vttbr_el2);
> +	if (vcpu->arch.tlb_vmid_stale) {
> +		/* Force vttbr_el2 to be written */
> +		isb();
> +		/* Local invalidate only for this VMID */
> +		asm volatile("tlbi vmalle1" : : );
> +		dsb(nsh);
> +		vcpu->arch.tlb_vmid_stale = false;
> +	}
>  }
>  
>  static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
> -- 
> 2.1.4
> 

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
  2016-11-01  9:04   ` Christoffer Dall
@ 2016-11-01 16:39     ` Marc Zyngier
  -1 siblings, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2016-11-01 16:39 UTC (permalink / raw)
  To: Christoffer Dall; +Cc: Will Deacon, kvm, linux-arm-kernel, kvmarm

[messed up my initial reply, resending]

On Tue, Nov 01 2016 at 09:04:08 AM, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
>> Architecturally, TLBs are private to the (physical) CPU they're
>> associated with. But when multiple vcpus from the same VM are
>> being multiplexed on the same CPU, the TLBs are not private
>> to the vcpus (and are actually shared across the VMID).
>> 
>> Let's consider the following scenario:
>> 
>> - vcpu-0 maps PA to VA
>> - vcpu-1 maps PA' to VA
>> 
>> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
>> by vcpu-0 accesses, and access the wrong physical page.
>> 
>> The solution to this is to keep a per-VM map of which vcpu ran last
>> on each given physical CPU, and invalidate local TLBs when switching
>> to a different vcpu from the same VM.
>> 
>> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>> Fixed comments, added Mark's RB.
>> 
>>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
>>  arch/arm/include/asm/kvm_hyp.h    |  1 +
>>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
>>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
>>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
>>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
>>  6 files changed, 72 insertions(+), 3 deletions(-)
>> 

[...]

>> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>  	return 0;
>>  }
>>  
>> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
>> +{
>
> why is calling this from here sufficient?
>
> You only get a notification from preempt notifiers if you were preempted
> while running (or rather while the vcpu was loaded).  I think this
> needs

Arghh. I completely miss-read the code when writing that patch.

> to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
> for other vcpu ioctls and doesn't necessarily imply that the vcpu will
> actually run, which is also the case for the sched_in notification, btw.
> The worst that will happen in that case is a bit of extra TLB
> invalidation, so sticking with kvm_arch_vcpu_load is probably fine.

Indeed. I don't mind the extra invalidation, as long as it is rare
enough. Another possibility would be to do this test on the entry path,
once preemption is disabled.

>
>> +	int *last_ran;
>> +
>> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
>> +
>> +	/*
>> +	 * We might get preempted before the vCPU actually runs, but
>> +	 * this is fine. Our TLBI stays pending until we actually make
>> +	 * it to __activate_vm, so we won't miss a TLBI. If another
>> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
>> +	 * and pend a TLBI for itself.
>> +	 */
>> +	if (*last_ran != vcpu->vcpu_id) {
>> +		if (*last_ran != -1)
>> +			vcpu->arch.tlb_vmid_stale = true;
>> +
>> +		*last_ran = vcpu->vcpu_id;
>> +	}
>> +}
>> +
>>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  {
>>  	vcpu->cpu = cpu;
>> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
>> index 92678b7..a411762 100644
>> --- a/arch/arm/kvm/hyp/switch.c
>> +++ b/arch/arm/kvm/hyp/switch.c
>> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>>  {
>>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>>  	write_sysreg(kvm->arch.vttbr, VTTBR);
>> +	if (vcpu->arch.tlb_vmid_stale) {
>> +		/* Force vttbr to be written */
>> +		isb();
>> +		/* Local invalidate only for this VMID */
>> +		write_sysreg(0, TLBIALL);
>> +		dsb(nsh);
>> +		vcpu->arch.tlb_vmid_stale = false;
>> +	}
>> +
>
> why not call this directly when you notice it via kvm_call_hyp as
> opposed to adding another conditional in the critical path?

Because the cost of a hypercall is very likely to be a lot higher than
that of testing a variable. Not to mention that at this point we're
absolutely sure that we're going to run the guest, while the hook in
vcpu_load is only probabilistic.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
@ 2016-11-01 16:39     ` Marc Zyngier
  0 siblings, 0 replies; 8+ messages in thread
From: Marc Zyngier @ 2016-11-01 16:39 UTC (permalink / raw)
  To: linux-arm-kernel

[messed up my initial reply, resending]

On Tue, Nov 01 2016 at 09:04:08 AM, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
>> Architecturally, TLBs are private to the (physical) CPU they're
>> associated with. But when multiple vcpus from the same VM are
>> being multiplexed on the same CPU, the TLBs are not private
>> to the vcpus (and are actually shared across the VMID).
>> 
>> Let's consider the following scenario:
>> 
>> - vcpu-0 maps PA to VA
>> - vcpu-1 maps PA' to VA
>> 
>> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
>> by vcpu-0 accesses, and access the wrong physical page.
>> 
>> The solution to this is to keep a per-VM map of which vcpu ran last
>> on each given physical CPU, and invalidate local TLBs when switching
>> to a different vcpu from the same VM.
>> 
>> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>> Fixed comments, added Mark's RB.
>> 
>>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
>>  arch/arm/include/asm/kvm_hyp.h    |  1 +
>>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
>>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
>>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
>>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
>>  6 files changed, 72 insertions(+), 3 deletions(-)
>> 

[...]

>> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>  	return 0;
>>  }
>>  
>> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
>> +{
>
> why is calling this from here sufficient?
>
> You only get a notification from preempt notifiers if you were preempted
> while running (or rather while the vcpu was loaded).  I think this
> needs

Arghh. I completely miss-read the code when writing that patch.

> to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
> for other vcpu ioctls and doesn't necessarily imply that the vcpu will
> actually run, which is also the case for the sched_in notification, btw.
> The worst that will happen in that case is a bit of extra TLB
> invalidation, so sticking with kvm_arch_vcpu_load is probably fine.

Indeed. I don't mind the extra invalidation, as long as it is rare
enough. Another possibility would be to do this test on the entry path,
once preemption is disabled.

>
>> +	int *last_ran;
>> +
>> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
>> +
>> +	/*
>> +	 * We might get preempted before the vCPU actually runs, but
>> +	 * this is fine. Our TLBI stays pending until we actually make
>> +	 * it to __activate_vm, so we won't miss a TLBI. If another
>> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
>> +	 * and pend a TLBI for itself.
>> +	 */
>> +	if (*last_ran != vcpu->vcpu_id) {
>> +		if (*last_ran != -1)
>> +			vcpu->arch.tlb_vmid_stale = true;
>> +
>> +		*last_ran = vcpu->vcpu_id;
>> +	}
>> +}
>> +
>>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  {
>>  	vcpu->cpu = cpu;
>> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
>> index 92678b7..a411762 100644
>> --- a/arch/arm/kvm/hyp/switch.c
>> +++ b/arch/arm/kvm/hyp/switch.c
>> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
>>  {
>>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
>>  	write_sysreg(kvm->arch.vttbr, VTTBR);
>> +	if (vcpu->arch.tlb_vmid_stale) {
>> +		/* Force vttbr to be written */
>> +		isb();
>> +		/* Local invalidate only for this VMID */
>> +		write_sysreg(0, TLBIALL);
>> +		dsb(nsh);
>> +		vcpu->arch.tlb_vmid_stale = false;
>> +	}
>> +
>
> why not call this directly when you notice it via kvm_call_hyp as
> opposed to adding another conditional in the critical path?

Because the cost of a hypercall is very likely to be a lot higher than
that of testing a variable. Not to mention that at this point we're
absolutely sure that we're going to run the guest, while the hook in
vcpu_load is only probabilistic.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
  2016-11-01 16:39     ` Marc Zyngier
@ 2016-11-01 18:28       ` Christoffer Dall
  -1 siblings, 0 replies; 8+ messages in thread
From: Christoffer Dall @ 2016-11-01 18:28 UTC (permalink / raw)
  To: Marc Zyngier; +Cc: Will Deacon, kvm, linux-arm-kernel, kvmarm

On Tue, Nov 01, 2016 at 04:39:29PM +0000, Marc Zyngier wrote:
> [messed up my initial reply, resending]
> 
> On Tue, Nov 01 2016 at 09:04:08 AM, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
> >> Architecturally, TLBs are private to the (physical) CPU they're
> >> associated with. But when multiple vcpus from the same VM are
> >> being multiplexed on the same CPU, the TLBs are not private
> >> to the vcpus (and are actually shared across the VMID).
> >> 
> >> Let's consider the following scenario:
> >> 
> >> - vcpu-0 maps PA to VA
> >> - vcpu-1 maps PA' to VA
> >> 
> >> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
> >> by vcpu-0 accesses, and access the wrong physical page.
> >> 
> >> The solution to this is to keep a per-VM map of which vcpu ran last
> >> on each given physical CPU, and invalidate local TLBs when switching
> >> to a different vcpu from the same VM.
> >> 
> >> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >> ---
> >> Fixed comments, added Mark's RB.
> >> 
> >>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
> >>  arch/arm/include/asm/kvm_hyp.h    |  1 +
> >>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
> >>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
> >>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
> >>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
> >>  6 files changed, 72 insertions(+), 3 deletions(-)
> >> 
> 
> [...]
> 
> >> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >>  	return 0;
> >>  }
> >>  
> >> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> >> +{
> >
> > why is calling this from here sufficient?
> >
> > You only get a notification from preempt notifiers if you were preempted
> > while running (or rather while the vcpu was loaded).  I think this
> > needs
> 
> Arghh. I completely miss-read the code when writing that patch.
> 
> > to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
> > for other vcpu ioctls and doesn't necessarily imply that the vcpu will
> > actually run, which is also the case for the sched_in notification, btw.
> > The worst that will happen in that case is a bit of extra TLB
> > invalidation, so sticking with kvm_arch_vcpu_load is probably fine.
> 
> Indeed. I don't mind the extra invalidation, as long as it is rare
> enough. Another possibility would be to do this test on the entry path,
> once preemption is disabled.
> 
> >
> >> +	int *last_ran;
> >> +
> >> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
> >> +
> >> +	/*
> >> +	 * We might get preempted before the vCPU actually runs, but
> >> +	 * this is fine. Our TLBI stays pending until we actually make
> >> +	 * it to __activate_vm, so we won't miss a TLBI. If another
> >> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
> >> +	 * and pend a TLBI for itself.
> >> +	 */
> >> +	if (*last_ran != vcpu->vcpu_id) {
> >> +		if (*last_ran != -1)
> >> +			vcpu->arch.tlb_vmid_stale = true;
> >> +
> >> +		*last_ran = vcpu->vcpu_id;
> >> +	}
> >> +}
> >> +
> >>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >>  {
> >>  	vcpu->cpu = cpu;
> >> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
> >> index 92678b7..a411762 100644
> >> --- a/arch/arm/kvm/hyp/switch.c
> >> +++ b/arch/arm/kvm/hyp/switch.c
> >> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
> >>  {
> >>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
> >>  	write_sysreg(kvm->arch.vttbr, VTTBR);
> >> +	if (vcpu->arch.tlb_vmid_stale) {
> >> +		/* Force vttbr to be written */
> >> +		isb();
> >> +		/* Local invalidate only for this VMID */
> >> +		write_sysreg(0, TLBIALL);
> >> +		dsb(nsh);
> >> +		vcpu->arch.tlb_vmid_stale = false;
> >> +	}
> >> +
> >
> > why not call this directly when you notice it via kvm_call_hyp as
> > opposed to adding another conditional in the critical path?
> 
> Because the cost of a hypercall is very likely to be a lot higher than
> that of testing a variable. Not to mention that at this point we're
> absolutely sure that we're going to run the guest, while the hook in
> vcpu_load is only probabilistic.
> 
Hmmm, I think for for performance workloads you care about, you will pin
VCPUs, so you'd rather take this hit in the case where your're bouncing
VCPUs all over the place, as opposed to the situation where you care
about being able to quickly exit, for example to take an interrupt for a
passthrough device.

As a comparison, the many-many debug flag checks and function calls we
currently take costs us over 150 cycles for each save/restore on both
Seattle and Mustang, so I don't think the conditionals are free.

I think doing this in the non-preemptible load_vcpu part is just fine,
and it's in line with the other tlbi stuff we do.

-Christoffer

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU
@ 2016-11-01 18:28       ` Christoffer Dall
  0 siblings, 0 replies; 8+ messages in thread
From: Christoffer Dall @ 2016-11-01 18:28 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Nov 01, 2016 at 04:39:29PM +0000, Marc Zyngier wrote:
> [messed up my initial reply, resending]
> 
> On Tue, Nov 01 2016 at 09:04:08 AM, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote:
> >> Architecturally, TLBs are private to the (physical) CPU they're
> >> associated with. But when multiple vcpus from the same VM are
> >> being multiplexed on the same CPU, the TLBs are not private
> >> to the vcpus (and are actually shared across the VMID).
> >> 
> >> Let's consider the following scenario:
> >> 
> >> - vcpu-0 maps PA to VA
> >> - vcpu-1 maps PA' to VA
> >> 
> >> If run on the same physical CPU, vcpu-1 can hit TLB entries generated
> >> by vcpu-0 accesses, and access the wrong physical page.
> >> 
> >> The solution to this is to keep a per-VM map of which vcpu ran last
> >> on each given physical CPU, and invalidate local TLBs when switching
> >> to a different vcpu from the same VM.
> >> 
> >> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >> ---
> >> Fixed comments, added Mark's RB.
> >> 
> >>  arch/arm/include/asm/kvm_host.h   | 11 ++++++++++-
> >>  arch/arm/include/asm/kvm_hyp.h    |  1 +
> >>  arch/arm/kvm/arm.c                | 35 ++++++++++++++++++++++++++++++++++-
> >>  arch/arm/kvm/hyp/switch.c         |  9 +++++++++
> >>  arch/arm64/include/asm/kvm_host.h | 11 ++++++++++-
> >>  arch/arm64/kvm/hyp/switch.c       |  8 ++++++++
> >>  6 files changed, 72 insertions(+), 3 deletions(-)
> >> 
> 
> [...]
> 
> >> @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >>  	return 0;
> >>  }
> >>  
> >> +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> >> +{
> >
> > why is calling this from here sufficient?
> >
> > You only get a notification from preempt notifiers if you were preempted
> > while running (or rather while the vcpu was loaded).  I think this
> > needs
> 
> Arghh. I completely miss-read the code when writing that patch.
> 
> > to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called
> > for other vcpu ioctls and doesn't necessarily imply that the vcpu will
> > actually run, which is also the case for the sched_in notification, btw.
> > The worst that will happen in that case is a bit of extra TLB
> > invalidation, so sticking with kvm_arch_vcpu_load is probably fine.
> 
> Indeed. I don't mind the extra invalidation, as long as it is rare
> enough. Another possibility would be to do this test on the entry path,
> once preemption is disabled.
> 
> >
> >> +	int *last_ran;
> >> +
> >> +	last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu);
> >> +
> >> +	/*
> >> +	 * We might get preempted before the vCPU actually runs, but
> >> +	 * this is fine. Our TLBI stays pending until we actually make
> >> +	 * it to __activate_vm, so we won't miss a TLBI. If another
> >> +	 * vCPU gets scheduled, it will see our vcpu_id in last_ran,
> >> +	 * and pend a TLBI for itself.
> >> +	 */
> >> +	if (*last_ran != vcpu->vcpu_id) {
> >> +		if (*last_ran != -1)
> >> +			vcpu->arch.tlb_vmid_stale = true;
> >> +
> >> +		*last_ran = vcpu->vcpu_id;
> >> +	}
> >> +}
> >> +
> >>  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >>  {
> >>  	vcpu->cpu = cpu;
> >> diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
> >> index 92678b7..a411762 100644
> >> --- a/arch/arm/kvm/hyp/switch.c
> >> +++ b/arch/arm/kvm/hyp/switch.c
> >> @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
> >>  {
> >>  	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
> >>  	write_sysreg(kvm->arch.vttbr, VTTBR);
> >> +	if (vcpu->arch.tlb_vmid_stale) {
> >> +		/* Force vttbr to be written */
> >> +		isb();
> >> +		/* Local invalidate only for this VMID */
> >> +		write_sysreg(0, TLBIALL);
> >> +		dsb(nsh);
> >> +		vcpu->arch.tlb_vmid_stale = false;
> >> +	}
> >> +
> >
> > why not call this directly when you notice it via kvm_call_hyp as
> > opposed to adding another conditional in the critical path?
> 
> Because the cost of a hypercall is very likely to be a lot higher than
> that of testing a variable. Not to mention that at this point we're
> absolutely sure that we're going to run the guest, while the hook in
> vcpu_load is only probabilistic.
> 
Hmmm, I think for for performance workloads you care about, you will pin
VCPUs, so you'd rather take this hit in the case where your're bouncing
VCPUs all over the place, as opposed to the situation where you care
about being able to quickly exit, for example to take an interrupt for a
passthrough device.

As a comparison, the many-many debug flag checks and function calls we
currently take costs us over 150 cycles for each save/restore on both
Seattle and Mustang, so I don't think the conditionals are free.

I think doing this in the non-preemptible load_vcpu part is just fine,
and it's in line with the other tlbi stuff we do.

-Christoffer

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-11-01 18:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-28 10:27 [PATCH v2] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU Marc Zyngier
2016-10-28 10:27 ` Marc Zyngier
2016-11-01  9:04 ` Christoffer Dall
2016-11-01  9:04   ` Christoffer Dall
2016-11-01 16:39   ` Marc Zyngier
2016-11-01 16:39     ` Marc Zyngier
2016-11-01 18:28     ` Christoffer Dall
2016-11-01 18:28       ` Christoffer Dall

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.