All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
@ 2014-07-30 12:04 Wanpeng Li
  2014-07-30 15:20 ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2014-07-30 12:04 UTC (permalink / raw)
  To: Paolo Bonzini, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, Zhang Yang, kvm,
	linux-kernel, Wanpeng Li

This patch fix bug https://bugzilla.kernel.org/show_bug.cgi?id=61411

TPR shadow/threshold feature is important to speed up the Windows guest. 
Besides, it is a must feature for certain VMM.

We map virtual APIC page address and TPR threshold from L1 VMCS. If 
TPR_BELOW_THRESHOLD VM exit is triggered by L2 guest and L1 interested 
in, we inject it into L1 VMM for handling.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
---
 arch/x86/kvm/vmx.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a3845b8..f60846c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2331,7 +2331,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
 		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
-		CPU_BASED_PAUSE_EXITING |
+		CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
@@ -6937,7 +6937,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_MCE_DURING_VMENTRY:
 		return 0;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
-		return 1;
+		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
 		return nested_cpu_has2(vmcs12,
 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7058,6 +7058,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
+	if (is_guest_mode(vcpu))
+		return;
+
 	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
@@ -7962,14 +7965,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		if (!vmx->rdtscp_enabled)
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
-		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+		exec_control &= ~(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                   SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
 
 		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
+			struct page *virtual_apic_page;
 			/*
 			 * Translate L1 physical address to host physical
 			 * address for vmcs02. Keep the page pinned, so this
@@ -7992,6 +7995,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			else
 				vmcs_write64(APIC_ACCESS_ADDR,
 				  page_to_phys(vmx->nested.apic_access_page));
+
+			virtual_apic_page = nested_get_page(vcpu,
+						vmcs12->virtual_apic_page_addr);
+			if (vmcs_read64(VIRTUAL_APIC_PAGE_ADDR) !=
+					page_to_phys(virtual_apic_page))
+				vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+					page_to_phys(virtual_apic_page));
+			nested_release_page(virtual_apic_page);
+
 		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
 			exec_control |=
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
@@ -8002,6 +8014,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
 
+	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
 
 	/*
 	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-07-30 12:04 [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation Wanpeng Li
@ 2014-07-30 15:20 ` Paolo Bonzini
  2014-07-31  8:03   ` Wanpeng Li
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2014-07-30 15:20 UTC (permalink / raw)
  To: Wanpeng Li, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, Zhang Yang, kvm, linux-kernel

Il 30/07/2014 14:04, Wanpeng Li ha scritto:
> @@ -7962,14 +7965,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  		if (!vmx->rdtscp_enabled)
>  			exec_control &= ~SECONDARY_EXEC_RDTSCP;
>  		/* Take the following fields only from vmcs12 */
> -		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> -				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
> +		exec_control &= ~(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>                                    SECONDARY_EXEC_APIC_REGISTER_VIRT);

This change is wrong.  You don't have to take L0's "virtualize APIC
accesses" setting into account, because while running L2 you cannot
modify L1's CR8 (only the virtual nested one).

> +
> +			virtual_apic_page = nested_get_page(vcpu,
> +						vmcs12->virtual_apic_page_addr);
> +			if (vmcs_read64(VIRTUAL_APIC_PAGE_ADDR) !=
> +					page_to_phys(virtual_apic_page))
> +				vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
> +					page_to_phys(virtual_apic_page));
> +			nested_release_page(virtual_apic_page);
> +

You cannot release this page here.  You need to the exactly the same
thing that is done for apic_access_page.

One thing:

> +	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
> +		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);

I think you can just do this write unconditionally, since most
hypervisors will enable this.  Also, you probably can add the tpr
threshold field to the read-write fields for shadow VMCS.

Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-07-30 15:20 ` Paolo Bonzini
@ 2014-07-31  8:03   ` Wanpeng Li
  2014-07-31  9:25     ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2014-07-31  8:03 UTC (permalink / raw)
  To: Paolo Bonzini, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, Zhang Yang, kvm, linux-kernel

Hi Paolo,
On Wed, Jul 30, 2014 at 05:20:58PM +0200, Paolo Bonzini wrote:
>Il 30/07/2014 14:04, Wanpeng Li ha scritto:
>> @@ -7962,14 +7965,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  		if (!vmx->rdtscp_enabled)
>>  			exec_control &= ~SECONDARY_EXEC_RDTSCP;
>>  		/* Take the following fields only from vmcs12 */
>> -		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>> -				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>> +		exec_control &= ~(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>>                                    SECONDARY_EXEC_APIC_REGISTER_VIRT);
>
>This change is wrong.  You don't have to take L0's "virtualize APIC
>accesses" setting into account, because while running L2 you cannot
>modify L1's CR8 (only the virtual nested one).
>

Agreed.

>> +
>> +			virtual_apic_page = nested_get_page(vcpu,
>> +						vmcs12->virtual_apic_page_addr);
>> +			if (vmcs_read64(VIRTUAL_APIC_PAGE_ADDR) !=
>> +					page_to_phys(virtual_apic_page))
>> +				vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
>> +					page_to_phys(virtual_apic_page));
>> +			nested_release_page(virtual_apic_page);
>> +
>
>You cannot release this page here.  You need to the exactly the same
>thing that is done for apic_access_page.
>

Agreed.

>One thing:
>
>> +	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
>> +		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
>
>I think you can just do this write unconditionally, since most
>hypervisors will enable this.  Also, you probably can add the tpr

What will happen if a hypervisor doesn't enable it? I make it more 
cleaner in version two.

>threshold field to the read-write fields for shadow VMCS.
>

Agreed.

Regards,
Wanpeng Li 

>Paolo
>--
>To unsubscribe from this list: send the line "unsubscribe kvm" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-07-31  8:03   ` Wanpeng Li
@ 2014-07-31  9:25     ` Paolo Bonzini
  2014-08-01  0:57       ` Zhang, Yang Z
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2014-07-31  9:25 UTC (permalink / raw)
  To: Wanpeng Li, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, Zhang Yang, kvm, linux-kernel

Il 31/07/2014 10:03, Wanpeng Li ha scritto:
>> One thing:
>>
>>> +	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
>>> +		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
>>
>> I think you can just do this write unconditionally, since most
>> hypervisors will enable this.  Also, you probably can add the tpr
> 
> What will happen if a hypervisor doesn't enable it? I make it more 
> cleaner in version two.

TPR_THRESHOLD will be likely written as zero, but the processor will
never use it anyway.  It's just a small optimization because
nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) will almost always be true.

Paolo

>> threshold field to the read-write fields for shadow VMCS.
> 
> Agreed.
> 
> Regards,
> Wanpeng Li 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-07-31  9:25     ` Paolo Bonzini
@ 2014-08-01  0:57       ` Zhang, Yang Z
  2014-08-01  6:35         ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Zhang, Yang Z @ 2014-08-01  0:57 UTC (permalink / raw)
  To: Paolo Bonzini, Wanpeng Li, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, kvm, linux-kernel

Paolo Bonzini wrote on 2014-07-31:
> Il 31/07/2014 10:03, Wanpeng Li ha scritto:
>>> One thing:
>>> 
>>>> +	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
>>>> +		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
>>> 
>>> I think you can just do this write unconditionally, since most
>>> hypervisors will enable this.  Also, you probably can add the tpr
>> 
>> What will happen if a hypervisor doesn't enable it? I make it more
>> cleaner in version two.
> 
> TPR_THRESHOLD will be likely written as zero, but the processor will
> never use it anyway.  It's just a small optimization because
> nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) will almost always be true.

Theoretically, you are right. But we should not expect all VMMs follow it. It is not worth to violate the SDM just for saving two or three instructions' cost.

> 
> Paolo
> 
>>> threshold field to the read-write fields for shadow VMCS.
>> 
>> Agreed.
>> 
>> Regards,
>> Wanpeng Li


Best regards,
Yang



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-08-01  0:57       ` Zhang, Yang Z
@ 2014-08-01  6:35         ` Paolo Bonzini
  2014-08-01  6:44           ` Zhang, Yang Z
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2014-08-01  6:35 UTC (permalink / raw)
  To: Zhang, Yang Z, Wanpeng Li, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, kvm, linux-kernel

Il 01/08/2014 02:57, Zhang, Yang Z ha scritto:
> > TPR_THRESHOLD will be likely written as zero, but the processor will
> > never use it anyway.  It's just a small optimization because
> > nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) will almost always be true.
> 
> Theoretically, you are right. But we should not expect all VMMs
> follow it. It is not worth to violate the SDM just for saving two or
> three instructions' cost.

Yes, you do need an "if (cpu_has_vmx_tpr_shadow())" around the
vmcs_write32.  But still, checking nested_cpu_has is not strictly
necessary.  Right now they both are a single AND, but I have plans to
change all of the cpu_has_*() checks to static keys.

Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation
  2014-08-01  6:35         ` Paolo Bonzini
@ 2014-08-01  6:44           ` Zhang, Yang Z
  0 siblings, 0 replies; 7+ messages in thread
From: Zhang, Yang Z @ 2014-08-01  6:44 UTC (permalink / raw)
  To: Paolo Bonzini, Wanpeng Li, Jan Kiszka
  Cc: Marcelo Tosatti, Gleb Natapov, Bandan Das, kvm, linux-kernel

Paolo Bonzini wrote on 2014-08-01:
> Il 01/08/2014 02:57, Zhang, Yang Z ha scritto:
>>> TPR_THRESHOLD will be likely written as zero, but the processor
>>> will never use it anyway.  It's just a small optimization because
>>> nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) will almost always
> be true.
>> 
>> Theoretically, you are right. But we should not expect all VMMs
>> follow it. It is not worth to violate the SDM just for saving two or
>> three instructions' cost.
> 
> Yes, you do need an "if (cpu_has_vmx_tpr_shadow())" around the
> vmcs_write32.  But still, checking nested_cpu_has is not strictly necessary.
> Right now they both are a single AND, but I have plans to change all
> of the
> cpu_has_*() checks to static keys.

See v2 patch. It isn't a problem anymore.

> 
> Paolo


Best regards,
Yang



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-08-01  6:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-30 12:04 [PATCH] KVM: nVMX: nested TPR shadow/threshold emulation Wanpeng Li
2014-07-30 15:20 ` Paolo Bonzini
2014-07-31  8:03   ` Wanpeng Li
2014-07-31  9:25     ` Paolo Bonzini
2014-08-01  0:57       ` Zhang, Yang Z
2014-08-01  6:35         ` Paolo Bonzini
2014-08-01  6:44           ` Zhang, Yang Z

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.