From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Dong, Eddie" Subject: RE: [PATCH 14/24] Prepare vmcs02 from vmcs01 and vmcs12 Date: Tue, 6 Jul 2010 14:25:16 +0800 Message-ID: <1A42CE6F5F474C41B63392A5F80372B21F67B5B8@shsmsx501.ccr.corp.intel.com> References: <1276431753-nyh@il.ibm.com> <201006131229.o5DCTih4013041@rice.haifa.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 8BIT Cc: "kvm@vger.kernel.org" , "Dong, Eddie" To: Nadav Har'El , "avi@redhat.com" Return-path: Received: from mga09.intel.com ([134.134.136.24]:9408 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753398Ab0GFG3B convert rfc822-to-8bit (ORCPT ); Tue, 6 Jul 2010 02:29:01 -0400 In-Reply-To: <201006131229.o5DCTih4013041@rice.haifa.ibm.com> Content-Language: en-US Sender: kvm-owner@vger.kernel.org List-ID: Nadav Har'El wrote: > This patch contains code to prepare the VMCS which can be used to > actually run the L2 guest, vmcs02. prepare_vmcs02 appropriately > merges the information in shadow_vmcs that L1 built for L2 (vmcs12), > and that in the VMCS that we built for L1 (vmcs01). > > VMREAD/WRITE can only access one VMCS at a time (the "current" VMCS), > which makes it difficult for us to read from vmcs01 while writing to > vmcs12. This is why we first make a copy of vmcs01 in memory > (l1_shadow_vmcs) and then read that memory copy while writing to > vmcs12. > > Signed-off-by: Nadav Har'El > --- > --- .before/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 > +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 > @@ -849,6 +849,36 @@ static inline bool report_flexpriority(v > return flexpriority_enabled; > } > > +static inline bool nested_cpu_has_vmx_tpr_shadow(struct kvm_vcpu > *vcpu) +{ > + return cpu_has_vmx_tpr_shadow() && > + get_shadow_vmcs(vcpu)->cpu_based_vm_exec_control & > + CPU_BASED_TPR_SHADOW; > +} > + > +static inline bool nested_cpu_has_secondary_exec_ctrls(struct > kvm_vcpu *vcpu) +{ > + return cpu_has_secondary_exec_ctrls() && > + get_shadow_vmcs(vcpu)->cpu_based_vm_exec_control & > + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; > +} > + > +static inline bool nested_vm_need_virtualize_apic_accesses(struct > kvm_vcpu + *vcpu) > +{ > + return nested_cpu_has_secondary_exec_ctrls(vcpu) && > + (get_shadow_vmcs(vcpu)->secondary_vm_exec_control & > + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); > +} > + > +static inline bool nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu) > +{ > + return nested_cpu_has_secondary_exec_ctrls(vcpu) && > + (get_shadow_vmcs(vcpu)->secondary_vm_exec_control & > + SECONDARY_EXEC_ENABLE_EPT); > +} > + > + > static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) > { > int i; > @@ -1292,6 +1322,39 @@ static void vmx_load_host_state(struct v > preempt_enable(); > } > > +int load_vmcs_host_state(struct shadow_vmcs *src) > +{ > + vmcs_write16(HOST_ES_SELECTOR, src->host_es_selector); > + vmcs_write16(HOST_CS_SELECTOR, src->host_cs_selector); > + vmcs_write16(HOST_SS_SELECTOR, src->host_ss_selector); > + vmcs_write16(HOST_DS_SELECTOR, src->host_ds_selector); > + vmcs_write16(HOST_FS_SELECTOR, src->host_fs_selector); > + vmcs_write16(HOST_GS_SELECTOR, src->host_gs_selector); > + vmcs_write16(HOST_TR_SELECTOR, src->host_tr_selector); > + > + vmcs_write64(TSC_OFFSET, src->tsc_offset); > + > + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) > + vmcs_write64(HOST_IA32_PAT, src->host_ia32_pat); > + > + vmcs_write32(HOST_IA32_SYSENTER_CS, src->host_ia32_sysenter_cs); > + > + vmcs_writel(HOST_CR0, src->host_cr0); > + vmcs_writel(HOST_CR3, src->host_cr3); > + vmcs_writel(HOST_CR4, src->host_cr4); > + vmcs_writel(HOST_FS_BASE, src->host_fs_base); > + vmcs_writel(HOST_GS_BASE, src->host_gs_base); > + vmcs_writel(HOST_TR_BASE, src->host_tr_base); > + vmcs_writel(HOST_GDTR_BASE, src->host_gdtr_base); > + vmcs_writel(HOST_IDTR_BASE, src->host_idtr_base); > + vmcs_writel(HOST_RSP, src->host_rsp); > + vmcs_writel(HOST_RIP, src->host_rip); > + vmcs_writel(HOST_IA32_SYSENTER_ESP, src->host_ia32_sysenter_esp); > + vmcs_writel(HOST_IA32_SYSENTER_EIP, src->host_ia32_sysenter_eip); > + > + return 0; > +} > + > /* > * Switches to specified vcpu, until a matching vcpu_put(), but > assumes > * vcpu mutex is already taken. > @@ -1922,6 +1985,71 @@ static void vmclear_local_vcpus(void) > __vcpu_clear(vmx); > } > > +int load_vmcs_common(struct shadow_vmcs *src) > +{ > + vmcs_write16(GUEST_ES_SELECTOR, src->guest_es_selector); > + vmcs_write16(GUEST_CS_SELECTOR, src->guest_cs_selector); > + vmcs_write16(GUEST_SS_SELECTOR, src->guest_ss_selector); > + vmcs_write16(GUEST_DS_SELECTOR, src->guest_ds_selector); > + vmcs_write16(GUEST_FS_SELECTOR, src->guest_fs_selector); > + vmcs_write16(GUEST_GS_SELECTOR, src->guest_gs_selector); > + vmcs_write16(GUEST_LDTR_SELECTOR, src->guest_ldtr_selector); > + vmcs_write16(GUEST_TR_SELECTOR, src->guest_tr_selector); > + > + vmcs_write64(GUEST_IA32_DEBUGCTL, src->guest_ia32_debugctl); > + > + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) > + vmcs_write64(GUEST_IA32_PAT, src->guest_ia32_pat); > + > + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, > src->vm_entry_intr_info_field); > + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, + > src->vm_entry_exception_error_code); > + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, > src->vm_entry_instruction_len); + + vmcs_write32(GUEST_ES_LIMIT, > src->guest_es_limit); + vmcs_write32(GUEST_CS_LIMIT, > src->guest_cs_limit); + vmcs_write32(GUEST_SS_LIMIT, > src->guest_ss_limit); + vmcs_write32(GUEST_DS_LIMIT, > src->guest_ds_limit); + vmcs_write32(GUEST_FS_LIMIT, > src->guest_fs_limit); + vmcs_write32(GUEST_GS_LIMIT, > src->guest_gs_limit); + vmcs_write32(GUEST_LDTR_LIMIT, > src->guest_ldtr_limit); + vmcs_write32(GUEST_TR_LIMIT, > src->guest_tr_limit); + vmcs_write32(GUEST_GDTR_LIMIT, > src->guest_gdtr_limit); + vmcs_write32(GUEST_IDTR_LIMIT, > src->guest_idtr_limit); + vmcs_write32(GUEST_ES_AR_BYTES, > src->guest_es_ar_bytes); + vmcs_write32(GUEST_CS_AR_BYTES, > src->guest_cs_ar_bytes); + vmcs_write32(GUEST_SS_AR_BYTES, > src->guest_ss_ar_bytes); + vmcs_write32(GUEST_DS_AR_BYTES, > src->guest_ds_ar_bytes); + vmcs_write32(GUEST_FS_AR_BYTES, > src->guest_fs_ar_bytes); + vmcs_write32(GUEST_GS_AR_BYTES, > src->guest_gs_ar_bytes); + vmcs_write32(GUEST_LDTR_AR_BYTES, > src->guest_ldtr_ar_bytes); + vmcs_write32(GUEST_TR_AR_BYTES, > src->guest_tr_ar_bytes); + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, > + src->guest_interruptibility_info); > + vmcs_write32(GUEST_ACTIVITY_STATE, src->guest_activity_state); > + vmcs_write32(GUEST_SYSENTER_CS, src->guest_sysenter_cs); > + > + vmcs_writel(GUEST_ES_BASE, src->guest_es_base); > + vmcs_writel(GUEST_CS_BASE, src->guest_cs_base); > + vmcs_writel(GUEST_SS_BASE, src->guest_ss_base); > + vmcs_writel(GUEST_DS_BASE, src->guest_ds_base); > + vmcs_writel(GUEST_FS_BASE, src->guest_fs_base); > + vmcs_writel(GUEST_GS_BASE, src->guest_gs_base); > + vmcs_writel(GUEST_LDTR_BASE, src->guest_ldtr_base); > + vmcs_writel(GUEST_TR_BASE, src->guest_tr_base); > + vmcs_writel(GUEST_GDTR_BASE, src->guest_gdtr_base); > + vmcs_writel(GUEST_IDTR_BASE, src->guest_idtr_base); > + vmcs_writel(GUEST_DR7, src->guest_dr7); > + vmcs_writel(GUEST_RSP, src->guest_rsp); > + vmcs_writel(GUEST_RIP, src->guest_rip); > + vmcs_writel(GUEST_RFLAGS, src->guest_rflags); > + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, > + src->guest_pending_dbg_exceptions); > + vmcs_writel(GUEST_SYSENTER_ESP, src->guest_sysenter_esp); > + vmcs_writel(GUEST_SYSENTER_EIP, src->guest_sysenter_eip); > + > + return 0; > +} > > /* Just like cpu_vmxoff(), but with the > __kvm_handle_fault_on_reboot() * tricks. > @@ -5363,6 +5491,281 @@ static void vmx_set_supported_cpuid(u32 > { > } > > +/* Make a copy of the current VMCS to ordinary memory. This is > needed because + * in VMX you cannot read and write to two VMCS at > the same time, so when we + * want to do this (in prepare_vmcs_02, > which needs to read from vmcs01 while + * preparing vmcs02), we need > to first save a copy of one VMCS's fields in + * memory, and then use > that copy. + */ > +void save_vmcs(struct shadow_vmcs *dst) > +{ > + dst->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); > + dst->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); > + dst->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); > + dst->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); > + dst->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); > + dst->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); > + dst->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); > + dst->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); > + dst->host_es_selector = vmcs_read16(HOST_ES_SELECTOR); > + dst->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR); > + dst->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR); > + dst->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR); > + dst->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR); > + dst->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR); > + dst->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR); > + dst->io_bitmap_a = vmcs_read64(IO_BITMAP_A); > + dst->io_bitmap_b = vmcs_read64(IO_BITMAP_B); > + if (cpu_has_vmx_msr_bitmap()) > + dst->msr_bitmap = vmcs_read64(MSR_BITMAP); > + dst->tsc_offset = vmcs_read64(TSC_OFFSET); > + dst->virtual_apic_page_addr = vmcs_read64(VIRTUAL_APIC_PAGE_ADDR); > + dst->apic_access_addr = vmcs_read64(APIC_ACCESS_ADDR); > + if (enable_ept) > + dst->ept_pointer = vmcs_read64(EPT_POINTER); > + dst->guest_physical_address = vmcs_read64(GUEST_PHYSICAL_ADDRESS); > + dst->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER); > + dst->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); > + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) > + dst->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); > + if (enable_ept) { > + dst->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); > + dst->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); > + dst->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); > + dst->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); > + } > + dst->pin_based_vm_exec_control = > vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); > + dst->cpu_based_vm_exec_control = > vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + dst->exception_bitmap = > vmcs_read32(EXCEPTION_BITMAP); + dst->page_fault_error_code_mask = > + vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK); > + dst->page_fault_error_code_match = > + vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH); + dst->cr3_target_count > = vmcs_read32(CR3_TARGET_COUNT); + dst->vm_exit_controls = > vmcs_read32(VM_EXIT_CONTROLS); + dst->vm_entry_controls = > vmcs_read32(VM_ENTRY_CONTROLS); + dst->vm_entry_intr_info_field = > vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); > + dst->vm_entry_exception_error_code = > + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE); > + dst->vm_entry_instruction_len = > vmcs_read32(VM_ENTRY_INSTRUCTION_LEN); + dst->tpr_threshold = > vmcs_read32(TPR_THRESHOLD); + dst->secondary_vm_exec_control = > vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + if (enable_vpid && > dst->secondary_vm_exec_control & + SECONDARY_EXEC_ENABLE_VPID) > + dst->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID); > + dst->vm_instruction_error = vmcs_read32(VM_INSTRUCTION_ERROR); > + dst->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); > + dst->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); > + dst->vm_exit_intr_error_code = > vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + dst->idt_vectoring_info_field > = vmcs_read32(IDT_VECTORING_INFO_FIELD); > + dst->idt_vectoring_error_code = > vmcs_read32(IDT_VECTORING_ERROR_CODE); + dst->vm_exit_instruction_len > = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + dst->vmx_instruction_info = > vmcs_read32(VMX_INSTRUCTION_INFO); + dst->guest_es_limit = > vmcs_read32(GUEST_ES_LIMIT); + dst->guest_cs_limit = > vmcs_read32(GUEST_CS_LIMIT); + dst->guest_ss_limit = > vmcs_read32(GUEST_SS_LIMIT); + dst->guest_ds_limit = > vmcs_read32(GUEST_DS_LIMIT); + dst->guest_fs_limit = > vmcs_read32(GUEST_FS_LIMIT); + dst->guest_gs_limit = > vmcs_read32(GUEST_GS_LIMIT); + dst->guest_ldtr_limit = > vmcs_read32(GUEST_LDTR_LIMIT); + dst->guest_tr_limit = > vmcs_read32(GUEST_TR_LIMIT); + dst->guest_gdtr_limit = > vmcs_read32(GUEST_GDTR_LIMIT); + dst->guest_idtr_limit = > vmcs_read32(GUEST_IDTR_LIMIT); + dst->guest_es_ar_bytes = > vmcs_read32(GUEST_ES_AR_BYTES); + dst->guest_cs_ar_bytes = > vmcs_read32(GUEST_CS_AR_BYTES); + dst->guest_ss_ar_bytes = > vmcs_read32(GUEST_SS_AR_BYTES); + dst->guest_ds_ar_bytes = > vmcs_read32(GUEST_DS_AR_BYTES); + dst->guest_fs_ar_bytes = > vmcs_read32(GUEST_FS_AR_BYTES); + dst->guest_gs_ar_bytes = > vmcs_read32(GUEST_GS_AR_BYTES); + dst->guest_ldtr_ar_bytes = > vmcs_read32(GUEST_LDTR_AR_BYTES); + dst->guest_tr_ar_bytes = > vmcs_read32(GUEST_TR_AR_BYTES); + dst->guest_interruptibility_info = > + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); > + dst->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); > + dst->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); > + dst->host_ia32_sysenter_cs = vmcs_read32(HOST_IA32_SYSENTER_CS); > + dst->cr0_guest_host_mask = vmcs_readl(CR0_GUEST_HOST_MASK); > + dst->cr4_guest_host_mask = vmcs_readl(CR4_GUEST_HOST_MASK); > + dst->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW); > + dst->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW); > + dst->cr3_target_value0 = vmcs_readl(CR3_TARGET_VALUE0); > + dst->cr3_target_value1 = vmcs_readl(CR3_TARGET_VALUE1); > + dst->cr3_target_value2 = vmcs_readl(CR3_TARGET_VALUE2); > + dst->cr3_target_value3 = vmcs_readl(CR3_TARGET_VALUE3); > + dst->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + dst->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); > + dst->guest_cr0 = vmcs_readl(GUEST_CR0); + dst->guest_cr3 = > vmcs_readl(GUEST_CR3); + dst->guest_cr4 = vmcs_readl(GUEST_CR4); > + dst->guest_es_base = vmcs_readl(GUEST_ES_BASE); > + dst->guest_cs_base = vmcs_readl(GUEST_CS_BASE); > + dst->guest_ss_base = vmcs_readl(GUEST_SS_BASE); > + dst->guest_ds_base = vmcs_readl(GUEST_DS_BASE); > + dst->guest_fs_base = vmcs_readl(GUEST_FS_BASE); > + dst->guest_gs_base = vmcs_readl(GUEST_GS_BASE); > + dst->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); > + dst->guest_tr_base = vmcs_readl(GUEST_TR_BASE); > + dst->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); > + dst->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); > + dst->guest_dr7 = vmcs_readl(GUEST_DR7); + dst->guest_rsp = > vmcs_readl(GUEST_RSP); + dst->guest_rip = vmcs_readl(GUEST_RIP); > + dst->guest_rflags = vmcs_readl(GUEST_RFLAGS); > + dst->guest_pending_dbg_exceptions = > + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > + dst->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); > + dst->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); > + dst->host_cr0 = vmcs_readl(HOST_CR0); > + dst->host_cr3 = vmcs_readl(HOST_CR3); > + dst->host_cr4 = vmcs_readl(HOST_CR4); > + dst->host_fs_base = vmcs_readl(HOST_FS_BASE); > + dst->host_gs_base = vmcs_readl(HOST_GS_BASE); > + dst->host_tr_base = vmcs_readl(HOST_TR_BASE); > + dst->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE); > + dst->host_idtr_base = vmcs_readl(HOST_IDTR_BASE); > + dst->host_ia32_sysenter_esp = vmcs_readl(HOST_IA32_SYSENTER_ESP); > + dst->host_ia32_sysenter_eip = vmcs_readl(HOST_IA32_SYSENTER_EIP); > + dst->host_rsp = vmcs_readl(HOST_RSP); > + dst->host_rip = vmcs_readl(HOST_RIP); > + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) > + dst->host_ia32_pat = vmcs_read64(HOST_IA32_PAT); > +} > + > +/* prepare_vmcs_02 is called in when the L1 guest hypervisor runs > its nested + * L2 guest. L1 has a vmcs for L2 (vmcs12), and this > function "merges" it + * with L0's wishes for its guest (vmsc01), so > we can run the L2 guest in a + * way that will both be appropriate to > L1's requests, and our needs. + */ > +int prepare_vmcs_02(struct kvm_vcpu *vcpu, > + struct shadow_vmcs *vmcs12, struct shadow_vmcs *vmcs01) > +{ > + u32 exec_control; > + > + load_vmcs_common(vmcs12); > + > + vmcs_write64(VMCS_LINK_POINTER, vmcs12->vmcs_link_pointer); > + vmcs_write64(IO_BITMAP_A, vmcs01->io_bitmap_a); > + vmcs_write64(IO_BITMAP_B, vmcs01->io_bitmap_b); > + if (cpu_has_vmx_msr_bitmap()) > + vmcs_write64(MSR_BITMAP, vmcs01->msr_bitmap); > + > + if (vmcs12->vm_entry_msr_load_count > 0 || > + vmcs12->vm_exit_msr_load_count > 0 || > + vmcs12->vm_exit_msr_store_count > 0) { > + printk(KERN_WARNING > + "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); > + } > + > + if (nested_cpu_has_vmx_tpr_shadow(vcpu)) { > + struct page *page = > + nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); > + if (!page) > + return 1; > + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(page)); > + kvm_release_page_clean(page); > + } > + > + if (nested_vm_need_virtualize_apic_accesses(vcpu)) { > + struct page *page = > + nested_get_page(vcpu, vmcs12->apic_access_addr); > + if (!page) > + return 1; > + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); > + kvm_release_page_clean(page); > + } > + > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > + (vmcs01->pin_based_vm_exec_control | > + vmcs12->pin_based_vm_exec_control)); > + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, > + (vmcs01->page_fault_error_code_mask & > + vmcs12->page_fault_error_code_mask)); > + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, > + (vmcs01->page_fault_error_code_match & > + vmcs12->page_fault_error_code_match)); > + > + if (cpu_has_secondary_exec_ctrls()) { > + u32 exec_control = vmcs01->secondary_vm_exec_control; > + if (nested_cpu_has_secondary_exec_ctrls(vcpu)) { > + exec_control |= vmcs12->secondary_vm_exec_control; > + if (!vm_need_virtualize_apic_accesses(vcpu->kvm) || > + !nested_vm_need_virtualize_apic_accesses(vcpu)) > + exec_control &= > + ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > + } > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); > + } > + > + load_vmcs_host_state(vmcs01); > + > + if (vm_need_tpr_shadow(vcpu->kvm) && > + nested_cpu_has_vmx_tpr_shadow(vcpu)) > + vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); > + > + if (enable_ept) { > + if (!nested_cpu_has_vmx_ept(vcpu)) { > + vmcs_write64(EPT_POINTER, vmcs01->ept_pointer); > + vmcs_write64(GUEST_PDPTR0, vmcs01->guest_pdptr0); > + vmcs_write64(GUEST_PDPTR1, vmcs01->guest_pdptr1); > + vmcs_write64(GUEST_PDPTR2, vmcs01->guest_pdptr2); > + vmcs_write64(GUEST_PDPTR3, vmcs01->guest_pdptr3); > + } > + } > + > + exec_control = vmcs01->cpu_based_vm_exec_control; > + exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; > + exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; > + exec_control &= ~CPU_BASED_TPR_SHADOW; > + exec_control |= vmcs12->cpu_based_vm_exec_control; > + if (!vm_need_tpr_shadow(vcpu->kvm) || > + vmcs12->virtual_apic_page_addr == 0) { > + exec_control &= ~CPU_BASED_TPR_SHADOW; > +#ifdef CONFIG_X86_64 > + exec_control |= CPU_BASED_CR8_STORE_EXITING | > + CPU_BASED_CR8_LOAD_EXITING; > +#endif > + } else if (exec_control & CPU_BASED_TPR_SHADOW) { > +#ifdef CONFIG_X86_64 > + exec_control &= ~CPU_BASED_CR8_STORE_EXITING; > + exec_control &= ~CPU_BASED_CR8_LOAD_EXITING; > +#endif > + } > + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); > + > + /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the > + * bitwise-or of what L1 wants to trap for L2, and what we want to > + * trap. However, vmx_fpu_activate/deactivate may have happened > after + * we saved vmcs01, so we shouldn't trust its TS and > NM_VECTOR bits + * and need to base them again on fpu_active. Note > that CR0.TS also + * needs updating - we do this after this function > returns (in + * nested_vmx_run). > + */ > + vmcs_write32(EXCEPTION_BITMAP, > + ((vmcs01->exception_bitmap&~(1u< + (vcpu->fpu_active ? 0 : (1u< + vmcs12->exception_bitmap)); > + vmcs_writel(CR0_GUEST_HOST_MASK, vmcs12->cr0_guest_host_mask | > + (vcpu->fpu_active ? 0 : X86_CR0_TS)); > + vcpu->arch.cr0_guest_owned_bits = ~(vmcs12->cr0_guest_host_mask | > + (vcpu->fpu_active ? 0 : X86_CR0_TS)); > + > + vmcs_write32(VM_EXIT_CONTROLS, > + (vmcs01->vm_exit_controls & > + (~(VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT))) > + | vmcs12->vm_exit_controls); > + > + vmcs_write32(VM_ENTRY_CONTROLS, > + (vmcs01->vm_entry_controls & > + (~(VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE))) > + | vmcs12->vm_entry_controls); > + > + vmcs_writel(CR4_GUEST_HOST_MASK, > + (vmcs01->cr4_guest_host_mask & > + vmcs12->cr4_guest_host_mask)); > + > + return 0; > +} > + > static struct kvm_x86_ops vmx_x86_ops = { > .cpu_has_kvm_support = cpu_has_kvm_support, > .disabled_by_bios = vmx_disabled_by_bios, Curious if we need to save all VMCS fields when switching from L2 to L1 (save_vmcs). For example TSC_OFFSET, PIN_BASED_VM_EXEC_CONTROL & CPU_BASED_EXEC_CONTROL won't be changed during L2 execution. Same for host VMCS states.