From mboxrd@z Thu Jan 1 00:00:00 1970 From: Gleb Natapov Subject: Re: [PATCH 13/24] Implement VMREAD and VMWRITE Date: Wed, 16 Jun 2010 18:03:14 +0300 Message-ID: <20100616150314.GE523@redhat.com> References: <1276431753-nyh@il.ibm.com> <201006131229.o5DCTDhk013030@rice.haifa.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: avi@redhat.com, kvm@vger.kernel.org To: "Nadav Har'El" Return-path: Received: from mx1.redhat.com ([209.132.183.28]:17738 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755114Ab0FPPDT (ORCPT ); Wed, 16 Jun 2010 11:03:19 -0400 Content-Disposition: inline In-Reply-To: <201006131229.o5DCTDhk013030@rice.haifa.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: On Sun, Jun 13, 2010 at 03:29:13PM +0300, Nadav Har'El wrote: > Implement the VMREAD and VMWRITE instructions. With these instructions, L1 > can read and write to the VMCS it is holding. The values are read or written > to the fields of the shadow_vmcs structure introduced in the previous patch. > > Signed-off-by: Nadav Har'El > --- > --- .before/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 > +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 > @@ -299,6 +299,42 @@ struct nested_vmx { > int l2_vmcs_num; > }; > > +enum vmcs_field_type { > + VMCS_FIELD_TYPE_U16 = 0, > + VMCS_FIELD_TYPE_U64 = 1, > + VMCS_FIELD_TYPE_U32 = 2, > + VMCS_FIELD_TYPE_ULONG = 3 > +}; > + > +#define VMCS_FIELD_LENGTH_OFFSET 13 > +#define VMCS_FIELD_LENGTH_MASK 0x6000 > + > +static inline int vmcs_field_type(unsigned long field) > +{ > + if (0x1 & field) /* one of the *_HIGH fields, all are 32 bit */ > + return VMCS_FIELD_TYPE_U32; > + return (VMCS_FIELD_LENGTH_MASK & field) >> 13; > +} > + > +static inline int vmcs_field_size(int field_type, struct kvm_vcpu *vcpu) > +{ > + switch (field_type) { > + case VMCS_FIELD_TYPE_U16: > + return 2; > + case VMCS_FIELD_TYPE_U32: > + return 4; > + case VMCS_FIELD_TYPE_U64: > + return 8; > + case VMCS_FIELD_TYPE_ULONG: > +#ifdef CONFIG_X86_64 > + if (is_long_mode(vcpu)) > + return 8; > +#endif > + return 4; > + } > + return 0; /* should never happen */ > +} > + > struct vcpu_vmx { > struct kvm_vcpu vcpu; > struct list_head local_vcpus_link; > @@ -4184,6 +4220,189 @@ static int handle_vmclear(struct kvm_vcp > return 1; > } > > +static inline bool nested_vmcs_read_any(struct kvm_vcpu *vcpu, > + unsigned long field, u64 *ret) > +{ > + short offset = vmcs_field_to_offset(field); > + char *p; > + > + if (offset < 0) > + return 0; > + if (!to_vmx(vcpu)->nested.current_l2_page) > + return 0; > + > + p = ((char *)(get_shadow_vmcs(vcpu))) + offset; > + > + switch (vmcs_field_type(field)) { > + case VMCS_FIELD_TYPE_ULONG: > + *ret = *((unsigned long *)p); > + return 1; > + case VMCS_FIELD_TYPE_U16: > + *ret = (u16) *((unsigned long *)p); > + return 1; > + case VMCS_FIELD_TYPE_U32: > + *ret = (u32) *((unsigned long *)p); > + return 1; > + case VMCS_FIELD_TYPE_U64: > + *ret = *((u64 *)p); > + return 1; > + default: > + return 0; /* can never happen. */ > + } > +} > + > +static int handle_vmread_reg(struct kvm_vcpu *vcpu, int reg, > + unsigned long field) > +{ > + u64 field_value; > + if (!nested_vmcs_read_any(vcpu, field, &field_value)) > + return 0; > + > +#ifdef CONFIG_X86_64 > + switch (vmcs_field_type(field)) { > + case VMCS_FIELD_TYPE_U64: case VMCS_FIELD_TYPE_ULONG: > + if (!is_long_mode(vcpu)) { > + kvm_register_write(vcpu, reg+1, field_value >> 32); > + field_value = (u32)field_value; > + } > + } > +#endif > + kvm_register_write(vcpu, reg, field_value); > + return 1; > +} > + > +static int handle_vmread_mem(struct kvm_vcpu *vcpu, gva_t gva, > + unsigned long field) > +{ > + u64 field_value; > + if (!nested_vmcs_read_any(vcpu, field, &field_value)) > + return 0; > + > + /* It's ok to use *_system, because handle_vmread verifies cpl=0 */ > + kvm_write_guest_virt_system(gva, &field_value, > + vmcs_field_size(vmcs_field_type(field), vcpu), > + vcpu, NULL); > + return 1; > +} > + > +static int handle_vmread(struct kvm_vcpu *vcpu) > +{ > + unsigned long field; > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + gva_t gva = 0; > + int read_succeed; > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + > + if (!nested_map_current(vcpu)) { > + printk(KERN_INFO "%s invalid shadow vmcs\n", __func__); > + set_rflags_to_vmx_fail_invalid(vcpu); > + return 1; > + } > + > + /* decode instruction info to get the field to read and where to store > + * its value */ > + field = kvm_register_read(vcpu, VMX_OPERAND_REG2(vmx_instruction_info)); > + if (VMX_OPERAND_IS_REG(vmx_instruction_info)) { > + read_succeed = handle_vmread_reg(vcpu, > + VMX_OPERAND_REG(vmx_instruction_info), field); > + } else { > + gva = get_vmx_mem_address(vcpu, exit_qualification, > + vmx_instruction_info); > + if (gva == 0) > + return 1; > + read_succeed = handle_vmread_mem(vcpu, gva, field); > + } > + > + if (read_succeed) { > + clear_rflags_cf_zf(vcpu); > + skip_emulated_instruction(vcpu); > + } else { > + set_rflags_to_vmx_fail_valid(vcpu); > + vmcs_write32(VM_INSTRUCTION_ERROR, 12); VM_INSTRUCTION_ERROR is read only and when do you transfer it to vmcs12 anyway?. I think set_rflags_to_vmx_fail_valid() should get vm_instruction_error as a parameter and put it into vmcs12, that way you'll never forget to provide error code on fail_valid case, compiler will remind you. > + } > + > + nested_unmap_current(vcpu); > + return 1; > +} > + > + > +static int handle_vmwrite(struct kvm_vcpu *vcpu) > +{ > + unsigned long field; > + u64 field_value = 0; > + gva_t gva; > + int field_type; > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + char *p; > + short offset; > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + > + if (!nested_map_current(vcpu)) { > + printk(KERN_INFO "%s invalid shadow vmcs\n", __func__); > + set_rflags_to_vmx_fail_invalid(vcpu); > + return 1; > + } > + > + field = kvm_register_read(vcpu, VMX_OPERAND_REG2(vmx_instruction_info)); > + field_type = vmcs_field_type(field); > + > + offset = vmcs_field_to_offset(field); > + if (offset < 0) { > + set_rflags_to_vmx_fail_invalid(vcpu); > + return 1; > + } > + p = ((char *) get_shadow_vmcs(vcpu)) + offset; > + > + if (VMX_OPERAND_IS_REG(vmx_instruction_info)) > + field_value = kvm_register_read(vcpu, > + VMX_OPERAND_REG(vmx_instruction_info)); > + else { > + gva = get_vmx_mem_address(vcpu, exit_qualification, > + vmx_instruction_info); > + if (gva == 0) > + return 1; > + kvm_read_guest_virt(gva, &field_value, > + vmcs_field_size(field_type, vcpu), vcpu, NULL); > + } > + What about checking that vmcs field is read only? > + switch (field_type) { > + case VMCS_FIELD_TYPE_U16: > + *(u16 *)p = field_value; > + break; > + case VMCS_FIELD_TYPE_U32: > + *(u32 *)p = field_value; > + break; > + case VMCS_FIELD_TYPE_U64: > +#ifdef CONFIG_X86_64 > + *(unsigned long *)p = field_value; > +#else > + *(unsigned long *)p = field_value; > + *(((unsigned long *)p)+1) = field_value >> 32; > +#endif > + break; > + case VMCS_FIELD_TYPE_ULONG: > + *(unsigned long *)p = field_value; > + break; > + default: > + printk(KERN_INFO "%s invalid field\n", __func__); > + set_rflags_to_vmx_fail_valid(vcpu); > + vmcs_write32(VM_INSTRUCTION_ERROR, 12); > + nested_unmap_current(vcpu); > + return 1; > + } > + > + clear_rflags_cf_zf(vcpu); > + skip_emulated_instruction(vcpu); > + nested_unmap_current(vcpu); > + return 1; > +} > + > static bool verify_vmcs12_revision(struct kvm_vcpu *vcpu, gpa_t guest_vmcs_addr) > { > bool ret; > @@ -4548,9 +4767,9 @@ static int (*kvm_vmx_exit_handlers[])(st > [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, > [EXIT_REASON_VMPTRLD] = handle_vmptrld, > [EXIT_REASON_VMPTRST] = handle_vmptrst, > - [EXIT_REASON_VMREAD] = handle_vmx_insn, > + [EXIT_REASON_VMREAD] = handle_vmread, > [EXIT_REASON_VMRESUME] = handle_vmx_insn, > - [EXIT_REASON_VMWRITE] = handle_vmx_insn, > + [EXIT_REASON_VMWRITE] = handle_vmwrite, > [EXIT_REASON_VMOFF] = handle_vmoff, > [EXIT_REASON_VMON] = handle_vmon, > [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb.