Re: [PATCH 09/17] SVM: use generic instruction decoding

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Subject: Re: [PATCH 09/17] SVM: use generic instruction decoding
Date: Wed, 14 Sep 2016 18:56:44 +0100	[thread overview]
Message-ID: <c0d3de11-25d8-2df9-cde2-ea08385813a3@citrix.com> (raw)
In-Reply-To: <57D1806F020000780010D18F@prv-mh.provo.novell.com>

On 08/09/16 14:14, Jan Beulich wrote:
> @@ -89,141 +54,96 @@ static unsigned long svm_nextrip_insn_le
>      return vmcb->nextrip - vmcb->rip;
>  }
>  
> -/* First byte: Length. Following bytes: Opcode bytes. */
> -#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
> -MAKE_INSTR(INVD,   2, 0x0f, 0x08);
> -MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
> -MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
> -MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
> -MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
> -MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
> -MAKE_INSTR(HLT,    1, 0xf4);
> -MAKE_INSTR(INT3,   1, 0xcc);
> -MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
> -MAKE_INSTR(PAUSE,  1, 0x90);
> -MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
> -MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
> -MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
> -MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
> -MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
> -MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
> -MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
> -
> -static const u8 *const opc_bytes[INSTR_MAX_COUNT] =
> -{
> -    [INSTR_INVD]   = OPCODE_INVD,
> -    [INSTR_WBINVD] = OPCODE_WBINVD,
> -    [INSTR_CPUID]  = OPCODE_CPUID,
> -    [INSTR_RDMSR]  = OPCODE_RDMSR,
> -    [INSTR_WRMSR]  = OPCODE_WRMSR,
> -    [INSTR_VMCALL] = OPCODE_VMCALL,
> -    [INSTR_HLT]    = OPCODE_HLT,
> -    [INSTR_INT3]   = OPCODE_INT3,
> -    [INSTR_RDTSC]  = OPCODE_RDTSC,
> -    [INSTR_PAUSE]  = OPCODE_PAUSE,
> -    [INSTR_XSETBV] = OPCODE_XSETBV,
> -    [INSTR_VMRUN]  = OPCODE_VMRUN,
> -    [INSTR_VMLOAD] = OPCODE_VMLOAD,
> -    [INSTR_VMSAVE] = OPCODE_VMSAVE,
> -    [INSTR_STGI]   = OPCODE_STGI,
> -    [INSTR_CLGI]   = OPCODE_CLGI,
> -    [INSTR_INVLPGA] = OPCODE_INVLPGA,
> +static const struct {
> +    unsigned int opcode;
> +    struct {
> +        unsigned int rm:3;
> +        unsigned int reg:3;
> +        unsigned int mod:2;
> +#define MODRM(mod, reg, rm) { rm, reg, mod }
> +    } modrm;
> +} const opc_tab[INSTR_MAX_COUNT] = {
> +    [INSTR_PAUSE]  = { X86EMUL_OPC_F3(0, 0x90) },
> +    [INSTR_INT3]   = { X86EMUL_OPC(   0, 0xcc) },
> +    [INSTR_HLT]    = { X86EMUL_OPC(   0, 0xf4) },
> +    [INSTR_XSETBV] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 2, 1) },
> +    [INSTR_VMRUN]  = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 0) },
> +    [INSTR_VMCALL] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 1) },
> +    [INSTR_VMLOAD] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 2) },
> +    [INSTR_VMSAVE] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 3) },
> +    [INSTR_STGI]   = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 4) },
> +    [INSTR_CLGI]   = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 5) },
> +    [INSTR_INVLPGA] = { X86EMUL_OPC(0x0f, 0x01), MODRM(3, 3, 7) },
> +    [INSTR_INVD]   = { X86EMUL_OPC(0x0f, 0x08) },
> +    [INSTR_WBINVD] = { X86EMUL_OPC(0x0f, 0x09) },
> +    [INSTR_WRMSR]  = { X86EMUL_OPC(0x0f, 0x30) },
> +    [INSTR_RDTSC]  = { X86EMUL_OPC(0x0f, 0x31) },
> +    [INSTR_RDMSR]  = { X86EMUL_OPC(0x0f, 0x32) },
> +    [INSTR_CPUID]  = { X86EMUL_OPC(0x0f, 0xa2) },
>  };
>  
> -static bool_t fetch(const struct vmcb_struct *vmcb, u8 *buf,
> -                    unsigned long addr, unsigned int len)
> -{
> -    uint32_t pfec = (vmcb_get_cpl(vmcb) == 3) ? PFEC_user_mode : 0;
> -
> -    switch ( hvm_fetch_from_guest_virt(buf, addr, len, pfec) )
> -    {
> -    case HVMCOPY_okay:
> -        break;
> -    case HVMCOPY_bad_gva_to_gfn:
> -        /* OK just to give up; we'll have injected #PF already */
> -        return 0;
> -    default:
> -        /* Not OK: fetches from non-RAM pages are not supportable. */
> -        gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n",
> -                 vmcb->rip, addr);
> -        hvm_inject_hw_exception(TRAP_gp_fault, 0);
> -        return 0;
> -    }
> -    return 1;
> -}
> -
>  int __get_instruction_length_from_list(struct vcpu *v,
>          const enum instruction_index *list, unsigned int list_count)
>  {
>      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
> -    unsigned int i, j, inst_len = 0;
> -    enum instruction_index instr = 0;
> -    u8 buf[MAX_INST_LEN];
> -    const u8 *opcode = NULL;
> -    unsigned long fetch_addr, fetch_limit;
> -    unsigned int fetch_len, max_len;
> +    struct hvm_emulate_ctxt ctxt;
> +    struct x86_emulate_state *state;
> +    unsigned int inst_len, j, modrm_rm, modrm_reg;
> +    int modrm_mod;
>  
> +#ifdef NDEBUG

Presumably this is just for your testing?

>      if ( (inst_len = svm_nextrip_insn_length(v)) != 0 )
>          return inst_len;
>  
>      if ( vmcb->exitcode == VMEXIT_IOIO )
>          return vmcb->exitinfo2 - vmcb->rip;
> +#endif
>  
> -    /* Fetch up to the next page break; we'll fetch from the next page
> -     * later if we have to. */
> -    fetch_addr = svm_rip2pointer(v, &fetch_limit);
> -    if ( vmcb->rip > fetch_limit )
> -        return 0;
> -    max_len = min(fetch_limit - vmcb->rip + 1, MAX_INST_LEN + 0UL);
> -    fetch_len = min_t(unsigned int, max_len,
> -                      PAGE_SIZE - (fetch_addr & ~PAGE_MASK));
> -    if ( !fetch(vmcb, buf, fetch_addr, fetch_len) )
> +    ASSERT(v == current);
> +    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
> +    hvm_emulate_init(&ctxt, NULL, 0);
> +    state = x86_decode_insn(&ctxt.ctxt, hvmemul_insn_fetch);
> +    if ( IS_ERR_OR_NULL(state) )
>          return 0;
>  
> -    while ( (inst_len < max_len) && is_prefix(buf[inst_len]) )
> -    {
> -        inst_len++;
> -        if ( inst_len >= fetch_len )
> -        {
> -            if ( !fetch(vmcb, buf + fetch_len, fetch_addr + fetch_len,
> -                        max_len - fetch_len) )
> -                return 0;
> -            fetch_len = max_len;
> -        }
> +    inst_len = x86_insn_length(state, &ctxt.ctxt);
> +    modrm_mod = x86_insn_modrm(state, &modrm_rm, &modrm_reg);
> +    x86_emulate_free_state(state);

From an API point of view, it is weird to have x86_emulate_free_state()
without a matching allocation function.  Perhaps that is just me.

However, the x86_insn_modrm() API is definitely more weird.  Wouldn't it
be more natural to take optional pointers for the mod, rm and reg parts
individually?

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -382,7 +382,7 @@ struct operand {
>      } mem;
>  };
>  #ifdef __x86_64__
> -#define REG_POISON ((unsigned long *) 0x8086000000008086UL) /* non-canonical */
> +#define REG_POISON ((void *)0x8086000000008086UL) /* non-canonical */
>  #else
>  #define REG_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
>  #endif

Given that these are now used for general pointer poisoning, they should
be renamed.  There are only 3 instances.

> @@ -1631,6 +1631,10 @@ struct x86_emulate_state {
>  
>      unsigned long eip;
>      struct cpu_user_regs *regs;
> +
> +#ifndef NDEBUG
> +    void *caller;
> +#endif

Perhaps worth a comment here?  Its purpose is rather opaque.

>  };
>  
>  /* Helper definitions. */
> @@ -1658,6 +1662,11 @@ x86_decode_base(
>  
>      switch ( ctxt->opcode )
>      {
> +    case 0x90: /* nop / pause */
> +        if ( repe_prefix() )
> +            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
> +        break;

Why is it necessary to special case the rep prefix handling in this case?

> +int
> +x86_insn_modrm(const struct x86_emulate_state *state,
> +               unsigned int *rm, unsigned int *reg)
> +{
> +    check_state(state);
> +
> +    if ( !(state->desc & ModRM) )
> +        return -EINVAL;
> +
> +    if ( rm )
> +        *rm = state->modrm_rm;
> +    if ( reg )
> +        *reg = state->modrm_reg;
> +
> +    return state->modrm_mod;
> +}
> +
> +unsigned int
> +x86_insn_length(const struct x86_emulate_state *state,
> +                const struct x86_emulate_ctxt *ctxt)
> +{
> +    check_state(state);
> +
> +    return state->eip - ctxt->regs->eip;

Is it worth stashing a starting eip?  This calculation will go wrong
after the emulated state has been committed.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel