Re: [PATCH v3 06/25] x86emul: support most remaining AVX2 insns

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Subject: Re: [PATCH v3 06/25] x86emul: support most remaining AVX2 insns
Date: Thu, 1 Feb 2018 19:45:31 +0000	[thread overview]
Message-ID: <03d7fffc-712c-850d-968c-8bd8194cf684@citrix.com> (raw)
In-Reply-To: <5A29582402000078001958A0@prv-mh.provo.novell.com>

On 07/12/17 14:03, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -370,7 +370,7 @@ static const struct {
>      [0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
>      [0x10] = { .simd_size = simd_packed_int },
>      [0x13] = { .simd_size = simd_other, .two_op = 1 },
> -    [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
> +    [0x14 ... 0x16] = { .simd_size = simd_packed_fp },
>      [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
>      [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 },
>      [0x1a] = { .simd_size = simd_128, .two_op = 1 },
> @@ -382,9 +382,15 @@ static const struct {
>      [0x2c ... 0x2d] = { .simd_size = simd_other },
>      [0x2e ... 0x2f] = { .simd_size = simd_other, .to_mem = 1 },
>      [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
> -    [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
> +    [0x36 ... 0x3f] = { .simd_size = simd_packed_int },
>      [0x40] = { .simd_size = simd_packed_int },
>      [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
> +    [0x45 ... 0x47] = { .simd_size = simd_packed_int },
> +    [0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 },
> +    [0x5a] = { .simd_size = simd_128, .two_op = 1 },
> +    [0x78 ... 0x79] = { .simd_size = simd_other, .two_op = 1 },
> +    [0x8c] = { .simd_size = simd_other },
> +    [0x8e] = { .simd_size = simd_other, .to_mem = 1 },
>      [0x96 ... 0x9f] = { .simd_size = simd_packed_fp },
>      [0xa6 ... 0xaf] = { .simd_size = simd_packed_fp },
>      [0xb6 ... 0xbf] = { .simd_size = simd_packed_fp },
> @@ -406,6 +412,9 @@ static const struct {
>      uint8_t two_op:1;
>      uint8_t four_op:1;
>  } ext0f3a_table[256] = {
> +    [0x00] = { .simd_size = simd_packed_int, .two_op = 1 },
> +    [0x01] = { .simd_size = simd_packed_fp, .two_op = 1 },
> +    [0x02] = { .simd_size = simd_packed_int },
>      [0x04 ... 0x05] = { .simd_size = simd_packed_fp, .two_op = 1 },
>      [0x06] = { .simd_size = simd_packed_fp },
>      [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1 },
> @@ -419,9 +428,12 @@ static const struct {
>      [0x20] = { .simd_size = simd_none },
>      [0x21] = { .simd_size = simd_other },
>      [0x22] = { .simd_size = simd_none },
> +    [0x38] = { .simd_size = simd_128 },
> +    [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
>      [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
>      [0x42] = { .simd_size = simd_packed_int },
>      [0x44] = { .simd_size = simd_packed_int },
> +    [0x46] = { .simd_size = simd_packed_int },
>      [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
>      [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
>      [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
> @@ -2973,7 +2985,7 @@ x86_decode(
>          }
>          break;
>  
> -    case simd_scalar_fp:
> +    case simd_scalar_fp: /* case simd_scalar_dq: */

I don't see this case label used, or introduced in any later patches. 
Is it stale?

>          op_bytes = 4 << (ctxt->opcode & 1);
>          break;
>  
> @@ -6070,6 +6082,10 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld {x,y}mm/mem,{x,y}mm,{x,y}mm */
>              if ( !vex.l )
>                  goto simd_0f_avx;
> +            /* fall through */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x45): /* vpsrlv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */

0x46 / vpsrav{d,q}?  You add a decode for it above, but I don't see an
introduced case.

> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x47): /* vpsllv{d,q} {x,y}mm/mem,{x,y}mm,{x,y}mm */
> +    simd_0f_avx2:
>              host_and_vcpu_must_have(avx2);
>              goto simd_0f_ymm;
>          }
> @@ -6169,7 +6185,10 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>              if ( vex.l )
> +            {
> +    simd_0f_imm8_avx2:
>                  host_and_vcpu_must_have(avx2);
> +            }
>              else
>              {
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
> @@ -7150,12 +7169,16 @@ x86_emulate(
>          fic.insn_bytes = PFX_BYTES + 3;
>          break;
>  
> -    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
>          generate_exception_if(!vex.l, EXC_UD);
>          /* fall through */
> -    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss m32,{x,y}mm */
> -        generate_exception_if(ea.type != OP_MEM, EXC_UD);
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */

It would help reviewability substantially if you split bugfixes of
existing code out separately from introduction of new code, especially
given the quantity of new additions here.  These comment changes are
particularly deceptive.

> +        if ( ea.type != OP_MEM )
> +        {
> +            generate_exception_if(b & 2, EXC_UD);
> +            host_and_vcpu_must_have(avx2);
> +        }
>          /* fall through */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
> @@ -7254,6 +7277,11 @@ x86_emulate(
>          op_bytes = 8 << vex.l;
>          goto simd_0f_ymm;
>  
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x16): /* vpermps ymm/m256,ymm,ymm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x36): /* vpermd ymm/m256,ymm,ymm */
> +        generate_exception_if(!vex.l || vex.w, EXC_UD);
> +        goto simd_0f_avx2;
> +

Looking at these additions, the case labels look like they need sorting
again.  Are you going to organise that in a later patch?

>      case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
> @@ -7370,6 +7398,80 @@ x86_emulate(
>          generate_exception_if(vex.l, EXC_UD);
>          goto simd_0f_avx;
>  
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,{x,y}mm */
> +        op_bytes = 1 << ((!(b & 0x20) * 2) + (b & 1));
> +        /* fall through */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x46): /* vpsravd {x,y}mm/mem,{x,y}mm,{x,y}mm */
> +        generate_exception_if(vex.w, EXC_UD);

Oh - here is vpsrav{d,q}.  Why is it not with its companions?  The
manual does curiously omit mention of the W1 encoding for VEX (unlike
its companions), but all 3 have W0 and W1 mentioned for EVEX encoding. 
Judging by them all having identical behaviour, and this one not being
declared as suffering a fault because of W, I expect that it is probably
encoded as WIG.

I've noticed lower down as well that you are inconsistent with vex.w
handling compared to the manual as to whether you reject or ignore
unspecified encodings.  Is this intentional, and if so, why?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel