All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mihai Donțu" <mdontu@bitdefender.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Zhi Wang <zhi.a.wang@intel.com>,
	xen-devel@lists.xen.org
Subject: Re: [PATCH v3 2/3] x86/emulate: add support of emulating SSE2 instruction {, v}movd mm, r32/m32 and {, v}movq mm, r64
Date: Mon, 1 Aug 2016 16:28:21 +0300	[thread overview]
Message-ID: <20160801162821.58a728a9@bitdefender.com> (raw)
In-Reply-To: <579F63BC02000078001014A8@prv-mh.provo.novell.com>

On Monday 01 August 2016 06:59:08 Jan Beulich wrote:
> >>> On 01.08.16 at 14:53, <mdontu@bitdefender.com> wrote:  
> > On Monday 01 August 2016 10:52:12 Andrew Cooper wrote:  
> >> On 01/08/16 03:52, Mihai Donțu wrote:  
> >> > Found that Windows driver was using a SSE2 instruction MOVD.
> >> >
> >> > Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
> >> > Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
> >> > ---
> >> > Picked from the XenServer 7 patch queue, as suggested by Andrew Cooper
> >> >
> >> > Changed since v2:
> >> >  * handle the case where the destination is a GPR
> >> > ---
> >> >  xen/arch/x86/x86_emulate/x86_emulate.c | 38   
> > +++++++++++++++++++++++++++++++---  
> >> >  1 file changed, 35 insertions(+), 3 deletions(-)
> >> >
> >> > diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c   
> > b/xen/arch/x86/x86_emulate/x86_emulate.c  
> >> > index 44de3b6..9f89ada 100644
> >> > --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> >> > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> >> > @@ -204,7 +204,7 @@ static uint8_t twobyte_table[256] = {
> >> >      /* 0x60 - 0x6F */
> >> >      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
> >> >      /* 0x70 - 0x7F */
> >> > -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
> >> > +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,   
> > ImplicitOps|ModRM,  
> >> >      /* 0x80 - 0x87 */
> >> >      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
> >> >      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
> >> > @@ -4409,6 +4409,10 @@ x86_emulate(
> >> >      case 0x6f: /* movq mm/m64,mm */
> >> >                 /* {,v}movdq{a,u} xmm/m128,xmm */
> >> >                 /* vmovdq{a,u} ymm/m256,ymm */
> >> > +    case 0x7e: /* movd mm,r/m32 */
> >> > +               /* movq mm,r/m64 */
> >> > +               /* {,v}movd xmm,r/m32 */
> >> > +               /* {,v}movq xmm,r/m64 */
> >> >      case 0x7f: /* movq mm,mm/m64 */
> >> >                 /* {,v}movdq{a,u} xmm,xmm/m128 */
> >> >                 /* vmovdq{a,u} ymm,ymm/m256 */
> >> > @@ -4432,7 +4436,17 @@ x86_emulate(
> >> >                  host_and_vcpu_must_have(sse2);
> >> >                  buf[0] = 0x66; /* SSE */
> >> >                  get_fpu(X86EMUL_FPU_xmm, &fic);
> >> > -                ea.bytes = (b == 0xd6 ? 8 : 16);
> >> > +                switch ( b )
> >> > +                {
> >> > +                case 0x7e:
> >> > +                    ea.bytes = 4;
> >> > +                    break;
> >> > +                case 0xd6:
> >> > +                    ea.bytes = 8;
> >> > +                    break;
> >> > +                default:
> >> > +                    ea.bytes = 16;
> >> > +                }
> >> >                  break;
> >> >              case vex_none:
> >> >                  if ( b != 0xe7 )
> >> > @@ -4452,7 +4466,17 @@ x86_emulate(
> >> >                      ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
> >> >              host_and_vcpu_must_have(avx);
> >> >              get_fpu(X86EMUL_FPU_ymm, &fic);
> >> > -            ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l));
> >> > +            switch ( b )
> >> > +            {
> >> > +            case 0x7e:
> >> > +                ea.bytes = 4;
> >> > +                break;
> >> > +            case 0xd6:
> >> > +                ea.bytes = 8;
> >> > +                break;
> >> > +            default:
> >> > +                ea.bytes = 16 << vex.l;
> >> > +            }
> >> >          }
> >> >          if ( ea.type == OP_MEM )
> >> >          {
> >> > @@ -4468,6 +4492,14 @@ x86_emulate(
> >> >              vex.b = 1;
> >> >              buf[4] &= 0x38;
> >> >          }
> >> > +        else if ( b == 0x7e )
> >> > +        {
> >> > +            /* convert the GPR destination to (%rAX) */
> >> > +            *((unsigned long *)&mmvalp) = (unsigned long)ea.reg;
> >> > +            rex_prefix &= ~REX_B;
> >> > +            vex.b = 1;
> >> > +            buf[4] &= 0x38;
> >> > +        }    
> >> 
> >> Thankyou for doing this.  However, looking at it, it has some code in
> >> common with the "ea.type == OP_MEM" clause.
> >> 
> >> Would this work?
> >> 
> >> diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
> >> b/xen/arch/x86/x86_emulate/x86_emulate.c
> >> index fe594ba..90db067 100644
> >> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> >> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> >> @@ -4453,16 +4453,25 @@ x86_emulate(
> >>              get_fpu(X86EMUL_FPU_ymm, &fic);
> >>              ea.bytes = 16 << vex.l;
> >>          }
> >> -        if ( ea.type == OP_MEM )
> >> +        if ( ea.type == OP_MEM || ea.type == OP_REG )
> >>          {
> >> -            /* XXX enable once there is ops->ea() or equivalent
> >> -            generate_exception_if((vex.pfx == vex_66) &&
> >> -                                  (ops->ea(ea.mem.seg, ea.mem.off)
> >> -                                   & (ea.bytes - 1)), EXC_GP, 0); */
> >> -            if ( b == 0x6f )
> >> -                rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
> >> -                               ea.bytes, ctxt);
> >>              /* convert memory operand to (%rAX) */
> >> +
> >> +            if ( ea.type == OP_MEM)
> >> +            {
> >> +                /* XXX enable once there is ops->ea() or equivalent
> >> +                   generate_exception_if((vex.pfx == vex_66) &&
> >> +                   (ops->ea(ea.mem.seg, ea.mem.off)
> >> +                   & (ea.bytes - 1)), EXC_GP, 0); */
> >> +                if ( b == 0x6f )
> >> +                    rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
> >> +                                   ea.bytes, ctxt);
> >> +            }
> >> +            else if ( ea.type == OP_REG )
> >> +            {
> >> +                *((unsigned long *)&mmvalp) = (unsigned long)ea.reg;
> >> +            }
> >> +
> >>              rex_prefix &= ~REX_B;
> >>              vex.b = 1;
> >>              buf[4] &= 0x38;
> >> 
> >> 
> >> This is untested, but avoids duplicating this bit of state maniupulation.  
> > 
> > Your suggestion makes sense, but I'm starting to doubt my initial
> > patch. :-) I'm testing "movq xmm1, xmm1" and noticing that it takes the
> > GPR-handling route and I can't seem to be able to easily prevent it
> > with !(rex_prefix & REX_B), as rex_prefix == 0 and vex.b == 1. I need
> > to take a harder look at how that class of instructions is coded.  
> 
> You obviously need to distinguish the two kinds of register sources/
> destinations: GPRs need suitable re-writing of the instruction (without
> having looked at the most recent version of the patch yet I btw doubt
> converting register to memory operands is the most efficient model),
> while MMs, XMMs, and YMMs can retain their register encoding.

Regarding efficiency, I'm not married with the approach I've proposed.
If you can give me a few more hints, I can give it a try.

-- 
Mihai DONȚU

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  reply	other threads:[~2016-08-01 13:28 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-01  2:52 [PATCH v3 1/3] x86/emulate: add support for {, v}movq xmm, xmm/m64 Mihai Donțu
2016-08-01  2:52 ` [PATCH v3 2/3] x86/emulate: add support of emulating SSE2 instruction {, v}movd mm, r32/m32 and {, v}movq mm, r64 Mihai Donțu
2016-08-01  9:52   ` Andrew Cooper
2016-08-01 12:53     ` Mihai Donțu
2016-08-01 12:56       ` Mihai Donțu
2016-08-01 12:57       ` Andrew Cooper
2016-08-01 12:59       ` Jan Beulich
2016-08-01 13:28         ` Mihai Donțu [this message]
2016-08-01 13:43           ` Jan Beulich
2016-08-01 14:48             ` Mihai Donțu
2016-08-01 14:53               ` Andrew Cooper
2016-08-01 15:10                 ` Mihai Donțu
2016-08-01 14:55               ` Mihai Donțu
2016-08-01 14:59                 ` Jan Beulich
2016-08-01 15:01                   ` Andrew Cooper
2016-08-01 14:56               ` Jan Beulich
2016-08-01 13:38   ` Jan Beulich
2016-08-01  2:52 ` [PATCH v3 3/3] x86/emulate: added tests for {, v}movd mm, r32/m32 and {, v}movq xmm, r64/m64 Mihai Donțu
2016-08-01  9:54   ` Andrew Cooper
2016-08-01 12:46     ` Mihai Donțu
2016-08-01  9:18 ` [PATCH v3 1/3] x86/emulate: add support for {, v}movq xmm, xmm/m64 Andrew Cooper
2016-08-01 13:19 ` Jan Beulich
2016-08-01 13:25   ` Mihai Donțu
2016-08-01 23:19   ` Mihai Donțu
2016-08-02  6:19     ` Jan Beulich
2016-08-02  8:13       ` Mihai Donțu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160801162821.58a728a9@bitdefender.com \
    --to=mdontu@bitdefender.com \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=zhi.a.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.