All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: Ian Campbell <Ian.Campbell@eu.citrix.com>,
	Keir Fraser <keir@xen.org>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>
Subject: Re: [PATCH v3] x86: also allow REP STOS emulation acceleration
Date: Mon, 12 Jan 2015 10:52:43 +0000	[thread overview]
Message-ID: <54B3A77B.4070009@citrix.com> (raw)
In-Reply-To: <54B38D540200007800053702@mail.emea.novell.com>

On 12/01/15 08:01, Jan Beulich wrote:
> While the REP MOVS acceleration appears to have helped qemu-traditional
> based guests, qemu-upstream (or really the respective video BIOSes)
> doesn't appear to benefit from that. Instead the acceleration added
> here provides a visible performance improvement during very early HVM
> guest boot.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
>     Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
> v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
>     Andrew. Add output operand telling the compiler that "buf" is being
>     written.
>
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
>      return X86EMUL_OKAY;
>  }
>  
> +static int hvmemul_rep_stos_discard(
> +    void *p_data,
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    unsigned int bytes_per_rep,
> +    unsigned long *reps,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    return X86EMUL_OKAY;
> +}
> +
>  static int hvmemul_rep_outs_discard(
>      enum x86_segment src_seg,
>      unsigned long src_offset,
> @@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
>      return X86EMUL_OKAY;
>  }
>  
> +static int hvmemul_rep_stos(
> +    void *p_data,
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    unsigned int bytes_per_rep,
> +    unsigned long *reps,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    struct hvm_emulate_ctxt *hvmemul_ctxt =
> +        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> +    unsigned long addr;
> +    paddr_t gpa;
> +    p2m_type_t p2mt;
> +    bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
> +    int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
> +                                       hvm_access_write, hvmemul_ctxt, &addr);
> +
> +    if ( rc == X86EMUL_OKAY )
> +    {
> +        uint32_t pfec = PFEC_page_present | PFEC_write_access;
> +
> +        if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
> +            pfec |= PFEC_user_mode;
> +
> +        rc = hvmemul_linear_to_phys(
> +            addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> +    }
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    /* Check for MMIO op */
> +    (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
> +
> +    switch ( p2mt )
> +    {
> +        unsigned long bytes;
> +        void *buf;
> +
> +    default:
> +        /* Allocate temporary buffer. */
> +        for ( ; ; )
> +        {
> +            bytes = *reps * bytes_per_rep;
> +            buf = xmalloc_bytes(bytes);
> +            if ( buf || *reps <= 1 )
> +                break;
> +            *reps >>= 1;
> +        }
> +
> +        if ( !buf )
> +            buf = p_data;
> +        else
> +            switch ( bytes_per_rep )
> +            {
> +                unsigned long dummy;
> +
> +#define CASE(bits, suffix)                                     \
> +            case (bits) / 8:                                   \
> +                asm ( "rep stos" #suffix                       \
> +                      : "=m" (*(char (*)[bytes])buf),          \
> +                        "=D" (dummy), "=c" (dummy)             \
> +                      : "a" (*(const uint##bits##_t *)p_data), \
> +                         "1" (buf), "2" (*reps) );             \
> +                break
> +            CASE(8, b);
> +            CASE(16, w);
> +            CASE(32, l);
> +            CASE(64, q);
> +#undef CASE
> +
> +            default:
> +                ASSERT_UNREACHABLE();
> +                xfree(buf);
> +                return X86EMUL_UNHANDLEABLE;
> +            }
> +
> +        /* Adjust address for reverse store. */
> +        if ( df )
> +            gpa -= bytes - bytes_per_rep;
> +
> +        rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
> +
> +        if ( buf != p_data )
> +            xfree(buf);
> +
> +        switch ( rc )
> +        {
> +        case HVMCOPY_gfn_paged_out:
> +        case HVMCOPY_gfn_shared:
> +            return X86EMUL_RETRY;
> +        case HVMCOPY_okay:
> +            return X86EMUL_OKAY;
> +        }
> +
> +        gdprintk(XENLOG_WARNING,
> +                 "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
> +                 gpa, *reps, bytes_per_rep);
> +        /* fall through */
> +    case p2m_mmio_direct:
> +        return X86EMUL_UNHANDLEABLE;
> +
> +    case p2m_mmio_dm:
> +        return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
> +                               p_data);
> +    }
> +}
> +
>  static int hvmemul_read_segment(
>      enum x86_segment seg,
>      struct segment_register *reg,
> @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
>      .rep_ins       = hvmemul_rep_ins,
>      .rep_outs      = hvmemul_rep_outs,
>      .rep_movs      = hvmemul_rep_movs,
> +    .rep_stos      = hvmemul_rep_stos,
>      .read_segment  = hvmemul_read_segment,
>      .write_segment = hvmemul_write_segment,
>      .read_io       = hvmemul_read_io,
> @@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_
>      .rep_ins       = hvmemul_rep_ins_discard,
>      .rep_outs      = hvmemul_rep_outs_discard,
>      .rep_movs      = hvmemul_rep_movs_discard,
> +    .rep_stos      = hvmemul_rep_stos_discard,
>      .read_segment  = hvmemul_read_segment,
>      .write_segment = hvmemul_write_segment,
>      .read_io       = hvmemul_read_io_discard,
> --- a/xen/arch/x86/hvm/stdvga.c
> +++ b/xen/arch/x86/hvm/stdvga.c
> @@ -470,11 +470,11 @@ static int mmio_move(struct hvm_hw_stdvg
>      uint64_t addr = p->addr;
>      p2m_type_t p2mt;
>      struct domain *d = current->domain;
> +    int step = p->df ? -p->size : p->size;
>  
>      if ( p->data_is_ptr )
>      {
>          uint64_t data = p->data, tmp;
> -        int step = p->df ? -p->size : p->size;
>  
>          if ( p->dir == IOREQ_READ )
>          {
> @@ -529,13 +529,18 @@ static int mmio_move(struct hvm_hw_stdvg
>              }
>          }
>      }
> +    else if ( p->dir == IOREQ_WRITE )
> +    {
> +        for ( i = 0; i < p->count; i++ )
> +        {
> +            stdvga_mem_write(addr, p->data, p->size);
> +            addr += step;
> +        }
> +    }
>      else
>      {
>          ASSERT(p->count == 1);
> -        if ( p->dir == IOREQ_READ )
> -            p->data = stdvga_mem_read(addr, p->size);
> -        else
> -            stdvga_mem_write(addr, p->data, p->size);
> +        p->data = stdvga_mem_read(addr, p->size);
>      }
>  
>      read_data = p->data;
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -2568,15 +2568,25 @@ x86_emulate(
>      }
>  
>      case 0xaa ... 0xab: /* stos */ {
> -        /* unsigned long max_reps = */get_rep_prefix();
> -        dst.type  = OP_MEM;
> +        unsigned long nr_reps = get_rep_prefix();
>          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
>          dst.mem.seg = x86_seg_es;
>          dst.mem.off = truncate_ea(_regs.edi);
> -        dst.val   = _regs.eax;
> +        if ( (nr_reps == 1) || !ops->rep_stos ||
> +             ((rc = ops->rep_stos(&_regs.eax,
> +                                  dst.mem.seg, dst.mem.off, dst.bytes,
> +                                  &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
> +        {
> +            dst.val = _regs.eax;
> +            dst.type = OP_MEM;
> +            nr_reps = 1;
> +        }
> +        else if ( rc != X86EMUL_OKAY )
> +            goto done;
>          register_address_increment(
> -            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
> -        put_rep_prefix(1);
> +            _regs.edi,
> +            nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
> +        put_rep_prefix(nr_reps);
>          break;
>      }
>  
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -241,6 +241,20 @@ struct x86_emulate_ops
>          struct x86_emulate_ctxt *ctxt);
>  
>      /*
> +     * rep_stos: Emulate STOS: <*p_data> -> <seg:offset>.
> +     *  @bytes_per_rep: [IN ] Bytes transferred per repetition.
> +     *  @reps:  [IN ] Maximum repetitions to be emulated.
> +     *          [OUT] Number of repetitions actually emulated.
> +     */
> +    int (*rep_stos)(
> +        void *p_data,
> +        enum x86_segment seg,
> +        unsigned long offset,
> +        unsigned int bytes_per_rep,
> +        unsigned long *reps,
> +        struct x86_emulate_ctxt *ctxt);
> +
> +    /*
>       * read_segment: Emulate a read of full context of a segment register.
>       *  @reg:   [OUT] Contents of segment register (visible and hidden state).
>       */
> --- a/xen/include/xen/lib.h
> +++ b/xen/include/xen/lib.h
> @@ -41,9 +41,11 @@ do {                                    
>  #ifndef NDEBUG
>  #define ASSERT(p) \
>      do { if ( unlikely(!(p)) ) assert_failed(#p); } while (0)
> +#define ASSERT_UNREACHABLE() assert_failed("unreachable")
>  #define debug_build() 1
>  #else
>  #define ASSERT(p) do { if ( 0 && (p) ); } while (0)
> +#define ASSERT_UNREACHABLE() do { } while (0)
>  #define debug_build() 0
>  #endif
>  
>
>

      reply	other threads:[~2015-01-12 10:52 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-12  8:01 [PATCH v3] x86: also allow REP STOS emulation acceleration Jan Beulich
2015-01-12 10:52 ` Andrew Cooper [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54B3A77B.4070009@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=Ian.Campbell@eu.citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=keir@xen.org \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.