* [PATCH v3] x86: also allow REP STOS emulation acceleration
@ 2015-01-12 8:01 Jan Beulich
2015-01-12 10:52 ` Andrew Cooper
0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2015-01-12 8:01 UTC (permalink / raw)
To: xen-devel
Cc: Ian Campbell, Andrew Cooper, Keir Fraser, Ian Jackson, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 9016 bytes --]
While the REP MOVS acceleration appears to have helped qemu-traditional
based guests, qemu-upstream (or really the respective video BIOSes)
doesn't appear to benefit from that. Instead the acceleration added
here provides a visible performance improvement during very early HVM
guest boot.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
Andrew. Add output operand telling the compiler that "buf" is being
written.
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
return X86EMUL_OKAY;
}
+static int hvmemul_rep_stos_discard(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt)
+{
+ return X86EMUL_OKAY;
+}
+
static int hvmemul_rep_outs_discard(
enum x86_segment src_seg,
unsigned long src_offset,
@@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
return X86EMUL_OKAY;
}
+static int hvmemul_rep_stos(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt)
+{
+ struct hvm_emulate_ctxt *hvmemul_ctxt =
+ container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+ unsigned long addr;
+ paddr_t gpa;
+ p2m_type_t p2mt;
+ bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
+ int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
+ hvm_access_write, hvmemul_ctxt, &addr);
+
+ if ( rc == X86EMUL_OKAY )
+ {
+ uint32_t pfec = PFEC_page_present | PFEC_write_access;
+
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
+ rc = hvmemul_linear_to_phys(
+ addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
+ }
+ if ( rc != X86EMUL_OKAY )
+ return rc;
+
+ /* Check for MMIO op */
+ (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
+
+ switch ( p2mt )
+ {
+ unsigned long bytes;
+ void *buf;
+
+ default:
+ /* Allocate temporary buffer. */
+ for ( ; ; )
+ {
+ bytes = *reps * bytes_per_rep;
+ buf = xmalloc_bytes(bytes);
+ if ( buf || *reps <= 1 )
+ break;
+ *reps >>= 1;
+ }
+
+ if ( !buf )
+ buf = p_data;
+ else
+ switch ( bytes_per_rep )
+ {
+ unsigned long dummy;
+
+#define CASE(bits, suffix) \
+ case (bits) / 8: \
+ asm ( "rep stos" #suffix \
+ : "=m" (*(char (*)[bytes])buf), \
+ "=D" (dummy), "=c" (dummy) \
+ : "a" (*(const uint##bits##_t *)p_data), \
+ "1" (buf), "2" (*reps) ); \
+ break
+ CASE(8, b);
+ CASE(16, w);
+ CASE(32, l);
+ CASE(64, q);
+#undef CASE
+
+ default:
+ ASSERT_UNREACHABLE();
+ xfree(buf);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ /* Adjust address for reverse store. */
+ if ( df )
+ gpa -= bytes - bytes_per_rep;
+
+ rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
+
+ if ( buf != p_data )
+ xfree(buf);
+
+ switch ( rc )
+ {
+ case HVMCOPY_gfn_paged_out:
+ case HVMCOPY_gfn_shared:
+ return X86EMUL_RETRY;
+ case HVMCOPY_okay:
+ return X86EMUL_OKAY;
+ }
+
+ gdprintk(XENLOG_WARNING,
+ "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
+ gpa, *reps, bytes_per_rep);
+ /* fall through */
+ case p2m_mmio_direct:
+ return X86EMUL_UNHANDLEABLE;
+
+ case p2m_mmio_dm:
+ return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
+ p_data);
+ }
+}
+
static int hvmemul_read_segment(
enum x86_segment seg,
struct segment_register *reg,
@@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
.rep_ins = hvmemul_rep_ins,
.rep_outs = hvmemul_rep_outs,
.rep_movs = hvmemul_rep_movs,
+ .rep_stos = hvmemul_rep_stos,
.read_segment = hvmemul_read_segment,
.write_segment = hvmemul_write_segment,
.read_io = hvmemul_read_io,
@@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_
.rep_ins = hvmemul_rep_ins_discard,
.rep_outs = hvmemul_rep_outs_discard,
.rep_movs = hvmemul_rep_movs_discard,
+ .rep_stos = hvmemul_rep_stos_discard,
.read_segment = hvmemul_read_segment,
.write_segment = hvmemul_write_segment,
.read_io = hvmemul_read_io_discard,
--- a/xen/arch/x86/hvm/stdvga.c
+++ b/xen/arch/x86/hvm/stdvga.c
@@ -470,11 +470,11 @@ static int mmio_move(struct hvm_hw_stdvg
uint64_t addr = p->addr;
p2m_type_t p2mt;
struct domain *d = current->domain;
+ int step = p->df ? -p->size : p->size;
if ( p->data_is_ptr )
{
uint64_t data = p->data, tmp;
- int step = p->df ? -p->size : p->size;
if ( p->dir == IOREQ_READ )
{
@@ -529,13 +529,18 @@ static int mmio_move(struct hvm_hw_stdvg
}
}
}
+ else if ( p->dir == IOREQ_WRITE )
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ stdvga_mem_write(addr, p->data, p->size);
+ addr += step;
+ }
+ }
else
{
ASSERT(p->count == 1);
- if ( p->dir == IOREQ_READ )
- p->data = stdvga_mem_read(addr, p->size);
- else
- stdvga_mem_write(addr, p->data, p->size);
+ p->data = stdvga_mem_read(addr, p->size);
}
read_data = p->data;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2568,15 +2568,25 @@ x86_emulate(
}
case 0xaa ... 0xab: /* stos */ {
- /* unsigned long max_reps = */get_rep_prefix();
- dst.type = OP_MEM;
+ unsigned long nr_reps = get_rep_prefix();
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.mem.seg = x86_seg_es;
dst.mem.off = truncate_ea(_regs.edi);
- dst.val = _regs.eax;
+ if ( (nr_reps == 1) || !ops->rep_stos ||
+ ((rc = ops->rep_stos(&_regs.eax,
+ dst.mem.seg, dst.mem.off, dst.bytes,
+ &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
+ {
+ dst.val = _regs.eax;
+ dst.type = OP_MEM;
+ nr_reps = 1;
+ }
+ else if ( rc != X86EMUL_OKAY )
+ goto done;
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
- put_rep_prefix(1);
+ _regs.edi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ put_rep_prefix(nr_reps);
break;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -241,6 +241,20 @@ struct x86_emulate_ops
struct x86_emulate_ctxt *ctxt);
/*
+ * rep_stos: Emulate STOS: <*p_data> -> <seg:offset>.
+ * @bytes_per_rep: [IN ] Bytes transferred per repetition.
+ * @reps: [IN ] Maximum repetitions to be emulated.
+ * [OUT] Number of repetitions actually emulated.
+ */
+ int (*rep_stos)(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
* read_segment: Emulate a read of full context of a segment register.
* @reg: [OUT] Contents of segment register (visible and hidden state).
*/
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -41,9 +41,11 @@ do {
#ifndef NDEBUG
#define ASSERT(p) \
do { if ( unlikely(!(p)) ) assert_failed(#p); } while (0)
+#define ASSERT_UNREACHABLE() assert_failed("unreachable")
#define debug_build() 1
#else
#define ASSERT(p) do { if ( 0 && (p) ); } while (0)
+#define ASSERT_UNREACHABLE() do { } while (0)
#define debug_build() 0
#endif
[-- Attachment #2: x86emul-rep-stos.patch --]
[-- Type: text/plain, Size: 9063 bytes --]
x86: also allow REP STOS emulation acceleration
While the REP MOVS acceleration appears to have helped qemu-traditional
based guests, qemu-upstream (or really the respective video BIOSes)
doesn't appear to benefit from that. Instead the acceleration added
here provides a visible performance improvement during very early HVM
guest boot.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
Andrew. Add output operand telling the compiler that "buf" is being
written.
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
return X86EMUL_OKAY;
}
+static int hvmemul_rep_stos_discard(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt)
+{
+ return X86EMUL_OKAY;
+}
+
static int hvmemul_rep_outs_discard(
enum x86_segment src_seg,
unsigned long src_offset,
@@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
return X86EMUL_OKAY;
}
+static int hvmemul_rep_stos(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt)
+{
+ struct hvm_emulate_ctxt *hvmemul_ctxt =
+ container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+ unsigned long addr;
+ paddr_t gpa;
+ p2m_type_t p2mt;
+ bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
+ int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
+ hvm_access_write, hvmemul_ctxt, &addr);
+
+ if ( rc == X86EMUL_OKAY )
+ {
+ uint32_t pfec = PFEC_page_present | PFEC_write_access;
+
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
+ rc = hvmemul_linear_to_phys(
+ addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
+ }
+ if ( rc != X86EMUL_OKAY )
+ return rc;
+
+ /* Check for MMIO op */
+ (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
+
+ switch ( p2mt )
+ {
+ unsigned long bytes;
+ void *buf;
+
+ default:
+ /* Allocate temporary buffer. */
+ for ( ; ; )
+ {
+ bytes = *reps * bytes_per_rep;
+ buf = xmalloc_bytes(bytes);
+ if ( buf || *reps <= 1 )
+ break;
+ *reps >>= 1;
+ }
+
+ if ( !buf )
+ buf = p_data;
+ else
+ switch ( bytes_per_rep )
+ {
+ unsigned long dummy;
+
+#define CASE(bits, suffix) \
+ case (bits) / 8: \
+ asm ( "rep stos" #suffix \
+ : "=m" (*(char (*)[bytes])buf), \
+ "=D" (dummy), "=c" (dummy) \
+ : "a" (*(const uint##bits##_t *)p_data), \
+ "1" (buf), "2" (*reps) ); \
+ break
+ CASE(8, b);
+ CASE(16, w);
+ CASE(32, l);
+ CASE(64, q);
+#undef CASE
+
+ default:
+ ASSERT_UNREACHABLE();
+ xfree(buf);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ /* Adjust address for reverse store. */
+ if ( df )
+ gpa -= bytes - bytes_per_rep;
+
+ rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
+
+ if ( buf != p_data )
+ xfree(buf);
+
+ switch ( rc )
+ {
+ case HVMCOPY_gfn_paged_out:
+ case HVMCOPY_gfn_shared:
+ return X86EMUL_RETRY;
+ case HVMCOPY_okay:
+ return X86EMUL_OKAY;
+ }
+
+ gdprintk(XENLOG_WARNING,
+ "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
+ gpa, *reps, bytes_per_rep);
+ /* fall through */
+ case p2m_mmio_direct:
+ return X86EMUL_UNHANDLEABLE;
+
+ case p2m_mmio_dm:
+ return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
+ p_data);
+ }
+}
+
static int hvmemul_read_segment(
enum x86_segment seg,
struct segment_register *reg,
@@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
.rep_ins = hvmemul_rep_ins,
.rep_outs = hvmemul_rep_outs,
.rep_movs = hvmemul_rep_movs,
+ .rep_stos = hvmemul_rep_stos,
.read_segment = hvmemul_read_segment,
.write_segment = hvmemul_write_segment,
.read_io = hvmemul_read_io,
@@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_
.rep_ins = hvmemul_rep_ins_discard,
.rep_outs = hvmemul_rep_outs_discard,
.rep_movs = hvmemul_rep_movs_discard,
+ .rep_stos = hvmemul_rep_stos_discard,
.read_segment = hvmemul_read_segment,
.write_segment = hvmemul_write_segment,
.read_io = hvmemul_read_io_discard,
--- a/xen/arch/x86/hvm/stdvga.c
+++ b/xen/arch/x86/hvm/stdvga.c
@@ -470,11 +470,11 @@ static int mmio_move(struct hvm_hw_stdvg
uint64_t addr = p->addr;
p2m_type_t p2mt;
struct domain *d = current->domain;
+ int step = p->df ? -p->size : p->size;
if ( p->data_is_ptr )
{
uint64_t data = p->data, tmp;
- int step = p->df ? -p->size : p->size;
if ( p->dir == IOREQ_READ )
{
@@ -529,13 +529,18 @@ static int mmio_move(struct hvm_hw_stdvg
}
}
}
+ else if ( p->dir == IOREQ_WRITE )
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ stdvga_mem_write(addr, p->data, p->size);
+ addr += step;
+ }
+ }
else
{
ASSERT(p->count == 1);
- if ( p->dir == IOREQ_READ )
- p->data = stdvga_mem_read(addr, p->size);
- else
- stdvga_mem_write(addr, p->data, p->size);
+ p->data = stdvga_mem_read(addr, p->size);
}
read_data = p->data;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2568,15 +2568,25 @@ x86_emulate(
}
case 0xaa ... 0xab: /* stos */ {
- /* unsigned long max_reps = */get_rep_prefix();
- dst.type = OP_MEM;
+ unsigned long nr_reps = get_rep_prefix();
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.mem.seg = x86_seg_es;
dst.mem.off = truncate_ea(_regs.edi);
- dst.val = _regs.eax;
+ if ( (nr_reps == 1) || !ops->rep_stos ||
+ ((rc = ops->rep_stos(&_regs.eax,
+ dst.mem.seg, dst.mem.off, dst.bytes,
+ &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
+ {
+ dst.val = _regs.eax;
+ dst.type = OP_MEM;
+ nr_reps = 1;
+ }
+ else if ( rc != X86EMUL_OKAY )
+ goto done;
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
- put_rep_prefix(1);
+ _regs.edi,
+ nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
+ put_rep_prefix(nr_reps);
break;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -241,6 +241,20 @@ struct x86_emulate_ops
struct x86_emulate_ctxt *ctxt);
/*
+ * rep_stos: Emulate STOS: <*p_data> -> <seg:offset>.
+ * @bytes_per_rep: [IN ] Bytes transferred per repetition.
+ * @reps: [IN ] Maximum repetitions to be emulated.
+ * [OUT] Number of repetitions actually emulated.
+ */
+ int (*rep_stos)(
+ void *p_data,
+ enum x86_segment seg,
+ unsigned long offset,
+ unsigned int bytes_per_rep,
+ unsigned long *reps,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
* read_segment: Emulate a read of full context of a segment register.
* @reg: [OUT] Contents of segment register (visible and hidden state).
*/
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -41,9 +41,11 @@ do {
#ifndef NDEBUG
#define ASSERT(p) \
do { if ( unlikely(!(p)) ) assert_failed(#p); } while (0)
+#define ASSERT_UNREACHABLE() assert_failed("unreachable")
#define debug_build() 1
#else
#define ASSERT(p) do { if ( 0 && (p) ); } while (0)
+#define ASSERT_UNREACHABLE() do { } while (0)
#define debug_build() 0
#endif
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v3] x86: also allow REP STOS emulation acceleration
2015-01-12 8:01 [PATCH v3] x86: also allow REP STOS emulation acceleration Jan Beulich
@ 2015-01-12 10:52 ` Andrew Cooper
0 siblings, 0 replies; 2+ messages in thread
From: Andrew Cooper @ 2015-01-12 10:52 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Ian Campbell, Keir Fraser, Ian Jackson, Tim Deegan
On 12/01/15 08:01, Jan Beulich wrote:
> While the REP MOVS acceleration appears to have helped qemu-traditional
> based guests, qemu-upstream (or really the respective video BIOSes)
> doesn't appear to benefit from that. Instead the acceleration added
> here provides a visible performance improvement during very early HVM
> guest boot.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
> Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
> v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
> Andrew. Add output operand telling the compiler that "buf" is being
> written.
>
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
> return X86EMUL_OKAY;
> }
>
> +static int hvmemul_rep_stos_discard(
> + void *p_data,
> + enum x86_segment seg,
> + unsigned long offset,
> + unsigned int bytes_per_rep,
> + unsigned long *reps,
> + struct x86_emulate_ctxt *ctxt)
> +{
> + return X86EMUL_OKAY;
> +}
> +
> static int hvmemul_rep_outs_discard(
> enum x86_segment src_seg,
> unsigned long src_offset,
> @@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
> return X86EMUL_OKAY;
> }
>
> +static int hvmemul_rep_stos(
> + void *p_data,
> + enum x86_segment seg,
> + unsigned long offset,
> + unsigned int bytes_per_rep,
> + unsigned long *reps,
> + struct x86_emulate_ctxt *ctxt)
> +{
> + struct hvm_emulate_ctxt *hvmemul_ctxt =
> + container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> + unsigned long addr;
> + paddr_t gpa;
> + p2m_type_t p2mt;
> + bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
> + int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
> + hvm_access_write, hvmemul_ctxt, &addr);
> +
> + if ( rc == X86EMUL_OKAY )
> + {
> + uint32_t pfec = PFEC_page_present | PFEC_write_access;
> +
> + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
> + pfec |= PFEC_user_mode;
> +
> + rc = hvmemul_linear_to_phys(
> + addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> + }
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + /* Check for MMIO op */
> + (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
> +
> + switch ( p2mt )
> + {
> + unsigned long bytes;
> + void *buf;
> +
> + default:
> + /* Allocate temporary buffer. */
> + for ( ; ; )
> + {
> + bytes = *reps * bytes_per_rep;
> + buf = xmalloc_bytes(bytes);
> + if ( buf || *reps <= 1 )
> + break;
> + *reps >>= 1;
> + }
> +
> + if ( !buf )
> + buf = p_data;
> + else
> + switch ( bytes_per_rep )
> + {
> + unsigned long dummy;
> +
> +#define CASE(bits, suffix) \
> + case (bits) / 8: \
> + asm ( "rep stos" #suffix \
> + : "=m" (*(char (*)[bytes])buf), \
> + "=D" (dummy), "=c" (dummy) \
> + : "a" (*(const uint##bits##_t *)p_data), \
> + "1" (buf), "2" (*reps) ); \
> + break
> + CASE(8, b);
> + CASE(16, w);
> + CASE(32, l);
> + CASE(64, q);
> +#undef CASE
> +
> + default:
> + ASSERT_UNREACHABLE();
> + xfree(buf);
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + /* Adjust address for reverse store. */
> + if ( df )
> + gpa -= bytes - bytes_per_rep;
> +
> + rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
> +
> + if ( buf != p_data )
> + xfree(buf);
> +
> + switch ( rc )
> + {
> + case HVMCOPY_gfn_paged_out:
> + case HVMCOPY_gfn_shared:
> + return X86EMUL_RETRY;
> + case HVMCOPY_okay:
> + return X86EMUL_OKAY;
> + }
> +
> + gdprintk(XENLOG_WARNING,
> + "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
> + gpa, *reps, bytes_per_rep);
> + /* fall through */
> + case p2m_mmio_direct:
> + return X86EMUL_UNHANDLEABLE;
> +
> + case p2m_mmio_dm:
> + return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
> + p_data);
> + }
> +}
> +
> static int hvmemul_read_segment(
> enum x86_segment seg,
> struct segment_register *reg,
> @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
> .rep_ins = hvmemul_rep_ins,
> .rep_outs = hvmemul_rep_outs,
> .rep_movs = hvmemul_rep_movs,
> + .rep_stos = hvmemul_rep_stos,
> .read_segment = hvmemul_read_segment,
> .write_segment = hvmemul_write_segment,
> .read_io = hvmemul_read_io,
> @@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_
> .rep_ins = hvmemul_rep_ins_discard,
> .rep_outs = hvmemul_rep_outs_discard,
> .rep_movs = hvmemul_rep_movs_discard,
> + .rep_stos = hvmemul_rep_stos_discard,
> .read_segment = hvmemul_read_segment,
> .write_segment = hvmemul_write_segment,
> .read_io = hvmemul_read_io_discard,
> --- a/xen/arch/x86/hvm/stdvga.c
> +++ b/xen/arch/x86/hvm/stdvga.c
> @@ -470,11 +470,11 @@ static int mmio_move(struct hvm_hw_stdvg
> uint64_t addr = p->addr;
> p2m_type_t p2mt;
> struct domain *d = current->domain;
> + int step = p->df ? -p->size : p->size;
>
> if ( p->data_is_ptr )
> {
> uint64_t data = p->data, tmp;
> - int step = p->df ? -p->size : p->size;
>
> if ( p->dir == IOREQ_READ )
> {
> @@ -529,13 +529,18 @@ static int mmio_move(struct hvm_hw_stdvg
> }
> }
> }
> + else if ( p->dir == IOREQ_WRITE )
> + {
> + for ( i = 0; i < p->count; i++ )
> + {
> + stdvga_mem_write(addr, p->data, p->size);
> + addr += step;
> + }
> + }
> else
> {
> ASSERT(p->count == 1);
> - if ( p->dir == IOREQ_READ )
> - p->data = stdvga_mem_read(addr, p->size);
> - else
> - stdvga_mem_write(addr, p->data, p->size);
> + p->data = stdvga_mem_read(addr, p->size);
> }
>
> read_data = p->data;
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -2568,15 +2568,25 @@ x86_emulate(
> }
>
> case 0xaa ... 0xab: /* stos */ {
> - /* unsigned long max_reps = */get_rep_prefix();
> - dst.type = OP_MEM;
> + unsigned long nr_reps = get_rep_prefix();
> dst.bytes = (d & ByteOp) ? 1 : op_bytes;
> dst.mem.seg = x86_seg_es;
> dst.mem.off = truncate_ea(_regs.edi);
> - dst.val = _regs.eax;
> + if ( (nr_reps == 1) || !ops->rep_stos ||
> + ((rc = ops->rep_stos(&_regs.eax,
> + dst.mem.seg, dst.mem.off, dst.bytes,
> + &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
> + {
> + dst.val = _regs.eax;
> + dst.type = OP_MEM;
> + nr_reps = 1;
> + }
> + else if ( rc != X86EMUL_OKAY )
> + goto done;
> register_address_increment(
> - _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
> - put_rep_prefix(1);
> + _regs.edi,
> + nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
> + put_rep_prefix(nr_reps);
> break;
> }
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -241,6 +241,20 @@ struct x86_emulate_ops
> struct x86_emulate_ctxt *ctxt);
>
> /*
> + * rep_stos: Emulate STOS: <*p_data> -> <seg:offset>.
> + * @bytes_per_rep: [IN ] Bytes transferred per repetition.
> + * @reps: [IN ] Maximum repetitions to be emulated.
> + * [OUT] Number of repetitions actually emulated.
> + */
> + int (*rep_stos)(
> + void *p_data,
> + enum x86_segment seg,
> + unsigned long offset,
> + unsigned int bytes_per_rep,
> + unsigned long *reps,
> + struct x86_emulate_ctxt *ctxt);
> +
> + /*
> * read_segment: Emulate a read of full context of a segment register.
> * @reg: [OUT] Contents of segment register (visible and hidden state).
> */
> --- a/xen/include/xen/lib.h
> +++ b/xen/include/xen/lib.h
> @@ -41,9 +41,11 @@ do {
> #ifndef NDEBUG
> #define ASSERT(p) \
> do { if ( unlikely(!(p)) ) assert_failed(#p); } while (0)
> +#define ASSERT_UNREACHABLE() assert_failed("unreachable")
> #define debug_build() 1
> #else
> #define ASSERT(p) do { if ( 0 && (p) ); } while (0)
> +#define ASSERT_UNREACHABLE() do { } while (0)
> #define debug_build() 0
> #endif
>
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-01-12 10:52 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-12 8:01 [PATCH v3] x86: also allow REP STOS emulation acceleration Jan Beulich
2015-01-12 10:52 ` Andrew Cooper
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.