xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
@ 2016-06-20 11:29 Jan Beulich
  2016-06-20 12:22 ` Paul Durrant
  2016-06-21 19:50 ` Andrew Cooper
  0 siblings, 2 replies; 6+ messages in thread
From: Jan Beulich @ 2016-06-20 11:29 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant

[-- Attachment #1: Type: text/plain, Size: 6098 bytes --]

If we have the translation result available already, we should also use
it here. In my tests with Linux guests this eliminates all calls to
hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.

Also record the translation for re-use at least during response
processing.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Restrict to single iteration or address incrementing variant upon
    initial invocation, and don't bound *reps upon response processing.
    Latch translation only for the actual MMIO part of the accesses (to
    match the purpose of the per-vCPU fields; any further translation
    caching should probably go into hvmemul_linear_to_phys() itself).
    Also this time tested on AMD as well (albeit it still escapes me
    why behavior here would be CPU vendor dependent).

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1156,6 +1156,7 @@ static int hvmemul_rep_movs(
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
     unsigned long saddr, daddr, bytes;
     paddr_t sgpa, dgpa;
     uint32_t pfec = PFEC_page_present;
@@ -1178,16 +1179,41 @@ static int hvmemul_rep_movs(
     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
         pfec |= PFEC_user_mode;
 
-    rc = hvmemul_linear_to_phys(
-        saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
-    if ( rc != X86EMUL_OKAY )
-        return rc;
+    if ( vio->mmio_access.read_access &&
+         (vio->mmio_gla == (saddr & PAGE_MASK)) &&
+         /*
+          * Upon initial invocation don't truncate large batches just because
+          * of a hit for the translation: Doing the guest page table walk is
+          * cheaper than multiple round trips through the device model. Yet
+          * when processing a response we can always re-use the translation.
+          */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
+    else
+    {
+        rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
+                                    hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+    }
 
-    rc = hvmemul_linear_to_phys(
-        daddr, &dgpa, bytes_per_rep, reps,
-        pfec | PFEC_write_access, hvmemul_ctxt);
-    if ( rc != X86EMUL_OKAY )
-        return rc;
+    bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
+    if ( vio->mmio_access.write_access &&
+         (vio->mmio_gla == (daddr & PAGE_MASK)) &&
+         /* See comment above. */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
+    else
+    {
+        rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
+                                    pfec | PFEC_write_access, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+    }
 
     /* Check for MMIO ops */
     (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt);
@@ -1198,12 +1224,18 @@ static int hvmemul_rep_movs(
         return X86EMUL_UNHANDLEABLE;
 
     if ( sp2mt == p2m_mmio_dm )
+    {
+        latch_linear_to_phys(vio, saddr, sgpa, 0);
         return hvmemul_do_mmio_addr(
             sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
+    }
 
     if ( dp2mt == p2m_mmio_dm )
+    {
+        latch_linear_to_phys(vio, daddr, dgpa, 1);
         return hvmemul_do_mmio_addr(
             dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
+    }
 
     /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */
     bytes = *reps * bytes_per_rep;
@@ -1279,25 +1311,37 @@ static int hvmemul_rep_stos(
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
-    unsigned long addr;
+    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
+    unsigned long addr, bytes;
     paddr_t gpa;
     p2m_type_t p2mt;
     bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
     int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
                                        hvm_access_write, hvmemul_ctxt, &addr);
 
-    if ( rc == X86EMUL_OKAY )
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
+    if ( vio->mmio_access.write_access &&
+         (vio->mmio_gla == (addr & PAGE_MASK)) &&
+         /* See respective comment in MOVS processing. */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
+    else
     {
         uint32_t pfec = PFEC_page_present | PFEC_write_access;
 
         if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
             pfec |= PFEC_user_mode;
 
-        rc = hvmemul_linear_to_phys(
-            addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
+        rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
+                                    hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
     }
-    if ( rc != X86EMUL_OKAY )
-        return rc;
 
     /* Check for MMIO op */
     (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
@@ -1371,6 +1415,7 @@ static int hvmemul_rep_stos(
         return X86EMUL_UNHANDLEABLE;
 
     case p2m_mmio_dm:
+        latch_linear_to_phys(vio, addr, gpa, 1);
         return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep, IOREQ_WRITE, df,
                                       p_data);
     }



[-- Attachment #2: x86-HVM-emul-REP-use-addresses.patch --]
[-- Type: text/plain, Size: 6172 bytes --]

x86/HVM: use available linear->phys translations in REP MOVS/STOS handling

If we have the translation result available already, we should also use
it here. In my tests with Linux guests this eliminates all calls to
hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.

Also record the translation for re-use at least during response
processing.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Restrict to single iteration or address incrementing variant upon
    initial invocation, and don't bound *reps upon response processing.
    Latch translation only for the actual MMIO part of the accesses (to
    match the purpose of the per-vCPU fields; any further translation
    caching should probably go into hvmemul_linear_to_phys() itself).
    Also this time tested on AMD as well (albeit it still escapes me
    why behavior here would be CPU vendor dependent).

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1156,6 +1156,7 @@ static int hvmemul_rep_movs(
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
     unsigned long saddr, daddr, bytes;
     paddr_t sgpa, dgpa;
     uint32_t pfec = PFEC_page_present;
@@ -1178,16 +1179,41 @@ static int hvmemul_rep_movs(
     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
         pfec |= PFEC_user_mode;
 
-    rc = hvmemul_linear_to_phys(
-        saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
-    if ( rc != X86EMUL_OKAY )
-        return rc;
+    if ( vio->mmio_access.read_access &&
+         (vio->mmio_gla == (saddr & PAGE_MASK)) &&
+         /*
+          * Upon initial invocation don't truncate large batches just because
+          * of a hit for the translation: Doing the guest page table walk is
+          * cheaper than multiple round trips through the device model. Yet
+          * when processing a response we can always re-use the translation.
+          */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
+    else
+    {
+        rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
+                                    hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+    }
 
-    rc = hvmemul_linear_to_phys(
-        daddr, &dgpa, bytes_per_rep, reps,
-        pfec | PFEC_write_access, hvmemul_ctxt);
-    if ( rc != X86EMUL_OKAY )
-        return rc;
+    bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
+    if ( vio->mmio_access.write_access &&
+         (vio->mmio_gla == (daddr & PAGE_MASK)) &&
+         /* See comment above. */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
+    else
+    {
+        rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
+                                    pfec | PFEC_write_access, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+    }
 
     /* Check for MMIO ops */
     (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt);
@@ -1198,12 +1224,18 @@ static int hvmemul_rep_movs(
         return X86EMUL_UNHANDLEABLE;
 
     if ( sp2mt == p2m_mmio_dm )
+    {
+        latch_linear_to_phys(vio, saddr, sgpa, 0);
         return hvmemul_do_mmio_addr(
             sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
+    }
 
     if ( dp2mt == p2m_mmio_dm )
+    {
+        latch_linear_to_phys(vio, daddr, dgpa, 1);
         return hvmemul_do_mmio_addr(
             dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
+    }
 
     /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */
     bytes = *reps * bytes_per_rep;
@@ -1279,25 +1311,37 @@ static int hvmemul_rep_stos(
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
-    unsigned long addr;
+    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
+    unsigned long addr, bytes;
     paddr_t gpa;
     p2m_type_t p2mt;
     bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
     int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
                                        hvm_access_write, hvmemul_ctxt, &addr);
 
-    if ( rc == X86EMUL_OKAY )
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
+    if ( vio->mmio_access.write_access &&
+         (vio->mmio_gla == (addr & PAGE_MASK)) &&
+         /* See respective comment in MOVS processing. */
+         (vio->io_req.state == STATE_IORESP_READY ||
+          ((!df || *reps == 1) &&
+           PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
+        gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
+    else
     {
         uint32_t pfec = PFEC_page_present | PFEC_write_access;
 
         if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
             pfec |= PFEC_user_mode;
 
-        rc = hvmemul_linear_to_phys(
-            addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
+        rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
+                                    hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
     }
-    if ( rc != X86EMUL_OKAY )
-        return rc;
 
     /* Check for MMIO op */
     (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
@@ -1371,6 +1415,7 @@ static int hvmemul_rep_stos(
         return X86EMUL_UNHANDLEABLE;
 
     case p2m_mmio_dm:
+        latch_linear_to_phys(vio, addr, gpa, 1);
         return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep, IOREQ_WRITE, df,
                                       p_data);
     }

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
  2016-06-20 11:29 [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling Jan Beulich
@ 2016-06-20 12:22 ` Paul Durrant
  2016-06-21 19:50 ` Andrew Cooper
  1 sibling, 0 replies; 6+ messages in thread
From: Paul Durrant @ 2016-06-20 12:22 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 20 June 2016 12:29
> To: xen-devel
> Cc: Paul Durrant
> Subject: [PATCH v2] x86/HVM: use available linear->phys translations in REP
> MOVS/STOS handling
> 
> If we have the translation result available already, we should also use
> it here. In my tests with Linux guests this eliminates all calls to
> hvmemul_linear_to_phys() from the STOS path and most from the MOVS
> one.
> 
> Also record the translation for re-use at least during response
> processing.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v2: Restrict to single iteration or address incrementing variant upon
>     initial invocation, and don't bound *reps upon response processing.
>     Latch translation only for the actual MMIO part of the accesses (to
>     match the purpose of the per-vCPU fields; any further translation
>     caching should probably go into hvmemul_linear_to_phys() itself).
>     Also this time tested on AMD as well (albeit it still escapes me
>     why behavior here would be CPU vendor dependent).
> 
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1156,6 +1156,7 @@ static int hvmemul_rep_movs(
>  {
>      struct hvm_emulate_ctxt *hvmemul_ctxt =
>          container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> +    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
>      unsigned long saddr, daddr, bytes;
>      paddr_t sgpa, dgpa;
>      uint32_t pfec = PFEC_page_present;
> @@ -1178,16 +1179,41 @@ static int hvmemul_rep_movs(
>      if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
>          pfec |= PFEC_user_mode;
> 
> -    rc = hvmemul_linear_to_phys(
> -        saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> -    if ( rc != X86EMUL_OKAY )
> -        return rc;
> +    if ( vio->mmio_access.read_access &&
> +         (vio->mmio_gla == (saddr & PAGE_MASK)) &&
> +         /*
> +          * Upon initial invocation don't truncate large batches just because
> +          * of a hit for the translation: Doing the guest page table walk is
> +          * cheaper than multiple round trips through the device model. Yet
> +          * when processing a response we can always re-use the translation.
> +          */
> +         (vio->io_req.state == STATE_IORESP_READY ||
> +          ((!df || *reps == 1) &&
> +           PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> +        sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
> +    else
> +    {
> +        rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
> +                                    hvmemul_ctxt);
> +        if ( rc != X86EMUL_OKAY )
> +            return rc;
> +    }
> 
> -    rc = hvmemul_linear_to_phys(
> -        daddr, &dgpa, bytes_per_rep, reps,
> -        pfec | PFEC_write_access, hvmemul_ctxt);
> -    if ( rc != X86EMUL_OKAY )
> -        return rc;
> +    bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
> +    if ( vio->mmio_access.write_access &&
> +         (vio->mmio_gla == (daddr & PAGE_MASK)) &&
> +         /* See comment above. */
> +         (vio->io_req.state == STATE_IORESP_READY ||
> +          ((!df || *reps == 1) &&
> +           PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> +        dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
> +    else
> +    {
> +        rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
> +                                    pfec | PFEC_write_access, hvmemul_ctxt);
> +        if ( rc != X86EMUL_OKAY )
> +            return rc;
> +    }
> 
>      /* Check for MMIO ops */
>      (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT,
> &sp2mt);
> @@ -1198,12 +1224,18 @@ static int hvmemul_rep_movs(
>          return X86EMUL_UNHANDLEABLE;
> 
>      if ( sp2mt == p2m_mmio_dm )
> +    {
> +        latch_linear_to_phys(vio, saddr, sgpa, 0);
>          return hvmemul_do_mmio_addr(
>              sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
> +    }
> 
>      if ( dp2mt == p2m_mmio_dm )
> +    {
> +        latch_linear_to_phys(vio, daddr, dgpa, 1);
>          return hvmemul_do_mmio_addr(
>              dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
> +    }
> 
>      /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa,
> bytes). */
>      bytes = *reps * bytes_per_rep;
> @@ -1279,25 +1311,37 @@ static int hvmemul_rep_stos(
>  {
>      struct hvm_emulate_ctxt *hvmemul_ctxt =
>          container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> -    unsigned long addr;
> +    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
> +    unsigned long addr, bytes;
>      paddr_t gpa;
>      p2m_type_t p2mt;
>      bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
>      int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
>                                         hvm_access_write, hvmemul_ctxt, &addr);
> 
> -    if ( rc == X86EMUL_OKAY )
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
> +    if ( vio->mmio_access.write_access &&
> +         (vio->mmio_gla == (addr & PAGE_MASK)) &&
> +         /* See respective comment in MOVS processing. */
> +         (vio->io_req.state == STATE_IORESP_READY ||
> +          ((!df || *reps == 1) &&
> +           PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> +        gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
> +    else
>      {
>          uint32_t pfec = PFEC_page_present | PFEC_write_access;
> 
>          if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
>              pfec |= PFEC_user_mode;
> 
> -        rc = hvmemul_linear_to_phys(
> -            addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> +        rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
> +                                    hvmemul_ctxt);
> +        if ( rc != X86EMUL_OKAY )
> +            return rc;
>      }
> -    if ( rc != X86EMUL_OKAY )
> -        return rc;
> 
>      /* Check for MMIO op */
>      (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT,
> &p2mt);
> @@ -1371,6 +1415,7 @@ static int hvmemul_rep_stos(
>          return X86EMUL_UNHANDLEABLE;
> 
>      case p2m_mmio_dm:
> +        latch_linear_to_phys(vio, addr, gpa, 1);
>          return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep,
> IOREQ_WRITE, df,
>                                        p_data);
>      }
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
  2016-06-20 11:29 [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling Jan Beulich
  2016-06-20 12:22 ` Paul Durrant
@ 2016-06-21 19:50 ` Andrew Cooper
  2016-06-22  7:19   ` Jan Beulich
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Cooper @ 2016-06-21 19:50 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant

On 20/06/16 12:29, Jan Beulich wrote:
> If we have the translation result available already, we should also use
> it here. In my tests with Linux guests this eliminates all calls to
> hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.
>
> Also record the translation for re-use at least during response
> processing.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This patch is still broken.  All XenServer HVM guests (both windows and
linux) are dying, with Qemu citing

I/O request not read: 0, ptr: 0, port: 0, data: 0, count: 0, size: 0


The domain still exists, but it looks like it never gets unpaused from
the device model request.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
  2016-06-21 19:50 ` Andrew Cooper
@ 2016-06-22  7:19   ` Jan Beulich
  2016-06-23  9:56     ` Andrew Cooper
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2016-06-22  7:19 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant

>>> On 21.06.16 at 21:50, <andrew.cooper3@citrix.com> wrote:
> On 20/06/16 12:29, Jan Beulich wrote:
>> If we have the translation result available already, we should also use
>> it here. In my tests with Linux guests this eliminates all calls to
>> hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.
>>
>> Also record the translation for re-use at least during response
>> processing.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> This patch is still broken.  All XenServer HVM guests (both windows and
> linux) are dying, with Qemu citing

_All_? That contradicts both my own testing and osstest's smoke
one having succeeded.

> I/O request not read: 0, ptr: 0, port: 0, data: 0, count: 0, size: 0

This doesn't look like a valid request, and I the patch at hand doesn't
alter requests generated in any way. Is it perhaps unmasking a
problem in some private XenServer patch? For now, unless your
claim would get supported by osstest's main round of testing, or
unless you or someone else can point out what's still wrong with v2,
I'm not really inclined to revert right away.

Otoh - iirc you run with qemu-trad, and I guess I didn't test that, so
let me try...

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
  2016-06-22  7:19   ` Jan Beulich
@ 2016-06-23  9:56     ` Andrew Cooper
  0 siblings, 0 replies; 6+ messages in thread
From: Andrew Cooper @ 2016-06-23  9:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Paul Durrant

On 22/06/16 08:19, Jan Beulich wrote:
>>>> On 21.06.16 at 21:50, <andrew.cooper3@citrix.com> wrote:
>> On 20/06/16 12:29, Jan Beulich wrote:
>>> If we have the translation result available already, we should also use
>>> it here. In my tests with Linux guests this eliminates all calls to
>>> hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.
>>>
>>> Also record the translation for re-use at least during response
>>> processing.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> This patch is still broken.  All XenServer HVM guests (both windows and
>> linux) are dying, with Qemu citing
> _All_? That contradicts both my own testing and osstest's smoke
> one having succeeded.

Apologies for the noise.  This was actually caused by the DOMCTL
interface bump, but manifests itself as hvmloader getting stuck while
performing IO to Qemu.

After fixing up a local change we have, everything is back to full
working order.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
@ 2016-06-22  7:00 Paul Durrant
  0 siblings, 0 replies; 6+ messages in thread
From: Paul Durrant @ 2016-06-22  7:00 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 942 bytes --]

On 21 June 2016, at 20:50, Andrew Cooper <andrew.cooper3@citrix.com> wrote:

>
>
>On 20/06/16 12:29, Jan Beulich wrote:
>> If we have the translation result available already, we should also use
>> it here. In my tests with Linux guests this eliminates all calls to
>> hvmemul_linear_to_phys() from the STOS path and most from the MOVS one.
>>
>> Also record the translation for re-use at least during response
>> processing.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>This patch is still broken.  All XenServer HVM guests (both windows and
>linux) are dying, with Qemu citing
>I/O request not read: 0, ptr: 0, port: 0, data: 0, count: 0, size: 0
>The domain still exists, but it looks like it never gets unpaused from
>the device model request.

I think that error is normal. The first time QEMU scans the ioreq pages it sees them blank. The lack of unpause, of course, is not normal.

  Paul

>~Andrew
>

[-- Attachment #1.2: Type: text/html, Size: 3453 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-06-23  9:56 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-20 11:29 [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling Jan Beulich
2016-06-20 12:22 ` Paul Durrant
2016-06-21 19:50 ` Andrew Cooper
2016-06-22  7:19   ` Jan Beulich
2016-06-23  9:56     ` Andrew Cooper
2016-06-22  7:00 Paul Durrant

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).