All of lore.kernel.org
 help / color / mirror / Atom feed
* XSAVE/XRSTOR crash resurgence in 4.3
@ 2013-07-03 14:02 Ben Guthro
  2013-07-04 13:21 ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-03 14:02 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

[-- Attachment #1: Type: text/plain, Size: 845 bytes --]

We (XenClient Enterprise) recently updated our mainline development to
xen 4.3, and noticed that the xsave/xrstor bug that crashes 32bit
windows guests with the driver verifier enabled has re-emerged.

>From Mark Roddy:
"The DOMU crash from last nights run  has this signature in the crash dump:

Interrupt Service Routine A30BC91C has changed extended thread context.
Context saved before executing ISR: 841C4380. Context saved after
executing ISR: 841C5040.

It looks like we lost the fix for the XSAVE/XRSTOR"

However, this tree is based on 4.3-rc6, and does include the following commit:

commit 10f969150025498fe27d985f9021a68f8c7acc31
Author: Jan Beulich <jbeulich@suse.com>
Date:   Tue Jun 4 17:23:11 2013 +0200

    x86: preserve FPU selectors for 32-bit guest code



Original patch against 4.2 (that does not fail) is attached.


Ben

[-- Attachment #2: x86-FPU-preserve-selectors.patch --]
[-- Type: application/octet-stream, Size: 8734 bytes --]

diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 0ec2308..6d2ac71 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -55,34 +55,54 @@ static inline void fpu_fxrstor(struct vcpu *v)
      * possibility, which may occur if the block was passed to us by control
      * tools, by silently clearing the block.
      */
-    asm volatile (
-#ifdef __i386__
-        "1: fxrstor %0            \n"
-#else /* __x86_64__ */
-        /* See above for why the operands/constraints are this way. */
-        "1: " REX64_PREFIX "fxrstor (%2)\n"
-#endif
-        ".section .fixup,\"ax\"   \n"
-        "2: push %%"__OP"ax       \n"
-        "   push %%"__OP"cx       \n"
-        "   push %%"__OP"di       \n"
-        "   lea  %0,%%"__OP"di    \n"
-        "   mov  %1,%%ecx         \n"
-        "   xor  %%eax,%%eax      \n"
-        "   rep ; stosl           \n"
-        "   pop  %%"__OP"di       \n"
-        "   pop  %%"__OP"cx       \n"
-        "   pop  %%"__OP"ax       \n"
-        "   jmp  1b               \n"
-        ".previous                \n"
-        _ASM_EXTABLE(1b, 2b)
-        : 
-        : "m" (*fpu_ctxt),
-          "i" (sizeof(v->arch.xsave_area->fpu_sse)/4)
-#ifdef __x86_64__
-          ,"cdaSDb" (fpu_ctxt)
-#endif
-        );
+    switch ( __builtin_expect(fpu_ctxt[FPU_WORD_SIZE_OFFSET], 8) )
+    {
+    default:
+        asm volatile (
+            /* See below for why the operands/constraints are this way. */
+            "1: " REX64_PREFIX "fxrstor (%2)\n"
+            ".section .fixup,\"ax\"   \n"
+            "2: push %%"__OP"ax       \n"
+            "   push %%"__OP"cx       \n"
+            "   push %%"__OP"di       \n"
+            "   mov  %2,%%"__OP"di    \n"
+            "   mov  %1,%%ecx         \n"
+            "   xor  %%eax,%%eax      \n"
+            "   rep ; stosl           \n"
+            "   pop  %%"__OP"di       \n"
+            "   pop  %%"__OP"cx       \n"
+            "   pop  %%"__OP"ax       \n"
+            "   jmp  1b               \n"
+            ".previous                \n"
+            _ASM_EXTABLE(1b, 2b)
+            :
+            : "m" (*fpu_ctxt),
+              "i" (sizeof(v->arch.xsave_area->fpu_sse)/4),
+              "cdaSDb" (fpu_ctxt) );
+        break;
+    case 4: case 2:
+        asm volatile (
+            "1: fxrstor (%2)\n"
+            ".section .fixup,\"ax\"   \n"
+            "2: push %%"__OP"ax       \n"
+            "   push %%"__OP"cx       \n"
+            "   push %%"__OP"di       \n"
+            "   mov  %2,%%"__OP"di    \n"
+            "   mov  %1,%%ecx         \n"
+            "   xor  %%eax,%%eax      \n"
+            "   rep ; stosl           \n"
+            "   pop  %%"__OP"di       \n"
+            "   pop  %%"__OP"cx       \n"
+            "   pop  %%"__OP"ax       \n"
+            "   jmp  1b               \n"
+            ".previous                \n"
+            _ASM_EXTABLE(1b, 2b)
+            :
+            : "m" (*fpu_ctxt),
+              "i" (sizeof(v->arch.xsave_area->fpu_sse)/4),
+              "r" (fpu_ctxt) );
+        break;
+    }
 }
 
 /* Restore x87 extended state */
@@ -111,6 +131,7 @@ static inline void fpu_xsave(struct vcpu *v)
 static inline void fpu_fxsave(struct vcpu *v)
 {
     char *fpu_ctxt = v->arch.fpu_ctxt;
+    int word_size = guest_word_size(v);
 
 #ifdef __i386__
     asm volatile (
@@ -122,9 +143,23 @@ static inline void fpu_fxsave(struct vcpu *v)
      * older versions the rex64 prefix works only if we force an
      * addressing mode that doesn't require extended registers.
      */
-    asm volatile (
-        REX64_PREFIX "fxsave (%1)"
-        : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
+    switch ( __builtin_expect(word_size, 8) )
+    {
+    default:
+        /*
+         * The only way to force fxsaveq on a wide range of gas versions.
+         * On older versions the rex64 prefix works only if we force an
+         * addressing mode that doesn't require extended registers.
+         */
+        asm volatile ( REX64_PREFIX "fxsave (%1)"
+                       : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
+        break;
+    case 4: case 2:
+        asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) );
+        break;
+    }
+    fpu_ctxt[FPU_WORD_SIZE_OFFSET] = word_size;
+
 #endif
     
     /* Clear exception flags if FSW.ES is set. */
@@ -265,6 +300,39 @@ void vcpu_destroy_fpu(struct vcpu *v)
         xfree(v->arch.fpu_ctxt);
 }
 
+int guest_word_size(struct vcpu *v)
+{
+    int mode;
+
+    if ( !is_hvm_vcpu(v) )
+    {
+        if ( is_pv_32bit_vcpu(v) )
+            return 4;
+
+        asm ( "1: lar %1,%0          \n"
+              "   jnz 2f             \n"
+              "3:                    \n"
+              ".section .fixup,\"ax\"\n"
+              "2: xor %0,%0          \n"
+              "   jmp 3b             \n"
+              ".previous             \n"
+              _ASM_EXTABLE(1b, 2b)
+              : "=r" (mode)
+              : "m" (guest_cpu_user_regs()->cs) );
+
+        return !(mode & _SEGMENT_S) || (mode & _SEGMENT_L) ? 8 : 4;
+    }
+
+    switch ( mode = hvm_guest_x86_mode(v) )
+    {
+    case 0: /* real mode */
+    case 1: /* virtual mode */
+        return 2;
+    }
+
+    return mode;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 4d88638..2add85a 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -56,32 +56,53 @@ void xsave(struct vcpu *v, uint64_t mask)
     struct xsave_struct *ptr = v->arch.xsave_area;
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
+    int word_size = guest_word_size(v);
 
-    if ( cpu_has_xsaveopt )
-        asm volatile (
-            ".byte " REX_PREFIX "0x0f,0xae,0x37"
-            :
-            : "a" (lmask), "d" (hmask), "D"(ptr)
-            : "memory" );
-    else
-        asm volatile (
-            ".byte " REX_PREFIX "0x0f,0xae,0x27"
-            :
-            : "a" (lmask), "d" (hmask), "D"(ptr)
-            : "memory" );
+    switch ( __builtin_expect(word_size, 8) )
+    {
+    default:
+        if ( cpu_has_xsaveopt )
+            asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else
+            asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        break;
+    case 4: case 2:
+        if ( cpu_has_xsaveopt )
+            asm volatile ( ".byte 0x0f,0xae,0x37"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else
+            asm volatile ( ".byte 0x0f,0xae,0x27"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        break;
+    }
+    ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
 }
 
 void xrstor(struct vcpu *v, uint64_t mask)
 {
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
-
     struct xsave_struct *ptr = v->arch.xsave_area;
 
-    asm volatile (
-        ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-        :
-        : "m" (*ptr), "a" (lmask), "d" (hmask), "D"(ptr) );
+    switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+    {
+    default:
+        asm volatile ( ".byte 0x48,0x0f,0xae,0x2f"
+                       :
+                       : "m" (*ptr), "a" (lmask), "d" (hmask), "D" (ptr) );
+        break;
+    case 4: case 2:
+        asm volatile ( ".byte 0x0f,0xae,0x2f"
+                       :
+                       : "m" (*ptr), "a" (lmask), "d" (hmask), "D" (ptr) );
+        break;
+    }
 }
 
 bool_t xsave_enabled(const struct vcpu *v)
diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
index 90e405e..d5ac499 100644
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -34,12 +34,6 @@
 #define XSTATE_NONLAZY (XSTATE_LWP)
 #define XSTATE_LAZY    (XSTATE_ALL & ~XSTATE_NONLAZY)
 
-#ifdef CONFIG_X86_64
-#define REX_PREFIX     "0x48, "
-#else
-#define REX_PREFIX
-#endif
-
 /* extended state variables */
 DECLARE_PER_CPU(uint64_t, xcr0);
 
@@ -94,4 +88,14 @@ void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(void);
 
+/* Byte offset within the FXSAVE (portion) of the stored word size. */
+#define FPU_WORD_SIZE_OFFSET 511
+
+/*
+ * Used EXCLUSIVELY to determine the needed operand size override on
+ * XSAVE/FXSAVE. Any other use would need to make sure that the context
+ * is suitable for all operations this involves.
+ */
+int guest_word_size(struct vcpu *);
+
 #endif /* __ASM_XSTATE_H */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-03 14:02 XSAVE/XRSTOR crash resurgence in 4.3 Ben Guthro
@ 2013-07-04 13:21 ` Jan Beulich
  2013-07-04 13:24   ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-04 13:21 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 03.07.13 at 16:02, Ben Guthro <ben@guthro.net> wrote:
> We (XenClient Enterprise) recently updated our mainline development to
> xen 4.3, and noticed that the xsave/xrstor bug that crashes 32bit
> windows guests with the driver verifier enabled has re-emerged.
> 
> From Mark Roddy:
> "The DOMU crash from last nights run  has this signature in the crash dump:
> 
> Interrupt Service Routine A30BC91C has changed extended thread context.
> Context saved before executing ISR: 841C4380. Context saved after
> executing ISR: 841C5040.

So along with the two questions raised on IRC (Intel vs AMD CPU
and whether in your successful testing the XSA-52 and -53 fixes
were included), would it be possible to get the contents of the
two memory blocks pointed to (assuming you have a dump from
that crash)? I'd like to be certain that the situation is the same as
earlier, i.e. both selector fields are holding zero in the "after"
incarnation, as I still can't see what's wrong with the new code.

> It looks like we lost the fix for the XSAVE/XRSTOR"

If the above doesn't help, I may need to hand you a debugging
patch, mainly to see whether the current guest word size
determination is wrong in any way (as that's the main thing that
changed from the version you tested to the one that got checked
in, yet I continue to only see this as an improvement, not as
something that could have broken things).

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-04 13:21 ` Jan Beulich
@ 2013-07-04 13:24   ` Ben Guthro
  2013-07-04 18:19     ` Mark Roddy
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-04 13:24 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On Jul 4, 2013, at 9:21 AM, Jan Beulich <JBeulich@suse.com> wrote:

>>>> On 03.07.13 at 16:02, Ben Guthro <ben@guthro.net> wrote:
>> We (XenClient Enterprise) recently updated our mainline development to
>> xen 4.3, and noticed that the xsave/xrstor bug that crashes 32bit
>> windows guests with the driver verifier enabled has re-emerged.
>>
>> From Mark Roddy:
>> "The DOMU crash from last nights run  has this signature in the crash dump:
>>
>> Interrupt Service Routine A30BC91C has changed extended thread context.
>> Context saved before executing ISR: 841C4380. Context saved after
>> executing ISR: 841C5040.
>
> So along with the two questions raised on IRC (Intel vs AMD CPU
> and whether in your successful testing the XSA-52 and -53 fixes
> were included), would it be possible to get the contents of the
> two memory blocks pointed to (assuming you have a dump from
> that crash)? I'd like to be certain that the situation is the same as
> earlier, i.e. both selector fields are holding zero in the "after"
> incarnation, as I still can't see what's wrong with the new code.

Mark and I are out until Monday (US holiday) - but I'll put this on my
list to follow up on then.

>
>> It looks like we lost the fix for the XSAVE/XRSTOR"
>
> If the above doesn't help, I may need to hand you a debugging
> patch, mainly to see whether the current guest word size
> determination is wrong in any way (as that's the main thing that
> changed from the version you tested to the one that got checked
> in, yet I continue to only see this as an improvement, not as
> something that could have broken things).

Ok.
Happy to test a debug patch, as well.

Ben

>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-04 13:24   ` Ben Guthro
@ 2013-07-04 18:19     ` Mark Roddy
  2013-07-05  6:42       ` Jan Beulich
  2013-07-05 10:30       ` Jan Beulich
  0 siblings, 2 replies; 38+ messages in thread
From: Mark Roddy @ 2013-07-04 18:19 UTC (permalink / raw)
  To: Ben Guthro, Jan Beulich; +Cc: Ben Guthro, xen-devel

The dump looks the same to me as before the last fix.

>From the crash dump:

Interrupt Service Routine A30BC91C has changed extended thread context.
Context saved before executing ISR: 841C4380. Context saved after executing ISR: 841C5040.

0: kd> dd 841C4380
841c4380  4020027f 00000000 6cad20e4 0000001b
841c4390  01a85588 00000023 00001f80 0000ffff
0: kd> dd 841C5040
841c5040  4020027f 00000000 6cad20e4 00000000
841c5050  01a85588 00000000 00001f80 0000ffff



-----Original Message-----
From: Ben Guthro [mailto:ben.guthro@gmail.com] 
Sent: Thursday, July 4, 2013 9:25 AM
To: Jan Beulich
Cc: Ben Guthro; Mark Roddy; xen-devel
Subject: Re: XSAVE/XRSTOR crash resurgence in 4.3

On Jul 4, 2013, at 9:21 AM, Jan Beulich <JBeulich@suse.com> wrote:

>>>> On 03.07.13 at 16:02, Ben Guthro <ben@guthro.net> wrote:
>> We (XenClient Enterprise) recently updated our mainline development 
>> to xen 4.3, and noticed that the xsave/xrstor bug that crashes 32bit 
>> windows guests with the driver verifier enabled has re-emerged.
>>
>> From Mark Roddy:
>> "The DOMU crash from last nights run  has this signature in the crash dump:
>>
>> Interrupt Service Routine A30BC91C has changed extended thread context.
>> Context saved before executing ISR: 841C4380. Context saved after 
>> executing ISR: 841C5040.
>
> So along with the two questions raised on IRC (Intel vs AMD CPU and 
> whether in your successful testing the XSA-52 and -53 fixes were 
> included), would it be possible to get the contents of the two memory 
> blocks pointed to (assuming you have a dump from that crash)? I'd like 
> to be certain that the situation is the same as earlier, i.e. both 
> selector fields are holding zero in the "after"
> incarnation, as I still can't see what's wrong with the new code.

Mark and I are out until Monday (US holiday) - but I'll put this on my list to follow up on then.

>
>> It looks like we lost the fix for the XSAVE/XRSTOR"
>
> If the above doesn't help, I may need to hand you a debugging patch, 
> mainly to see whether the current guest word size determination is 
> wrong in any way (as that's the main thing that changed from the 
> version you tested to the one that got checked in, yet I continue to 
> only see this as an improvement, not as something that could have 
> broken things).

Ok.
Happy to test a debug patch, as well.

Ben

>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-04 18:19     ` Mark Roddy
@ 2013-07-05  6:42       ` Jan Beulich
  2013-07-05 10:30       ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2013-07-05  6:42 UTC (permalink / raw)
  To: Mark Roddy; +Cc: Ben Guthro, Ben Guthro, xen-devel

>>> On 04.07.13 at 20:19, Mark Roddy <mark.roddy@citrix.com> wrote:
> The dump looks the same to me as before the last fix.

Thanks, indeed it does. Nevertheless I wanted to be certain that's
the case before thinking about possible debugging strategies.

Jan

> From the crash dump:
> 
> Interrupt Service Routine A30BC91C has changed extended thread context.
> Context saved before executing ISR: 841C4380. Context saved after executing 
> ISR: 841C5040.
> 
> 0: kd> dd 841C4380
> 841c4380  4020027f 00000000 6cad20e4 0000001b
> 841c4390  01a85588 00000023 00001f80 0000ffff
> 0: kd> dd 841C5040
> 841c5040  4020027f 00000000 6cad20e4 00000000
> 841c5050  01a85588 00000000 00001f80 0000ffff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-04 18:19     ` Mark Roddy
  2013-07-05  6:42       ` Jan Beulich
@ 2013-07-05 10:30       ` Jan Beulich
  2013-07-05 12:10         ` Ben Guthro
  1 sibling, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-05 10:30 UTC (permalink / raw)
  To: Mark Roddy, Ben Guthro; +Cc: Ben Guthro, xen-devel

[-- Attachment #1: Type: text/plain, Size: 681 bytes --]

>>> On 04.07.13 at 20:19, Mark Roddy <mark.roddy@citrix.com> wrote:
> -----Original Message-----
> From: Ben Guthro [mailto:ben.guthro@gmail.com] 
> 
> Happy to test a debug patch, as well.

Here you go. The logging is done using thresholds (so you wouldn't
get your log flooded), but of course this has the risk of not seeing
anything useful in case the problem happens not on every run
through these functions. Also the patch instruments on the xsave/
xrstor paths, not the fxsave/fxrstor ones, so is assumed to be used
on an xsave-capable and -enabled system. Ideally - if reproducible
this way - you'd use a single-vCPU guest (easing later log analysis).

Jan


[-- Attachment #2: x86-FPU-selector-loss.patch --]
[-- Type: text/plain, Size: 2507 bytes --]

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -97,8 +97,25 @@ void xsave(struct vcpu *v, uint64_t mask
             asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
             ptr->fpu_sse.fip.sel = fpu_env.fcs;
             ptr->fpu_sse.fdp.sel = fpu_env.fds;
+ if((!fpu_env.fcs && ptr->fpu_sse.fip.offs) ||
+    (!fpu_env.fds && ptr->fpu_sse.fdp.offs)) {//temp
+  static unsigned long count, thresh;
+  if(++count > thresh) {
+   thresh |= thresh + 1;
+   printk("d%dv%d: fip=%04x:%08x fdp=%04x:%08x\n", v->domain->domain_id, v->vcpu_id,
+          fpu_env.fcs, ptr->fpu_sse.fip.offs, fpu_env.fds, ptr->fpu_sse.fdp.offs);
+  }
+ }
             word_size = 4;
         }
+ else if(word_size >= 0 && is_hvm_vcpu(v) && hvm_guest_x86_mode(v) < 8) {//temp
+  static unsigned long count, thresh;
+  if(++count > thresh) {
+   thresh |= thresh + 1;
+   printk("d%dv%d: fip=%"PRIx64" fdp=%"PRIx64" w=%d\n", v->domain->domain_id, v->vcpu_id,
+          ptr->fpu_sse.fip.addr, ptr->fpu_sse.fdp.addr, word_size);
+  }
+ }
     }
     else
     {
@@ -110,6 +127,16 @@ void xsave(struct vcpu *v, uint64_t mask
             asm volatile ( ".byte 0x0f,0xae,0x27"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
+ if((!ptr->fpu_sse.fip.sel && ptr->fpu_sse.fip.offs) ||
+    (!ptr->fpu_sse.fdp.sel && ptr->fpu_sse.fdp.offs)) {//temp
+  static unsigned long count, thresh;
+  if(++count > thresh) {
+   thresh |= thresh + 1;
+   printk("d%dv%d: FIP=%04x:%08x FDP=%04x:%08x\n", v->domain->domain_id, v->vcpu_id,
+          ptr->fpu_sse.fip.sel, ptr->fpu_sse.fip.offs,
+          ptr->fpu_sse.fdp.sel, ptr->fpu_sse.fdp.offs);
+  }
+ }
         word_size = 4;
     }
     if ( word_size >= 0 )
@@ -145,6 +172,14 @@ void xrstor(struct vcpu *v, uint64_t mas
     switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
     {
     default:
+ if(is_hvm_vcpu(v) && (ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr)) {//temp
+  static unsigned long count, thresh;
+  if(++count > thresh) {
+   thresh |= thresh + 1;
+   printk("d%dv%d: FIP=%"PRIx64" FDP=%"PRIx64" w=%d\n", v->domain->domain_id, v->vcpu_id,
+          ptr->fpu_sse.fip.addr, ptr->fpu_sse.fdp.addr, ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET]);
+  }
+ }
         asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
                        ".section .fixup,\"ax\"      \n"
                        "2: mov %5,%%ecx             \n"

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-05 10:30       ` Jan Beulich
@ 2013-07-05 12:10         ` Ben Guthro
  2013-07-05 12:15           ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-05 12:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On Jul 5, 2013, at 6:30 AM, Jan Beulich <JBeulich@suse.com> wrote:

>>>> On 04.07.13 at 20:19, Mark Roddy <mark.roddy@citrix.com> wrote:
>> -----Original Message-----
>> From: Ben Guthro [mailto:ben.guthro@gmail.com]
>>
>> Happy to test a debug patch, as well.
>
> Here you go. The logging is done using thresholds (so you wouldn't
> get your log flooded), but of course this has the risk of not seeing
> anything useful in case the problem happens not on every run
> through these functions. Also the patch instruments on the xsave/
> xrstor paths, not the fxsave/fxrstor ones, so is assumed to be used
> on an xsave-capable and -enabled system. Ideally - if reproducible
> this way - you'd use a single-vCPU guest (easing later log analysis).
>
> Jan
>
> <x86-FPU-selector-loss.patch>

Thanks.

Since I am not in the office today, nor near a machine that I can
access this, I asked someone else to apply and check in this patch, in
the hope that we could get some useful debug info from the weekend
test run.

However, when he went to test booting a VM with this patch, the host
machine rebooted - presumably a Xen crash.

I'll have a closer look on Mon.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-05 12:10         ` Ben Guthro
@ 2013-07-05 12:15           ` Jan Beulich
  2013-07-05 12:58             ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-05 12:15 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, Ben Guthro, xen-devel

>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
> Since I am not in the office today, nor near a machine that I can
> access this, I asked someone else to apply and check in this patch, in
> the hope that we could get some useful debug info from the weekend
> test run.
> 
> However, when he went to test booting a VM with this patch, the host
> machine rebooted - presumably a Xen crash.

Oops - it's just printing stuff, so I can't immediately see how that
would happen. But perhaps a trivial oversight of mine...

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-05 12:15           ` Jan Beulich
@ 2013-07-05 12:58             ` Ben Guthro
  2013-07-08 14:13               ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-05 12:58 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:

>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>> Since I am not in the office today, nor near a machine that I can
>> access this, I asked someone else to apply and check in this patch, in
>> the hope that we could get some useful debug info from the weekend
>> test run.
>>
>> However, when he went to test booting a VM with this patch, the host
>> machine rebooted - presumably a Xen crash.
>
> Oops - it's just printing stuff, so I can't immediately see how that
> would happen. But perhaps a trivial oversight of mine...

Very possible this is unrelated to this patch, based on some other
comments - we may have other regressions.

I'll sort it out Mon.


>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-05 12:58             ` Ben Guthro
@ 2013-07-08 14:13               ` Ben Guthro
  2013-07-08 14:24                 ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-08 14:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>
>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> Since I am not in the office today, nor near a machine that I can
>>> access this, I asked someone else to apply and check in this patch, in
>>> the hope that we could get some useful debug info from the weekend
>>> test run.
>>>
>>> However, when he went to test booting a VM with this patch, the host
>>> machine rebooted - presumably a Xen crash.
>>
>> Oops - it's just printing stuff, so I can't immediately see how that
>> would happen. But perhaps a trivial oversight of mine...

Here's the crash with this patch
I'm continuing to look at it, but if something jumps out at you,
please let me know.

(XEN) Assertion 'v == current' failed at
/data/home/bguthro/dev/orc-precise/xen/xen/include:275
(XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    1
(XEN) RIP:    e008:[<ffff82c4c01a4f65>] xsave+0x178/0x2c7
(XEN) RFLAGS: 0000000000010006   CONTEXT: hypervisor
(XEN) rax: ffff830215860000   rbx: ffff83020904ec00   rcx: ffff83021d63a000
(XEN) rdx: 00000000bfffffff   rsi: 0000000000000003   rdi: ffff83020904ec00
(XEN) rbp: ffff830215867d20   rsp: ffff830215867cd0   r8:  0000000000000000
(XEN) r9:  0000000000000000   r10: 0000000e23c7b646   r11: 0000000000000000
(XEN) r12: ffff8300d0840000   r13: 0000000000000008   r14: ffff830215860000
(XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
(XEN) cr3: 000000020bae2000   cr2: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) Xen stack trace from rsp=ffff830215867cd0:
(XEN)    ffff830215867df8 ffff830215867d38 ffff82c4c01b4622 ffff830215867d00
(XEN)    ffff83021d63ad80 0000000000000286 00000000000fa933 ffff8300d0840000
(XEN)    0000000000000001 ffff8300d0840000 ffff830215867d60 ffff82c4c0167a1a
(XEN)    ffff830215867d48 ffff830215867dc8 ffff82c4c017019c ffff830215867d98
(XEN)    0000000000000282 ffff8300d04fc000 ffff830215867db0 ffff82c4c0160c80
(XEN)    0000000a1d63ad18 0000000000000000 80000000d0840510 ffff8300d04fc000
(XEN)    ffff8300d072b000 0000000000000001 0000000000000001 ffff83021583d080
(XEN)    ffff830215867e20 ffff82c4c01634bc ffff82c4c01277b9 ffff8300d072b030
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    ffff830215867e20 ffff8300d072b000 ffff8300d04fc000 0000000e21f655a0
(XEN)    0000000000000001 ffff83021583d080 ffff830215867eb0 ffff82c4c0124364
(XEN)    0000000000000002 ffff83021583d060 0000000100867e60 ffff83021583d068
(XEN)    ffff82c4c0127824 ffff83021583c028 ffff830215867eb0 ffff8300d04fc000
(XEN)    0000000000989680 ffff82c4c01ab600 ffff8300d072b000 ffff82c4c02f0080
(XEN)    ffff82c4c02f0000 ffff830215860000 ffffffffffffffff ffff83021583d080
(XEN)    ffff830215867ee0 ffff82c4c012710e ffff830215860000 ffff830215860000
(XEN)    00000000ffffff01 0000000000000003 ffff830215867ef0 ffff82c4c0127174
(XEN)    ffff830215867f10 ffff82c4c0160aba ffff8300d072b000 ffff8300d0840000
(XEN)    ffff830215867dc8 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000080 00000000000001f7 0000000000000000
(XEN) Xen call trace:
(XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
(XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
(XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
(XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
(XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
(XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
(XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
(XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 1:
(XEN) Assertion 'v == current' failed at
/data/home/bguthro/dev/orc-precise/xen/xen/include:275
(XEN) ****************************************
(XEN)
(XEN) Reboot in five seconds...
(XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
domain.c:1494
(XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    1
(XEN) RIP:    e008:[<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
(XEN) RFLAGS: 0000000000010087   CONTEXT: hypervisor
(XEN) rax: ffff830215860000   rbx: 0000000000000001   rcx: 0000000000000001
(XEN) rdx: ffff82c4c026c4a0   rsi: ffff8300d072b000   rdi: ffff830215867a68
(XEN) rbp: ffff8302158679b8   rsp: ffff8302158679a8   r8:  0000000000000004
(XEN) r9:  000000000000000a   r10: 0000000000000020   r11: 000000000000000a
(XEN) r12: 0000000000000082   r13: 00000000000000fd   r14: 0000000000000113
(XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
(XEN) cr3: 000000020bae2000   cr2: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) Xen stack trace from rsp=ffff8302158679a8:
(XEN)    00000000000000fd ffff830215867a68 ffff8302158679c8 ffff82c4c01860eb
(XEN)    ffff830215867a58 ffff82c4c017019c ffff82c4c024d581 0000000000000086
(XEN)    ffff830215867a48 0000000000000086 ffff830215867a18 ffff82c4c012788c
(XEN)    0000000000000000 8000000015867a28 ffff830215867a28 ffff82c4c01437dc
(XEN)    ffff830215867a58 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
(XEN)    0000000000000113 ffff830215867f18 00007cfdea798577 ffff82c4c0167bef
(XEN)    ffff830215867f18 0000000000000113 ffff82c4c02566f0 0000000000000082
(XEN)    ffff830215867b58 ffff82c4c0261138 000000000000000a 0000000000000020
(XEN)    000000000000000a 0000000000000004 0000000000000000 0000000000000000
(XEN)    ffff83021583d020 0000000000000082 ffff82c4c0287860 000000fd00000000
(XEN)    ffff82c4c0185be0 000000000000e008 0000000000000246 ffff830215867b18
(XEN)    0000000000000000 ffff82c4c0185bd8 ffff830215867b48 0000138800000061
(XEN)    ffff83021583d020 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
(XEN)    0000000000000113 ffff830215867f18 ffff830215867bc8 ffff82c4c0143670
(XEN)    ffff830215867ba8 ffff82c400000020 ffff830215867bd8 ffff830215867b88
(XEN)    ffff82c4c024cf6a ffff82c4c024cf6a ffff82c4c02566f0 0000000000000113
(XEN)    0000000000000004 0000000000000061 ffff830215867c28 ffff82c4c024cf6a
(XEN)    ffff830215867c18 ffff82c4c018dde0 ffff830200000000 0000000000000000
(XEN)    fbbc000000000000 044fc20b0f000a7f ffff8300d04fc000 ffff8300d0840000
(XEN)    0000000000000008 ffff830215860000 00007cfdea7983b7 ffff82c4c023020d
(XEN) Xen call trace:
(XEN)    [<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
(XEN)    [<ffff82c4c01860eb>] invalidate_interrupt+0x33/0x86
(XEN)    [<ffff82c4c017019c>] do_IRQ+0x9e/0x68d
(XEN)    [<ffff82c4c0167bef>] common_interrupt+0x5f/0x70
(XEN)    [<ffff82c4c0185be0>] machine_restart+0x39/0x208
(XEN)    [<ffff82c4c0143670>] panic+0x120/0x129
(XEN)    [<ffff82c4c018dde0>] do_invalid_op+0x3d7/0x45b
(XEN)    [<ffff82c4c023020d>] handle_exception_saved+0x2e/0x6c
(XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
(XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
(XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
(XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
(XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
(XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
(XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
(XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 1:
(XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
domain.c:1494
(XEN) ****************************************
(XEN)
(XEN) Reboot in five seconds...
(XEN) Resetting with ACPI MEMORY or I/O RESET_REG.


Ben

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:13               ` Ben Guthro
@ 2013-07-08 14:24                 ` Jan Beulich
  2013-07-08 14:31                   ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-08 14:24 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>
>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>> Since I am not in the office today, nor near a machine that I can
>>>> access this, I asked someone else to apply and check in this patch, in
>>>> the hope that we could get some useful debug info from the weekend
>>>> test run.
>>>>
>>>> However, when he went to test booting a VM with this patch, the host
>>>> machine rebooted - presumably a Xen crash.
>>>
>>> Oops - it's just printing stuff, so I can't immediately see how that
>>> would happen. But perhaps a trivial oversight of mine...
> 
> Here's the crash with this patch
> I'm continuing to look at it, but if something jumps out at you,
> please let me know.

Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
the original, supposedly working patch had a use of this too iirc.

Jan

> (XEN) Assertion 'v == current' failed at
> /data/home/bguthro/dev/orc-precise/xen/xen/include:275
> (XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
> (XEN) CPU:    1
> (XEN) RIP:    e008:[<ffff82c4c01a4f65>] xsave+0x178/0x2c7
> (XEN) RFLAGS: 0000000000010006   CONTEXT: hypervisor
> (XEN) rax: ffff830215860000   rbx: ffff83020904ec00   rcx: ffff83021d63a000
> (XEN) rdx: 00000000bfffffff   rsi: 0000000000000003   rdi: ffff83020904ec00
> (XEN) rbp: ffff830215867d20   rsp: ffff830215867cd0   r8:  0000000000000000
> (XEN) r9:  0000000000000000   r10: 0000000e23c7b646   r11: 0000000000000000
> (XEN) r12: ffff8300d0840000   r13: 0000000000000008   r14: ffff830215860000
> (XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
> (XEN) cr3: 000000020bae2000   cr2: 0000000000000000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
> (XEN) Xen stack trace from rsp=ffff830215867cd0:
> (XEN)    ffff830215867df8 ffff830215867d38 ffff82c4c01b4622 ffff830215867d00
> (XEN)    ffff83021d63ad80 0000000000000286 00000000000fa933 ffff8300d0840000
> (XEN)    0000000000000001 ffff8300d0840000 ffff830215867d60 ffff82c4c0167a1a
> (XEN)    ffff830215867d48 ffff830215867dc8 ffff82c4c017019c ffff830215867d98
> (XEN)    0000000000000282 ffff8300d04fc000 ffff830215867db0 ffff82c4c0160c80
> (XEN)    0000000a1d63ad18 0000000000000000 80000000d0840510 ffff8300d04fc000
> (XEN)    ffff8300d072b000 0000000000000001 0000000000000001 ffff83021583d080
> (XEN)    ffff830215867e20 ffff82c4c01634bc ffff82c4c01277b9 ffff8300d072b030
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    ffff830215867e20 ffff8300d072b000 ffff8300d04fc000 0000000e21f655a0
> (XEN)    0000000000000001 ffff83021583d080 ffff830215867eb0 ffff82c4c0124364
> (XEN)    0000000000000002 ffff83021583d060 0000000100867e60 ffff83021583d068
> (XEN)    ffff82c4c0127824 ffff83021583c028 ffff830215867eb0 ffff8300d04fc000
> (XEN)    0000000000989680 ffff82c4c01ab600 ffff8300d072b000 ffff82c4c02f0080
> (XEN)    ffff82c4c02f0000 ffff830215860000 ffffffffffffffff ffff83021583d080
> (XEN)    ffff830215867ee0 ffff82c4c012710e ffff830215860000 ffff830215860000
> (XEN)    00000000ffffff01 0000000000000003 ffff830215867ef0 ffff82c4c0127174
> (XEN)    ffff830215867f10 ffff82c4c0160aba ffff8300d072b000 ffff8300d0840000
> (XEN)    ffff830215867dc8 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000080 00000000000001f7 0000000000000000
> (XEN) Xen call trace:
> (XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
> (XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
> (XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
> (XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
> (XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
> (XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
> (XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
> (XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 1:
> (XEN) Assertion 'v == current' failed at
> /data/home/bguthro/dev/orc-precise/xen/xen/include:275
> (XEN) ****************************************
> (XEN)
> (XEN) Reboot in five seconds...
> (XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
> domain.c:1494
> (XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
> (XEN) CPU:    1
> (XEN) RIP:    e008:[<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
> (XEN) RFLAGS: 0000000000010087   CONTEXT: hypervisor
> (XEN) rax: ffff830215860000   rbx: 0000000000000001   rcx: 0000000000000001
> (XEN) rdx: ffff82c4c026c4a0   rsi: ffff8300d072b000   rdi: ffff830215867a68
> (XEN) rbp: ffff8302158679b8   rsp: ffff8302158679a8   r8:  0000000000000004
> (XEN) r9:  000000000000000a   r10: 0000000000000020   r11: 000000000000000a
> (XEN) r12: 0000000000000082   r13: 00000000000000fd   r14: 0000000000000113
> (XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
> (XEN) cr3: 000000020bae2000   cr2: 0000000000000000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
> (XEN) Xen stack trace from rsp=ffff8302158679a8:
> (XEN)    00000000000000fd ffff830215867a68 ffff8302158679c8 ffff82c4c01860eb
> (XEN)    ffff830215867a58 ffff82c4c017019c ffff82c4c024d581 0000000000000086
> (XEN)    ffff830215867a48 0000000000000086 ffff830215867a18 ffff82c4c012788c
> (XEN)    0000000000000000 8000000015867a28 ffff830215867a28 ffff82c4c01437dc
> (XEN)    ffff830215867a58 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
> (XEN)    0000000000000113 ffff830215867f18 00007cfdea798577 ffff82c4c0167bef
> (XEN)    ffff830215867f18 0000000000000113 ffff82c4c02566f0 0000000000000082
> (XEN)    ffff830215867b58 ffff82c4c0261138 000000000000000a 0000000000000020
> (XEN)    000000000000000a 0000000000000004 0000000000000000 0000000000000000
> (XEN)    ffff83021583d020 0000000000000082 ffff82c4c0287860 000000fd00000000
> (XEN)    ffff82c4c0185be0 000000000000e008 0000000000000246 ffff830215867b18
> (XEN)    0000000000000000 ffff82c4c0185bd8 ffff830215867b48 0000138800000061
> (XEN)    ffff83021583d020 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
> (XEN)    0000000000000113 ffff830215867f18 ffff830215867bc8 ffff82c4c0143670
> (XEN)    ffff830215867ba8 ffff82c400000020 ffff830215867bd8 ffff830215867b88
> (XEN)    ffff82c4c024cf6a ffff82c4c024cf6a ffff82c4c02566f0 0000000000000113
> (XEN)    0000000000000004 0000000000000061 ffff830215867c28 ffff82c4c024cf6a
> (XEN)    ffff830215867c18 ffff82c4c018dde0 ffff830200000000 0000000000000000
> (XEN)    fbbc000000000000 044fc20b0f000a7f ffff8300d04fc000 ffff8300d0840000
> (XEN)    0000000000000008 ffff830215860000 00007cfdea7983b7 ffff82c4c023020d
> (XEN) Xen call trace:
> (XEN)    [<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
> (XEN)    [<ffff82c4c01860eb>] invalidate_interrupt+0x33/0x86
> (XEN)    [<ffff82c4c017019c>] do_IRQ+0x9e/0x68d
> (XEN)    [<ffff82c4c0167bef>] common_interrupt+0x5f/0x70
> (XEN)    [<ffff82c4c0185be0>] machine_restart+0x39/0x208
> (XEN)    [<ffff82c4c0143670>] panic+0x120/0x129
> (XEN)    [<ffff82c4c018dde0>] do_invalid_op+0x3d7/0x45b
> (XEN)    [<ffff82c4c023020d>] handle_exception_saved+0x2e/0x6c
> (XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
> (XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
> (XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
> (XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
> (XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
> (XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
> (XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
> (XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 1:
> (XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
> domain.c:1494
> (XEN) ****************************************
> (XEN)
> (XEN) Reboot in five seconds...
> (XEN) Resetting with ACPI MEMORY or I/O RESET_REG.
> 
> 
> Ben
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org 
> http://lists.xen.org/xen-devel 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:24                 ` Jan Beulich
@ 2013-07-08 14:31                   ` Ben Guthro
  2013-07-08 14:40                     ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-08 14:31 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>
>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>> Since I am not in the office today, nor near a machine that I can
>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>> the hope that we could get some useful debug info from the weekend
>>>>> test run.
>>>>>
>>>>> However, when he went to test booting a VM with this patch, the host
>>>>> machine rebooted - presumably a Xen crash.
>>>>
>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>> would happen. But perhaps a trivial oversight of mine...
>>
>> Here's the crash with this patch
>> I'm continuing to look at it, but if something jumps out at you,
>> please let me know.
>
> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
> the original, supposedly working patch had a use of this too iirc.

It did...which is worrying.

One difference here, is that 4.2 is running in debug=n mode, where 4.3
is debug=y

iirc, asserts are disabled on debug=n builds.



>
> Jan
>
>> (XEN) Assertion 'v == current' failed at
>> /data/home/bguthro/dev/orc-precise/xen/xen/include:275
>> (XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
>> (XEN) CPU:    1
>> (XEN) RIP:    e008:[<ffff82c4c01a4f65>] xsave+0x178/0x2c7
>> (XEN) RFLAGS: 0000000000010006   CONTEXT: hypervisor
>> (XEN) rax: ffff830215860000   rbx: ffff83020904ec00   rcx: ffff83021d63a000
>> (XEN) rdx: 00000000bfffffff   rsi: 0000000000000003   rdi: ffff83020904ec00
>> (XEN) rbp: ffff830215867d20   rsp: ffff830215867cd0   r8:  0000000000000000
>> (XEN) r9:  0000000000000000   r10: 0000000e23c7b646   r11: 0000000000000000
>> (XEN) r12: ffff8300d0840000   r13: 0000000000000008   r14: ffff830215860000
>> (XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
>> (XEN) cr3: 000000020bae2000   cr2: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>> (XEN) Xen stack trace from rsp=ffff830215867cd0:
>> (XEN)    ffff830215867df8 ffff830215867d38 ffff82c4c01b4622 ffff830215867d00
>> (XEN)    ffff83021d63ad80 0000000000000286 00000000000fa933 ffff8300d0840000
>> (XEN)    0000000000000001 ffff8300d0840000 ffff830215867d60 ffff82c4c0167a1a
>> (XEN)    ffff830215867d48 ffff830215867dc8 ffff82c4c017019c ffff830215867d98
>> (XEN)    0000000000000282 ffff8300d04fc000 ffff830215867db0 ffff82c4c0160c80
>> (XEN)    0000000a1d63ad18 0000000000000000 80000000d0840510 ffff8300d04fc000
>> (XEN)    ffff8300d072b000 0000000000000001 0000000000000001 ffff83021583d080
>> (XEN)    ffff830215867e20 ffff82c4c01634bc ffff82c4c01277b9 ffff8300d072b030
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    ffff830215867e20 ffff8300d072b000 ffff8300d04fc000 0000000e21f655a0
>> (XEN)    0000000000000001 ffff83021583d080 ffff830215867eb0 ffff82c4c0124364
>> (XEN)    0000000000000002 ffff83021583d060 0000000100867e60 ffff83021583d068
>> (XEN)    ffff82c4c0127824 ffff83021583c028 ffff830215867eb0 ffff8300d04fc000
>> (XEN)    0000000000989680 ffff82c4c01ab600 ffff8300d072b000 ffff82c4c02f0080
>> (XEN)    ffff82c4c02f0000 ffff830215860000 ffffffffffffffff ffff83021583d080
>> (XEN)    ffff830215867ee0 ffff82c4c012710e ffff830215860000 ffff830215860000
>> (XEN)    00000000ffffff01 0000000000000003 ffff830215867ef0 ffff82c4c0127174
>> (XEN)    ffff830215867f10 ffff82c4c0160aba ffff8300d072b000 ffff8300d0840000
>> (XEN)    ffff830215867dc8 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000080 00000000000001f7 0000000000000000
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
>> (XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
>> (XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
>> (XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
>> (XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
>> (XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
>> (XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
>> (XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
>> (XEN)
>> (XEN)
>> (XEN) ****************************************
>> (XEN) Panic on CPU 1:
>> (XEN) Assertion 'v == current' failed at
>> /data/home/bguthro/dev/orc-precise/xen/xen/include:275
>> (XEN) ****************************************
>> (XEN)
>> (XEN) Reboot in five seconds...
>> (XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
>> domain.c:1494
>> (XEN) ----[ Xen-4.3-unstable  x86_64  debug=y  Not tainted ]----
>> (XEN) CPU:    1
>> (XEN) RIP:    e008:[<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
>> (XEN) RFLAGS: 0000000000010087   CONTEXT: hypervisor
>> (XEN) rax: ffff830215860000   rbx: 0000000000000001   rcx: 0000000000000001
>> (XEN) rdx: ffff82c4c026c4a0   rsi: ffff8300d072b000   rdi: ffff830215867a68
>> (XEN) rbp: ffff8302158679b8   rsp: ffff8302158679a8   r8:  0000000000000004
>> (XEN) r9:  000000000000000a   r10: 0000000000000020   r11: 000000000000000a
>> (XEN) r12: 0000000000000082   r13: 00000000000000fd   r14: 0000000000000113
>> (XEN) r15: ffff830215867f18   cr0: 0000000080050033   cr4: 00000000001426f0
>> (XEN) cr3: 000000020bae2000   cr2: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>> (XEN) Xen stack trace from rsp=ffff8302158679a8:
>> (XEN)    00000000000000fd ffff830215867a68 ffff8302158679c8 ffff82c4c01860eb
>> (XEN)    ffff830215867a58 ffff82c4c017019c ffff82c4c024d581 0000000000000086
>> (XEN)    ffff830215867a48 0000000000000086 ffff830215867a18 ffff82c4c012788c
>> (XEN)    0000000000000000 8000000015867a28 ffff830215867a28 ffff82c4c01437dc
>> (XEN)    ffff830215867a58 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
>> (XEN)    0000000000000113 ffff830215867f18 00007cfdea798577 ffff82c4c0167bef
>> (XEN)    ffff830215867f18 0000000000000113 ffff82c4c02566f0 0000000000000082
>> (XEN)    ffff830215867b58 ffff82c4c0261138 000000000000000a 0000000000000020
>> (XEN)    000000000000000a 0000000000000004 0000000000000000 0000000000000000
>> (XEN)    ffff83021583d020 0000000000000082 ffff82c4c0287860 000000fd00000000
>> (XEN)    ffff82c4c0185be0 000000000000e008 0000000000000246 ffff830215867b18
>> (XEN)    0000000000000000 ffff82c4c0185bd8 ffff830215867b48 0000138800000061
>> (XEN)    ffff83021583d020 ffff82c4c0261138 0000000000000082 ffff82c4c02566f0
>> (XEN)    0000000000000113 ffff830215867f18 ffff830215867bc8 ffff82c4c0143670
>> (XEN)    ffff830215867ba8 ffff82c400000020 ffff830215867bd8 ffff830215867b88
>> (XEN)    ffff82c4c024cf6a ffff82c4c024cf6a ffff82c4c02566f0 0000000000000113
>> (XEN)    0000000000000004 0000000000000061 ffff830215867c28 ffff82c4c024cf6a
>> (XEN)    ffff830215867c18 ffff82c4c018dde0 ffff830200000000 0000000000000000
>> (XEN)    fbbc000000000000 044fc20b0f000a7f ffff8300d04fc000 ffff8300d0840000
>> (XEN)    0000000000000008 ffff830215860000 00007cfdea7983b7 ffff82c4c023020d
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82c4c01642cc>] __sync_local_execstate+0x63/0x86
>> (XEN)    [<ffff82c4c01860eb>] invalidate_interrupt+0x33/0x86
>> (XEN)    [<ffff82c4c017019c>] do_IRQ+0x9e/0x68d
>> (XEN)    [<ffff82c4c0167bef>] common_interrupt+0x5f/0x70
>> (XEN)    [<ffff82c4c0185be0>] machine_restart+0x39/0x208
>> (XEN)    [<ffff82c4c0143670>] panic+0x120/0x129
>> (XEN)    [<ffff82c4c018dde0>] do_invalid_op+0x3d7/0x45b
>> (XEN)    [<ffff82c4c023020d>] handle_exception_saved+0x2e/0x6c
>> (XEN)    [<ffff82c4c01a4f65>] xsave+0x178/0x2c7
>> (XEN)    [<ffff82c4c0167a1a>] vcpu_save_fpu+0x83/0x159
>> (XEN)    [<ffff82c4c0160c80>] __context_switch+0x16e/0x50b
>> (XEN)    [<ffff82c4c01634bc>] context_switch+0x1ba/0xf4e
>> (XEN)    [<ffff82c4c0124364>] schedule+0x60b/0x61a
>> (XEN)    [<ffff82c4c012710e>] __do_softirq+0x8e/0x99
>> (XEN)    [<ffff82c4c0127174>] do_softirq+0x13/0x15
>> (XEN)    [<ffff82c4c0160aba>] idle_loop+0x68/0x6a
>> (XEN)
>> (XEN)
>> (XEN) ****************************************
>> (XEN) Panic on CPU 1:
>> (XEN) Assertion 'current == idle_vcpu[smp_processor_id()]' failed at
>> domain.c:1494
>> (XEN) ****************************************
>> (XEN)
>> (XEN) Reboot in five seconds...
>> (XEN) Resetting with ACPI MEMORY or I/O RESET_REG.
>>
>>
>> Ben
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
>
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:31                   ` Ben Guthro
@ 2013-07-08 14:40                     ` Jan Beulich
  2013-07-08 14:42                       ` Ben Guthro
  2013-07-08 14:44                       ` XSAVE/XRSTOR crash resurgence in 4.3 Andrew Cooper
  0 siblings, 2 replies; 38+ messages in thread
From: Jan Beulich @ 2013-07-08 14:40 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>
>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>> test run.
>>>>>>
>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>> machine rebooted - presumably a Xen crash.
>>>>>
>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>> would happen. But perhaps a trivial oversight of mine...
>>>
>>> Here's the crash with this patch
>>> I'm continuing to look at it, but if something jumps out at you,
>>> please let me know.
>>
>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>> the original, supposedly working patch had a use of this too iirc.
> 
> It did...which is worrying.
> 
> One difference here, is that 4.2 is running in debug=n mode, where 4.3
> is debug=y
> 
> iirc, asserts are disabled on debug=n builds.

Oh, right. And in the context here the assertion triggering is
apparently wrong anyway. For the purpose of debugging the
issue at hand, I think it is safe to comment it out.

But then again I thought you had assertions always enabled in
XenServer.

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:40                     ` Jan Beulich
@ 2013-07-08 14:42                       ` Ben Guthro
  2013-07-08 14:47                         ` Jan Beulich
  2013-07-08 14:44                       ` XSAVE/XRSTOR crash resurgence in 4.3 Andrew Cooper
  1 sibling, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-08 14:42 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On Jul 8, 2013, at 10:41 AM, Jan Beulich <JBeulich@suse.com> wrote:

>>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
>> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>
>>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>>> test run.
>>>>>>>
>>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>>> machine rebooted - presumably a Xen crash.
>>>>>>
>>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>>> would happen. But perhaps a trivial oversight of mine...
>>>>
>>>> Here's the crash with this patch
>>>> I'm continuing to look at it, but if something jumps out at you,
>>>> please let me know.
>>>
>>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>>> the original, supposedly working patch had a use of this too iirc.
>>
>> It did...which is worrying.
>>
>> One difference here, is that 4.2 is running in debug=n mode, where 4.3
>> is debug=y
>>
>> iirc, asserts are disabled on debug=n builds.
>
> Oh, right. And in the context here the assertion triggering is
> apparently wrong anyway. For the purpose of debugging the
> issue at hand, I think it is safe to comment it out.

Ok, I'll do that, thanks

>
> But then again I thought you had assertions always enabled in
> XenServer.

This is in XenClient

It is perhaps something to consider doing

>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:40                     ` Jan Beulich
  2013-07-08 14:42                       ` Ben Guthro
@ 2013-07-08 14:44                       ` Andrew Cooper
  2013-07-08 14:52                         ` Jan Beulich
  1 sibling, 1 reply; 38+ messages in thread
From: Andrew Cooper @ 2013-07-08 14:44 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On 08/07/13 15:40, Jan Beulich wrote:
>>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
>> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>
>>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>>> test run.
>>>>>>>
>>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>>> machine rebooted - presumably a Xen crash.
>>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>>> would happen. But perhaps a trivial oversight of mine...
>>>> Here's the crash with this patch
>>>> I'm continuing to look at it, but if something jumps out at you,
>>>> please let me know.
>>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>>> the original, supposedly working patch had a use of this too iirc.
>> It did...which is worrying.
>>
>> One difference here, is that 4.2 is running in debug=n mode, where 4.3
>> is debug=y
>>
>> iirc, asserts are disabled on debug=n builds.
> Oh, right. And in the context here the assertion triggering is
> apparently wrong anyway. For the purpose of debugging the
> issue at hand, I think it is safe to comment it out.
>
> But then again I thought you had assertions always enabled in
> XenServer.
>
> Jan

Ben is XenClient which is a different team.

XenServer is still running with xsave=0 to emulate the 4.1 behaviour,
pending time to work out how to enable it without breaking certain PV
guests.

~Andrew

>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:42                       ` Ben Guthro
@ 2013-07-08 14:47                         ` Jan Beulich
  2013-07-08 15:10                           ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-08 14:47 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, Ben Guthro, xen-devel

>>> On 08.07.13 at 16:42, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Jul 8, 2013, at 10:41 AM, Jan Beulich <JBeulich@suse.com> wrote:
> 
>>>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
>>> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>
>>>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>>>> test run.
>>>>>>>>
>>>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>>>> machine rebooted - presumably a Xen crash.
>>>>>>>
>>>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>>>> would happen. But perhaps a trivial oversight of mine...
>>>>>
>>>>> Here's the crash with this patch
>>>>> I'm continuing to look at it, but if something jumps out at you,
>>>>> please let me know.
>>>>
>>>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>>>> the original, supposedly working patch had a use of this too iirc.
>>>
>>> It did...which is worrying.
>>>
>>> One difference here, is that 4.2 is running in debug=n mode, where 4.3
>>> is debug=y
>>>
>>> iirc, asserts are disabled on debug=n builds.
>>
>> Oh, right. And in the context here the assertion triggering is
>> apparently wrong anyway. For the purpose of debugging the
>> issue at hand, I think it is safe to comment it out.
> 
> Ok, I'll do that, thanks

Or, perhaps better, replace the call in the patch with one to
hvmfuncs.guest_x86_mode(). That's what I have done in my
copy of the patch just now (in case we need a second rev at
some point).

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:44                       ` XSAVE/XRSTOR crash resurgence in 4.3 Andrew Cooper
@ 2013-07-08 14:52                         ` Jan Beulich
  2013-07-08 14:55                           ` Andrew Cooper
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-08 14:52 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Mark Roddy, Ben Guthro, xen-devel

>>> On 08.07.13 at 16:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> XenServer is still running with xsave=0 to emulate the 4.1 behaviour,
> pending time to work out how to enable it without breaking certain PV
> guests.

"... breaking certain broken PV guests" you perhaps meant to say...

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:52                         ` Jan Beulich
@ 2013-07-08 14:55                           ` Andrew Cooper
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Cooper @ 2013-07-08 14:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, Ben Guthro, xen-devel

On 08/07/13 15:52, Jan Beulich wrote:
>>>> On 08.07.13 at 16:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> XenServer is still running with xsave=0 to emulate the 4.1 behaviour,
>> pending time to work out how to enable it without breaking certain PV
>> guests.
> "... breaking certain broken PV guests" you perhaps meant to say...
>
> Jan

Yes, but management and customers care that their VMs continue to work,
irrespective of guest kernel bugs.

Getting SSE/AVX working is on my todo list, but not at the top right now.

~Andrew

>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 14:47                         ` Jan Beulich
@ 2013-07-08 15:10                           ` Ben Guthro
  2013-07-12 13:11                             ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-08 15:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Mon, Jul 8, 2013 at 10:47 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 08.07.13 at 16:42, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On Jul 8, 2013, at 10:41 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>
>>>>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
>>>> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>>>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>
>>>>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>>>>> test run.
>>>>>>>>>
>>>>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>>>>> machine rebooted - presumably a Xen crash.
>>>>>>>>
>>>>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>>>>> would happen. But perhaps a trivial oversight of mine...
>>>>>>
>>>>>> Here's the crash with this patch
>>>>>> I'm continuing to look at it, but if something jumps out at you,
>>>>>> please let me know.
>>>>>
>>>>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>>>>> the original, supposedly working patch had a use of this too iirc.
>>>>
>>>> It did...which is worrying.
>>>>
>>>> One difference here, is that 4.2 is running in debug=n mode, where 4.3
>>>> is debug=y
>>>>
>>>> iirc, asserts are disabled on debug=n builds.
>>>
>>> Oh, right. And in the context here the assertion triggering is
>>> apparently wrong anyway. For the purpose of debugging the
>>> issue at hand, I think it is safe to comment it out.
>>
>> Ok, I'll do that, thanks
>
> Or, perhaps better, replace the call in the patch with one to
> hvmfuncs.guest_x86_mode(). That's what I have done in my
> copy of the patch just now (in case we need a second rev at
> some point).
>

Good idea. I'm testing a patch now, and will get it into tonight's test run.
Hopefully I'll have some data to share tomorrow morning.

Ben

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-08 15:10                           ` Ben Guthro
@ 2013-07-12 13:11                             ` Ben Guthro
  2013-07-12 13:38                               ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-12 13:11 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Mon, Jul 8, 2013 at 11:10 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Mon, Jul 8, 2013 at 10:47 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 08.07.13 at 16:42, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> On Jul 8, 2013, at 10:41 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>
>>>>>>> On 08.07.13 at 16:31, Ben Guthro <ben@guthro.net> wrote:
>>>>> On Mon, Jul 8, 2013 at 10:24 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>>> On 08.07.13 at 16:13, Ben Guthro <ben@guthro.net> wrote:
>>>>>>> On Fri, Jul 5, 2013 at 8:58 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>>> On Jul 5, 2013, at 8:15 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>>>>>> On 05.07.13 at 14:10, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>>>>>> Since I am not in the office today, nor near a machine that I can
>>>>>>>>>> access this, I asked someone else to apply and check in this patch, in
>>>>>>>>>> the hope that we could get some useful debug info from the weekend
>>>>>>>>>> test run.
>>>>>>>>>>
>>>>>>>>>> However, when he went to test booting a VM with this patch, the host
>>>>>>>>>> machine rebooted - presumably a Xen crash.
>>>>>>>>>
>>>>>>>>> Oops - it's just printing stuff, so I can't immediately see how that
>>>>>>>>> would happen. But perhaps a trivial oversight of mine...
>>>>>>>
>>>>>>> Here's the crash with this patch
>>>>>>> I'm continuing to look at it, but if something jumps out at you,
>>>>>>> please let me know.
>>>>>>
>>>>>> Quite obvious: hvm_guest_x86_mode() has this assertion. Yet
>>>>>> the original, supposedly working patch had a use of this too iirc.
>>>>>
>>>>> It did...which is worrying.
>>>>>
>>>>> One difference here, is that 4.2 is running in debug=n mode, where 4.3
>>>>> is debug=y
>>>>>
>>>>> iirc, asserts are disabled on debug=n builds.
>>>>
>>>> Oh, right. And in the context here the assertion triggering is
>>>> apparently wrong anyway. For the purpose of debugging the
>>>> issue at hand, I think it is safe to comment it out.
>>>
>>> Ok, I'll do that, thanks
>>
>> Or, perhaps better, replace the call in the patch with one to
>> hvmfuncs.guest_x86_mode(). That's what I have done in my
>> copy of the patch just now (in case we need a second rev at
>> some point).
>>
>
> Good idea. I'm testing a patch now, and will get it into tonight's test run.
> Hopefully I'll have some data to share tomorrow morning.

Due to some other breakage in our mainline build that just got fixed,
I just now got to analyze the logs for a system that showed this crash
with the debugging patch.
The odd part is, that in this failing case - this machine does not
seem to be printing these debug messages at all.

Is this the only path this can go through for xsave, or are there others?

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 13:11                             ` Ben Guthro
@ 2013-07-12 13:38                               ` Jan Beulich
  2013-07-12 13:49                                 ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-12 13:38 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
> The odd part is, that in this failing case - this machine does not
> seem to be printing these debug messages at all.
> 
> Is this the only path this can go through for xsave, or are there others?

For xsave that's the only one. On systems not supporting xsave, it
would go through the (not instrumented) fxsave path instead. But
if we know that the system uses xsave, yet none of the messages
gets printed, that's also a valuable piece of information.

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 13:38                               ` Jan Beulich
@ 2013-07-12 13:49                                 ` Ben Guthro
  2013-07-12 14:34                                   ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-12 13:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Fri, Jul 12, 2013 at 9:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
>> The odd part is, that in this failing case - this machine does not
>> seem to be printing these debug messages at all.
>>
>> Is this the only path this can go through for xsave, or are there others?
>
> For xsave that's the only one. On systems not supporting xsave, it
> would go through the (not instrumented) fxsave path instead. But
> if we know that the system uses xsave, yet none of the messages
> gets printed, that's also a valuable piece of information.

It at least has the xsave cpuid leaf in /proc/cpuinfo

Is there something else in the Xen log I should look for to know that
Xen is actually using xsave?

>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 13:49                                 ` Ben Guthro
@ 2013-07-12 14:34                                   ` Jan Beulich
  2013-07-12 14:49                                     ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-12 14:34 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 12.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Fri, Jul 12, 2013 at 9:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> The odd part is, that in this failing case - this machine does not
>>> seem to be printing these debug messages at all.
>>>
>>> Is this the only path this can go through for xsave, or are there others?
>>
>> For xsave that's the only one. On systems not supporting xsave, it
>> would go through the (not instrumented) fxsave path instead. But
>> if we know that the system uses xsave, yet none of the messages
>> gets printed, that's also a valuable piece of information.
> 
> It at least has the xsave cpuid leaf in /proc/cpuinfo
> 
> Is there something else in the Xen log I should look for to know that
> Xen is actually using xsave?

        printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
            __func__, xsave_cntxt_size, xfeature_mask);

(in xstate_init()).

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 14:34                                   ` Jan Beulich
@ 2013-07-12 14:49                                     ` Ben Guthro
  2013-07-12 14:55                                       ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-12 14:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Fri, Jul 12, 2013 at 10:34 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 12.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On Fri, Jul 12, 2013 at 9:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>> The odd part is, that in this failing case - this machine does not
>>>> seem to be printing these debug messages at all.
>>>>
>>>> Is this the only path this can go through for xsave, or are there others?
>>>
>>> For xsave that's the only one. On systems not supporting xsave, it
>>> would go through the (not instrumented) fxsave path instead. But
>>> if we know that the system uses xsave, yet none of the messages
>>> gets printed, that's also a valuable piece of information.
>>
>> It at least has the xsave cpuid leaf in /proc/cpuinfo
>>
>> Is there something else in the Xen log I should look for to know that
>> Xen is actually using xsave?
>
>         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
>             __func__, xsave_cntxt_size, xfeature_mask);
>
> (in xstate_init()).


(XEN) xstate_init: using cntxt_size: 0x340 and states: 0x7

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 14:49                                     ` Ben Guthro
@ 2013-07-12 14:55                                       ` Jan Beulich
  2013-07-12 15:14                                         ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-12 14:55 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 12.07.13 at 16:49, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Fri, Jul 12, 2013 at 10:34 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 12.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> On Fri, Jul 12, 2013 at 9:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>> The odd part is, that in this failing case - this machine does not
>>>>> seem to be printing these debug messages at all.
>>>>>
>>>>> Is this the only path this can go through for xsave, or are there others?
>>>>
>>>> For xsave that's the only one. On systems not supporting xsave, it
>>>> would go through the (not instrumented) fxsave path instead. But
>>>> if we know that the system uses xsave, yet none of the messages
>>>> gets printed, that's also a valuable piece of information.
>>>
>>> It at least has the xsave cpuid leaf in /proc/cpuinfo
>>>
>>> Is there something else in the Xen log I should look for to know that
>>> Xen is actually using xsave?
>>
>>         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
>>             __func__, xsave_cntxt_size, xfeature_mask);
>>
>> (in xstate_init()).
> 
> 
> (XEN) xstate_init: using cntxt_size: 0x340 and states: 0x7

Okay, so to summarize: xsave is being used, and you don't see any
of the messages from the debug patch printed, yet the Windows
crash still occurs?

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 14:55                                       ` Jan Beulich
@ 2013-07-12 15:14                                         ` Ben Guthro
  2013-07-15  6:41                                           ` Jan Beulich
  2013-07-15 12:33                                           ` Jan Beulich
  0 siblings, 2 replies; 38+ messages in thread
From: Ben Guthro @ 2013-07-12 15:14 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Fri, Jul 12, 2013 at 10:55 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 12.07.13 at 16:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On Fri, Jul 12, 2013 at 10:34 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 12.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>> On Fri, Jul 12, 2013 at 9:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>> On 12.07.13 at 15:11, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>>> The odd part is, that in this failing case - this machine does not
>>>>>> seem to be printing these debug messages at all.
>>>>>>
>>>>>> Is this the only path this can go through for xsave, or are there others?
>>>>>
>>>>> For xsave that's the only one. On systems not supporting xsave, it
>>>>> would go through the (not instrumented) fxsave path instead. But
>>>>> if we know that the system uses xsave, yet none of the messages
>>>>> gets printed, that's also a valuable piece of information.
>>>>
>>>> It at least has the xsave cpuid leaf in /proc/cpuinfo
>>>>
>>>> Is there something else in the Xen log I should look for to know that
>>>> Xen is actually using xsave?
>>>
>>>         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
>>>             __func__, xsave_cntxt_size, xfeature_mask);
>>>
>>> (in xstate_init()).
>>
>>
>> (XEN) xstate_init: using cntxt_size: 0x340 and states: 0x7
>
> Okay, so to summarize: xsave is being used, and you don't see any
> of the messages from the debug patch printed, yet the Windows
> crash still occurs?

Yes - that is accurate.

The crash from this test run looked like the following

(from Mark)

STACK_COMMAND:  kb

FOLLOWUP_IP:
nt!ViCtxCheckAndReleaseXSaveData+59
8298120c cd2c            int     2Ch

Interrupt Service Routine 9AD1B91C has changed extended thread context.
Context saved before executing ISR: 841C5D80. Context saved after
executing ISR: 841C5B00.
1: kd> dd 841C5D80
841c5d80  0120027f 00000000 6dc42542 0000001b
841c5d90  6dc413c8 00000023 00001fa0 0000ffff
841c5da0  00d400d4 000000d4 0000ffff 00000000
841c5db0  009a9a9a 00000000 0000ffff 00000000
841c5dc0  00010001 00010001 0000ffff 00000000
841c5dd0  00000000 00000000 00000000 00000000
841c5de0  00000000 00000000 00000000 00000000
841c5df0  00000000 00000000 00008000 00000000
1: kd> dd 841C5B00
841c5b00  0120027f 00000000 6dc42542 00000000
841c5b10  6dc413c8 00000000 00001fa0 0000ffff
841c5b20  00d400d4 000000d4 0000ffff 00000000
841c5b30  009a9a9a 00000000 0000ffff 00000000
841c5b40  00010001 00010001 0000ffff 00000000
841c5b50  00000000 00000000 00000000 00000000
841c5b60  00000000 00000000 00000000 00000000
841c5b70  00000000 00000000 00008000 00000000


On other systems, with this same build, I see the debug output in the logs.

However, it is absent on this system

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 15:14                                         ` Ben Guthro
@ 2013-07-15  6:41                                           ` Jan Beulich
  2013-07-15 12:33                                           ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2013-07-15  6:41 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 12.07.13 at 17:14, Ben Guthro <ben.guthro@gmail.com> wrote:
> On other systems, with this same build, I see the debug output in the logs.

Hmm, and on those other systems there no guest crash? Would
suggest that some conditional is backwards then... Could you
send one such log having the debug output for reference?

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-12 15:14                                         ` Ben Guthro
  2013-07-15  6:41                                           ` Jan Beulich
@ 2013-07-15 12:33                                           ` Jan Beulich
  2013-07-15 12:43                                             ` Ben Guthro
  1 sibling, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-15 12:33 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 12.07.13 at 17:14, Ben Guthro <ben.guthro@gmail.com> wrote:
> On other systems, with this same build, I see the debug output in the logs.
> 
> However, it is absent on this system

Just looked over the debugging patch again, and can't see what
might be wrong with it. Following what you say there must be
something different between the hosts and/or guests between
those systems.

Are you, on the other systems, perhaps only ever seeing the
message added to xrstor(), which would also get printed for
64-bit HVM guests? If so, we may need to add another printk()
to the other case within that switch statement (albeit getting
there would seem to imply that on the matching xsave() run
the selectors would still have been non-null, or else one of the
printk()s should have got executed)...

And I take it, btw, that there's no migration or save/restore
involved here?

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-15 12:33                                           ` Jan Beulich
@ 2013-07-15 12:43                                             ` Ben Guthro
  2013-07-15 13:49                                               ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-15 12:43 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Mon, Jul 15, 2013 at 8:33 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 12.07.13 at 17:14, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On other systems, with this same build, I see the debug output in the logs.
>>
>> However, it is absent on this system
>
> Just looked over the debugging patch again, and can't see what
> might be wrong with it. Following what you say there must be
> something different between the hosts and/or guests between
> those systems.
>
> Are you, on the other systems, perhaps only ever seeing the
> message added to xrstor(), which would also get printed for
> 64-bit HVM guests?

Hmm... this may very well be possible.
I certainly see the messages on 64bit guests.

I need to collect some more data to understand where, and when I'm
seeing the problem.
Trying to form a theory around a limited data set is challenging here.

> If so, we may need to add another printk()
> to the other case within that switch statement (albeit getting
> there would seem to imply that on the matching xsave() run
> the selectors would still have been non-null, or else one of the
> printk()s should have got executed)...
>
> And I take it, btw, that there's no migration or save/restore
> involved here?
>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-15 12:43                                             ` Ben Guthro
@ 2013-07-15 13:49                                               ` Ben Guthro
  2013-07-15 14:06                                                 ` Jan Beulich
  2013-07-16 16:23                                                 ` Jan Beulich
  0 siblings, 2 replies; 38+ messages in thread
From: Ben Guthro @ 2013-07-15 13:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Mon, Jul 15, 2013 at 8:43 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Mon, Jul 15, 2013 at 8:33 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 12.07.13 at 17:14, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> On other systems, with this same build, I see the debug output in the logs.
>>>
>>> However, it is absent on this system
>>
>> Just looked over the debugging patch again, and can't see what
>> might be wrong with it. Following what you say there must be
>> something different between the hosts and/or guests between
>> those systems.
>>
>> Are you, on the other systems, perhaps only ever seeing the
>> message added to xrstor(), which would also get printed for
>> 64-bit HVM guests?
>
> Hmm... this may very well be possible.
> I certainly see the messages on 64bit guests.
>
> I need to collect some more data to understand where, and when I'm
> seeing the problem.
> Trying to form a theory around a limited data set is challenging here.

Well...there goes that theory

The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:

(XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
(XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
(XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
(XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
(XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
(XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
(XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
(XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
(XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
(XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
(XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
(XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
(XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
(XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8


I have some more logs to go through.
I'll reply again if I find anything worth noting.

>
>> If so, we may need to add another printk()
>> to the other case within that switch statement (albeit getting
>> there would seem to imply that on the matching xsave() run
>> the selectors would still have been non-null, or else one of the
>> printk()s should have got executed)...
>>
>> And I take it, btw, that there's no migration or save/restore
>> involved here?
>>
>> Jan
>>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-15 13:49                                               ` Ben Guthro
@ 2013-07-15 14:06                                                 ` Jan Beulich
  2013-07-16 16:23                                                 ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2013-07-15 14:06 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
> 
> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8

Which clearly shows that something's going very wrong. Let
me go associate this with the sources...

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-15 13:49                                               ` Ben Guthro
  2013-07-15 14:06                                                 ` Jan Beulich
@ 2013-07-16 16:23                                                 ` Jan Beulich
  2013-07-16 16:57                                                   ` Ben Guthro
  1 sibling, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-16 16:23 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
> 
> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8

Well, it should have been quite obvious (to me) that this is
related to xsaveopt behavior (according to my own observations
of cases where the selector/offset fields don't get written). Could
you double check whether the system(s) you see the problem on
support xsaveopt, and if so, simply comment out the conditionals
that cause it to be used?

Jan

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-16 16:23                                                 ` Jan Beulich
@ 2013-07-16 16:57                                                   ` Ben Guthro
  2013-07-17  6:38                                                     ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-16 16:57 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Tue, Jul 16, 2013 at 12:23 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
>>
>> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
>> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
>> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
>> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
>> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
>> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
>> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
>> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
>> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8
>
> Well, it should have been quite obvious (to me) that this is
> related to xsaveopt behavior (according to my own observations
> of cases where the selector/offset fields don't get written). Could
> you double check whether the system(s) you see the problem on
> support xsaveopt, and if so, simply comment out the conditionals
> that cause it to be used?

They do support xsaveopt.
I'll ifdef these out for tonight's test run.

>
> Jan
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-16 16:57                                                   ` Ben Guthro
@ 2013-07-17  6:38                                                     ` Jan Beulich
  2013-07-17 13:07                                                       ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-07-17  6:38 UTC (permalink / raw)
  To: Ben Guthro; +Cc: Mark Roddy, xen-devel

[-- Attachment #1: Type: text/plain, Size: 2668 bytes --]

>>> On 16.07.13 at 18:57, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Tue, Jul 16, 2013 at 12:23 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
>>>
>>> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
>>> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
>>> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
>>> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
>>> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
>>> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>
>> Well, it should have been quite obvious (to me) that this is
>> related to xsaveopt behavior (according to my own observations
>> of cases where the selector/offset fields don't get written). Could
>> you double check whether the system(s) you see the problem on
>> support xsaveopt, and if so, simply comment out the conditionals
>> that cause it to be used?
> 
> They do support xsaveopt.
> I'll ifdef these out for tonight's test run.

And attached/below also a tentative fix.

Jan

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -72,9 +72,24 @@ void xsave(struct vcpu *v, uint64_t mask
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
         if ( cpu_has_xsaveopt )
+        {
+            /*
+             * xsaveopt may not write the FPU portion even when the respective
+             * mask bit is set. For the check further down to work we hence
+             * need to put the save image back into the state that it was in
+             * right after the previous xsaveopt.
+             */
+            if ( word_size > 0 &&
+                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
+                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
+            {
+                ptr->fpu_sse.fip.sel = 0;
+                ptr->fpu_sse.fdp.sel = 0;
+            }
             asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
+        }
         else
             asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
                            : "=m" (*ptr)



[-- Attachment #2: x86-xsaveopt-preserve-selectors.patch --]
[-- Type: text/plain, Size: 1133 bytes --]

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -72,9 +72,24 @@ void xsave(struct vcpu *v, uint64_t mask
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
         if ( cpu_has_xsaveopt )
+        {
+            /*
+             * xsaveopt may not write the FPU portion even when the respective
+             * mask bit is set. For the check further down to work we hence
+             * need to put the save image back into the state that it was in
+             * right after the previous xsaveopt.
+             */
+            if ( word_size > 0 &&
+                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
+                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
+            {
+                ptr->fpu_sse.fip.sel = 0;
+                ptr->fpu_sse.fdp.sel = 0;
+            }
             asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
+        }
         else
             asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
                            : "=m" (*ptr)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-17  6:38                                                     ` Jan Beulich
@ 2013-07-17 13:07                                                       ` Ben Guthro
  2013-07-22 12:25                                                         ` Ben Guthro
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-17 13:07 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Wed, Jul 17, 2013 at 2:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 16.07.13 at 18:57, Ben Guthro <ben.guthro@gmail.com> wrote:
>> On Tue, Jul 16, 2013 at 12:23 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
>>>>
>>>> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
>>>> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>>> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
>>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
>>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
>>>> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
>>>> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
>>>> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
>>>> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>>
>>> Well, it should have been quite obvious (to me) that this is
>>> related to xsaveopt behavior (according to my own observations
>>> of cases where the selector/offset fields don't get written). Could
>>> you double check whether the system(s) you see the problem on
>>> support xsaveopt, and if so, simply comment out the conditionals
>>> that cause it to be used?
>>
>> They do support xsaveopt.
>> I'll ifdef these out for tonight's test run.

There was a problem with my commit for this test run, so I did not get
the data I wanted.

I will test with this patch tonight.
Thank you

Ben

>
> And attached/below also a tentative fix.
>
> Jan
>
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -72,9 +72,24 @@ void xsave(struct vcpu *v, uint64_t mask
>      if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
>      {
>          if ( cpu_has_xsaveopt )
> +        {
> +            /*
> +             * xsaveopt may not write the FPU portion even when the respective
> +             * mask bit is set. For the check further down to work we hence
> +             * need to put the save image back into the state that it was in
> +             * right after the previous xsaveopt.
> +             */
> +            if ( word_size > 0 &&
> +                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
> +                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
> +            {
> +                ptr->fpu_sse.fip.sel = 0;
> +                ptr->fpu_sse.fdp.sel = 0;
> +            }
>              asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
>                             : "=m" (*ptr)
>                             : "a" (lmask), "d" (hmask), "D" (ptr) );
> +        }
>          else
>              asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
>                             : "=m" (*ptr)
>
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: XSAVE/XRSTOR crash resurgence in 4.3
  2013-07-17 13:07                                                       ` Ben Guthro
@ 2013-07-22 12:25                                                         ` Ben Guthro
  2013-08-05 13:05                                                           ` [PATCH] x86: refine FPU selector handling code for XSAVEOPT Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Ben Guthro @ 2013-07-22 12:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Mark Roddy, xen-devel

On Wed, Jul 17, 2013 at 9:07 AM, Ben Guthro <ben.guthro@gmail.com> wrote:
> On Wed, Jul 17, 2013 at 2:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 16.07.13 at 18:57, Ben Guthro <ben.guthro@gmail.com> wrote:
>>> On Tue, Jul 16, 2013 at 12:23 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>> On 15.07.13 at 15:49, Ben Guthro <ben.guthro@gmail.com> wrote:
>>>>> The following output was from a WinXP SP3 guest (32bit) on a Lenovo T430:
>>>>>
>>>>> (XEN) d1v0: fip=1b773d6e9a fdp=23773d1c48 w=8
>>>>> (XEN) d1v0: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>>>> (XEN) d1v1: fip=1b79e78dee fdp=230012e3b4 w=8
>>>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012e3b4 w=8
>>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>>> (XEN) d1v1: fip=0000:79e78dee fdp=0000:0012e3b4
>>>>> (XEN) d1v1: fip=1b79e78dee fdp=230012d528 w=8
>>>>> (XEN) d1v1: FIP=1b79e78dee FDP=230012d528 w=8
>>>>> (XEN) d1v0: fip=4500000000 fdp=4b1000000000 w=8
>>>>> (XEN) d1v0: FIP=4500000000 FDP=4b1000000000 w=8
>>>>> (XEN) d1v1: fip=1b773d6e9a fdp=23773d1c48 w=8
>>>>> (XEN) d1v1: FIP=1b773d6e9a FDP=23773d1c48 w=8
>>>>
>>>> Well, it should have been quite obvious (to me) that this is
>>>> related to xsaveopt behavior (according to my own observations
>>>> of cases where the selector/offset fields don't get written). Could
>>>> you double check whether the system(s) you see the problem on
>>>> support xsaveopt, and if so, simply comment out the conditionals
>>>> that cause it to be used?
>>>
>>> They do support xsaveopt.
>>> I'll ifdef these out for tonight's test run.
>
> There was a problem with my commit for this test run, so I did not get
> the data I wanted.
>
> I will test with this patch tonight.

Testing runs over the past few nights were successful, with this patch.
While not totally conclusive - I would say that this solved the problem.

Thank you again.

Ben

>> And attached/below also a tentative fix.
>>
>> Jan
>>
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -72,9 +72,24 @@ void xsave(struct vcpu *v, uint64_t mask
>>      if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
>>      {
>>          if ( cpu_has_xsaveopt )
>> +        {
>> +            /*
>> +             * xsaveopt may not write the FPU portion even when the respective
>> +             * mask bit is set. For the check further down to work we hence
>> +             * need to put the save image back into the state that it was in
>> +             * right after the previous xsaveopt.
>> +             */
>> +            if ( word_size > 0 &&
>> +                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
>> +                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
>> +            {
>> +                ptr->fpu_sse.fip.sel = 0;
>> +                ptr->fpu_sse.fdp.sel = 0;
>> +            }
>>              asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
>>                             : "=m" (*ptr)
>>                             : "a" (lmask), "d" (hmask), "D" (ptr) );
>> +        }
>>          else
>>              asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
>>                             : "=m" (*ptr)
>>
>>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH] x86: refine FPU selector handling code for XSAVEOPT
  2013-07-22 12:25                                                         ` Ben Guthro
@ 2013-08-05 13:05                                                           ` Jan Beulich
  2013-08-05 16:03                                                             ` Keir Fraser
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2013-08-05 13:05 UTC (permalink / raw)
  To: xen-devel; +Cc: Ben Guthro, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 2113 bytes --]

Some extra tweaks are necessary to deal with the situation of XSAVEOPT
not writing the FPU portion of the save image (due to it detecting that
the register state did not get modified since the last XRSTOR).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Ben Guthro <ben.guthro@gmail.com>

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -71,10 +71,28 @@ void xsave(struct vcpu *v, uint64_t mask
 
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
+        typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
+        typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
+
         if ( cpu_has_xsaveopt )
+        {
+            /*
+             * xsaveopt may not write the FPU portion even when the respective
+             * mask bit is set. For the check further down to work we hence
+             * need to put the save image back into the state that it was in
+             * right after the previous xsaveopt.
+             */
+            if ( word_size > 0 &&
+                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
+                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
+            {
+                ptr->fpu_sse.fip.sel = 0;
+                ptr->fpu_sse.fdp.sel = 0;
+            }
             asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
+        }
         else
             asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
                            : "=m" (*ptr)
@@ -87,7 +105,14 @@ void xsave(struct vcpu *v, uint64_t mask
               */
              (!(ptr->fpu_sse.fsw & 0x0080) &&
               boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+        {
+            if ( cpu_has_xsaveopt && word_size > 0 )
+            {
+                ptr->fpu_sse.fip.sel = fcs;
+                ptr->fpu_sse.fdp.sel = fds;
+            }
             return;
+        }
 
         if ( word_size > 0 &&
              !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )




[-- Attachment #2: x86-xsaveopt-preserve-selectors.patch --]
[-- Type: text/plain, Size: 2162 bytes --]

x86: refine FPU selector handling code for XSAVEOPT

Some extra tweaks are necessary to deal with the situation of XSAVEOPT
not writing the FPU portion of the save image (due to it detecting that
the register state did not get modified since the last XRSTOR).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Ben Guthro <ben.guthro@gmail.com>

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -71,10 +71,28 @@ void xsave(struct vcpu *v, uint64_t mask
 
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
+        typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
+        typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
+
         if ( cpu_has_xsaveopt )
+        {
+            /*
+             * xsaveopt may not write the FPU portion even when the respective
+             * mask bit is set. For the check further down to work we hence
+             * need to put the save image back into the state that it was in
+             * right after the previous xsaveopt.
+             */
+            if ( word_size > 0 &&
+                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
+                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
+            {
+                ptr->fpu_sse.fip.sel = 0;
+                ptr->fpu_sse.fdp.sel = 0;
+            }
             asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
+        }
         else
             asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
                            : "=m" (*ptr)
@@ -87,7 +105,14 @@ void xsave(struct vcpu *v, uint64_t mask
               */
              (!(ptr->fpu_sse.fsw & 0x0080) &&
               boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+        {
+            if ( cpu_has_xsaveopt && word_size > 0 )
+            {
+                ptr->fpu_sse.fip.sel = fcs;
+                ptr->fpu_sse.fdp.sel = fds;
+            }
             return;
+        }
 
         if ( word_size > 0 &&
              !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86: refine FPU selector handling code for XSAVEOPT
  2013-08-05 13:05                                                           ` [PATCH] x86: refine FPU selector handling code for XSAVEOPT Jan Beulich
@ 2013-08-05 16:03                                                             ` Keir Fraser
  0 siblings, 0 replies; 38+ messages in thread
From: Keir Fraser @ 2013-08-05 16:03 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Ben Guthro

On 05/08/2013 14:05, "Jan Beulich" <JBeulich@suse.com> wrote:

> Some extra tweaks are necessary to deal with the situation of XSAVEOPT
> not writing the FPU portion of the save image (due to it detecting that
> the register state did not get modified since the last XRSTOR).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Ben Guthro <ben.guthro@gmail.com>

Acked-by: Keir Fraser <keir@xen.org>

> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -71,10 +71,28 @@ void xsave(struct vcpu *v, uint64_t mask
>  
>      if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
>      {
> +        typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
> +        typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
> +
>          if ( cpu_has_xsaveopt )
> +        {
> +            /*
> +             * xsaveopt may not write the FPU portion even when the
> respective
> +             * mask bit is set. For the check further down to work we hence
> +             * need to put the save image back into the state that it was in
> +             * right after the previous xsaveopt.
> +             */
> +            if ( word_size > 0 &&
> +                 (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
> +                  ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) )
> +            {
> +                ptr->fpu_sse.fip.sel = 0;
> +                ptr->fpu_sse.fdp.sel = 0;
> +            }
>              asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
>                             : "=m" (*ptr)
>                             : "a" (lmask), "d" (hmask), "D" (ptr) );
> +        }
>          else
>              asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
>                             : "=m" (*ptr)
> @@ -87,7 +105,14 @@ void xsave(struct vcpu *v, uint64_t mask
>                */
>               (!(ptr->fpu_sse.fsw & 0x0080) &&
>                boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
> +        {
> +            if ( cpu_has_xsaveopt && word_size > 0 )
> +            {
> +                ptr->fpu_sse.fip.sel = fcs;
> +                ptr->fpu_sse.fdp.sel = fds;
> +            }
>              return;
> +        }
>  
>          if ( word_size > 0 &&
>               !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )
> 
> 
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2013-08-05 16:03 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-03 14:02 XSAVE/XRSTOR crash resurgence in 4.3 Ben Guthro
2013-07-04 13:21 ` Jan Beulich
2013-07-04 13:24   ` Ben Guthro
2013-07-04 18:19     ` Mark Roddy
2013-07-05  6:42       ` Jan Beulich
2013-07-05 10:30       ` Jan Beulich
2013-07-05 12:10         ` Ben Guthro
2013-07-05 12:15           ` Jan Beulich
2013-07-05 12:58             ` Ben Guthro
2013-07-08 14:13               ` Ben Guthro
2013-07-08 14:24                 ` Jan Beulich
2013-07-08 14:31                   ` Ben Guthro
2013-07-08 14:40                     ` Jan Beulich
2013-07-08 14:42                       ` Ben Guthro
2013-07-08 14:47                         ` Jan Beulich
2013-07-08 15:10                           ` Ben Guthro
2013-07-12 13:11                             ` Ben Guthro
2013-07-12 13:38                               ` Jan Beulich
2013-07-12 13:49                                 ` Ben Guthro
2013-07-12 14:34                                   ` Jan Beulich
2013-07-12 14:49                                     ` Ben Guthro
2013-07-12 14:55                                       ` Jan Beulich
2013-07-12 15:14                                         ` Ben Guthro
2013-07-15  6:41                                           ` Jan Beulich
2013-07-15 12:33                                           ` Jan Beulich
2013-07-15 12:43                                             ` Ben Guthro
2013-07-15 13:49                                               ` Ben Guthro
2013-07-15 14:06                                                 ` Jan Beulich
2013-07-16 16:23                                                 ` Jan Beulich
2013-07-16 16:57                                                   ` Ben Guthro
2013-07-17  6:38                                                     ` Jan Beulich
2013-07-17 13:07                                                       ` Ben Guthro
2013-07-22 12:25                                                         ` Ben Guthro
2013-08-05 13:05                                                           ` [PATCH] x86: refine FPU selector handling code for XSAVEOPT Jan Beulich
2013-08-05 16:03                                                             ` Keir Fraser
2013-07-08 14:44                       ` XSAVE/XRSTOR crash resurgence in 4.3 Andrew Cooper
2013-07-08 14:52                         ` Jan Beulich
2013-07-08 14:55                           ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.