All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-11-30 17:34 ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
uses offset macros and bit manipulation to access the xsave area.
This series changes both to use C structs for those operations.

I still need to figure out a way to write unit tests for the new
code. Maybe I will just copy and paste the new and old functions,
and test them locally (checking if they give the same results
when translating blobs of random bytes).

Changes v1 -> v2:
* Use uint8_t[8*n] instead of uint64_t[n] for register data
* Keep the QEMU_BUILD_BUG_ON lines

v1 -> v2 diff below:

  diff --git a/target-i386/cpu.h b/target-i386/cpu.h
  index 3d1d01e..41f55ef 100644
  --- a/target-i386/cpu.h
  +++ b/target-i386/cpu.h
  @@ -818,7 +818,7 @@ typedef union X86LegacyXSaveArea {
           uint32_t mxcsr;
           uint32_t mxcsr_mask;
           FPReg fpregs[8];
  -        uint64_t xmm_regs[16][2];
  +        uint8_t xmm_regs[16][16];
       };
       uint8_t data[512];
   } X86LegacyXSaveArea;
  @@ -831,7 +831,7 @@ typedef struct X86XSaveHeader {

   /* Ext. save area 2: AVX State */
   typedef struct XSaveAVX {
  -    uint64_t ymmh[16][2];
  +    uint8_t ymmh[16][16];
   } XSaveAVX;

   /* Ext. save area 3: BNDREG */
  @@ -852,12 +852,12 @@ typedef struct XSaveOpmask {

   /* Ext. save area 6: ZMM_Hi256 */
   typedef struct XSaveZMM_Hi256 {
  -    uint64_t zmm_hi256[16][4];
  +    uint8_t zmm_hi256[16][32];
   } XSaveZMM_Hi256;

   /* Ext. save area 7: Hi16_ZMM */
   typedef struct XSaveHi16_ZMM {
  -    XMMReg hi16_zmm[16];
  +    uint8_t hi16_zmm[16][64];
   } XSaveHi16_ZMM;

   typedef struct X86XSaveArea {
  diff --git a/target-i386/kvm.c b/target-i386/kvm.c
  index 5e7ec70..98249e4 100644
  --- a/target-i386/kvm.c
  +++ b/target-i386/kvm.c
  @@ -1203,6 +1203,43 @@ static int kvm_put_fpu(X86CPU *cpu)
       return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
   }

  +#define XSAVE_FCW_FSW     0
  +#define XSAVE_FTW_FOP     1
  +#define XSAVE_CWD_RIP     2
  +#define XSAVE_CWD_RDP     4
  +#define XSAVE_MXCSR       6
  +#define XSAVE_ST_SPACE    8
  +#define XSAVE_XMM_SPACE   40
  +#define XSAVE_XSTATE_BV   128
  +#define XSAVE_YMMH_SPACE  144
  +#define XSAVE_BNDREGS     240
  +#define XSAVE_BNDCSR      256
  +#define XSAVE_OPMASK      272
  +#define XSAVE_ZMM_Hi256   288
  +#define XSAVE_Hi16_ZMM    416
  +
  +#define XSAVE_BYTE_OFFSET(word_offset) \
  +    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
  +
  +#define ASSERT_OFFSET(word_offset, field) \
  +    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
  +                      offsetof(X86XSaveArea, field))
  +
  +ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
  +ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
  +ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
  +ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
  +ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
  +ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
  +ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
  +ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
  +ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
  +ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
  +ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
  +ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
  +ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
  +ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
  +
   static int kvm_put_xsave(X86CPU *cpu)
   {
       CPUX86State *env = &cpu->env;
  @@ -1239,17 +1276,17 @@ static int kvm_put_xsave(X86CPU *cpu)
               sizeof env->opmask_regs);

       for (i = 0; i < CPU_NB_REGS; i++) {
  -        X86LegacyXSaveArea *legacy = &xsave->legacy;
  -        XSaveAVX *avx = &xsave->avx_state;
  -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
  -        stq_p(&legacy->xmm_regs[i][0],     env->xmm_regs[i].XMM_Q(0));
  -        stq_p(&legacy->xmm_regs[i][1],     env->xmm_regs[i].XMM_Q(1));
  -        stq_p(&avx->ymmh[i][0],            env->xmm_regs[i].XMM_Q(2));
  -        stq_p(&avx->ymmh[i][1],            env->xmm_regs[i].XMM_Q(3));
  -        stq_p(&zmm_hi256->zmm_hi256[i][0], env->xmm_regs[i].XMM_Q(4));
  -        stq_p(&zmm_hi256->zmm_hi256[i][1], env->xmm_regs[i].XMM_Q(5));
  -        stq_p(&zmm_hi256->zmm_hi256[i][2], env->xmm_regs[i].XMM_Q(6));
  -        stq_p(&zmm_hi256->zmm_hi256[i][3], env->xmm_regs[i].XMM_Q(7));
  +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
  +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
  +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
  +        stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
  +        stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
  +        stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
  +        stq_p(ymmh+8,  env->xmm_regs[i].XMM_Q(3));
  +        stq_p(zmmh,    env->xmm_regs[i].XMM_Q(4));
  +        stq_p(zmmh+8,  env->xmm_regs[i].XMM_Q(5));
  +        stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
  +        stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
       }

   #ifdef TARGET_X86_64
  @@ -1625,17 +1662,17 @@ static int kvm_get_xsave(X86CPU *cpu)
               sizeof env->opmask_regs);

       for (i = 0; i < CPU_NB_REGS; i++) {
  -        X86LegacyXSaveArea *legacy = &xsave->legacy;
  -        XSaveAVX *avx = &xsave->avx_state;
  -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
  -        env->xmm_regs[i].XMM_Q(0) = ldq_p(&legacy->xmm_regs[i][0]);
  -        env->xmm_regs[i].XMM_Q(1) = ldq_p(&legacy->xmm_regs[i][1]);
  -        env->xmm_regs[i].XMM_Q(2) = ldq_p(&avx->ymmh[i][0]);
  -        env->xmm_regs[i].XMM_Q(3) = ldq_p(&avx->ymmh[i][1]);
  -        env->xmm_regs[i].XMM_Q(4) = ldq_p(&zmm_hi256->zmm_hi256[i][0]);
  -        env->xmm_regs[i].XMM_Q(5) = ldq_p(&zmm_hi256->zmm_hi256[i][1]);
  -        env->xmm_regs[i].XMM_Q(6) = ldq_p(&zmm_hi256->zmm_hi256[i][2]);
  -        env->xmm_regs[i].XMM_Q(7) = ldq_p(&zmm_hi256->zmm_hi256[i][3]);
  +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
  +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
  +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
  +        env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
  +        env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
  +        env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
  +        env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
  +        env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
  +        env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
  +        env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
  +        env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
       }

   #ifdef TARGET_X86_64

Eduardo Habkost (3):
  target-i386: Define structs for layout of xsave area
  target-i386: Use xsave structs for ext_save_area
  target-i386: kvm: Use X86XSaveArea struct for xsave save/load

 target-i386/cpu.c | 18 +++++++----
 target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
 3 files changed, 155 insertions(+), 44 deletions(-)

-- 
2.1.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-11-30 17:34 ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
uses offset macros and bit manipulation to access the xsave area.
This series changes both to use C structs for those operations.

I still need to figure out a way to write unit tests for the new
code. Maybe I will just copy and paste the new and old functions,
and test them locally (checking if they give the same results
when translating blobs of random bytes).

Changes v1 -> v2:
* Use uint8_t[8*n] instead of uint64_t[n] for register data
* Keep the QEMU_BUILD_BUG_ON lines

v1 -> v2 diff below:

  diff --git a/target-i386/cpu.h b/target-i386/cpu.h
  index 3d1d01e..41f55ef 100644
  --- a/target-i386/cpu.h
  +++ b/target-i386/cpu.h
  @@ -818,7 +818,7 @@ typedef union X86LegacyXSaveArea {
           uint32_t mxcsr;
           uint32_t mxcsr_mask;
           FPReg fpregs[8];
  -        uint64_t xmm_regs[16][2];
  +        uint8_t xmm_regs[16][16];
       };
       uint8_t data[512];
   } X86LegacyXSaveArea;
  @@ -831,7 +831,7 @@ typedef struct X86XSaveHeader {

   /* Ext. save area 2: AVX State */
   typedef struct XSaveAVX {
  -    uint64_t ymmh[16][2];
  +    uint8_t ymmh[16][16];
   } XSaveAVX;

   /* Ext. save area 3: BNDREG */
  @@ -852,12 +852,12 @@ typedef struct XSaveOpmask {

   /* Ext. save area 6: ZMM_Hi256 */
   typedef struct XSaveZMM_Hi256 {
  -    uint64_t zmm_hi256[16][4];
  +    uint8_t zmm_hi256[16][32];
   } XSaveZMM_Hi256;

   /* Ext. save area 7: Hi16_ZMM */
   typedef struct XSaveHi16_ZMM {
  -    XMMReg hi16_zmm[16];
  +    uint8_t hi16_zmm[16][64];
   } XSaveHi16_ZMM;

   typedef struct X86XSaveArea {
  diff --git a/target-i386/kvm.c b/target-i386/kvm.c
  index 5e7ec70..98249e4 100644
  --- a/target-i386/kvm.c
  +++ b/target-i386/kvm.c
  @@ -1203,6 +1203,43 @@ static int kvm_put_fpu(X86CPU *cpu)
       return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
   }

  +#define XSAVE_FCW_FSW     0
  +#define XSAVE_FTW_FOP     1
  +#define XSAVE_CWD_RIP     2
  +#define XSAVE_CWD_RDP     4
  +#define XSAVE_MXCSR       6
  +#define XSAVE_ST_SPACE    8
  +#define XSAVE_XMM_SPACE   40
  +#define XSAVE_XSTATE_BV   128
  +#define XSAVE_YMMH_SPACE  144
  +#define XSAVE_BNDREGS     240
  +#define XSAVE_BNDCSR      256
  +#define XSAVE_OPMASK      272
  +#define XSAVE_ZMM_Hi256   288
  +#define XSAVE_Hi16_ZMM    416
  +
  +#define XSAVE_BYTE_OFFSET(word_offset) \
  +    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
  +
  +#define ASSERT_OFFSET(word_offset, field) \
  +    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
  +                      offsetof(X86XSaveArea, field))
  +
  +ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
  +ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
  +ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
  +ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
  +ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
  +ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
  +ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
  +ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
  +ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
  +ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
  +ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
  +ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
  +ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
  +ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
  +
   static int kvm_put_xsave(X86CPU *cpu)
   {
       CPUX86State *env = &cpu->env;
  @@ -1239,17 +1276,17 @@ static int kvm_put_xsave(X86CPU *cpu)
               sizeof env->opmask_regs);

       for (i = 0; i < CPU_NB_REGS; i++) {
  -        X86LegacyXSaveArea *legacy = &xsave->legacy;
  -        XSaveAVX *avx = &xsave->avx_state;
  -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
  -        stq_p(&legacy->xmm_regs[i][0],     env->xmm_regs[i].XMM_Q(0));
  -        stq_p(&legacy->xmm_regs[i][1],     env->xmm_regs[i].XMM_Q(1));
  -        stq_p(&avx->ymmh[i][0],            env->xmm_regs[i].XMM_Q(2));
  -        stq_p(&avx->ymmh[i][1],            env->xmm_regs[i].XMM_Q(3));
  -        stq_p(&zmm_hi256->zmm_hi256[i][0], env->xmm_regs[i].XMM_Q(4));
  -        stq_p(&zmm_hi256->zmm_hi256[i][1], env->xmm_regs[i].XMM_Q(5));
  -        stq_p(&zmm_hi256->zmm_hi256[i][2], env->xmm_regs[i].XMM_Q(6));
  -        stq_p(&zmm_hi256->zmm_hi256[i][3], env->xmm_regs[i].XMM_Q(7));
  +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
  +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
  +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
  +        stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
  +        stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
  +        stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
  +        stq_p(ymmh+8,  env->xmm_regs[i].XMM_Q(3));
  +        stq_p(zmmh,    env->xmm_regs[i].XMM_Q(4));
  +        stq_p(zmmh+8,  env->xmm_regs[i].XMM_Q(5));
  +        stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
  +        stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
       }

   #ifdef TARGET_X86_64
  @@ -1625,17 +1662,17 @@ static int kvm_get_xsave(X86CPU *cpu)
               sizeof env->opmask_regs);

       for (i = 0; i < CPU_NB_REGS; i++) {
  -        X86LegacyXSaveArea *legacy = &xsave->legacy;
  -        XSaveAVX *avx = &xsave->avx_state;
  -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
  -        env->xmm_regs[i].XMM_Q(0) = ldq_p(&legacy->xmm_regs[i][0]);
  -        env->xmm_regs[i].XMM_Q(1) = ldq_p(&legacy->xmm_regs[i][1]);
  -        env->xmm_regs[i].XMM_Q(2) = ldq_p(&avx->ymmh[i][0]);
  -        env->xmm_regs[i].XMM_Q(3) = ldq_p(&avx->ymmh[i][1]);
  -        env->xmm_regs[i].XMM_Q(4) = ldq_p(&zmm_hi256->zmm_hi256[i][0]);
  -        env->xmm_regs[i].XMM_Q(5) = ldq_p(&zmm_hi256->zmm_hi256[i][1]);
  -        env->xmm_regs[i].XMM_Q(6) = ldq_p(&zmm_hi256->zmm_hi256[i][2]);
  -        env->xmm_regs[i].XMM_Q(7) = ldq_p(&zmm_hi256->zmm_hi256[i][3]);
  +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
  +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
  +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
  +        env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
  +        env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
  +        env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
  +        env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
  +        env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
  +        env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
  +        env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
  +        env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
       }

   #ifdef TARGET_X86_64

Eduardo Habkost (3):
  target-i386: Define structs for layout of xsave area
  target-i386: Use xsave structs for ext_save_area
  target-i386: kvm: Use X86XSaveArea struct for xsave save/load

 target-i386/cpu.c | 18 +++++++----
 target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
 3 files changed, 155 insertions(+), 44 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/3] target-i386: Define structs for layout of xsave area
  2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
@ 2015-11-30 17:34   ` Eduardo Habkost
  -1 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

Add structs that define the layout of the xsave areas used by
Intel processors. Add some QEMU_BUILD_BUG_ON lines to ensure the
structs match the XSAVE_* macros in target-i386/kvm.c and the
offsets and sizes at target-i386/cpu.c:ext_save_areas.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v1 -> v2:
* Use uint8_t[8*n] instead of uint64_t[n] for register data
---
 target-i386/cpu.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/kvm.c | 22 ++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..41f55ef 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -806,6 +806,91 @@ typedef struct {
 
 #define NB_OPMASK_REGS 8
 
+typedef union X86LegacyXSaveArea {
+    struct {
+        uint16_t fcw;
+        uint16_t fsw;
+        uint8_t ftw;
+        uint8_t reserved;
+        uint16_t fpop;
+        uint64_t fpip;
+        uint64_t fpdp;
+        uint32_t mxcsr;
+        uint32_t mxcsr_mask;
+        FPReg fpregs[8];
+        uint8_t xmm_regs[16][16];
+    };
+    uint8_t data[512];
+} X86LegacyXSaveArea;
+
+typedef struct X86XSaveHeader {
+    uint64_t xstate_bv;
+    uint64_t xcomp_bv;
+    uint8_t reserved[48];
+} X86XSaveHeader;
+
+/* Ext. save area 2: AVX State */
+typedef struct XSaveAVX {
+    uint8_t ymmh[16][16];
+} XSaveAVX;
+
+/* Ext. save area 3: BNDREG */
+typedef struct XSaveBNDREG {
+    BNDReg bnd_regs[4];
+} XSaveBNDREG;
+
+/* Ext. save area 4: BNDCSR */
+typedef union XSaveBNDCSR {
+    BNDCSReg bndcsr;
+    uint8_t data[64];
+} XSaveBNDCSR;
+
+/* Ext. save area 5: Opmask */
+typedef struct XSaveOpmask {
+    uint64_t opmask_regs[NB_OPMASK_REGS];
+} XSaveOpmask;
+
+/* Ext. save area 6: ZMM_Hi256 */
+typedef struct XSaveZMM_Hi256 {
+    uint8_t zmm_hi256[16][32];
+} XSaveZMM_Hi256;
+
+/* Ext. save area 7: Hi16_ZMM */
+typedef struct XSaveHi16_ZMM {
+    uint8_t hi16_zmm[16][64];
+} XSaveHi16_ZMM;
+
+typedef struct X86XSaveArea {
+    X86LegacyXSaveArea legacy;
+    X86XSaveHeader header;
+
+    /* Extended save areas: */
+
+    /* AVX State: */
+    XSaveAVX avx_state;
+    uint8_t padding[960-576-sizeof(XSaveAVX)];
+    /* MPX State: */
+    XSaveBNDREG bndreg_state;
+    XSaveBNDCSR bndcsr_state;
+    /* AVX-512 State: */
+    XSaveOpmask opmask_state;
+    XSaveZMM_Hi256 zmm_hi256_state;
+    XSaveHi16_ZMM hi16_zmm_state;
+} X86XSaveArea;
+
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != 0x240);
+QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != 0x3c0);
+QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != 0x400);
+QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != 0x440);
+QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != 0x480);
+QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != 0x680);
+QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
+
 typedef enum TPRAccess {
     TPR_ACCESS_READ,
     TPR_ACCESS_WRITE,
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..b8b336b 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1218,6 +1218,28 @@ static int kvm_put_fpu(X86CPU *cpu)
 #define XSAVE_ZMM_Hi256   288
 #define XSAVE_Hi16_ZMM    416
 
+#define XSAVE_BYTE_OFFSET(word_offset) \
+    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
+
+#define ASSERT_OFFSET(word_offset, field) \
+    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
+                      offsetof(X86XSaveArea, field))
+
+ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
+ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
+ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
+ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
+ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
+ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
+ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
+ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
+ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
+ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
+ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
+ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
+ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
+ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
+
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Qemu-devel] [PATCH v2 1/3] target-i386: Define structs for layout of xsave area
@ 2015-11-30 17:34   ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

Add structs that define the layout of the xsave areas used by
Intel processors. Add some QEMU_BUILD_BUG_ON lines to ensure the
structs match the XSAVE_* macros in target-i386/kvm.c and the
offsets and sizes at target-i386/cpu.c:ext_save_areas.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v1 -> v2:
* Use uint8_t[8*n] instead of uint64_t[n] for register data
---
 target-i386/cpu.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/kvm.c | 22 ++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..41f55ef 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -806,6 +806,91 @@ typedef struct {
 
 #define NB_OPMASK_REGS 8
 
+typedef union X86LegacyXSaveArea {
+    struct {
+        uint16_t fcw;
+        uint16_t fsw;
+        uint8_t ftw;
+        uint8_t reserved;
+        uint16_t fpop;
+        uint64_t fpip;
+        uint64_t fpdp;
+        uint32_t mxcsr;
+        uint32_t mxcsr_mask;
+        FPReg fpregs[8];
+        uint8_t xmm_regs[16][16];
+    };
+    uint8_t data[512];
+} X86LegacyXSaveArea;
+
+typedef struct X86XSaveHeader {
+    uint64_t xstate_bv;
+    uint64_t xcomp_bv;
+    uint8_t reserved[48];
+} X86XSaveHeader;
+
+/* Ext. save area 2: AVX State */
+typedef struct XSaveAVX {
+    uint8_t ymmh[16][16];
+} XSaveAVX;
+
+/* Ext. save area 3: BNDREG */
+typedef struct XSaveBNDREG {
+    BNDReg bnd_regs[4];
+} XSaveBNDREG;
+
+/* Ext. save area 4: BNDCSR */
+typedef union XSaveBNDCSR {
+    BNDCSReg bndcsr;
+    uint8_t data[64];
+} XSaveBNDCSR;
+
+/* Ext. save area 5: Opmask */
+typedef struct XSaveOpmask {
+    uint64_t opmask_regs[NB_OPMASK_REGS];
+} XSaveOpmask;
+
+/* Ext. save area 6: ZMM_Hi256 */
+typedef struct XSaveZMM_Hi256 {
+    uint8_t zmm_hi256[16][32];
+} XSaveZMM_Hi256;
+
+/* Ext. save area 7: Hi16_ZMM */
+typedef struct XSaveHi16_ZMM {
+    uint8_t hi16_zmm[16][64];
+} XSaveHi16_ZMM;
+
+typedef struct X86XSaveArea {
+    X86LegacyXSaveArea legacy;
+    X86XSaveHeader header;
+
+    /* Extended save areas: */
+
+    /* AVX State: */
+    XSaveAVX avx_state;
+    uint8_t padding[960-576-sizeof(XSaveAVX)];
+    /* MPX State: */
+    XSaveBNDREG bndreg_state;
+    XSaveBNDCSR bndcsr_state;
+    /* AVX-512 State: */
+    XSaveOpmask opmask_state;
+    XSaveZMM_Hi256 zmm_hi256_state;
+    XSaveHi16_ZMM hi16_zmm_state;
+} X86XSaveArea;
+
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != 0x240);
+QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != 0x3c0);
+QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != 0x400);
+QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != 0x440);
+QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != 0x480);
+QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != 0x680);
+QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
+
 typedef enum TPRAccess {
     TPR_ACCESS_READ,
     TPR_ACCESS_WRITE,
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..b8b336b 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1218,6 +1218,28 @@ static int kvm_put_fpu(X86CPU *cpu)
 #define XSAVE_ZMM_Hi256   288
 #define XSAVE_Hi16_ZMM    416
 
+#define XSAVE_BYTE_OFFSET(word_offset) \
+    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
+
+#define ASSERT_OFFSET(word_offset, field) \
+    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
+                      offsetof(X86XSaveArea, field))
+
+ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
+ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
+ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
+ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
+ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
+ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
+ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
+ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
+ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
+ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
+ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
+ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
+ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
+ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
+
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/3] target-i386: Use xsave structs for ext_save_area
  2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
@ 2015-11-30 17:34   ` Eduardo Habkost
  -1 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

This doesn't introduce any change in the code, as the offsets and
struct sizes match what was present in the table. This can be
validated by the QEMU_BUILD_BUG_ON lines on target-i386/cpu.h,
which ensures the struct sizes and offsets match the existing
values in ext_save_area.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
 target-i386/cpu.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 11e5e39..bc95437 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -458,17 +458,23 @@ typedef struct ExtSaveArea {
 
 static const ExtSaveArea ext_save_areas[] = {
     [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
-            .offset = 0x240, .size = 0x100 },
+            .offset = offsetof(X86XSaveArea, avx_state),
+            .size = sizeof(XSaveAVX) },
     [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = 0x3c0, .size = 0x40  },
+            .offset = offsetof(X86XSaveArea, bndreg_state),
+            .size = sizeof(XSaveBNDREG)  },
     [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = 0x400, .size = 0x40  },
+            .offset = offsetof(X86XSaveArea, bndcsr_state),
+            .size = sizeof(XSaveBNDCSR)  },
     [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x440, .size = 0x40 },
+            .offset = offsetof(X86XSaveArea, opmask_state),
+            .size = sizeof(XSaveOpmask) },
     [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x480, .size = 0x200 },
+            .offset = offsetof(X86XSaveArea, zmm_hi256_state),
+            .size = sizeof(XSaveZMM_Hi256) },
     [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x680, .size = 0x400 },
+            .offset = offsetof(X86XSaveArea, hi16_zmm_state),
+            .size = sizeof(XSaveHi16_ZMM) },
 };
 
 const char *get_register_name_32(unsigned int reg)
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Qemu-devel] [PATCH v2 2/3] target-i386: Use xsave structs for ext_save_area
@ 2015-11-30 17:34   ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

This doesn't introduce any change in the code, as the offsets and
struct sizes match what was present in the table. This can be
validated by the QEMU_BUILD_BUG_ON lines on target-i386/cpu.h,
which ensures the struct sizes and offsets match the existing
values in ext_save_area.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
 target-i386/cpu.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 11e5e39..bc95437 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -458,17 +458,23 @@ typedef struct ExtSaveArea {
 
 static const ExtSaveArea ext_save_areas[] = {
     [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
-            .offset = 0x240, .size = 0x100 },
+            .offset = offsetof(X86XSaveArea, avx_state),
+            .size = sizeof(XSaveAVX) },
     [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = 0x3c0, .size = 0x40  },
+            .offset = offsetof(X86XSaveArea, bndreg_state),
+            .size = sizeof(XSaveBNDREG)  },
     [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = 0x400, .size = 0x40  },
+            .offset = offsetof(X86XSaveArea, bndcsr_state),
+            .size = sizeof(XSaveBNDCSR)  },
     [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x440, .size = 0x40 },
+            .offset = offsetof(X86XSaveArea, opmask_state),
+            .size = sizeof(XSaveOpmask) },
     [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x480, .size = 0x200 },
+            .offset = offsetof(X86XSaveArea, zmm_hi256_state),
+            .size = sizeof(XSaveZMM_Hi256) },
     [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = 0x680, .size = 0x400 },
+            .offset = offsetof(X86XSaveArea, hi16_zmm_state),
+            .size = sizeof(XSaveHi16_ZMM) },
 };
 
 const char *get_register_name_32(unsigned int reg)
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/3] target-i386: kvm: Use X86XSaveArea struct for xsave save/load
  2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
@ 2015-11-30 17:34   ` Eduardo Habkost
  -1 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

Instead of using offset macros and bit operations in a uint32_t
array, use the X86XSaveArea struct to perform the loading/saving
operations in kvm_put_xsave() and kvm_get_xsave().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v1 -> v2:
* Use uint8_t pointers when loading/saving xmm, ymmh, zmmh,
  keeping the same load/save logic from previous code
* Keep the QEMU_BUILD_BUG_ON lines
---
 target-i386/kvm.c | 74 +++++++++++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 38 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b8b336b..98249e4 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1243,9 +1243,8 @@ ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    struct kvm_xsave* xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->kvm_xsave_buf;
     uint16_t cwd, swd, twd;
-    uint8_t *xmm, *ymmh, *zmmh;
     int i, r;
 
     if (!has_xsave) {
@@ -1260,25 +1259,26 @@ static int kvm_put_xsave(X86CPU *cpu)
     for (i = 0; i < 8; ++i) {
         twd |= (!env->fptags[i]) << i;
     }
-    xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
-    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
-    memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
-    memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
-    memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
+    xsave->legacy.fcw = cwd;
+    xsave->legacy.fsw = swd;
+    xsave->legacy.ftw = twd;
+    xsave->legacy.fpop = env->fpop;
+    xsave->legacy.fpip = env->fpip;
+    xsave->legacy.fpdp = env->fpdp;
+    memcpy(&xsave->legacy.fpregs, env->fpregs,
             sizeof env->fpregs);
-    xsave->region[XSAVE_MXCSR] = env->mxcsr;
-    *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
-    memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
+    xsave->legacy.mxcsr = env->mxcsr;
+    xsave->header.xstate_bv = env->xstate_bv;
+    memcpy(&xsave->bndreg_state.bnd_regs, env->bnd_regs,
             sizeof env->bnd_regs);
-    memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
-            sizeof(env->bndcs_regs));
-    memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
+    xsave->bndcsr_state.bndcsr = env->bndcs_regs;
+    memcpy(&xsave->opmask_state.opmask_regs, env->opmask_regs,
             sizeof env->opmask_regs);
 
-    xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
-    ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
-    zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
-    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        uint8_t *xmm = xsave->legacy.xmm_regs[i];
+        uint8_t *ymmh = xsave->avx_state.ymmh[i];
+        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
         stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
         stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
         stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
@@ -1290,7 +1290,7 @@ static int kvm_put_xsave(X86CPU *cpu)
     }
 
 #ifdef TARGET_X86_64
-    memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16],
+    memcpy(&xsave->hi16_zmm_state.hi16_zmm, &env->xmm_regs[16],
             16 * sizeof env->xmm_regs[16]);
 #endif
     r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
@@ -1626,9 +1626,8 @@ static int kvm_get_fpu(X86CPU *cpu)
 static int kvm_get_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    struct kvm_xsave* xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->kvm_xsave_buf;
     int ret, i;
-    const uint8_t *xmm, *ymmh, *zmmh;
     uint16_t cwd, swd, twd;
 
     if (!has_xsave) {
@@ -1640,33 +1639,32 @@ static int kvm_get_xsave(X86CPU *cpu)
         return ret;
     }
 
-    cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
-    swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
-    twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
-    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
+    cwd = xsave->legacy.fcw;
+    swd = xsave->legacy.fsw;
+    twd = xsave->legacy.ftw;
+    env->fpop = xsave->legacy.fpop;
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
     for (i = 0; i < 8; ++i) {
         env->fptags[i] = !((twd >> i) & 1);
     }
-    memcpy(&env->fpip, &xsave->region[XSAVE_CWD_RIP], sizeof(env->fpip));
-    memcpy(&env->fpdp, &xsave->region[XSAVE_CWD_RDP], sizeof(env->fpdp));
-    env->mxcsr = xsave->region[XSAVE_MXCSR];
-    memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
+    env->fpip = xsave->legacy.fpip;
+    env->fpdp = xsave->legacy.fpdp;
+    env->mxcsr = xsave->legacy.mxcsr;
+    memcpy(env->fpregs, &xsave->legacy.fpregs,
             sizeof env->fpregs);
-    env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
-    memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
+    env->xstate_bv = xsave->header.xstate_bv;
+    memcpy(env->bnd_regs, &xsave->bndreg_state.bnd_regs,
             sizeof env->bnd_regs);
-    memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
-            sizeof(env->bndcs_regs));
-    memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
+    env->bndcs_regs = xsave->bndcsr_state.bndcsr;
+    memcpy(env->opmask_regs, &xsave->opmask_state.opmask_regs,
             sizeof env->opmask_regs);
 
-    xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
-    ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
-    zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
-    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        uint8_t *xmm = xsave->legacy.xmm_regs[i];
+        uint8_t *ymmh = xsave->avx_state.ymmh[i];
+        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
         env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
         env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
         env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
@@ -1678,7 +1676,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     }
 
 #ifdef TARGET_X86_64
-    memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM],
+    memcpy(&env->xmm_regs[16], &xsave->hi16_zmm_state.hi16_zmm,
            16 * sizeof env->xmm_regs[16]);
 #endif
     return 0;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Qemu-devel] [PATCH v2 3/3] target-i386: kvm: Use X86XSaveArea struct for xsave save/load
@ 2015-11-30 17:34   ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-11-30 17:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, kvm, Huaitong Han

Instead of using offset macros and bit operations in a uint32_t
array, use the X86XSaveArea struct to perform the loading/saving
operations in kvm_put_xsave() and kvm_get_xsave().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v1 -> v2:
* Use uint8_t pointers when loading/saving xmm, ymmh, zmmh,
  keeping the same load/save logic from previous code
* Keep the QEMU_BUILD_BUG_ON lines
---
 target-i386/kvm.c | 74 +++++++++++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 38 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b8b336b..98249e4 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1243,9 +1243,8 @@ ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    struct kvm_xsave* xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->kvm_xsave_buf;
     uint16_t cwd, swd, twd;
-    uint8_t *xmm, *ymmh, *zmmh;
     int i, r;
 
     if (!has_xsave) {
@@ -1260,25 +1259,26 @@ static int kvm_put_xsave(X86CPU *cpu)
     for (i = 0; i < 8; ++i) {
         twd |= (!env->fptags[i]) << i;
     }
-    xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
-    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
-    memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
-    memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
-    memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
+    xsave->legacy.fcw = cwd;
+    xsave->legacy.fsw = swd;
+    xsave->legacy.ftw = twd;
+    xsave->legacy.fpop = env->fpop;
+    xsave->legacy.fpip = env->fpip;
+    xsave->legacy.fpdp = env->fpdp;
+    memcpy(&xsave->legacy.fpregs, env->fpregs,
             sizeof env->fpregs);
-    xsave->region[XSAVE_MXCSR] = env->mxcsr;
-    *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
-    memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
+    xsave->legacy.mxcsr = env->mxcsr;
+    xsave->header.xstate_bv = env->xstate_bv;
+    memcpy(&xsave->bndreg_state.bnd_regs, env->bnd_regs,
             sizeof env->bnd_regs);
-    memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
-            sizeof(env->bndcs_regs));
-    memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
+    xsave->bndcsr_state.bndcsr = env->bndcs_regs;
+    memcpy(&xsave->opmask_state.opmask_regs, env->opmask_regs,
             sizeof env->opmask_regs);
 
-    xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
-    ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
-    zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
-    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        uint8_t *xmm = xsave->legacy.xmm_regs[i];
+        uint8_t *ymmh = xsave->avx_state.ymmh[i];
+        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
         stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
         stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
         stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
@@ -1290,7 +1290,7 @@ static int kvm_put_xsave(X86CPU *cpu)
     }
 
 #ifdef TARGET_X86_64
-    memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16],
+    memcpy(&xsave->hi16_zmm_state.hi16_zmm, &env->xmm_regs[16],
             16 * sizeof env->xmm_regs[16]);
 #endif
     r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
@@ -1626,9 +1626,8 @@ static int kvm_get_fpu(X86CPU *cpu)
 static int kvm_get_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    struct kvm_xsave* xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->kvm_xsave_buf;
     int ret, i;
-    const uint8_t *xmm, *ymmh, *zmmh;
     uint16_t cwd, swd, twd;
 
     if (!has_xsave) {
@@ -1640,33 +1639,32 @@ static int kvm_get_xsave(X86CPU *cpu)
         return ret;
     }
 
-    cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
-    swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
-    twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
-    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
+    cwd = xsave->legacy.fcw;
+    swd = xsave->legacy.fsw;
+    twd = xsave->legacy.ftw;
+    env->fpop = xsave->legacy.fpop;
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
     for (i = 0; i < 8; ++i) {
         env->fptags[i] = !((twd >> i) & 1);
     }
-    memcpy(&env->fpip, &xsave->region[XSAVE_CWD_RIP], sizeof(env->fpip));
-    memcpy(&env->fpdp, &xsave->region[XSAVE_CWD_RDP], sizeof(env->fpdp));
-    env->mxcsr = xsave->region[XSAVE_MXCSR];
-    memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
+    env->fpip = xsave->legacy.fpip;
+    env->fpdp = xsave->legacy.fpdp;
+    env->mxcsr = xsave->legacy.mxcsr;
+    memcpy(env->fpregs, &xsave->legacy.fpregs,
             sizeof env->fpregs);
-    env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
-    memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
+    env->xstate_bv = xsave->header.xstate_bv;
+    memcpy(env->bnd_regs, &xsave->bndreg_state.bnd_regs,
             sizeof env->bnd_regs);
-    memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
-            sizeof(env->bndcs_regs));
-    memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
+    env->bndcs_regs = xsave->bndcsr_state.bndcsr;
+    memcpy(env->opmask_regs, &xsave->opmask_state.opmask_regs,
             sizeof env->opmask_regs);
 
-    xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
-    ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
-    zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
-    for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        uint8_t *xmm = xsave->legacy.xmm_regs[i];
+        uint8_t *ymmh = xsave->avx_state.ymmh[i];
+        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
         env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
         env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
         env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
@@ -1678,7 +1676,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     }
 
 #ifdef TARGET_X86_64
-    memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM],
+    memcpy(&env->xmm_regs[16], &xsave->hi16_zmm_state.hi16_zmm,
            16 * sizeof env->xmm_regs[16]);
 #endif
     return 0;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
  2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
@ 2015-12-01 10:22   ` Paolo Bonzini
  -1 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 10:22 UTC (permalink / raw)
  To: Eduardo Habkost, qemu-devel; +Cc: kvm, Huaitong Han

On 30/11/2015 18:34, Eduardo Habkost wrote:
> target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> uses offset macros and bit manipulation to access the xsave area.
> This series changes both to use C structs for those operations.
> 
> I still need to figure out a way to write unit tests for the new
> code. Maybe I will just copy and paste the new and old functions,
> and test them locally (checking if they give the same results
> when translating blobs of random bytes).
> 
> Changes v1 -> v2:
> * Use uint8_t[8*n] instead of uint64_t[n] for register data
> * Keep the QEMU_BUILD_BUG_ON lines
> 
> v1 -> v2 diff below:
> 
>   diff --git a/target-i386/cpu.h b/target-i386/cpu.h
>   index 3d1d01e..41f55ef 100644
>   --- a/target-i386/cpu.h
>   +++ b/target-i386/cpu.h
>   @@ -818,7 +818,7 @@ typedef union X86LegacyXSaveArea {
>            uint32_t mxcsr;
>            uint32_t mxcsr_mask;
>            FPReg fpregs[8];
>   -        uint64_t xmm_regs[16][2];
>   +        uint8_t xmm_regs[16][16];
>        };
>        uint8_t data[512];
>    } X86LegacyXSaveArea;
>   @@ -831,7 +831,7 @@ typedef struct X86XSaveHeader {
> 
>    /* Ext. save area 2: AVX State */
>    typedef struct XSaveAVX {
>   -    uint64_t ymmh[16][2];
>   +    uint8_t ymmh[16][16];
>    } XSaveAVX;
> 
>    /* Ext. save area 3: BNDREG */
>   @@ -852,12 +852,12 @@ typedef struct XSaveOpmask {
> 
>    /* Ext. save area 6: ZMM_Hi256 */
>    typedef struct XSaveZMM_Hi256 {
>   -    uint64_t zmm_hi256[16][4];
>   +    uint8_t zmm_hi256[16][32];
>    } XSaveZMM_Hi256;
> 
>    /* Ext. save area 7: Hi16_ZMM */
>    typedef struct XSaveHi16_ZMM {
>   -    XMMReg hi16_zmm[16];
>   +    uint8_t hi16_zmm[16][64];
>    } XSaveHi16_ZMM;
> 
>    typedef struct X86XSaveArea {
>   diff --git a/target-i386/kvm.c b/target-i386/kvm.c
>   index 5e7ec70..98249e4 100644
>   --- a/target-i386/kvm.c
>   +++ b/target-i386/kvm.c
>   @@ -1203,6 +1203,43 @@ static int kvm_put_fpu(X86CPU *cpu)
>        return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
>    }
> 
>   +#define XSAVE_FCW_FSW     0
>   +#define XSAVE_FTW_FOP     1
>   +#define XSAVE_CWD_RIP     2
>   +#define XSAVE_CWD_RDP     4
>   +#define XSAVE_MXCSR       6
>   +#define XSAVE_ST_SPACE    8
>   +#define XSAVE_XMM_SPACE   40
>   +#define XSAVE_XSTATE_BV   128
>   +#define XSAVE_YMMH_SPACE  144
>   +#define XSAVE_BNDREGS     240
>   +#define XSAVE_BNDCSR      256
>   +#define XSAVE_OPMASK      272
>   +#define XSAVE_ZMM_Hi256   288
>   +#define XSAVE_Hi16_ZMM    416
>   +
>   +#define XSAVE_BYTE_OFFSET(word_offset) \
>   +    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
>   +
>   +#define ASSERT_OFFSET(word_offset, field) \
>   +    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
>   +                      offsetof(X86XSaveArea, field))
>   +
>   +ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
>   +ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
>   +ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
>   +ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
>   +ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
>   +ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
>   +ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
>   +ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
>   +ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
>   +ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
>   +ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
>   +ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
>   +ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
>   +ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
>   +
>    static int kvm_put_xsave(X86CPU *cpu)
>    {
>        CPUX86State *env = &cpu->env;
>   @@ -1239,17 +1276,17 @@ static int kvm_put_xsave(X86CPU *cpu)
>                sizeof env->opmask_regs);
> 
>        for (i = 0; i < CPU_NB_REGS; i++) {
>   -        X86LegacyXSaveArea *legacy = &xsave->legacy;
>   -        XSaveAVX *avx = &xsave->avx_state;
>   -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
>   -        stq_p(&legacy->xmm_regs[i][0],     env->xmm_regs[i].XMM_Q(0));
>   -        stq_p(&legacy->xmm_regs[i][1],     env->xmm_regs[i].XMM_Q(1));
>   -        stq_p(&avx->ymmh[i][0],            env->xmm_regs[i].XMM_Q(2));
>   -        stq_p(&avx->ymmh[i][1],            env->xmm_regs[i].XMM_Q(3));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][0], env->xmm_regs[i].XMM_Q(4));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][1], env->xmm_regs[i].XMM_Q(5));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][2], env->xmm_regs[i].XMM_Q(6));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][3], env->xmm_regs[i].XMM_Q(7));
>   +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
>   +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
>   +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
>   +        stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
>   +        stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
>   +        stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
>   +        stq_p(ymmh+8,  env->xmm_regs[i].XMM_Q(3));
>   +        stq_p(zmmh,    env->xmm_regs[i].XMM_Q(4));
>   +        stq_p(zmmh+8,  env->xmm_regs[i].XMM_Q(5));
>   +        stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
>   +        stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
>        }
> 
>    #ifdef TARGET_X86_64
>   @@ -1625,17 +1662,17 @@ static int kvm_get_xsave(X86CPU *cpu)
>                sizeof env->opmask_regs);
> 
>        for (i = 0; i < CPU_NB_REGS; i++) {
>   -        X86LegacyXSaveArea *legacy = &xsave->legacy;
>   -        XSaveAVX *avx = &xsave->avx_state;
>   -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
>   -        env->xmm_regs[i].XMM_Q(0) = ldq_p(&legacy->xmm_regs[i][0]);
>   -        env->xmm_regs[i].XMM_Q(1) = ldq_p(&legacy->xmm_regs[i][1]);
>   -        env->xmm_regs[i].XMM_Q(2) = ldq_p(&avx->ymmh[i][0]);
>   -        env->xmm_regs[i].XMM_Q(3) = ldq_p(&avx->ymmh[i][1]);
>   -        env->xmm_regs[i].XMM_Q(4) = ldq_p(&zmm_hi256->zmm_hi256[i][0]);
>   -        env->xmm_regs[i].XMM_Q(5) = ldq_p(&zmm_hi256->zmm_hi256[i][1]);
>   -        env->xmm_regs[i].XMM_Q(6) = ldq_p(&zmm_hi256->zmm_hi256[i][2]);
>   -        env->xmm_regs[i].XMM_Q(7) = ldq_p(&zmm_hi256->zmm_hi256[i][3]);
>   +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
>   +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
>   +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
>   +        env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
>   +        env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
>   +        env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
>   +        env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
>   +        env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
>   +        env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
>   +        env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
>   +        env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
>        }
> 
>    #ifdef TARGET_X86_64
> 
> Eduardo Habkost (3):
>   target-i386: Define structs for layout of xsave area
>   target-i386: Use xsave structs for ext_save_area
>   target-i386: kvm: Use X86XSaveArea struct for xsave save/load
> 
>  target-i386/cpu.c | 18 +++++++----
>  target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
>  target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
>  3 files changed, 155 insertions(+), 44 deletions(-)
> 

The patches are okay, are you going to rebase them on top of the PKRU
patches?

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-12-01 10:22   ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 10:22 UTC (permalink / raw)
  To: Eduardo Habkost, qemu-devel; +Cc: Huaitong Han, kvm

On 30/11/2015 18:34, Eduardo Habkost wrote:
> target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> uses offset macros and bit manipulation to access the xsave area.
> This series changes both to use C structs for those operations.
> 
> I still need to figure out a way to write unit tests for the new
> code. Maybe I will just copy and paste the new and old functions,
> and test them locally (checking if they give the same results
> when translating blobs of random bytes).
> 
> Changes v1 -> v2:
> * Use uint8_t[8*n] instead of uint64_t[n] for register data
> * Keep the QEMU_BUILD_BUG_ON lines
> 
> v1 -> v2 diff below:
> 
>   diff --git a/target-i386/cpu.h b/target-i386/cpu.h
>   index 3d1d01e..41f55ef 100644
>   --- a/target-i386/cpu.h
>   +++ b/target-i386/cpu.h
>   @@ -818,7 +818,7 @@ typedef union X86LegacyXSaveArea {
>            uint32_t mxcsr;
>            uint32_t mxcsr_mask;
>            FPReg fpregs[8];
>   -        uint64_t xmm_regs[16][2];
>   +        uint8_t xmm_regs[16][16];
>        };
>        uint8_t data[512];
>    } X86LegacyXSaveArea;
>   @@ -831,7 +831,7 @@ typedef struct X86XSaveHeader {
> 
>    /* Ext. save area 2: AVX State */
>    typedef struct XSaveAVX {
>   -    uint64_t ymmh[16][2];
>   +    uint8_t ymmh[16][16];
>    } XSaveAVX;
> 
>    /* Ext. save area 3: BNDREG */
>   @@ -852,12 +852,12 @@ typedef struct XSaveOpmask {
> 
>    /* Ext. save area 6: ZMM_Hi256 */
>    typedef struct XSaveZMM_Hi256 {
>   -    uint64_t zmm_hi256[16][4];
>   +    uint8_t zmm_hi256[16][32];
>    } XSaveZMM_Hi256;
> 
>    /* Ext. save area 7: Hi16_ZMM */
>    typedef struct XSaveHi16_ZMM {
>   -    XMMReg hi16_zmm[16];
>   +    uint8_t hi16_zmm[16][64];
>    } XSaveHi16_ZMM;
> 
>    typedef struct X86XSaveArea {
>   diff --git a/target-i386/kvm.c b/target-i386/kvm.c
>   index 5e7ec70..98249e4 100644
>   --- a/target-i386/kvm.c
>   +++ b/target-i386/kvm.c
>   @@ -1203,6 +1203,43 @@ static int kvm_put_fpu(X86CPU *cpu)
>        return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
>    }
> 
>   +#define XSAVE_FCW_FSW     0
>   +#define XSAVE_FTW_FOP     1
>   +#define XSAVE_CWD_RIP     2
>   +#define XSAVE_CWD_RDP     4
>   +#define XSAVE_MXCSR       6
>   +#define XSAVE_ST_SPACE    8
>   +#define XSAVE_XMM_SPACE   40
>   +#define XSAVE_XSTATE_BV   128
>   +#define XSAVE_YMMH_SPACE  144
>   +#define XSAVE_BNDREGS     240
>   +#define XSAVE_BNDCSR      256
>   +#define XSAVE_OPMASK      272
>   +#define XSAVE_ZMM_Hi256   288
>   +#define XSAVE_Hi16_ZMM    416
>   +
>   +#define XSAVE_BYTE_OFFSET(word_offset) \
>   +    ((word_offset)*sizeof(((struct kvm_xsave*)0)->region[0]))
>   +
>   +#define ASSERT_OFFSET(word_offset, field) \
>   +    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
>   +                      offsetof(X86XSaveArea, field))
>   +
>   +ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
>   +ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
>   +ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
>   +ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
>   +ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
>   +ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
>   +ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
>   +ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
>   +ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
>   +ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
>   +ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
>   +ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
>   +ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
>   +ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
>   +
>    static int kvm_put_xsave(X86CPU *cpu)
>    {
>        CPUX86State *env = &cpu->env;
>   @@ -1239,17 +1276,17 @@ static int kvm_put_xsave(X86CPU *cpu)
>                sizeof env->opmask_regs);
> 
>        for (i = 0; i < CPU_NB_REGS; i++) {
>   -        X86LegacyXSaveArea *legacy = &xsave->legacy;
>   -        XSaveAVX *avx = &xsave->avx_state;
>   -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
>   -        stq_p(&legacy->xmm_regs[i][0],     env->xmm_regs[i].XMM_Q(0));
>   -        stq_p(&legacy->xmm_regs[i][1],     env->xmm_regs[i].XMM_Q(1));
>   -        stq_p(&avx->ymmh[i][0],            env->xmm_regs[i].XMM_Q(2));
>   -        stq_p(&avx->ymmh[i][1],            env->xmm_regs[i].XMM_Q(3));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][0], env->xmm_regs[i].XMM_Q(4));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][1], env->xmm_regs[i].XMM_Q(5));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][2], env->xmm_regs[i].XMM_Q(6));
>   -        stq_p(&zmm_hi256->zmm_hi256[i][3], env->xmm_regs[i].XMM_Q(7));
>   +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
>   +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
>   +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
>   +        stq_p(xmm,     env->xmm_regs[i].XMM_Q(0));
>   +        stq_p(xmm+8,   env->xmm_regs[i].XMM_Q(1));
>   +        stq_p(ymmh,    env->xmm_regs[i].XMM_Q(2));
>   +        stq_p(ymmh+8,  env->xmm_regs[i].XMM_Q(3));
>   +        stq_p(zmmh,    env->xmm_regs[i].XMM_Q(4));
>   +        stq_p(zmmh+8,  env->xmm_regs[i].XMM_Q(5));
>   +        stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
>   +        stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
>        }
> 
>    #ifdef TARGET_X86_64
>   @@ -1625,17 +1662,17 @@ static int kvm_get_xsave(X86CPU *cpu)
>                sizeof env->opmask_regs);
> 
>        for (i = 0; i < CPU_NB_REGS; i++) {
>   -        X86LegacyXSaveArea *legacy = &xsave->legacy;
>   -        XSaveAVX *avx = &xsave->avx_state;
>   -        XSaveZMM_Hi256 *zmm_hi256 = &xsave->zmm_hi256_state;
>   -        env->xmm_regs[i].XMM_Q(0) = ldq_p(&legacy->xmm_regs[i][0]);
>   -        env->xmm_regs[i].XMM_Q(1) = ldq_p(&legacy->xmm_regs[i][1]);
>   -        env->xmm_regs[i].XMM_Q(2) = ldq_p(&avx->ymmh[i][0]);
>   -        env->xmm_regs[i].XMM_Q(3) = ldq_p(&avx->ymmh[i][1]);
>   -        env->xmm_regs[i].XMM_Q(4) = ldq_p(&zmm_hi256->zmm_hi256[i][0]);
>   -        env->xmm_regs[i].XMM_Q(5) = ldq_p(&zmm_hi256->zmm_hi256[i][1]);
>   -        env->xmm_regs[i].XMM_Q(6) = ldq_p(&zmm_hi256->zmm_hi256[i][2]);
>   -        env->xmm_regs[i].XMM_Q(7) = ldq_p(&zmm_hi256->zmm_hi256[i][3]);
>   +        uint8_t *xmm = xsave->legacy.xmm_regs[i];
>   +        uint8_t *ymmh = xsave->avx_state.ymmh[i];
>   +        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
>   +        env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
>   +        env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
>   +        env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
>   +        env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
>   +        env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
>   +        env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
>   +        env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
>   +        env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
>        }
> 
>    #ifdef TARGET_X86_64
> 
> Eduardo Habkost (3):
>   target-i386: Define structs for layout of xsave area
>   target-i386: Use xsave structs for ext_save_area
>   target-i386: kvm: Use X86XSaveArea struct for xsave save/load
> 
>  target-i386/cpu.c | 18 +++++++----
>  target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
>  target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
>  3 files changed, 155 insertions(+), 44 deletions(-)
> 

The patches are okay, are you going to rebase them on top of the PKRU
patches?

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
  2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
@ 2015-12-01 15:09   ` Paolo Bonzini
  -1 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 15:09 UTC (permalink / raw)
  To: Eduardo Habkost, qemu-devel; +Cc: kvm, Huaitong Han



On 30/11/2015 18:34, Eduardo Habkost wrote:
> target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> uses offset macros and bit manipulation to access the xsave area.
> This series changes both to use C structs for those operations.
> 
> I still need to figure out a way to write unit tests for the new
> code. Maybe I will just copy and paste the new and old functions,
> and test them locally (checking if they give the same results
> when translating blobs of random bytes).

I think it's easier to use small guests (i.e. kvm-unit-tests) to test
this code.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-12-01 15:09   ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 15:09 UTC (permalink / raw)
  To: Eduardo Habkost, qemu-devel; +Cc: Huaitong Han, kvm



On 30/11/2015 18:34, Eduardo Habkost wrote:
> target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> uses offset macros and bit manipulation to access the xsave area.
> This series changes both to use C structs for those operations.
> 
> I still need to figure out a way to write unit tests for the new
> code. Maybe I will just copy and paste the new and old functions,
> and test them locally (checking if they give the same results
> when translating blobs of random bytes).

I think it's easier to use small guests (i.e. kvm-unit-tests) to test
this code.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
  2015-12-01 15:09   ` [Qemu-devel] " Paolo Bonzini
@ 2015-12-01 15:25     ` Eduardo Habkost
  -1 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-12-01 15:25 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, kvm, Huaitong Han

On Tue, Dec 01, 2015 at 04:09:44PM +0100, Paolo Bonzini wrote:
> 
> 
> On 30/11/2015 18:34, Eduardo Habkost wrote:
> > target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> > area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> > uses offset macros and bit manipulation to access the xsave area.
> > This series changes both to use C structs for those operations.
> > 
> > I still need to figure out a way to write unit tests for the new
> > code. Maybe I will just copy and paste the new and old functions,
> > and test them locally (checking if they give the same results
> > when translating blobs of random bytes).
> 
> I think it's easier to use small guests (i.e. kvm-unit-tests) to test
> this code.

I agree it's easier, but how likely it is to catch bugs in the
save/load code? If the code corrupts a register, we need to
trigger a save/load cycle at the exact moment the guest code is
using that register. Do we have something that helps us
repeatedly save/load CPU state while kvm-unit-tests is running?

-- 
Eduardo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-12-01 15:25     ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-12-01 15:25 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Huaitong Han, qemu-devel, kvm

On Tue, Dec 01, 2015 at 04:09:44PM +0100, Paolo Bonzini wrote:
> 
> 
> On 30/11/2015 18:34, Eduardo Habkost wrote:
> > target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> > area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> > uses offset macros and bit manipulation to access the xsave area.
> > This series changes both to use C structs for those operations.
> > 
> > I still need to figure out a way to write unit tests for the new
> > code. Maybe I will just copy and paste the new and old functions,
> > and test them locally (checking if they give the same results
> > when translating blobs of random bytes).
> 
> I think it's easier to use small guests (i.e. kvm-unit-tests) to test
> this code.

I agree it's easier, but how likely it is to catch bugs in the
save/load code? If the code corrupts a register, we need to
trigger a save/load cycle at the exact moment the guest code is
using that register. Do we have something that helps us
repeatedly save/load CPU state while kvm-unit-tests is running?

-- 
Eduardo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
  2015-12-01 10:22   ` [Qemu-devel] " Paolo Bonzini
@ 2015-12-01 15:27     ` Eduardo Habkost
  -1 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-12-01 15:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, kvm, Huaitong Han

On Tue, Dec 01, 2015 at 11:22:31AM +0100, Paolo Bonzini wrote:
> On 30/11/2015 18:34, Eduardo Habkost wrote:
> > target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> > area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> > uses offset macros and bit manipulation to access the xsave area.
> > This series changes both to use C structs for those operations.
> > 
> > I still need to figure out a way to write unit tests for the new
> > code. Maybe I will just copy and paste the new and old functions,
> > and test them locally (checking if they give the same results
> > when translating blobs of random bytes).
> > 
> > Changes v1 -> v2:
> > * Use uint8_t[8*n] instead of uint64_t[n] for register data
> > * Keep the QEMU_BUILD_BUG_ON lines
> > 
[...]
> > 
> > Eduardo Habkost (3):
> >   target-i386: Define structs for layout of xsave area
> >   target-i386: Use xsave structs for ext_save_area
> >   target-i386: kvm: Use X86XSaveArea struct for xsave save/load
> > 
> >  target-i386/cpu.c | 18 +++++++----
> >  target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
> >  3 files changed, 155 insertions(+), 44 deletions(-)
> > 
> 
> The patches are okay, are you going to rebase them on top of the PKRU
> patches?

I will probably redo the PKRU patches on top of this, to reduce
diff size.

-- 
Eduardo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-12-01 15:27     ` Eduardo Habkost
  0 siblings, 0 replies; 18+ messages in thread
From: Eduardo Habkost @ 2015-12-01 15:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Huaitong Han, qemu-devel, kvm

On Tue, Dec 01, 2015 at 11:22:31AM +0100, Paolo Bonzini wrote:
> On 30/11/2015 18:34, Eduardo Habkost wrote:
> > target-i386/cpu.c:ext_save_area uses magic numbers for the xsave
> > area offets and sizes, and target-i386/kvm.c:kvm_{put,get}_xsave()
> > uses offset macros and bit manipulation to access the xsave area.
> > This series changes both to use C structs for those operations.
> > 
> > I still need to figure out a way to write unit tests for the new
> > code. Maybe I will just copy and paste the new and old functions,
> > and test them locally (checking if they give the same results
> > when translating blobs of random bytes).
> > 
> > Changes v1 -> v2:
> > * Use uint8_t[8*n] instead of uint64_t[n] for register data
> > * Keep the QEMU_BUILD_BUG_ON lines
> > 
[...]
> > 
> > Eduardo Habkost (3):
> >   target-i386: Define structs for layout of xsave area
> >   target-i386: Use xsave structs for ext_save_area
> >   target-i386: kvm: Use X86XSaveArea struct for xsave save/load
> > 
> >  target-i386/cpu.c | 18 +++++++----
> >  target-i386/cpu.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  target-i386/kvm.c | 96 +++++++++++++++++++++++++++++++++----------------------
> >  3 files changed, 155 insertions(+), 44 deletions(-)
> > 
> 
> The patches are okay, are you going to rebase them on top of the PKRU
> patches?

I will probably redo the PKRU patches on top of this, to reduce
diff size.

-- 
Eduardo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
  2015-12-01 15:25     ` [Qemu-devel] " Eduardo Habkost
@ 2015-12-01 15:28       ` Paolo Bonzini
  -1 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 15:28 UTC (permalink / raw)
  To: Eduardo Habkost; +Cc: qemu-devel, kvm, Huaitong Han



On 01/12/2015 16:25, Eduardo Habkost wrote:
> > I think it's easier to use small guests (i.e. kvm-unit-tests) to test
> > this code.
>
> I agree it's easier, but how likely it is to catch bugs in the
> save/load code? If the code corrupts a register, we need to
> trigger a save/load cycle at the exact moment the guest code is
> using that register. Do we have something that helps us
> repeatedly save/load CPU state while kvm-unit-tests is running?

A vmware magic port read should do that.  Put VMPORT_MAGIC in EAX and
VMPORT_CMD_GETVERSION in ECX, then do a 32-bit in from port 0x5658.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes
@ 2015-12-01 15:28       ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2015-12-01 15:28 UTC (permalink / raw)
  To: Eduardo Habkost; +Cc: Huaitong Han, qemu-devel, kvm



On 01/12/2015 16:25, Eduardo Habkost wrote:
> > I think it's easier to use small guests (i.e. kvm-unit-tests) to test
> > this code.
>
> I agree it's easier, but how likely it is to catch bugs in the
> save/load code? If the code corrupts a register, we need to
> trigger a save/load cycle at the exact moment the guest code is
> using that register. Do we have something that helps us
> repeatedly save/load CPU state while kvm-unit-tests is running?

A vmware magic port read should do that.  Put VMPORT_MAGIC in EAX and
VMPORT_CMD_GETVERSION in ECX, then do a 32-bit in from port 0x5658.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2015-12-01 15:28 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-30 17:34 [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes Eduardo Habkost
2015-11-30 17:34 ` [Qemu-devel] " Eduardo Habkost
2015-11-30 17:34 ` [PATCH v2 1/3] target-i386: Define structs for layout of xsave area Eduardo Habkost
2015-11-30 17:34   ` [Qemu-devel] " Eduardo Habkost
2015-11-30 17:34 ` [PATCH v2 2/3] target-i386: Use xsave structs for ext_save_area Eduardo Habkost
2015-11-30 17:34   ` [Qemu-devel] " Eduardo Habkost
2015-11-30 17:34 ` [PATCH v2 3/3] target-i386: kvm: Use X86XSaveArea struct for xsave save/load Eduardo Habkost
2015-11-30 17:34   ` [Qemu-devel] " Eduardo Habkost
2015-12-01 10:22 ` [PATCH v2 0/3] target-i386: Use C struct for xsave area layout, offsets & sizes Paolo Bonzini
2015-12-01 10:22   ` [Qemu-devel] " Paolo Bonzini
2015-12-01 15:27   ` Eduardo Habkost
2015-12-01 15:27     ` [Qemu-devel] " Eduardo Habkost
2015-12-01 15:09 ` Paolo Bonzini
2015-12-01 15:09   ` [Qemu-devel] " Paolo Bonzini
2015-12-01 15:25   ` Eduardo Habkost
2015-12-01 15:25     ` [Qemu-devel] " Eduardo Habkost
2015-12-01 15:28     ` Paolo Bonzini
2015-12-01 15:28       ` [Qemu-devel] " Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.