qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Set the correct env->fpip for x86 float instructions
@ 2021-04-30  2:19 Ziqiao Kong
  2021-04-30 18:19 ` Richard Henderson
  0 siblings, 1 reply; 2+ messages in thread
From: Ziqiao Kong @ 2021-04-30  2:19 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, richard.henderson, ehabkost, Ziqiao Kong

Thanks the review for v1 from Richard Henderson!

Changes since v1:
  - Don't update FCS, FIP, FDS and FDP for x87 control instruction.
  - Also write FCS, FDS and FDP for FSTENV.
  - Clear FCS, FIP, FDS and FDP for FXSAVE as intel manual says.

Note:
  During my test, I find that the implementation between some intel cpus
  differs on updating FDS and FDP while the AMD Ryzen always update the 
  two registers correctly. Not sure wthether it's a bug or not.

Ziqiao

Signed-off-by: Ziqiao Kong <ziqiaokong@gmail.com>
---
 target/i386/cpu.h            |  4 +++
 target/i386/tcg/fpu_helper.c | 50 ++++++++++++++++++++++--------------
 target/i386/tcg/translate.c  | 45 +++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 20 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 570f916878..ba43ceb4ad 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -705,6 +705,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EBX_INVPCID           (1U << 10)
 /* Restricted Transactional Memory */
 #define CPUID_7_0_EBX_RTM               (1U << 11)
+/* Deprecates FPU CS and FPU DS values */
+#define CPUID_7_0_EBX_FCS_FDS           (1U << 13)
 /* Memory Protection Extension */
 #define CPUID_7_0_EBX_MPX               (1U << 14)
 /* AVX-512 Foundation */
@@ -1440,7 +1442,9 @@ typedef struct CPUX86State {
     FPReg fpregs[8];
     /* KVM-only so far */
     uint16_t fpop;
+    uint16_t fpcs;
     uint64_t fpip;
+    uint16_t fpds;
     uint64_t fpdp;
 
     /* emulator internal variables */
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 60ed93520a..97cf68542b 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -766,6 +766,10 @@ void helper_fninit(CPUX86State *env)
 {
     env->fpus = 0;
     env->fpstt = 0;
+    env->fpcs = 0;
+    env->fpip = 0;
+    env->fpds = 0;
+    env->fpdp = 0;
     cpu_set_fpuc(env, 0x37f);
     env->fptags[0] = 1;
     env->fptags[1] = 1;
@@ -2368,6 +2372,7 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 {
     int fpus, fptag, exp, i;
     uint64_t mant;
+    uint16_t fpcs, fpds;
     CPU_LDoubleU tmp;
 
     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
@@ -2390,24 +2395,41 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
             }
         }
     }
+
+    /*
+     * If CR0.PE = 1, each instruction saves FCS and FDS into memory. If
+     * CPUID.(EAX=07H,ECX=0H):EBX[bit 13] = 1, the processor deprecates
+     * FCS and FDS; it saves each as 0000H.
+     */
+    if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_FCS_FDS)
+        && (env->cr[0] & CR0_PE_MASK)) {
+        fpcs = env->fpcs;
+        fpds = env->fpds;
+    } else {
+        fpcs = 0;
+        fpds = 0;
+    }
+
     if (data32) {
         /* 32 bit */
         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
-        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
-        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
-        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
-        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
+        cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
+        cpu_stw_data_ra(env, ptr + 16, fpcs, retaddr); /* fpcs */
+        cpu_stw_data_ra(env, ptr + 18, 0, retaddr);
+        cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpdp */
+        cpu_stw_data_ra(env, ptr + 24, fpds, retaddr); /* fpds */
+        cpu_stw_data_ra(env, ptr + 26, 0, retaddr);
     } else {
         /* 16 bit */
         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
-        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
+        cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
+        cpu_stw_data_ra(env, ptr + 8, fpcs, retaddr);
+        cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
+        cpu_stw_data_ra(env, ptr + 12, fpds, retaddr);
     }
 }
 
@@ -2473,17 +2495,7 @@ void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
     }
 
     /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    cpu_set_fpuc(env, 0x37f);
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }
 
 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 880bc45561..c26d343ab8 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4486,6 +4486,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     target_ulong next_eip, tval;
     int rex_w, rex_r;
     target_ulong pc_start = s->base.pc_next;
+    /* For FCS, FIP, FDS and FDP. */
+    AddressParts last_addr;
+    TCGv ea;
+    bool update_fdp;
+    bool update_fip;
 
     s->pc_start = s->pc = pc_start;
     s->override = -1;
@@ -4506,6 +4511,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     rex_w = -1;
     rex_r = 0;
 
+    update_fip = true;
+    update_fdp = false;
+
  next_byte:
     b = x86_ldub_code(env, s);
     /* Collect prefixes.  */
@@ -5850,7 +5858,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            update_fdp = true;
+            last_addr = gen_lea_modrm_0(env, s, modrm);
+            ea = gen_lea_modrm_1(s, last_addr);
+            gen_lea_v_seg(s, s->aflag, ea, last_addr.def_seg, s->override);
+
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5976,19 +5988,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             case 0x0c: /* fldenv mem */
                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
+                update_fip = update_fdp = false;
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
+                update_fip = update_fdp = false;
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
+                update_fip = update_fdp = false;
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
+                update_fip = update_fdp = false;
                 break;
             case 0x1d: /* fldt mem */
                 gen_helper_fldt_ST0(cpu_env, s->A0);
@@ -5999,14 +6015,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             case 0x2c: /* frstor mem */
                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
+                update_fip = update_fdp = false;
                 break;
             case 0x2e: /* fnsave mem */
                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
+                update_fip = update_fdp = false;
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
+                update_fip = update_fdp = false;
                 break;
             case 0x3c: /* fbld */
                 gen_helper_fbld_ST0(cpu_env, s->A0);
@@ -6047,6 +6066,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0: /* fnop */
                     /* check exceptions (FreeBSD FPU probe) */
                     gen_helper_fwait(cpu_env);
+                    update_fip = update_fdp = false;
                     break;
                 default:
                     goto unknown_op;
@@ -6214,9 +6234,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     break;
                 case 2: /* fclex */
                     gen_helper_fclex(cpu_env);
+                    update_fip = update_fdp = false;
                     break;
                 case 3: /* fninit */
                     gen_helper_fninit(cpu_env);
+                    update_fip = update_fdp = false;
                     break;
                 case 4: /* fsetpm (287 only, just do nop here) */
                     break;
@@ -6337,6 +6359,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto unknown_op;
             }
         }
+
+
+        if (update_fip) {
+            tcg_gen_movi_tl(s->T0, pc_start - s->cs_base);
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, fpip));
+
+            tcg_gen_mov_tl(s->T0, cpu_seg_base[R_CS]);
+            tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State, fpcs));
+        }
+
+        if (update_fdp) {
+            if (s->override < 0) {
+                tcg_gen_mov_tl(s->A0, cpu_seg_base[last_addr.def_seg]);
+            } else {
+                tcg_gen_mov_tl(s->A0, cpu_seg_base[s->override]);
+            }
+            tcg_gen_st16_tl(s->A0, cpu_env, offsetof(CPUX86State, fpds));
+
+            ea = gen_lea_modrm_1(s, last_addr);
+            tcg_gen_st_tl(ea, cpu_env, offsetof(CPUX86State, fpdp));
+        }
         break;
         /************************/
         /* string ops */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] Set the correct env->fpip for x86 float instructions
  2021-04-30  2:19 [PATCH v2] Set the correct env->fpip for x86 float instructions Ziqiao Kong
@ 2021-04-30 18:19 ` Richard Henderson
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Henderson @ 2021-04-30 18:19 UTC (permalink / raw)
  To: Ziqiao Kong, qemu-devel; +Cc: pbonzini, ehabkost

On 4/29/21 7:19 PM, Ziqiao Kong wrote:
> @@ -1440,7 +1442,9 @@ typedef struct CPUX86State {
>       FPReg fpregs[8];
>       /* KVM-only so far */
>       uint16_t fpop;
> +    uint16_t fpcs;
>       uint64_t fpip;
> +    uint16_t fpds;
>       uint64_t fpdp;

Let's put all uint16_t together, just after fpop, to avoid extra padding 
between the uint64_t.

> +        cpu_stw_data_ra(env, ptr + 16, fpcs, retaddr); /* fpcs */
> +        cpu_stw_data_ra(env, ptr + 18, 0, retaddr);
...
> +        cpu_stw_data_ra(env, ptr + 24, fpds, retaddr); /* fpds */
> +        cpu_stw_data_ra(env, ptr + 26, 0, retaddr);

Better to continue to use stl here, to zero-extend the field.

> @@ -5850,7 +5858,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>           op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>           if (mod != 3) {
>               /* memory op */
> -            gen_lea_modrm(env, s, modrm);
> +            update_fdp = true;

You should just move the declaration of update_fdp and last_addr into this 
block.  Then the store to fdp+fds goes at the end of this block.

> +            last_addr = gen_lea_modrm_0(env, s, modrm);
> +            ea = gen_lea_modrm_1(s, last_addr);
> +            gen_lea_v_seg(s, s->aflag, ea, last_addr.def_seg, s->override);

I think you should copy ea into a temporary here.

> +            tcg_gen_mov_tl(s->T0, cpu_seg_base[R_CS]);
> +            tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State, fpcs));

cpu_seg_base is segment base, not the segment selector.

I believe that you want to copy offsetof(CPUX86State, segs[seg_reg].selector) 
into this field.

> +            ea = gen_lea_modrm_1(s, last_addr);
> +            tcg_gen_st_tl(ea, cpu_env, offsetof(CPUX86State, fpdp));

Use the temporary saved above instead of re-computing.


r~


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-04-30 18:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-30  2:19 [PATCH v2] Set the correct env->fpip for x86 float instructions Ziqiao Kong
2021-04-30 18:19 ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).