All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands
@ 2022-08-30  3:48 Ricky Zhou
  2022-08-30  3:48 ` [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required Ricky Zhou
  2022-09-19 17:33 ` [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Paolo Bonzini
  0 siblings, 2 replies; 5+ messages in thread
From: Ricky Zhou @ 2022-08-30  3:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, richard.henderson, eduardo, Ricky Zhou

Before this change, emulation of cvttps2pi and cvtps2pi instructions
would read 16 bytes of memory instead of 8. The SDM states that
cvttps2pi takes a 64-bit memory location. The documentation for cvtps2pi
claims that it takes a a 128-bit memory location, but as with cvttps2pi,
the operand is written as xmm/m64. I double-checked on real hardware
that both of these instructions only read 8 bytes.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Ricky Zhou <ricky@rzhou.org>
---
 target/i386/tcg/translate.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index b7972f0ff5..3ba5f76156 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3621,7 +3621,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
-                gen_ldo_env_A0(s, op2_offset);
+                if (b1) {
+                    gen_ldo_env_A0(s, op2_offset);
+                } else {
+                    gen_ldq_env_A0(s, op2_offset);
+                }
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
-- 
2.37.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required.
  2022-08-30  3:48 [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Ricky Zhou
@ 2022-08-30  3:48 ` Ricky Zhou
  2022-09-17  2:14   ` Ricky Zhou
  2022-09-19 17:33 ` [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Paolo Bonzini
  1 sibling, 1 reply; 5+ messages in thread
From: Ricky Zhou @ 2022-08-30  3:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, richard.henderson, eduardo, Ricky Zhou

Many instructions which load/store 128-bit values are supposed to
raise #GP when the memory operand isn't 16-byte aligned. This includes:
 - Instructions explicitly requiring memory alignment (Exceptions Type 1
   in the "AVX and SSE Instruction Exception Specification" section of
   the SDM)
 - Legacy SSE instructions that load/store 128-bit values (Exceptions
   Types 2 and 4).

This change sets MO_ALIGN_16 on 128-bit memory accesses that require
16-byte alignment. It adds cpu_record_sigbus and cpu_do_unaligned_access
hooks that simulate a #GP exception in qemu-user and qemu-system,
respectively.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/217
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Ricky Zhou <ricky@rzhou.org>
---
 target/i386/tcg/excp_helper.c        | 13 ++++++++
 target/i386/tcg/helper-tcg.h         | 28 ++++++++++-------
 target/i386/tcg/sysemu/excp_helper.c |  8 +++++
 target/i386/tcg/tcg-cpu.c            |  2 ++
 target/i386/tcg/translate.c          | 45 +++++++++++++++++-----------
 target/i386/tcg/user/excp_helper.c   |  7 +++++
 6 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index c1ffa1c0ef..7c3c8dc7fe 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -140,3 +140,16 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index,
 {
     raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
 }
+
+G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
+                                        MMUAccessType access_type,
+                                        uintptr_t retaddr)
+{
+    /*
+     * Unaligned accesses are currently only triggered by SSE/AVX
+     * instructions that impose alignment requirements on memory
+     * operands. These instructions raise #GP(0) upon accessing an
+     * unaligned address.
+     */
+    raise_exception_ra(env, EXCP0D_GPF, retaddr);
+}
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 34167e2e29..cd1723389a 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -42,17 +42,6 @@ void x86_cpu_do_interrupt(CPUState *cpu);
 bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
 #endif
 
-/* helper.c */
-#ifdef CONFIG_USER_ONLY
-void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
-                            MMUAccessType access_type,
-                            bool maperr, uintptr_t ra);
-#else
-bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                      MMUAccessType access_type, int mmu_idx,
-                      bool probe, uintptr_t retaddr);
-#endif
-
 void breakpoint_handler(CPUState *cs);
 
 /* n must be a constant to be efficient */
@@ -78,6 +67,23 @@ G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index,
                                        int error_code, uintptr_t retaddr);
 G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int is_int,
                                 int error_code, int next_eip_addend);
+G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
+                                        MMUAccessType access_type,
+                                        uintptr_t retaddr);
+#ifdef CONFIG_USER_ONLY
+void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra);
+void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
+                           MMUAccessType access_type, uintptr_t ra);
+#else
+bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr);
+G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+                                            MMUAccessType access_type,
+                                            int mmu_idx, uintptr_t retaddr);
+#endif
 
 /* cc_helper.c */
 extern const uint8_t parity_table[256];
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 48feba7e75..796dc2a1f3 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -439,3 +439,11 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
     }
     return true;
 }
+
+G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+                                            MMUAccessType access_type,
+                                            int mmu_idx, uintptr_t retaddr)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
+}
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 6fdfdf9598..d3c2b8fb49 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -75,10 +75,12 @@ static const struct TCGCPUOps x86_tcg_ops = {
 #ifdef CONFIG_USER_ONLY
     .fake_user_interrupt = x86_cpu_do_interrupt,
     .record_sigsegv = x86_cpu_record_sigsegv,
+    .record_sigbus = x86_cpu_record_sigbus,
 #else
     .tlb_fill = x86_cpu_tlb_fill,
     .do_interrupt = x86_cpu_do_interrupt,
     .cpu_exec_interrupt = x86_cpu_exec_interrupt,
+    .do_unaligned_access = x86_cpu_do_unaligned_access,
     .debug_excp_handler = breakpoint_handler,
     .debug_check_breakpoint = x86_debug_check_breakpoint,
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 3ba5f76156..29104a49b9 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2731,21 +2731,23 @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
 }
 
-static inline void gen_ldo_env_A0(DisasContext *s, int offset)
+static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
+                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
-static inline void gen_sto_env_A0(DisasContext *s, int offset)
+static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
 {
     int mem_index = s->mem_index;
     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
+                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
@@ -3054,7 +3056,7 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x25] = SSE41_OP(pmovsxdq),
     [0x28] = SSE41_OP(pmuldq),
     [0x29] = SSE41_OP(pcmpeqq),
-    [0x2a] = SSE41_SPECIAL, /* movntqda */
+    [0x2a] = SSE41_SPECIAL, /* movntdqa */
     [0x2b] = SSE41_OP(packusdw),
     [0x30] = SSE41_OP(pmovzxbw),
     [0x31] = SSE41_OP(pmovzxbd),
@@ -3194,17 +3196,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x1e7: /* movntdq */
         case 0x02b: /* movntps */
-        case 0x12b: /* movntps */
+        case 0x12b: /* movntpd */
             if (mod == 3)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), true);
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), false);
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
@@ -3273,7 +3275,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
+                               /* movaps, movapd, movdqa */
+                               b == 0x028 || b == 0x128 || b == 0x16f);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
@@ -3331,7 +3335,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x212: /* movsldup */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
+                               !(s->prefix & PREFIX_VEX));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
@@ -3373,7 +3378,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x216: /* movshdup */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
+                               !(s->prefix & PREFIX_VEX));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
@@ -3465,7 +3471,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
+                               /* movaps, movapd, movdqa */
+                               b == 0x029 || b == 0x129 || b == 0x17f);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
@@ -3622,7 +3630,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 if (b1) {
-                    gen_ldo_env_A0(s, op2_offset);
+                    gen_ldo_env_A0(s, op2_offset, true);
                 } else {
                     gen_ldq_env_A0(s, op2_offset);
                 }
@@ -3810,11 +3818,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
                         break;
-                    case 0x2a:            /* movntqda */
-                        gen_ldo_env_A0(s, op1_offset);
+                    case 0x2a:            /* movntdqa */
+                        gen_ldo_env_A0(s, op1_offset, true);
                         return;
                     default:
-                        gen_ldo_env_A0(s, op2_offset);
+                        gen_ldo_env_A0(s, op2_offset,
+                                       !(s->prefix & PREFIX_VEX));
                     }
                 }
             } else {
@@ -4355,7 +4364,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
                     gen_lea_modrm(env, s, modrm);
-                    gen_ldo_env_A0(s, op2_offset);
+                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
                 }
             } else {
                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
@@ -4473,7 +4482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 default:
                     /* 128 bit access */
-                    gen_ldo_env_A0(s, op2_offset);
+                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
                     break;
                 }
             } else {
diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
index cd507e2a1b..b3bdb7831a 100644
--- a/target/i386/tcg/user/excp_helper.c
+++ b/target/i386/tcg/user/excp_helper.c
@@ -48,3 +48,10 @@ void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
 
     cpu_loop_exit_restore(cs, ra);
 }
+
+void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
+                           MMUAccessType access_type, uintptr_t ra)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    handle_unaligned_access(&cpu->env, addr, access_type, ra);
+}
-- 
2.37.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required.
  2022-08-30  3:48 ` [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required Ricky Zhou
@ 2022-09-17  2:14   ` Ricky Zhou
  2022-09-19 17:34     ` Paolo Bonzini
  0 siblings, 1 reply; 5+ messages in thread
From: Ricky Zhou @ 2022-09-17  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, Richard Henderson, eduardo

Checking back on the status of patch, I noticed that there are some
exciting patches out for AVX support that may conflict with this,
though I see that they are still in the RFC phase:
https://patchew.org/QEMU/20220911230418.340941-1-pbonzini@redhat.com/

I'm not sure how far away AVX support is from being merged, but do let
me know if there's any preference re applying this change vs. waiting
to rebase on top the AVX support changes, etc.

Thanks!
Ricky

On Mon, Aug 29, 2022 at 8:48 PM Ricky Zhou <ricky@rzhou.org> wrote:
>
> Many instructions which load/store 128-bit values are supposed to
> raise #GP when the memory operand isn't 16-byte aligned. This includes:
>  - Instructions explicitly requiring memory alignment (Exceptions Type 1
>    in the "AVX and SSE Instruction Exception Specification" section of
>    the SDM)
>  - Legacy SSE instructions that load/store 128-bit values (Exceptions
>    Types 2 and 4).
>
> This change sets MO_ALIGN_16 on 128-bit memory accesses that require
> 16-byte alignment. It adds cpu_record_sigbus and cpu_do_unaligned_access
> hooks that simulate a #GP exception in qemu-user and qemu-system,
> respectively.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/217
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Ricky Zhou <ricky@rzhou.org>
> ---
>  target/i386/tcg/excp_helper.c        | 13 ++++++++
>  target/i386/tcg/helper-tcg.h         | 28 ++++++++++-------
>  target/i386/tcg/sysemu/excp_helper.c |  8 +++++
>  target/i386/tcg/tcg-cpu.c            |  2 ++
>  target/i386/tcg/translate.c          | 45 +++++++++++++++++-----------
>  target/i386/tcg/user/excp_helper.c   |  7 +++++
>  6 files changed, 74 insertions(+), 29 deletions(-)
>
> diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
> index c1ffa1c0ef..7c3c8dc7fe 100644
> --- a/target/i386/tcg/excp_helper.c
> +++ b/target/i386/tcg/excp_helper.c
> @@ -140,3 +140,16 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index,
>  {
>      raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
>  }
> +
> +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
> +                                        MMUAccessType access_type,
> +                                        uintptr_t retaddr)
> +{
> +    /*
> +     * Unaligned accesses are currently only triggered by SSE/AVX
> +     * instructions that impose alignment requirements on memory
> +     * operands. These instructions raise #GP(0) upon accessing an
> +     * unaligned address.
> +     */
> +    raise_exception_ra(env, EXCP0D_GPF, retaddr);
> +}
> diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
> index 34167e2e29..cd1723389a 100644
> --- a/target/i386/tcg/helper-tcg.h
> +++ b/target/i386/tcg/helper-tcg.h
> @@ -42,17 +42,6 @@ void x86_cpu_do_interrupt(CPUState *cpu);
>  bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
>  #endif
>
> -/* helper.c */
> -#ifdef CONFIG_USER_ONLY
> -void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
> -                            MMUAccessType access_type,
> -                            bool maperr, uintptr_t ra);
> -#else
> -bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
> -                      MMUAccessType access_type, int mmu_idx,
> -                      bool probe, uintptr_t retaddr);
> -#endif
> -
>  void breakpoint_handler(CPUState *cs);
>
>  /* n must be a constant to be efficient */
> @@ -78,6 +67,23 @@ G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index,
>                                         int error_code, uintptr_t retaddr);
>  G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int is_int,
>                                  int error_code, int next_eip_addend);
> +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
> +                                        MMUAccessType access_type,
> +                                        uintptr_t retaddr);
> +#ifdef CONFIG_USER_ONLY
> +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
> +                            MMUAccessType access_type,
> +                            bool maperr, uintptr_t ra);
> +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
> +                           MMUAccessType access_type, uintptr_t ra);
> +#else
> +bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
> +                      MMUAccessType access_type, int mmu_idx,
> +                      bool probe, uintptr_t retaddr);
> +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
> +                                            MMUAccessType access_type,
> +                                            int mmu_idx, uintptr_t retaddr);
> +#endif
>
>  /* cc_helper.c */
>  extern const uint8_t parity_table[256];
> diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
> index 48feba7e75..796dc2a1f3 100644
> --- a/target/i386/tcg/sysemu/excp_helper.c
> +++ b/target/i386/tcg/sysemu/excp_helper.c
> @@ -439,3 +439,11 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
>      }
>      return true;
>  }
> +
> +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
> +                                            MMUAccessType access_type,
> +                                            int mmu_idx, uintptr_t retaddr)
> +{
> +    X86CPU *cpu = X86_CPU(cs);
> +    handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
> +}
> diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
> index 6fdfdf9598..d3c2b8fb49 100644
> --- a/target/i386/tcg/tcg-cpu.c
> +++ b/target/i386/tcg/tcg-cpu.c
> @@ -75,10 +75,12 @@ static const struct TCGCPUOps x86_tcg_ops = {
>  #ifdef CONFIG_USER_ONLY
>      .fake_user_interrupt = x86_cpu_do_interrupt,
>      .record_sigsegv = x86_cpu_record_sigsegv,
> +    .record_sigbus = x86_cpu_record_sigbus,
>  #else
>      .tlb_fill = x86_cpu_tlb_fill,
>      .do_interrupt = x86_cpu_do_interrupt,
>      .cpu_exec_interrupt = x86_cpu_exec_interrupt,
> +    .do_unaligned_access = x86_cpu_do_unaligned_access,
>      .debug_excp_handler = breakpoint_handler,
>      .debug_check_breakpoint = x86_debug_check_breakpoint,
>  #endif /* !CONFIG_USER_ONLY */
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 3ba5f76156..29104a49b9 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -2731,21 +2731,23 @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
>      tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
>  }
>
> -static inline void gen_ldo_env_A0(DisasContext *s, int offset)
> +static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
>  {
>      int mem_index = s->mem_index;
> -    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
> +    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
> +                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
>      tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
>      tcg_gen_addi_tl(s->tmp0, s->A0, 8);
>      tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
>      tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>  }
>
> -static inline void gen_sto_env_A0(DisasContext *s, int offset)
> +static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
>  {
>      int mem_index = s->mem_index;
>      tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> -    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
> +    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
> +                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
>      tcg_gen_addi_tl(s->tmp0, s->A0, 8);
>      tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>      tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
> @@ -3054,7 +3056,7 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
>      [0x25] = SSE41_OP(pmovsxdq),
>      [0x28] = SSE41_OP(pmuldq),
>      [0x29] = SSE41_OP(pcmpeqq),
> -    [0x2a] = SSE41_SPECIAL, /* movntqda */
> +    [0x2a] = SSE41_SPECIAL, /* movntdqa */
>      [0x2b] = SSE41_OP(packusdw),
>      [0x30] = SSE41_OP(pmovzxbw),
>      [0x31] = SSE41_OP(pmovzxbd),
> @@ -3194,17 +3196,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              break;
>          case 0x1e7: /* movntdq */
>          case 0x02b: /* movntps */
> -        case 0x12b: /* movntps */
> +        case 0x12b: /* movntpd */
>              if (mod == 3)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
> -            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), true);
>              break;
>          case 0x3f0: /* lddqu */
>              if (mod == 3)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
> -            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), false);
>              break;
>          case 0x22b: /* movntss */
>          case 0x32b: /* movntsd */
> @@ -3273,7 +3275,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x26f: /* movdqu xmm, ea */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> +                               /* movaps, movapd, movdqa */
> +                               b == 0x028 || b == 0x128 || b == 0x16f);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
> @@ -3331,7 +3335,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x212: /* movsldup */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> +                               !(s->prefix & PREFIX_VEX));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
> @@ -3373,7 +3378,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x216: /* movshdup */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> +                               !(s->prefix & PREFIX_VEX));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
> @@ -3465,7 +3471,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x27f: /* movdqu ea, xmm */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> +                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> +                               /* movaps, movapd, movdqa */
> +                               b == 0x029 || b == 0x129 || b == 0x17f);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
> @@ -3622,7 +3630,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_lea_modrm(env, s, modrm);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>                  if (b1) {
> -                    gen_ldo_env_A0(s, op2_offset);
> +                    gen_ldo_env_A0(s, op2_offset, true);
>                  } else {
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
> @@ -3810,11 +3818,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
>                                          offsetof(ZMMReg, ZMM_W(0)));
>                          break;
> -                    case 0x2a:            /* movntqda */
> -                        gen_ldo_env_A0(s, op1_offset);
> +                    case 0x2a:            /* movntdqa */
> +                        gen_ldo_env_A0(s, op1_offset, true);
>                          return;
>                      default:
> -                        gen_ldo_env_A0(s, op2_offset);
> +                        gen_ldo_env_A0(s, op2_offset,
> +                                       !(s->prefix & PREFIX_VEX));
>                      }
>                  }
>              } else {
> @@ -4355,7 +4364,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_ldo_env_A0(s, op2_offset);
> +                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
>                  }
>              } else {
>                  op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
> @@ -4473,7 +4482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>                  default:
>                      /* 128 bit access */
> -                    gen_ldo_env_A0(s, op2_offset);
> +                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
>                      break;
>                  }
>              } else {
> diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
> index cd507e2a1b..b3bdb7831a 100644
> --- a/target/i386/tcg/user/excp_helper.c
> +++ b/target/i386/tcg/user/excp_helper.c
> @@ -48,3 +48,10 @@ void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
>
>      cpu_loop_exit_restore(cs, ra);
>  }
> +
> +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
> +                           MMUAccessType access_type, uintptr_t ra)
> +{
> +    X86CPU *cpu = X86_CPU(cs);
> +    handle_unaligned_access(&cpu->env, addr, access_type, ra);
> +}
> --
> 2.37.2
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands
  2022-08-30  3:48 [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Ricky Zhou
  2022-08-30  3:48 ` [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required Ricky Zhou
@ 2022-09-19 17:33 ` Paolo Bonzini
  1 sibling, 0 replies; 5+ messages in thread
From: Paolo Bonzini @ 2022-09-19 17:33 UTC (permalink / raw)
  To: Ricky Zhou; +Cc: qemu-devel, richard.henderson, eduardo

Hi, I think this is broken for big endian systems because ldq expects
a pointer to xmm_t0.L(0) while ldo expects a pointer xmm_t0.

I will fix the bug in my new AVX decoder though, where it is also
present. So thanks for the report!

Paolo

On Tue, Aug 30, 2022 at 5:48 AM Ricky Zhou <ricky@rzhou.org> wrote:
>
> Before this change, emulation of cvttps2pi and cvtps2pi instructions
> would read 16 bytes of memory instead of 8. The SDM states that
> cvttps2pi takes a 64-bit memory location. The documentation for cvtps2pi
> claims that it takes a a 128-bit memory location, but as with cvttps2pi,
> the operand is written as xmm/m64. I double-checked on real hardware
> that both of these instructions only read 8 bytes.
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Ricky Zhou <ricky@rzhou.org>
> ---
>  target/i386/tcg/translate.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index b7972f0ff5..3ba5f76156 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -3621,7 +3621,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
> -                gen_ldo_env_A0(s, op2_offset);
> +                if (b1) {
> +                    gen_ldo_env_A0(s, op2_offset);
> +                } else {
> +                    gen_ldq_env_A0(s, op2_offset);
> +                }
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
> --
> 2.37.2
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required.
  2022-09-17  2:14   ` Ricky Zhou
@ 2022-09-19 17:34     ` Paolo Bonzini
  0 siblings, 0 replies; 5+ messages in thread
From: Paolo Bonzini @ 2022-09-19 17:34 UTC (permalink / raw)
  To: Ricky Zhou; +Cc: qemu-devel, Richard Henderson, eduardo

Hi,

I will merge this patch already, but with simpler code that doesn't
look at PREFIX_VEX. The changes to the helpers and the addition of the
aligned argument help with the new decoder as well, and I will build
on top of them in the next submission of the AVX patches.

Thanks!

Paolo

On Sat, Sep 17, 2022 at 4:15 AM Ricky Zhou <ricky@rzhou.org> wrote:
>
> Checking back on the status of patch, I noticed that there are some
> exciting patches out for AVX support that may conflict with this,
> though I see that they are still in the RFC phase:
> https://patchew.org/QEMU/20220911230418.340941-1-pbonzini@redhat.com/
>
> I'm not sure how far away AVX support is from being merged, but do let
> me know if there's any preference re applying this change vs. waiting
> to rebase on top the AVX support changes, etc.
>
> Thanks!
> Ricky
>
> On Mon, Aug 29, 2022 at 8:48 PM Ricky Zhou <ricky@rzhou.org> wrote:
> >
> > Many instructions which load/store 128-bit values are supposed to
> > raise #GP when the memory operand isn't 16-byte aligned. This includes:
> >  - Instructions explicitly requiring memory alignment (Exceptions Type 1
> >    in the "AVX and SSE Instruction Exception Specification" section of
> >    the SDM)
> >  - Legacy SSE instructions that load/store 128-bit values (Exceptions
> >    Types 2 and 4).
> >
> > This change sets MO_ALIGN_16 on 128-bit memory accesses that require
> > 16-byte alignment. It adds cpu_record_sigbus and cpu_do_unaligned_access
> > hooks that simulate a #GP exception in qemu-user and qemu-system,
> > respectively.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/217
> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> > Signed-off-by: Ricky Zhou <ricky@rzhou.org>
> > ---
> >  target/i386/tcg/excp_helper.c        | 13 ++++++++
> >  target/i386/tcg/helper-tcg.h         | 28 ++++++++++-------
> >  target/i386/tcg/sysemu/excp_helper.c |  8 +++++
> >  target/i386/tcg/tcg-cpu.c            |  2 ++
> >  target/i386/tcg/translate.c          | 45 +++++++++++++++++-----------
> >  target/i386/tcg/user/excp_helper.c   |  7 +++++
> >  6 files changed, 74 insertions(+), 29 deletions(-)
> >
> > diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
> > index c1ffa1c0ef..7c3c8dc7fe 100644
> > --- a/target/i386/tcg/excp_helper.c
> > +++ b/target/i386/tcg/excp_helper.c
> > @@ -140,3 +140,16 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index,
> >  {
> >      raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
> >  }
> > +
> > +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
> > +                                        MMUAccessType access_type,
> > +                                        uintptr_t retaddr)
> > +{
> > +    /*
> > +     * Unaligned accesses are currently only triggered by SSE/AVX
> > +     * instructions that impose alignment requirements on memory
> > +     * operands. These instructions raise #GP(0) upon accessing an
> > +     * unaligned address.
> > +     */
> > +    raise_exception_ra(env, EXCP0D_GPF, retaddr);
> > +}
> > diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
> > index 34167e2e29..cd1723389a 100644
> > --- a/target/i386/tcg/helper-tcg.h
> > +++ b/target/i386/tcg/helper-tcg.h
> > @@ -42,17 +42,6 @@ void x86_cpu_do_interrupt(CPUState *cpu);
> >  bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
> >  #endif
> >
> > -/* helper.c */
> > -#ifdef CONFIG_USER_ONLY
> > -void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
> > -                            MMUAccessType access_type,
> > -                            bool maperr, uintptr_t ra);
> > -#else
> > -bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
> > -                      MMUAccessType access_type, int mmu_idx,
> > -                      bool probe, uintptr_t retaddr);
> > -#endif
> > -
> >  void breakpoint_handler(CPUState *cs);
> >
> >  /* n must be a constant to be efficient */
> > @@ -78,6 +67,23 @@ G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index,
> >                                         int error_code, uintptr_t retaddr);
> >  G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int is_int,
> >                                  int error_code, int next_eip_addend);
> > +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
> > +                                        MMUAccessType access_type,
> > +                                        uintptr_t retaddr);
> > +#ifdef CONFIG_USER_ONLY
> > +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
> > +                            MMUAccessType access_type,
> > +                            bool maperr, uintptr_t ra);
> > +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
> > +                           MMUAccessType access_type, uintptr_t ra);
> > +#else
> > +bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
> > +                      MMUAccessType access_type, int mmu_idx,
> > +                      bool probe, uintptr_t retaddr);
> > +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
> > +                                            MMUAccessType access_type,
> > +                                            int mmu_idx, uintptr_t retaddr);
> > +#endif
> >
> >  /* cc_helper.c */
> >  extern const uint8_t parity_table[256];
> > diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
> > index 48feba7e75..796dc2a1f3 100644
> > --- a/target/i386/tcg/sysemu/excp_helper.c
> > +++ b/target/i386/tcg/sysemu/excp_helper.c
> > @@ -439,3 +439,11 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
> >      }
> >      return true;
> >  }
> > +
> > +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
> > +                                            MMUAccessType access_type,
> > +                                            int mmu_idx, uintptr_t retaddr)
> > +{
> > +    X86CPU *cpu = X86_CPU(cs);
> > +    handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr);
> > +}
> > diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
> > index 6fdfdf9598..d3c2b8fb49 100644
> > --- a/target/i386/tcg/tcg-cpu.c
> > +++ b/target/i386/tcg/tcg-cpu.c
> > @@ -75,10 +75,12 @@ static const struct TCGCPUOps x86_tcg_ops = {
> >  #ifdef CONFIG_USER_ONLY
> >      .fake_user_interrupt = x86_cpu_do_interrupt,
> >      .record_sigsegv = x86_cpu_record_sigsegv,
> > +    .record_sigbus = x86_cpu_record_sigbus,
> >  #else
> >      .tlb_fill = x86_cpu_tlb_fill,
> >      .do_interrupt = x86_cpu_do_interrupt,
> >      .cpu_exec_interrupt = x86_cpu_exec_interrupt,
> > +    .do_unaligned_access = x86_cpu_do_unaligned_access,
> >      .debug_excp_handler = breakpoint_handler,
> >      .debug_check_breakpoint = x86_debug_check_breakpoint,
> >  #endif /* !CONFIG_USER_ONLY */
> > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> > index 3ba5f76156..29104a49b9 100644
> > --- a/target/i386/tcg/translate.c
> > +++ b/target/i386/tcg/translate.c
> > @@ -2731,21 +2731,23 @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
> >      tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
> >  }
> >
> > -static inline void gen_ldo_env_A0(DisasContext *s, int offset)
> > +static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
> >  {
> >      int mem_index = s->mem_index;
> > -    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
> > +    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
> > +                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
> >      tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> >      tcg_gen_addi_tl(s->tmp0, s->A0, 8);
> >      tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
> >      tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
> >  }
> >
> > -static inline void gen_sto_env_A0(DisasContext *s, int offset)
> > +static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
> >  {
> >      int mem_index = s->mem_index;
> >      tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> > -    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
> > +    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
> > +                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
> >      tcg_gen_addi_tl(s->tmp0, s->A0, 8);
> >      tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
> >      tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
> > @@ -3054,7 +3056,7 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
> >      [0x25] = SSE41_OP(pmovsxdq),
> >      [0x28] = SSE41_OP(pmuldq),
> >      [0x29] = SSE41_OP(pcmpeqq),
> > -    [0x2a] = SSE41_SPECIAL, /* movntqda */
> > +    [0x2a] = SSE41_SPECIAL, /* movntdqa */
> >      [0x2b] = SSE41_OP(packusdw),
> >      [0x30] = SSE41_OP(pmovzxbw),
> >      [0x31] = SSE41_OP(pmovzxbd),
> > @@ -3194,17 +3196,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >              break;
> >          case 0x1e7: /* movntdq */
> >          case 0x02b: /* movntps */
> > -        case 0x12b: /* movntps */
> > +        case 0x12b: /* movntpd */
> >              if (mod == 3)
> >                  goto illegal_op;
> >              gen_lea_modrm(env, s, modrm);
> > -            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), true);
> >              break;
> >          case 0x3f0: /* lddqu */
> >              if (mod == 3)
> >                  goto illegal_op;
> >              gen_lea_modrm(env, s, modrm);
> > -            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]), false);
> >              break;
> >          case 0x22b: /* movntss */
> >          case 0x32b: /* movntsd */
> > @@ -3273,7 +3275,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >          case 0x26f: /* movdqu xmm, ea */
> >              if (mod != 3) {
> >                  gen_lea_modrm(env, s, modrm);
> > -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> > +                               /* movaps, movapd, movdqa */
> > +                               b == 0x028 || b == 0x128 || b == 0x16f);
> >              } else {
> >                  rm = (modrm & 7) | REX_B(s);
> >                  gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
> > @@ -3331,7 +3335,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >          case 0x212: /* movsldup */
> >              if (mod != 3) {
> >                  gen_lea_modrm(env, s, modrm);
> > -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> > +                               !(s->prefix & PREFIX_VEX));
> >              } else {
> >                  rm = (modrm & 7) | REX_B(s);
> >                  gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
> > @@ -3373,7 +3378,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >          case 0x216: /* movshdup */
> >              if (mod != 3) {
> >                  gen_lea_modrm(env, s, modrm);
> > -                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> > +                               !(s->prefix & PREFIX_VEX));
> >              } else {
> >                  rm = (modrm & 7) | REX_B(s);
> >                  gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
> > @@ -3465,7 +3471,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >          case 0x27f: /* movdqu ea, xmm */
> >              if (mod != 3) {
> >                  gen_lea_modrm(env, s, modrm);
> > -                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
> > +                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]),
> > +                               /* movaps, movapd, movdqa */
> > +                               b == 0x029 || b == 0x129 || b == 0x17f);
> >              } else {
> >                  rm = (modrm & 7) | REX_B(s);
> >                  gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
> > @@ -3622,7 +3630,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >                  gen_lea_modrm(env, s, modrm);
> >                  op2_offset = offsetof(CPUX86State,xmm_t0);
> >                  if (b1) {
> > -                    gen_ldo_env_A0(s, op2_offset);
> > +                    gen_ldo_env_A0(s, op2_offset, true);
> >                  } else {
> >                      gen_ldq_env_A0(s, op2_offset);
> >                  }
> > @@ -3810,11 +3818,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >                          tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
> >                                          offsetof(ZMMReg, ZMM_W(0)));
> >                          break;
> > -                    case 0x2a:            /* movntqda */
> > -                        gen_ldo_env_A0(s, op1_offset);
> > +                    case 0x2a:            /* movntdqa */
> > +                        gen_ldo_env_A0(s, op1_offset, true);
> >                          return;
> >                      default:
> > -                        gen_ldo_env_A0(s, op2_offset);
> > +                        gen_ldo_env_A0(s, op2_offset,
> > +                                       !(s->prefix & PREFIX_VEX));
> >                      }
> >                  }
> >              } else {
> > @@ -4355,7 +4364,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >                  } else {
> >                      op2_offset = offsetof(CPUX86State,xmm_t0);
> >                      gen_lea_modrm(env, s, modrm);
> > -                    gen_ldo_env_A0(s, op2_offset);
> > +                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
> >                  }
> >              } else {
> >                  op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
> > @@ -4473,7 +4482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
> >                      break;
> >                  default:
> >                      /* 128 bit access */
> > -                    gen_ldo_env_A0(s, op2_offset);
> > +                    gen_ldo_env_A0(s, op2_offset, !(s->prefix & PREFIX_VEX));
> >                      break;
> >                  }
> >              } else {
> > diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
> > index cd507e2a1b..b3bdb7831a 100644
> > --- a/target/i386/tcg/user/excp_helper.c
> > +++ b/target/i386/tcg/user/excp_helper.c
> > @@ -48,3 +48,10 @@ void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
> >
> >      cpu_loop_exit_restore(cs, ra);
> >  }
> > +
> > +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr,
> > +                           MMUAccessType access_type, uintptr_t ra)
> > +{
> > +    X86CPU *cpu = X86_CPU(cs);
> > +    handle_unaligned_access(&cpu->env, addr, access_type, ra);
> > +}
> > --
> > 2.37.2
> >
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-09-19 17:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-30  3:48 [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Ricky Zhou
2022-08-30  3:48 ` [PATCH v3 2/2] target/i386: Raise #GP on unaligned m128 accesses when required Ricky Zhou
2022-09-17  2:14   ` Ricky Zhou
2022-09-19 17:34     ` Paolo Bonzini
2022-09-19 17:33 ` [PATCH v3 1/2] target/i386: Read 8 bytes from cvttps2pi/cvtps2pi memory operands Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.