All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/1] cmpxchg and lock cmpxchg should not touch accumulator
       [not found] <20220322042008.399857-1-lw945lw945.ref@yahoo.com>
@ 2022-03-22  4:20 ` Wei Li
  2022-03-22  4:20   ` [PATCH v2 1/1] fix cmpxchg and lock cmpxchg instruction Wei Li
  0 siblings, 1 reply; 3+ messages in thread
From: Wei Li @ 2022-03-22  4:20 UTC (permalink / raw)
  To: pbonzini, richard.henderson, eduardo; +Cc: qemu-devel

Bug: https://gitlab.com/qemu-project/qemu/-/issues/508

This series fix a bug reported on issues 508.
The problem is cmpxchg and lock cmpxchg would touch accumulator when
they should not do that.

Changes from v1
* cmpxchg uses the lock cmpxchg path whenever mod != 3 to reduce code
  duplication.
* lock cmpxchg uses movcond to replace branch.
* Combine the two patches into one patch because cmpxchg uses the lock
  cmpxchg path.

v1 link:
https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg05023.html

Wei Li (1):
  fix cmpxchg and lock cmpxchg instruction

 target/i386/tcg/translate.c | 44 +++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

-- 
2.30.2



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/1] fix cmpxchg and lock cmpxchg instruction
  2022-03-22  4:20 ` [PATCH v2 0/1] cmpxchg and lock cmpxchg should not touch accumulator Wei Li
@ 2022-03-22  4:20   ` Wei Li
  2022-03-22 14:27     ` Richard Henderson
  0 siblings, 1 reply; 3+ messages in thread
From: Wei Li @ 2022-03-22  4:20 UTC (permalink / raw)
  To: pbonzini, richard.henderson, eduardo; +Cc: qemu-devel

One question is that we can reduce more code duplication if we use

---------
if(foo){
    ....
    tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                              s->mem_index, ot | MO_LE);
    gen_extu(ot, oldv);
    gen_extu(ot, cmpv); 
}else{
    ....
    tcg_gen_movcond_tl(TCG_COND_EQ, newv, old, cmpv, newv, oldv);
    gen_op_mov_reg_v(s, ot, rm, newv);
}
gen_op_mov_reg_v(s, ot, R_EAX, oldv);
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
                    temp, cpu_regs[R_EAX]);
--------

The problem is gen_op_mov_reg_v(s, ot, rm, newv) will happen before
gen_op_mov_reg_v(s, ot, R_EAX, oldv). According to SDM, write to R_EAX
should happen before write to rm. I am not sure about its side effects.

All in all, if there is no side effect, we can use the code above to
reduce more code duplication. Or we use the code below to ensure
correctness.

Signed-off-by: Wei Li <lw945lw945@yahoo.com>
---
 target/i386/tcg/translate.c | 44 +++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2a94d33742..6633d8ece6 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -5339,7 +5339,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1b0:
     case 0x1b1: /* cmpxchg Ev, Gv */
         {
-            TCGv oldv, newv, cmpv;
+            TCGv oldv, newv, cmpv, temp;
 
             ot = mo_b_d(b, dflag);
             modrm = x86_ldub_code(env, s);
@@ -5348,41 +5348,42 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             oldv = tcg_temp_new();
             newv = tcg_temp_new();
             cmpv = tcg_temp_new();
+            temp = tcg_temp_new();
             gen_op_mov_v_reg(s, ot, newv, reg);
             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
+            tcg_gen_mov_tl(temp, cpu_regs[R_EAX]);
 
-            if (s->prefix & PREFIX_LOCK) {
+            if ((s->prefix & PREFIX_LOCK) ||
+                (mod != 3)) {
+                /* Use the tcg_gen_atomic_cmpxchg_tl path whenever mod != 3.
+                   While an unlocked cmpxchg need not be atomic, it is not
+                   required to be non-atomic either. */
                 if (mod == 3) {
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
+                gen_extu(ot, oldv);
+                gen_extu(ot, cmpv);
+                /* Perform the merge into %al or %ax as required by ot. */
                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
+                /* Undo the entire modification to %rax if comparison equal. */
+                tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
+                                    temp, cpu_regs[R_EAX]);
             } else {
-                if (mod == 3) {
-                    rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(s, ot, oldv, rm);
-                } else {
-                    gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, s->A0);
-                    rm = 0; /* avoid warning */
-                }
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_mov_v_reg(s, ot, oldv, rm);
                 gen_extu(ot, oldv);
                 gen_extu(ot, cmpv);
                 /* store value = (old == cmp ? new : old);  */
                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
-                if (mod == 3) {
-                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
-                    gen_op_mov_reg_v(s, ot, rm, newv);
-                } else {
-                    /* Perform an unconditional store cycle like physical cpu;
-                       must be before changing accumulator to ensure
-                       idempotency if the store faults and the instruction
-                       is restarted */
-                    gen_op_st_v(s, ot, newv, s->A0);
-                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
-                }
+                /* Perform the merge into %al or %ax as required by ot. */
+                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
+                /* Undo the entire modification to %rax if comparison equal. */
+                tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
+                                    temp, cpu_regs[R_EAX]);
+                gen_op_mov_reg_v(s, ot, rm, newv);
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
             tcg_gen_mov_tl(s->cc_srcT, cmpv);
@@ -5391,6 +5392,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_temp_free(oldv);
             tcg_temp_free(newv);
             tcg_temp_free(cmpv);
+            tcg_temp_free(temp);
         }
         break;
     case 0x1c7: /* cmpxchg8b */
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 1/1] fix cmpxchg and lock cmpxchg instruction
  2022-03-22  4:20   ` [PATCH v2 1/1] fix cmpxchg and lock cmpxchg instruction Wei Li
@ 2022-03-22 14:27     ` Richard Henderson
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2022-03-22 14:27 UTC (permalink / raw)
  To: Wei Li, pbonzini, eduardo; +Cc: qemu-devel

On 3/21/22 21:20, Wei Li wrote:
> One question is that we can reduce more code duplication if we use
> 
> ---------
> if(foo){
>      ....
>      tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                s->mem_index, ot | MO_LE);
>      gen_extu(ot, oldv);
>      gen_extu(ot, cmpv);
> }else{
>      ....
>      tcg_gen_movcond_tl(TCG_COND_EQ, newv, old, cmpv, newv, oldv);
>      gen_op_mov_reg_v(s, ot, rm, newv);
> }
> gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
>                      temp, cpu_regs[R_EAX]);
> --------
> 
> The problem is gen_op_mov_reg_v(s, ot, rm, newv) will happen before
> gen_op_mov_reg_v(s, ot, R_EAX, oldv). According to SDM, write to R_EAX
> should happen before write to rm. I am not sure about its side effects.

There are no side effects beyond the store into RM.
I do prefer the structure above.


r~

> 
> All in all, if there is no side effect, we can use the code above to
> reduce more code duplication. Or we use the code below to ensure
> correctness.
> 
> Signed-off-by: Wei Li <lw945lw945@yahoo.com>
> ---
>   target/i386/tcg/translate.c | 44 +++++++++++++++++++------------------
>   1 file changed, 23 insertions(+), 21 deletions(-)
> 
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 2a94d33742..6633d8ece6 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -5339,7 +5339,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>       case 0x1b0:
>       case 0x1b1: /* cmpxchg Ev, Gv */
>           {
> -            TCGv oldv, newv, cmpv;
> +            TCGv oldv, newv, cmpv, temp;
>   
>               ot = mo_b_d(b, dflag);
>               modrm = x86_ldub_code(env, s);
> @@ -5348,41 +5348,42 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>               oldv = tcg_temp_new();
>               newv = tcg_temp_new();
>               cmpv = tcg_temp_new();
> +            temp = tcg_temp_new();
>               gen_op_mov_v_reg(s, ot, newv, reg);
>               tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
> +            tcg_gen_mov_tl(temp, cpu_regs[R_EAX]);
>   
> -            if (s->prefix & PREFIX_LOCK) {
> +            if ((s->prefix & PREFIX_LOCK) ||
> +                (mod != 3)) {
> +                /* Use the tcg_gen_atomic_cmpxchg_tl path whenever mod != 3.
> +                   While an unlocked cmpxchg need not be atomic, it is not
> +                   required to be non-atomic either. */
>                   if (mod == 3) {
>                       goto illegal_op;
>                   }
>                   gen_lea_modrm(env, s, modrm);
>                   tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                             s->mem_index, ot | MO_LE);
> +                gen_extu(ot, oldv);
> +                gen_extu(ot, cmpv);
> +                /* Perform the merge into %al or %ax as required by ot. */
>                   gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> +                /* Undo the entire modification to %rax if comparison equal. */
> +                tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
> +                                    temp, cpu_regs[R_EAX]);
>               } else {
> -                if (mod == 3) {
> -                    rm = (modrm & 7) | REX_B(s);
> -                    gen_op_mov_v_reg(s, ot, oldv, rm);
> -                } else {
> -                    gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, oldv, s->A0);
> -                    rm = 0; /* avoid warning */
> -                }
> +                rm = (modrm & 7) | REX_B(s);
> +                gen_op_mov_v_reg(s, ot, oldv, rm);
>                   gen_extu(ot, oldv);
>                   gen_extu(ot, cmpv);
>                   /* store value = (old == cmp ? new : old);  */
>                   tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
> -                if (mod == 3) {
> -                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> -                    gen_op_mov_reg_v(s, ot, rm, newv);
> -                } else {
> -                    /* Perform an unconditional store cycle like physical cpu;
> -                       must be before changing accumulator to ensure
> -                       idempotency if the store faults and the instruction
> -                       is restarted */
> -                    gen_op_st_v(s, ot, newv, s->A0);
> -                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> -                }
> +                /* Perform the merge into %al or %ax as required by ot. */
> +                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> +                /* Undo the entire modification to %rax if comparison equal. */
> +                tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], oldv, cmpv,
> +                                    temp, cpu_regs[R_EAX]);
> +                gen_op_mov_reg_v(s, ot, rm, newv);
>               }
>               tcg_gen_mov_tl(cpu_cc_src, oldv);
>               tcg_gen_mov_tl(s->cc_srcT, cmpv);
> @@ -5391,6 +5392,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>               tcg_temp_free(oldv);
>               tcg_temp_free(newv);
>               tcg_temp_free(cmpv);
> +            tcg_temp_free(temp);
>           }
>           break;
>       case 0x1c7: /* cmpxchg8b */



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-22 14:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220322042008.399857-1-lw945lw945.ref@yahoo.com>
2022-03-22  4:20 ` [PATCH v2 0/1] cmpxchg and lock cmpxchg should not touch accumulator Wei Li
2022-03-22  4:20   ` [PATCH v2 1/1] fix cmpxchg and lock cmpxchg instruction Wei Li
2022-03-22 14:27     ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.