All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/3] target-m68k: add movem, BCD and CAS instructions
@ 2016-11-02 21:15 Laurent Vivier
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd Laurent Vivier
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Laurent Vivier @ 2016-11-02 21:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: schwab, agraf, Richard Henderson, gerg, Laurent Vivier

This series is another subset of the series I sent in May:
https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00501.html

This subset contains reworked patches for:
- abcd/nbcd/sbcd: remove inline, delay write back to memory and
  use only 3 digits (and extract it from the bifield patch as
  it was squashed into it)
- movem: delay the update of the registers to the end of the load
  sequence to be able to restart the operation in case of page
  fault, and manage the 68020+ <-> 68000/68010 special case
- cas/cas2: rewrite according Richard's comments,
  and use cmpxchg() in cas.

I've checked it doesn't break coldfire support:
http://wiki.qemu.org/download/coldfire-test-0.1.tar.bz2
but it can't boot a 680x0 processor kernel.

v2:
- movem: don't use temp variable mask2
- bcd: delay register write back
       fix bcd_add() to add X
       define bcd_sub() instead of bsd_neg()
- cas2: make it atomic with "parallel_cpus" and
        cpu_loop_exit_atomic() (and 64bit cmpxchg()
        if possible)

Richard: This series doesn't use your "areg writeback" series,
but if you resend your series (with cmpm patches squashed
and the fix for writeback_mask), I will rebase this series
on top of yours.

Thanks,
Laurent

Laurent Vivier (3):
  target-m68k: add abcd/sbcd/nbcd
  target-m68k: implement 680x0 movem
  target-m68k: add cas/cas2 ops

 target-m68k/helper.h    |   2 +
 target-m68k/op_helper.c | 133 +++++++++++++
 target-m68k/translate.c | 481 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 599 insertions(+), 17 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd
  2016-11-02 21:15 [Qemu-devel] [PATCH v2 0/3] target-m68k: add movem, BCD and CAS instructions Laurent Vivier
@ 2016-11-02 21:15 ` Laurent Vivier
  2016-11-03 16:16   ` Richard Henderson
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops Laurent Vivier
  2 siblings, 1 reply; 15+ messages in thread
From: Laurent Vivier @ 2016-11-02 21:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: schwab, agraf, Richard Henderson, gerg, Laurent Vivier

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/translate.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index d0a3b0f..1cf88a4 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1313,6 +1313,243 @@ DISAS_INSN(divl)
     set_cc_op(s, CC_OP_FLAGS);
 }
 
+static void bcd_add(TCGv dest, TCGv src)
+{
+    TCGv t0, t1;
+
+    /*  dest10 = dest10 + src10 + X
+     *
+     *        t1 = src
+     *        t2 = t1 + 0x066
+     *        t3 = t2 + dest + X
+     *        t4 = t2 ^ dest ^ X
+     *        t5 = t3 ^ t4
+     *        t6 = ~t5 & 0x110
+     *        t7 = (t6 >> 2) | (t6 >> 3)
+     *        return t3 - t7
+     */
+
+    /* t1 = (src + 0x066) + dest + X
+     *    = result with some possible exceding 0x6
+     */
+
+    t0 = tcg_const_i32(0x066);
+    tcg_gen_add_i32(t0, t0, src);
+
+    t1 = tcg_temp_new();
+    tcg_gen_add_i32(t1, t0, dest);
+    tcg_gen_add_i32(t1, t1, QREG_CC_X);
+
+    /* we will remove exceding 0x6 where there is no carry */
+
+    /* t0 = (src + 0x0066) ^ dest ^ X
+     *    = t1 without carries
+     */
+
+    tcg_gen_xor_i32(t0, t0, dest);
+    tcg_gen_xor_i32(t0, t0, QREG_CC_X);
+
+    /* extract the carries
+     * t0 = t0 ^ t1
+     *    = only the carries
+     */
+
+    tcg_gen_xor_i32(t0, t0, t1);
+
+    /* generate 0x1 where there is no carry */
+
+    tcg_gen_not_i32(t0, t0);
+    tcg_gen_andi_i32(t0, t0, 0x110);
+
+    /* for each 0x10, generate a 0x6 */
+
+    tcg_gen_shri_i32(dest, t0, 2);
+    tcg_gen_shri_i32(t0, t0, 3);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_temp_free(t0);
+
+    /* remove the exceding 0x6
+     * for digits that have not generated a carry
+     */
+
+    tcg_gen_sub_i32(dest, t1, dest);
+    tcg_temp_free(t1);
+}
+
+static void bcd_sub(TCGv dest, TCGv src)
+{
+    TCGv t0, t1, t2;
+
+    /*  dest10 = dest10 - src10 - X
+     *         = bcd_add(dest + 1 - X, 0xf99 - src)
+     */
+
+    /* t0 = 0xfff - src */
+
+    t0 = tcg_temp_new();
+    tcg_gen_neg_i32(t0, src);
+    tcg_gen_addi_i32(t0, t0, 0xfff);
+
+    /* t1 = t0 + dest + 1 - X*/
+
+    t1 = tcg_temp_new();
+    tcg_gen_add_i32(t1, t0, dest);
+    tcg_gen_addi_i32(t1, t1, 1);
+    tcg_gen_sub_i32(t1, t1, QREG_CC_X);
+
+    /* t2 = t0 ^ dest ^ 1 ^ X */
+
+    t2 = tcg_temp_new();
+    tcg_gen_xor_i32(t2, t0, dest);
+    tcg_gen_xori_i32(t2, t2, 1);
+    tcg_gen_xor_i32(t2, t2, QREG_CC_X);
+
+    /* t0 = t1 ^ t2 */
+
+    tcg_gen_xor_i32(t0, t1, t2);
+
+    /* t2 = ~t0 & 0x110 */
+
+    tcg_gen_not_i32(t2, t0);
+    tcg_gen_andi_i32(t2, t2, 0x110);
+
+    /* t0 = (t2 >> 2) | (t2 >> 3) */
+
+    tcg_gen_shri_i32(t0, t2, 2);
+    tcg_gen_shri_i32(t2, t2, 3);
+    tcg_gen_or_i32(t0, t0, t2);
+
+    /* return t1 - t0 */
+
+    tcg_gen_sub_i32(dest, t1, t0);
+}
+
+static void bcd_flags(TCGv val)
+{
+    tcg_gen_andi_i32(QREG_CC_C, val, 0x0ff);
+    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_C);
+
+    tcg_gen_movi_i32(QREG_CC_X, 0);
+    tcg_gen_andi_i32(val, val, 0xf00);
+    tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_C, val, QREG_CC_X);
+
+    tcg_gen_mov_i32(QREG_CC_X, QREG_CC_C);
+}
+
+DISAS_INSN(abcd_reg)
+{
+    TCGv src;
+    TCGv dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    src = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+    dest = gen_extend(DREG(insn, 9), OS_BYTE, 0);
+    bcd_add(dest, src);
+    gen_partset_reg(OS_BYTE, DREG(insn, 9), dest);
+
+    bcd_flags(dest);
+}
+
+DISAS_INSN(abcd_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    addr_src = tcg_temp_new();
+    tcg_gen_subi_i32(addr_src, AREG(insn, 0), opsize_bytes(OS_BYTE));
+    src = gen_load(s, OS_BYTE, addr_src, 0);
+
+    addr_dest = tcg_temp_new();
+    if (REG(insn, 0) == REG(insn, 9)) {
+        tcg_gen_subi_i32(addr_dest, addr_src, opsize_bytes(OS_BYTE));
+    } else {
+        tcg_gen_subi_i32(addr_dest, AREG(insn, 9), opsize_bytes(OS_BYTE));
+    }
+    dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+    bcd_add(dest, src);
+
+    gen_store(s, OS_BYTE, addr_dest, dest);
+
+    tcg_gen_mov_i32(AREG(insn, 0), addr_src);
+    tcg_temp_free(addr_src);
+    tcg_gen_mov_i32(AREG(insn, 9), addr_dest);
+    tcg_temp_free(addr_dest);
+
+    bcd_flags(dest);
+}
+
+DISAS_INSN(sbcd_reg)
+{
+    TCGv src, dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    src = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+    dest = gen_extend(DREG(insn, 9), OS_BYTE, 0);
+
+    bcd_sub(dest, src);
+
+    gen_partset_reg(OS_BYTE, DREG(insn, 9), dest);
+
+    bcd_flags(dest);
+}
+
+DISAS_INSN(sbcd_mem)
+{
+    TCGv src, dest;
+    TCGv addr_src, addr_dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    addr_src = tcg_temp_new();
+    tcg_gen_subi_i32(addr_src, AREG(insn, 0), opsize_bytes(OS_BYTE));
+    src = gen_load(s, OS_BYTE, addr_src, 0);
+
+    addr_dest = tcg_temp_new();
+    if (REG(insn, 0) == REG(insn, 9)) {
+        tcg_gen_subi_i32(addr_dest, addr_src, opsize_bytes(OS_BYTE));
+    } else {
+        tcg_gen_subi_i32(addr_dest, AREG(insn, 9), opsize_bytes(OS_BYTE));
+    }
+    dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+    bcd_sub(dest, src);
+
+    gen_store(s, OS_BYTE, addr_dest, dest);
+
+    tcg_gen_mov_i32(AREG(insn, 0), addr_src);
+    tcg_temp_free(addr_src);
+    tcg_gen_mov_i32(AREG(insn, 9), addr_dest);
+    tcg_temp_free(addr_dest);
+
+    bcd_flags(dest);
+}
+
+DISAS_INSN(nbcd)
+{
+    TCGv src, dest;
+    TCGv addr;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    SRC_EA(env, src, OS_BYTE, 0, &addr);
+
+    dest = tcg_const_i32(0);
+    bcd_sub(dest, src);
+
+    DEST_EA(env, insn, OS_BYTE, dest, &addr);
+
+    bcd_flags(dest);
+
+    tcg_temp_free(dest);
+}
+
 DISAS_INSN(addsub)
 {
     TCGv reg;
@@ -3616,6 +3853,7 @@ void register_m68k_insns (CPUM68KState *env)
     INSN(not,       4600, ff00, M68000);
     INSN(undef,     46c0, ffc0, M68000);
     INSN(move_to_sr, 46c0, ffc0, CF_ISA_A);
+    INSN(nbcd,      4800, ffc0, M68000);
     INSN(linkl,     4808, fff8, M68000);
     BASE(pea,       4840, ffc0);
     BASE(swap,      4840, fff8);
@@ -3667,6 +3905,8 @@ void register_m68k_insns (CPUM68KState *env)
     INSN(mvzs,      7100, f100, CF_ISA_B);
     BASE(or,        8000, f000);
     BASE(divw,      80c0, f0c0);
+    INSN(sbcd_reg,  8100, f1f8, M68000);
+    INSN(sbcd_mem,  8108, f1f8, M68000);
     BASE(addsub,    9000, f000);
     INSN(undef,     90c0, f0c0, CF_ISA_A);
     INSN(subx_reg,  9180, f1f8, CF_ISA_A);
@@ -3704,6 +3944,8 @@ void register_m68k_insns (CPUM68KState *env)
     INSN(exg_aa,    c148, f1f8, M68000);
     INSN(exg_da,    c188, f1f8, M68000);
     BASE(mulw,      c0c0, f0c0);
+    INSN(abcd_reg,  c100, f1f8, M68000);
+    INSN(abcd_mem,  c108, f1f8, M68000);
     BASE(addsub,    d000, f000);
     INSN(undef,     d0c0, f0c0, CF_ISA_A);
     INSN(addx_reg,      d180, f1f8, CF_ISA_A);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-02 21:15 [Qemu-devel] [PATCH v2 0/3] target-m68k: add movem, BCD and CAS instructions Laurent Vivier
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd Laurent Vivier
@ 2016-11-02 21:15 ` Laurent Vivier
  2016-11-03 16:17   ` Richard Henderson
                     ` (2 more replies)
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops Laurent Vivier
  2 siblings, 3 replies; 15+ messages in thread
From: Laurent Vivier @ 2016-11-02 21:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: schwab, agraf, Richard Henderson, gerg, Laurent Vivier

680x0 movem can load/store words and long words
and can use more addressing modes.
Coldfire can only use long words with (Ax) and (d16,Ax)
addressing modes.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/translate.c | 96 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 79 insertions(+), 17 deletions(-)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 1cf88a4..93f1270 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1667,14 +1667,25 @@ static void gen_push(DisasContext *s, TCGv val)
     tcg_gen_mov_i32(QREG_SP, tmp);
 }
 
+static TCGv mreg(int reg)
+{
+    if (reg < 8) {
+        /* Dx */
+        return cpu_dregs[reg];
+    }
+    /* Ax */
+    return cpu_aregs[reg & 7];
+}
+
 DISAS_INSN(movem)
 {
     TCGv addr;
     int i;
     uint16_t mask;
-    TCGv reg;
     TCGv tmp;
-    int is_load;
+    int is_load = (insn & 0x0400) != 0;
+    int opsize = (insn & 0x40) != 0 ? OS_LONG : OS_WORD;
+    TCGv incr;
 
     mask = read_im16(env, s);
     tmp = gen_lea(env, s, insn, OS_LONG);
@@ -1682,25 +1693,74 @@ DISAS_INSN(movem)
         gen_addr_fault(s);
         return;
     }
+
     addr = tcg_temp_new();
     tcg_gen_mov_i32(addr, tmp);
-    is_load = ((insn & 0x0400) != 0);
-    for (i = 0; i < 16; i++, mask >>= 1) {
-        if (mask & 1) {
-            if (i < 8)
-                reg = DREG(i, 0);
-            else
-                reg = AREG(i, 0);
-            if (is_load) {
-                tmp = gen_load(s, OS_LONG, addr, 0);
-                tcg_gen_mov_i32(reg, tmp);
-            } else {
-                gen_store(s, OS_LONG, addr, reg);
+    incr = tcg_const_i32(opsize_bytes(opsize));
+
+    if (is_load) {
+        /* memory to register */
+        TCGv r[16];
+        for (i = 0; i < 16; i++) {
+            if ((mask >> i) & 1) {
+                r[i] = gen_load(s, opsize, addr, 1);
+                tcg_gen_add_i32(addr, addr, incr);
+            }
+        }
+        for (i = 0; i < 16; i++, mask >>= 1) {
+            if (mask & 1) {
+                tcg_gen_mov_i32(mreg(i), r[i]);
+                tcg_temp_free(r[i]);
+            }
+        }
+        if ((insn & 070) == 030) {
+            /* movem (An)+,X */
+            tcg_gen_mov_i32(AREG(insn, 0), addr);
+        }
+
+    } else {
+        /* register to memory */
+
+        if ((insn & 070) == 040) {
+            /* movem X,-(An) */
+
+            for (i = 15; i >= 0; i--, mask >>= 1) {
+                if (mask & 1) {
+                    if ((insn & 7) + 8 == i &&
+                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
+                        /* M68020+: if the addressing register is the
+                         * register moved to memory, the value written
+                         * is the initial value decremented by the size of
+                         * the operation
+                         * M68000/M68010: the value is the initial value
+                         */
+                        TCGv tmp = tcg_temp_new();
+                        tcg_gen_sub_i32(tmp, mreg(i), incr);
+                        gen_store(s, opsize, addr, tmp);
+                        tcg_temp_free(tmp);
+                    } else {
+                        gen_store(s, opsize, addr, mreg(i));
+                    }
+                    if (mask != 1) {
+                        tcg_gen_sub_i32(addr, addr, incr);
+                    }
+                }
+            }
+            tcg_gen_mov_i32(AREG(insn, 0), addr);
+        } else {
+            /* movem X,(An)+ is not allowed */
+
+            for (i = 0; i < 16; i++, mask >>= 1) {
+                if (mask & 1) {
+                    gen_store(s, opsize, addr, mreg(i));
+                    tcg_gen_add_i32(addr, addr, incr);
+                }
             }
-            if (mask != 1)
-                tcg_gen_addi_i32(addr, addr, 4);
         }
     }
+
+    tcg_temp_free(incr);
+    tcg_temp_free(addr);
 }
 
 DISAS_INSN(bitop_im)
@@ -3858,7 +3918,9 @@ void register_m68k_insns (CPUM68KState *env)
     BASE(pea,       4840, ffc0);
     BASE(swap,      4840, fff8);
     INSN(bkpt,      4848, fff8, BKPT);
-    BASE(movem,     48c0, fbc0);
+    INSN(movem,     48d0, fbf8, CF_ISA_A);
+    INSN(movem,     48e8, fbf8, CF_ISA_A);
+    INSN(movem,     4880, fb80, M68000);
     BASE(ext,       4880, fff8);
     BASE(ext,       48c0, fff8);
     BASE(ext,       49c0, fff8);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops
  2016-11-02 21:15 [Qemu-devel] [PATCH v2 0/3] target-m68k: add movem, BCD and CAS instructions Laurent Vivier
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd Laurent Vivier
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
@ 2016-11-02 21:15 ` Laurent Vivier
  2016-11-03 16:36   ` Richard Henderson
  2 siblings, 1 reply; 15+ messages in thread
From: Laurent Vivier @ 2016-11-02 21:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: schwab, agraf, Richard Henderson, gerg, Laurent Vivier

Implement CAS using cmpxchg.
Implement CAS2 using helper and either cmpxchg when
the 32bit addresses are consecutive, or with
parallel_cpus+cpu_loop_exit_atomic() otherwise.

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/helper.h    |   2 +
 target-m68k/op_helper.c | 133 ++++++++++++++++++++++++++++++++++++++++++++
 target-m68k/translate.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 278 insertions(+)

diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index d863e55..17ec342 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -9,6 +9,8 @@ DEF_HELPER_4(divull, void, env, int, int, i32)
 DEF_HELPER_4(divsll, void, env, int, int, s32)
 DEF_HELPER_2(set_sr, void, env, i32)
 DEF_HELPER_3(movec, void, env, i32, i32)
+DEF_HELPER_4(cas2w, void, env, i32, i32, i32)
+DEF_HELPER_4(cas2l, void, env, i32, i32, i32)
 
 DEF_HELPER_2(f64_to_i32, f32, env, f64)
 DEF_HELPER_2(f64_to_f32, f32, env, f64)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index a4bfa4e..ff27211 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -359,3 +359,136 @@ void HELPER(divsll)(CPUM68KState *env, int numr, int regr, int32_t den)
     env->dregs[regr] = rem;
     env->dregs[numr] = quot;
 }
+
+void HELPER(cas2w)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
+{
+    uint32_t Dc1 = extract32(regs, 9, 3);
+    uint32_t Dc2 = extract32(regs, 6, 3);
+    uint32_t Du1 = extract32(regs, 3, 3);
+    uint32_t Du2 = extract32(regs, 0, 3);
+    int16_t c1 = env->dregs[Dc1];
+    int16_t c2 = env->dregs[Dc2];
+    int16_t u1 = env->dregs[Du1];
+    int16_t u2 = env->dregs[Du2];
+    int16_t l1, l2;
+    uintptr_t ra = GETPC();
+
+    if (parallel_cpus) {
+        /* Tell the main loop we need to serialize this insn.  */
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    } else {
+        /* We're executing in a serial context -- no need to be atomic.  */
+#ifdef CONFIG_USER_ONLY
+        int16_t *ha1 = g2h(a1);
+        int16_t *ha2 = g2h(a2);
+        l1 = ldsw_be_p(ha1);
+        l2 = ldsw_be_p(ha2);
+        if (l1 == c1 && l2 == c2) {
+            stw_be_p(ha1, u1);
+            stw_be_p(ha2, u2);
+        }
+#else
+        int mmu_idx = cpu_mmu_index(env, 0);
+        TCGMemOpIdx oi = make_memop_idx(MO_BEUW, mmu_idx);
+        l1 = helper_be_ldsw_mmu(env, a1, oi, ra);
+        l2 = helper_be_ldsw_mmu(env, a2, oi, ra);
+        if (l1 == c1 && l2 == c2) {
+            helper_be_stw_mmu(env, a1, u1, oi, ra);
+            helper_be_stw_mmu(env, a2, u2, oi, ra);
+        }
+#endif
+    }
+
+    if (c1 != l1) {
+        env->cc_n = l1;
+        env->cc_v = c1;
+    } else {
+        env->cc_n = l2;
+        env->cc_v = c2;
+    }
+    env->cc_op = CC_OP_CMPL;
+    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
+    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);
+}
+
+void HELPER(cas2l)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
+{
+    uint32_t Dc1 = extract32(regs, 9, 3);
+    uint32_t Dc2 = extract32(regs, 6, 3);
+    uint32_t Du1 = extract32(regs, 3, 3);
+    uint32_t Du2 = extract32(regs, 0, 3);
+    uint32_t c1 = env->dregs[Dc1];
+    uint32_t c2 = env->dregs[Dc2];
+    uint32_t u1 = env->dregs[Du1];
+    uint32_t u2 = env->dregs[Du2];
+    uint32_t l1, l2;
+    uint64_t c, u, l;
+    uintptr_t ra = GETPC();
+#ifndef CONFIG_USER_ONLY
+    int mmu_idx = cpu_mmu_index(env, 0);
+    TCGMemOpIdx oi;
+#endif
+
+    if (parallel_cpus) {
+        /* We're executing in a parallel context -- must be atomic.  */
+        if ((a1 & 7) == 0 && a2 == a1 + 4) {
+            c = deposit64(c2, 32, 32, c1);
+            u = deposit64(u2, 32, 32, u1);
+#ifdef CONFIG_USER_ONLY
+            uint64_t *ha1 = g2h(a1);
+            l = atomic_cmpxchg__nocheck(ha1, c, u);
+#else
+            oi = make_memop_idx(MO_BEQ, mmu_idx);
+            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
+#endif
+            l1 = l >> 32;
+            l2 = l;
+        } else if ((a2 & 7) == 0 && a1 == a2 + 4) {
+            c = deposit64(c1, 32, 32, c2);
+            u = deposit64(u1, 32, 32, u2);
+#ifdef CONFIG_USER_ONLY
+            uint64_t *ha1 = g2h(a1);
+            l = atomic_cmpxchg__nocheck(ha1, c, u);
+#else
+            oi = make_memop_idx(MO_BEQ, mmu_idx);
+            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
+#endif
+            l2 = l >> 32;
+            l1 = l;
+        } else {
+            /* Tell the main loop we need to serialize this insn.  */
+            cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+        }
+    } else {
+#ifdef CONFIG_USER_ONLY
+        uint32_t *ha1 = g2h(a1);
+        uint32_t *ha2 = g2h(a2);
+        l1 = ldl_be_p(ha1);
+        l2 = ldl_be_p(ha2);
+        if (l1 == c1 && l2 == c2) {
+            stl_be_p(ha1, u1);
+            stl_be_p(ha2, u2);
+        }
+#else
+        /* We're executing in a serial context -- no need to be atomic.  */
+        oi = make_memop_idx(MO_BEUL, mmu_idx);
+        l1 = helper_be_ldul_mmu(env, a1, oi, ra);
+        l2 = helper_be_ldul_mmu(env, a2, oi, ra);
+        if (l1 == c1 && l2 == c2) {
+            helper_be_stl_mmu(env, a1, u1, oi, ra);
+            helper_be_stl_mmu(env, a2, u2, oi, ra);
+        }
+#endif
+    }
+
+    if (c1 != l1) {
+        env->cc_n = l1;
+        env->cc_v = c1;
+    } else {
+        env->cc_n = l2;
+        env->cc_v = c2;
+    }
+    env->cc_op = CC_OP_CMPL;
+    env->dregs[Dc1] = l1;
+    env->dregs[Dc2] = l2;
+}
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 93f1270..68cb8d4 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1880,6 +1880,146 @@ DISAS_INSN(arith_im)
     tcg_temp_free(dest);
 }
 
+DISAS_INSN(cas)
+{
+    int opsize;
+    TCGv addr;
+    uint16_t ext;
+    TCGv load;
+    TCGv cmp;
+    TCGMemOp opc;
+
+    switch ((insn >> 9) & 3) {
+    case 1:
+        opsize = OS_BYTE;
+        opc = MO_UB;
+        break;
+    case 2:
+        opsize = OS_WORD;
+        opc = MO_TEUW;
+        break;
+    case 3:
+        opsize = OS_LONG;
+        opc = MO_TEUL;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    opc |= MO_ALIGN;
+
+    ext = read_im16(env, s);
+
+    /* cas Dc,Du,<EA> */
+
+    addr = gen_lea(env, s, insn, opsize);
+    if (IS_NULL_QREG(addr)) {
+        gen_addr_fault(s);
+        return;
+    }
+
+    cmp = gen_extend(DREG(ext, 0), opsize, 0);
+
+    /* if  <EA> == Dc then
+     *     <EA> = Du
+     *     Dc = <EA> (because <EA> == Dc)
+     * else
+     *     Dc = <EA>
+     */
+
+    load = tcg_temp_new();
+    tcg_gen_atomic_cmpxchg_i32(load, addr, cmp, DREG(ext, 6),
+                               IS_USER(s), opc);
+    gen_partset_reg(opsize, DREG(ext, 0), load);
+
+    gen_update_cc_cmp(s, load, cmp, opsize);
+    tcg_temp_free(load);
+}
+
+DISAS_INSN(cas2w)
+{
+    uint16_t ext1, ext2;
+    TCGv addr1, addr2;
+    TCGv regs;
+
+    /* cas2 Dc1:Dc2,Du1:Du2,(Rn1):(Rn2) */
+
+    ext1 = read_im16(env, s);
+
+    if (ext1 & 0x8000) {
+        /* Address Register */
+        addr1 = AREG(ext1, 12);
+    } else {
+        /* Data Register */
+        addr1 = DREG(ext1, 12);
+    }
+
+    ext2 = read_im16(env, s);
+    if (ext2 & 0x8000) {
+        /* Address Register */
+        addr2 = AREG(ext2, 12);
+    } else {
+        /* Data Register */
+        addr2 = DREG(ext2, 12);
+    }
+
+    /* if (R1) == Dc1 && (R2) == Dc2 then
+     *     (R1) = Du1
+     *     (R2) = Du2
+     * else
+     *     Dc1 = (R1)
+     *     Dc2 = (R2)
+     */
+
+   regs = tcg_const_i32(REG(ext2, 6) |
+                        (REG(ext1, 6) << 3) |
+                        (REG(ext2, 0) << 6) |
+                        (REG(ext1, 0) << 9));
+   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
+   tcg_temp_free(regs);
+}
+
+DISAS_INSN(cas2l)
+{
+    uint16_t ext1, ext2;
+    TCGv addr1, addr2, regs;
+
+    /* cas2 Dc1:Dc2,Du1:Du2,(Rn1):(Rn2) */
+
+    ext1 = read_im16(env, s);
+
+    if (ext1 & 0x8000) {
+        /* Address Register */
+        addr1 = AREG(ext1, 12);
+    } else {
+        /* Data Register */
+        addr1 = DREG(ext1, 12);
+    }
+
+    ext2 = read_im16(env, s);
+    if (ext2 & 0x8000) {
+        /* Address Register */
+        addr2 = AREG(ext2, 12);
+    } else {
+        /* Data Register */
+        addr2 = DREG(ext2, 12);
+    }
+
+    /* if (R1) == Dc1 && (R2) == Dc2 then
+     *     (R1) = Du1
+     *     (R2) = Du2
+     * else
+     *     Dc1 = (R1)
+     *     Dc2 = (R2)
+     */
+
+   regs = tcg_const_i32(REG(ext2, 6) |
+                        (REG(ext1, 6) << 3) |
+                        (REG(ext2, 0) << 6) |
+                        (REG(ext1, 0) << 9));
+   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
+   tcg_temp_free(regs);
+}
+
 DISAS_INSN(byterev)
 {
     TCGv reg;
@@ -3885,6 +4025,9 @@ void register_m68k_insns (CPUM68KState *env)
     INSN(arith_im,  0680, fff8, CF_ISA_A);
     INSN(arith_im,  0c00, ff38, CF_ISA_A);
     INSN(arith_im,  0c00, ff00, M68000);
+    INSN(cas,       08c0, f9c0, CAS);
+    INSN(cas2w,     0cfc, ffff, CAS);
+    INSN(cas2l,     0efc, ffff, CAS);
     BASE(bitop_im,  0800, ffc0);
     BASE(bitop_im,  0840, ffc0);
     BASE(bitop_im,  0880, ffc0);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd Laurent Vivier
@ 2016-11-03 16:16   ` Richard Henderson
  0 siblings, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 16:16 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> +static void bcd_sub(TCGv dest, TCGv src)
> +{
> +    TCGv t0, t1, t2;
> +
> +    /*  dest10 = dest10 - src10 - X
> +     *         = bcd_add(dest + 1 - X, 0xf99 - src)
> +     */
> +
> +    /* t0 = 0xfff - src */
> +
> +    t0 = tcg_temp_new();
> +    tcg_gen_neg_i32(t0, src);
> +    tcg_gen_addi_i32(t0, t0, 0xfff);

tcg_gen_subfi_i32(t0, 0xfff, src);

And how did we get 0xfff from 0xf99?  Also, see below...

> +
> +    /* t1 = t0 + dest + 1 - X*/
> +
> +    t1 = tcg_temp_new();
> +    tcg_gen_add_i32(t1, t0, dest);
> +    tcg_gen_addi_i32(t1, t1, 1);
> +    tcg_gen_sub_i32(t1, t1, QREG_CC_X);
> +
> +    /* t2 = t0 ^ dest ^ 1 ^ X */
> +
> +    t2 = tcg_temp_new();
> +    tcg_gen_xor_i32(t2, t0, dest);
> +    tcg_gen_xori_i32(t2, t2, 1);
> +    tcg_gen_xor_i32(t2, t2, QREG_CC_X);

Since you only care about bits 0x110, you can drop the ^ 1 ^ X; they won't 
affect the result.  (Similarly wrt bcd_add, dropping ^ X).

> +    /* t2 = ~t0 & 0x110 */
> +
> +    tcg_gen_not_i32(t2, t0);
> +    tcg_gen_andi_i32(t2, t2, 0x110);
> +
> +    /* t0 = (t2 >> 2) | (t2 >> 3) */
> +
> +    tcg_gen_shri_i32(t0, t2, 2);
> +    tcg_gen_shri_i32(t2, t2, 3);
> +    tcg_gen_or_i32(t0, t0, t2);

For the benefit of hosts that have 8-bit immediate AND insns (e.g. arm32), it 
would be better to rearrange this a little:

     t2 = t0 >> 3;
     t3 = ~t2 & 0x22;
     t4 = t3 + t3;
     t5 = t3 + t4;

(Similarly in bcd_add).

> +
> +    /* return t1 - t0 */
> +
> +    tcg_gen_sub_i32(dest, t1, t0);
> +}
> +
> +static void bcd_flags(TCGv val)
> +{
> +    tcg_gen_andi_i32(QREG_CC_C, val, 0x0ff);
> +    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_C);
> +
> +    tcg_gen_movi_i32(QREG_CC_X, 0);
> +    tcg_gen_andi_i32(val, val, 0xf00);
> +    tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_C, val, QREG_CC_X);

Surely 0x100 is the carry.  I don't see how you could produce 0x2xx from 
addition.  For subtraction, I think we got things other than 0/1 in the third 
nibble simply because we started with 0xf99 instead of 0x199.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
@ 2016-11-03 16:17   ` Richard Henderson
  2016-11-03 19:47   ` Richard Henderson
  2016-11-03 20:47   ` Richard Henderson
  2 siblings, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 16:17 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> 680x0 movem can load/store words and long words
> and can use more addressing modes.
> Coldfire can only use long words with (Ax) and (d16,Ax)
> addressing modes.
>
> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
> ---
>  target-m68k/translate.c | 96 ++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 79 insertions(+), 17 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops Laurent Vivier
@ 2016-11-03 16:36   ` Richard Henderson
  2016-11-03 18:03     ` Laurent Vivier
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 16:36 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> +    if (c1 != l1) {
> +        env->cc_n = l1;
> +        env->cc_v = c1;
> +    } else {
> +        env->cc_n = l2;
> +        env->cc_v = c2;
> +    }
> +    env->cc_op = CC_OP_CMPL;
> +    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
> +    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);

CC_OP_CMPW for cas2w.

> +void HELPER(cas2l)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
> +{
> +    uint32_t Dc1 = extract32(regs, 9, 3);
> +    uint32_t Dc2 = extract32(regs, 6, 3);
> +    uint32_t Du1 = extract32(regs, 3, 3);
> +    uint32_t Du2 = extract32(regs, 0, 3);
> +    uint32_t c1 = env->dregs[Dc1];
> +    uint32_t c2 = env->dregs[Dc2];
> +    uint32_t u1 = env->dregs[Du1];
> +    uint32_t u2 = env->dregs[Du2];
> +    uint32_t l1, l2;
> +    uint64_t c, u, l;
> +    uintptr_t ra = GETPC();
> +#ifndef CONFIG_USER_ONLY
> +    int mmu_idx = cpu_mmu_index(env, 0);
> +    TCGMemOpIdx oi;
> +#endif
> +
> +    if (parallel_cpus) {
> +        /* We're executing in a parallel context -- must be atomic.  */
> +        if ((a1 & 7) == 0 && a2 == a1 + 4) {
> +            c = deposit64(c2, 32, 32, c1);
> +            u = deposit64(u2, 32, 32, u1);
> +#ifdef CONFIG_USER_ONLY
> +            uint64_t *ha1 = g2h(a1);
> +            l = atomic_cmpxchg__nocheck(ha1, c, u);
> +#else
> +            oi = make_memop_idx(MO_BEQ, mmu_idx);
> +            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
> +#endif

We do need a check here for CONFIG_ATOMIC64.  If that's not set, the host 
doesn't have 64-bit cmpxchg.  Probably arrange this as

if (parallel_cpus) {
#ifdef CONFIG_ATOMIC64
     if ((a1 & 7) ...) {
        ...
     } else if ((a2 & 7) ...) {
        ...
     } else
#endif
     {
         /* Tell the main loop we need to serialize this insn.  */
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
} else {
    ...
}

Which is pretty ugly, but the best we can do without re-organizing the helpers.

> +   regs = tcg_const_i32(REG(ext2, 6) |
> +                        (REG(ext1, 6) << 3) |
> +                        (REG(ext2, 0) << 6) |
> +                        (REG(ext1, 0) << 9));
> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
> +   tcg_temp_free(regs);

Need

   /* Note that cas2w also assigned to env->cc_op.  */
   s->cc_op = CC_OP_CMPW;
   s->cc_op_synced = 1;

> +DISAS_INSN(cas2l)
> +{
...
> +   regs = tcg_const_i32(REG(ext2, 6) |
> +                        (REG(ext1, 6) << 3) |
> +                        (REG(ext2, 0) << 6) |
> +                        (REG(ext1, 0) << 9));
> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);

cas2l.

Also need to set cc_op to CC_OP_CMPL, as above.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops
  2016-11-03 16:36   ` Richard Henderson
@ 2016-11-03 18:03     ` Laurent Vivier
  2016-11-03 19:20       ` Richard Henderson
  0 siblings, 1 reply; 15+ messages in thread
From: Laurent Vivier @ 2016-11-03 18:03 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: schwab, agraf, gerg

Le 03/11/2016 à 17:36, Richard Henderson a écrit :
> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>> +    if (c1 != l1) {
>> +        env->cc_n = l1;
>> +        env->cc_v = c1;
>> +    } else {
>> +        env->cc_n = l2;
>> +        env->cc_v = c2;
>> +    }
>> +    env->cc_op = CC_OP_CMPL;
>> +    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
>> +    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);
> 
> CC_OP_CMPW for cas2w.

It was working because I have used helper_be_ldsw_mmu() to load values,
is it better to use helper_be_lduw_mmu with CC_OP_CMPW?

>> +DISAS_INSN(cas2l)
>> +{
> ...
>> +   regs = tcg_const_i32(REG(ext2, 6) |
>> +                        (REG(ext1, 6) << 3) |
>> +                        (REG(ext2, 0) << 6) |
>> +                        (REG(ext1, 0) << 9));
>> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
> 
> cas2l.

I should not use values with the high word equal to the low word to test
this...

Many thanks,
Laurent

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops
  2016-11-03 18:03     ` Laurent Vivier
@ 2016-11-03 19:20       ` Richard Henderson
  0 siblings, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 19:20 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/03/2016 12:03 PM, Laurent Vivier wrote:
>> CC_OP_CMPW for cas2w.
>
> It was working because I have used helper_be_ldsw_mmu() to load values,
> is it better to use helper_be_lduw_mmu with CC_OP_CMPW?

IIRC, one needs the extra sign-extension here:

     case CC_OP_CMPB:                                                       \
     case CC_OP_CMPW:                                                       \
     case CC_OP_CMPL:                                                       \
         src1 = n;                                                          \
         src2 = v;                                                          \
         res = EXTSIGN(src1 - src2, op - CC_OP_CMPB);                       \

to get all of the flags correct, even though just Z would be correct without.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
  2016-11-03 16:17   ` Richard Henderson
@ 2016-11-03 19:47   ` Richard Henderson
  2016-11-03 20:11     ` Laurent Vivier
  2016-11-03 20:47   ` Richard Henderson
  2 siblings, 1 reply; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 19:47 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> +                    if ((insn & 7) + 8 == i &&
> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
> +                        /* M68020+: if the addressing register is the
> +                         * register moved to memory, the value written
> +                         * is the initial value decremented by the size of
> +                         * the operation
> +                         * M68000/M68010: the value is the initial value
> +                         */
> +                        TCGv tmp = tcg_temp_new();
> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
> +                        gen_store(s, opsize, addr, tmp);
> +                        tcg_temp_free(tmp);

This doesn't look right.  Is the value stored the intermediate value of the 
decremented register, or the final value?  What you're storing is reg-4, which 
is neither of these things.

I could see, maybe, that reg-4 might well turn out to be the right value for

	movem	{a0-a7}, (sp)-

since sp == a7, and therefore stored first.  But I question that's the correct 
result for

	movem	{a0-a7}, (a1)-

If it's the incremental value, then you can just store "addr" and you don't 
need a temp.  If it's the final value, then you can compute

	tcg_gen_subi_i32(tmp, AREG(insn, 0), ctpop32(mask) * 4);



r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-03 19:47   ` Richard Henderson
@ 2016-11-03 20:11     ` Laurent Vivier
  2016-11-03 20:45       ` Richard Henderson
  0 siblings, 1 reply; 15+ messages in thread
From: Laurent Vivier @ 2016-11-03 20:11 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: schwab, agraf, gerg

Le 03/11/2016 à 20:47, Richard Henderson a écrit :
> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>> +                    if ((insn & 7) + 8 == i &&
>> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
>> +                        /* M68020+: if the addressing register is the
>> +                         * register moved to memory, the value written
>> +                         * is the initial value decremented by the
>> size of
>> +                         * the operation
>> +                         * M68000/M68010: the value is the initial value
>> +                         */
>> +                        TCGv tmp = tcg_temp_new();
>> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
>> +                        gen_store(s, opsize, addr, tmp);
>> +                        tcg_temp_free(tmp);
> 
> This doesn't look right.  Is the value stored the intermediate value of
> the decremented register, or the final value?  What you're storing is
> reg-4, which is neither of these things.
>
> I could see, maybe, that reg-4 might well turn out to be the right value
> for
> 
>     movem    {a0-a7}, (sp)-
> 
> since sp == a7, and therefore stored first.  But I question that's the
> correct result for
> 
>     movem    {a0-a7}, (a1)-
> 
> If it's the incremental value, then you can just store "addr" and you
> don't need a temp.  If it's the final value, then you can compute
> 
>     tcg_gen_subi_i32(tmp, AREG(insn, 0), ctpop32(mask) * 4);
> 

As it was not clear for me, I have written a test to see what was the
good value.

my test program is:

top:
        .space 64,0
stack:
        .text
        .globl _start
_start:
        lea stack,%a4
        lea 1,%a0
        lea 2,%a1
        lea 3,%a2
        lea 4,%a3
        lea 5,%a5
        lea 6,%a6
        moveq.l #8, %d0
        moveq.l #9, %d1
        moveq.l #10, %d2
        moveq.l #11, %d3
        moveq.l #12, %d4
        moveq.l #13, %d5
        moveq.l #14, %d6
        moveq.l #15, %d7
        movem.l %a0-%a7/%d0-%d7,-(%a4)

on a real 68040:

initial value of A4 is 0x800020ec
final value of A4 is   0x800020ac

(gdb) x/15x 0x800020ac
0x800020ac: 0x00000008	0x00000009	0x0000000a	0x0000000b
0x800020bc: 0x0000000c	0x0000000d	0x0000000e	0x0000000f
0x800020cc: 0x00000001	0x00000002	0x00000003	0x00000004
0x800020dc: 0x800020e8	0x00000005	0x00000006

Stored value is thus 0x800020e8 so this is initial value - 4.
[I have tried the same test with a1, for the same result]

Laurent

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-03 20:11     ` Laurent Vivier
@ 2016-11-03 20:45       ` Richard Henderson
  0 siblings, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 20:45 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/03/2016 02:11 PM, Laurent Vivier wrote:
> Le 03/11/2016 à 20:47, Richard Henderson a écrit :
>> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>>> +                    if ((insn & 7) + 8 == i &&
>>> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
>>> +                        /* M68020+: if the addressing register is the
>>> +                         * register moved to memory, the value written
>>> +                         * is the initial value decremented by the
>>> size of
>>> +                         * the operation
>>> +                         * M68000/M68010: the value is the initial value
>>> +                         */
>>> +                        TCGv tmp = tcg_temp_new();
>>> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
>>> +                        gen_store(s, opsize, addr, tmp);
>>> +                        tcg_temp_free(tmp);
>>
>> This doesn't look right.  Is the value stored the intermediate value of
>> the decremented register, or the final value?  What you're storing is
>> reg-4, which is neither of these things.
>>
>> I could see, maybe, that reg-4 might well turn out to be the right value
>> for
>>
>>     movem    {a0-a7}, (sp)-
>>
>> since sp == a7, and therefore stored first.  But I question that's the
>> correct result for
>>
>>     movem    {a0-a7}, (a1)-
>>
>> If it's the incremental value, then you can just store "addr" and you
>> don't need a temp.  If it's the final value, then you can compute
>>
>>     tcg_gen_subi_i32(tmp, AREG(insn, 0), ctpop32(mask) * 4);
>>
>
> As it was not clear for me, I have written a test to see what was the
> good value.
>
> my test program is:
>
> top:
>         .space 64,0
> stack:
>         .text
>         .globl _start
> _start:
>         lea stack,%a4
>         lea 1,%a0
>         lea 2,%a1
>         lea 3,%a2
>         lea 4,%a3
>         lea 5,%a5
>         lea 6,%a6
>         moveq.l #8, %d0
>         moveq.l #9, %d1
>         moveq.l #10, %d2
>         moveq.l #11, %d3
>         moveq.l #12, %d4
>         moveq.l #13, %d5
>         moveq.l #14, %d6
>         moveq.l #15, %d7
>         movem.l %a0-%a7/%d0-%d7,-(%a4)
>
> on a real 68040:
>
> initial value of A4 is 0x800020ec
> final value of A4 is   0x800020ac
>
> (gdb) x/15x 0x800020ac
> 0x800020ac: 0x00000008	0x00000009	0x0000000a	0x0000000b
> 0x800020bc: 0x0000000c	0x0000000d	0x0000000e	0x0000000f
> 0x800020cc: 0x00000001	0x00000002	0x00000003	0x00000004
> 0x800020dc: 0x800020e8	0x00000005	0x00000006
>
> Stored value is thus 0x800020e8 so this is initial value - 4.
> [I have tried the same test with a1, for the same result]

Weird.  But, ok.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
  2016-11-03 16:17   ` Richard Henderson
  2016-11-03 19:47   ` Richard Henderson
@ 2016-11-03 20:47   ` Richard Henderson
  2016-11-04  7:59     ` Laurent Vivier
  2 siblings, 1 reply; 15+ messages in thread
From: Richard Henderson @ 2016-11-03 20:47 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> +            for (i = 15; i >= 0; i--, mask >>= 1) {
> +                if (mask & 1) {
> +                    if ((insn & 7) + 8 == i &&
> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
> +                        /* M68020+: if the addressing register is the
> +                         * register moved to memory, the value written
> +                         * is the initial value decremented by the size of
> +                         * the operation
> +                         * M68000/M68010: the value is the initial value
> +                         */
> +                        TCGv tmp = tcg_temp_new();
> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
> +                        gen_store(s, opsize, addr, tmp);
> +                        tcg_temp_free(tmp);
> +                    } else {
> +                        gen_store(s, opsize, addr, mreg(i));
> +                    }
> +                    if (mask != 1) {
> +                        tcg_gen_sub_i32(addr, addr, incr);
> +                    }
> +                }

One more thing: This is pre-decrement.  Why are you decrementing after the 
store?  Seems to me this should be

    if (mask & 1) {
        tcg_gen_sub_i32(addr, addr, incr);
        if (REG(insn, 0) + 8 == i ...)
        ...
    }


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-03 20:47   ` Richard Henderson
@ 2016-11-04  7:59     ` Laurent Vivier
  2016-11-04 12:27       ` Richard Henderson
  0 siblings, 1 reply; 15+ messages in thread
From: Laurent Vivier @ 2016-11-04  7:59 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: schwab, agraf, gerg

Le 03/11/2016 à 21:47, Richard Henderson a écrit :
> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>> +            for (i = 15; i >= 0; i--, mask >>= 1) {
>> +                if (mask & 1) {
>> +                    if ((insn & 7) + 8 == i &&
>> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
>> +                        /* M68020+: if the addressing register is the
>> +                         * register moved to memory, the value written
>> +                         * is the initial value decremented by the
>> size of
>> +                         * the operation
>> +                         * M68000/M68010: the value is the initial value
>> +                         */
>> +                        TCGv tmp = tcg_temp_new();
>> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
>> +                        gen_store(s, opsize, addr, tmp);
>> +                        tcg_temp_free(tmp);
>> +                    } else {
>> +                        gen_store(s, opsize, addr, mreg(i));
>> +                    }
>> +                    if (mask != 1) {
>> +                        tcg_gen_sub_i32(addr, addr, incr);
>> +                    }
>> +                }
> 
> One more thing: This is pre-decrement.  Why are you decrementing after
> the store?  Seems to me this should be
> 
>    if (mask & 1) {
>        tcg_gen_sub_i32(addr, addr, incr);
>        if (REG(insn, 0) + 8 == i ...)
>        ...
>    }
> 

Because it has already been decremented by gen_lea()... so this a
problem if we have page fault, except if we use your "areg writeback"
series, and we will.

Thanks,
Laurent

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem
  2016-11-04  7:59     ` Laurent Vivier
@ 2016-11-04 12:27       ` Richard Henderson
  0 siblings, 0 replies; 15+ messages in thread
From: Richard Henderson @ 2016-11-04 12:27 UTC (permalink / raw)
  To: Laurent Vivier, qemu-devel; +Cc: schwab, agraf, gerg

On 11/04/2016 01:59 AM, Laurent Vivier wrote:
> Le 03/11/2016 à 21:47, Richard Henderson a écrit :
>> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>>> +            for (i = 15; i >= 0; i--, mask >>= 1) {
>>> +                if (mask & 1) {
>>> +                    if ((insn & 7) + 8 == i &&
>>> +                        m68k_feature(s->env, M68K_FEATURE_EXT_FULL)) {
>>> +                        /* M68020+: if the addressing register is the
>>> +                         * register moved to memory, the value written
>>> +                         * is the initial value decremented by the
>>> size of
>>> +                         * the operation
>>> +                         * M68000/M68010: the value is the initial value
>>> +                         */
>>> +                        TCGv tmp = tcg_temp_new();
>>> +                        tcg_gen_sub_i32(tmp, mreg(i), incr);
>>> +                        gen_store(s, opsize, addr, tmp);
>>> +                        tcg_temp_free(tmp);
>>> +                    } else {
>>> +                        gen_store(s, opsize, addr, mreg(i));
>>> +                    }
>>> +                    if (mask != 1) {
>>> +                        tcg_gen_sub_i32(addr, addr, incr);
>>> +                    }
>>> +                }
>>
>> One more thing: This is pre-decrement.  Why are you decrementing after
>> the store?  Seems to me this should be
>>
>>    if (mask & 1) {
>>        tcg_gen_sub_i32(addr, addr, incr);
>>        if (REG(insn, 0) + 8 == i ...)
>>        ...
>>    }
>>
>
> Because it has already been decremented by gen_lea()... so this a
> problem if we have page fault, except if we use your "areg writeback"
> series, and we will.

Ah, I see.  No, gen_lea doesn't writeback; only gen_ea does.  But I wonder if 
this couldn't be cleaned up a tad.  I'll get back to you.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-11-04 12:27 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-02 21:15 [Qemu-devel] [PATCH v2 0/3] target-m68k: add movem, BCD and CAS instructions Laurent Vivier
2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 1/3] target-m68k: add abcd/sbcd/nbcd Laurent Vivier
2016-11-03 16:16   ` Richard Henderson
2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 2/3] target-m68k: implement 680x0 movem Laurent Vivier
2016-11-03 16:17   ` Richard Henderson
2016-11-03 19:47   ` Richard Henderson
2016-11-03 20:11     ` Laurent Vivier
2016-11-03 20:45       ` Richard Henderson
2016-11-03 20:47   ` Richard Henderson
2016-11-04  7:59     ` Laurent Vivier
2016-11-04 12:27       ` Richard Henderson
2016-11-02 21:15 ` [Qemu-devel] [PATCH v2 3/3] target-m68k: add cas/cas2 ops Laurent Vivier
2016-11-03 16:36   ` Richard Henderson
2016-11-03 18:03     ` Laurent Vivier
2016-11-03 19:20       ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.