All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/11] target/sh4 improvments
@ 2017-07-06  0:23 Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco Richard Henderson
                   ` (12 more replies)
  0 siblings, 13 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

This fixes two problems with atomic operations on sh4,
including an attempt at supporting the user-space atomics
technique used by most sh-linux-user binaries.

This is good enough to run the one occurrence in linux-user-test-0.3.
I'm still downloading enough of a cross environment to be able to run
more recent sh4 binaries.  Including the one in the LP bug report.

Thoughts and more extensive testing appreciated.


r~


Richard Henderson (11):
  target/sh4: Use cmpxchg for movco
  target/sh4: Consolidate end-of-TB tests
  target/sh4: Handle user-space atomics
  target/sh4: Recognize common gUSA sequences
  linux-user/sh4: Notice gUSA regions during signal delivery
  target/sh4: Hoist register bank selection
  target/sh4: Unify cpu_fregs into FREG
  target/sh4: Pass DisasContext to fpr64 routines
  target/sh4: Avoid a potential translator crash for malformed FPR64
  target/sh4: Hoist fp bank selection
  target/sh4: Eliminate DREG macro

 target/sh4/cpu.h       |  24 +-
 target/sh4/helper.h    |   1 +
 linux-user/signal.c    |  21 ++
 target/sh4/op_helper.c |   6 +
 target/sh4/translate.c | 724 ++++++++++++++++++++++++++++++++++++++-----------
 5 files changed, 621 insertions(+), 155 deletions(-)

-- 
2.9.4

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06 15:25   ` Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests Richard Henderson
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

As for other targets, cmpxchg isn't quite right for ll/sc,
suffering from an ABA race, but is sufficient to implement
portable atomic operations.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/cpu.h       |  3 ++-
 target/sh4/translate.c | 56 +++++++++++++++++++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index ffb9168..b15116e 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -169,7 +169,8 @@ typedef struct CPUSH4State {
     tlb_t itlb[ITLB_SIZE];	/* instruction translation table */
     tlb_t utlb[UTLB_SIZE];	/* unified translation table */
 
-    uint32_t ldst;
+    uint32_t lock_addr;
+    uint32_t lock_value;
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 8bc132b..6b247fa 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -68,7 +68,8 @@ static TCGv cpu_gregs[24];
 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
-static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
+static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
+static TCGv cpu_lock_addr, cpu_lock_value;
 static TCGv cpu_fregs[32];
 
 /* internal register indexes */
@@ -151,8 +152,12 @@ void sh4_translate_init(void)
                                               offsetof(CPUSH4State,
                                                        delayed_cond),
                                               "_delayed_cond_");
-    cpu_ldst = tcg_global_mem_new_i32(cpu_env,
-				      offsetof(CPUSH4State, ldst), "_ldst_");
+    cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
+				           offsetof(CPUSH4State, lock_addr),
+                                           "_lock_addr_");
+    cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
+				            offsetof(CPUSH4State, lock_value),
+                                            "_lock_value_");
 
     for (i = 0; i < 32; i++)
         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
@@ -1526,20 +1531,32 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x0073:
         /* MOVCO.L
-	       LDST -> T
+               LDST -> T
                If (T == 1) R0 -> (Rn)
                0 -> LDST
         */
         if (ctx->features & SH_FEATURE_SH4A) {
-            TCGLabel *label = gen_new_label();
-            tcg_gen_mov_i32(cpu_sr_t, cpu_ldst);
-	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
-            tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	    gen_set_label(label);
-	    tcg_gen_movi_i32(cpu_ldst, 0);
-	    return;
-	} else
-	    break;
+            TCGLabel *fail = gen_new_label();
+            TCGLabel *done = gen_new_label();
+            TCGv tmp;
+
+            tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8), cpu_lock_addr, fail);
+
+            tmp = tcg_temp_new();
+            tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
+                                       REG(0), ctx->memidx, MO_TEUL);
+            tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
+            tcg_temp_free(tmp);
+            tcg_gen_br(done);
+
+            gen_set_label(fail);
+            tcg_gen_movi_i32(cpu_sr_t, 0);
+
+            gen_set_label(done);
+            return;
+        } else {
+            break;
+        }
     case 0x0063:
         /* MOVLI.L @Rm,R0
                1 -> LDST
@@ -1547,13 +1564,14 @@ static void _decode_opc(DisasContext * ctx)
                When interrupt/exception
                occurred 0 -> LDST
         */
-	if (ctx->features & SH_FEATURE_SH4A) {
-	    tcg_gen_movi_i32(cpu_ldst, 0);
+        if (ctx->features & SH_FEATURE_SH4A) {
             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
-	    tcg_gen_movi_i32(cpu_ldst, 1);
-	    return;
-	} else
-	    break;
+            tcg_gen_mov_i32(cpu_lock_addr, REG(B11_8));
+            tcg_gen_mov_i32(cpu_lock_value, REG(0));
+            return;
+        } else {
+            break;
+        }
     case 0x0093:		/* ocbi @Rn */
 	{
             gen_helper_ocbi(cpu_env, REG(B11_8));
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06 15:17   ` Aurelien Jarno
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics Richard Henderson
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

We can fold 3 different tests within the decode loop
into a more accurate computation of max_insns to start.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 6b247fa..e1661e9 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1856,7 +1856,6 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     ctx.features = env->features;
     ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
 
-    num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
@@ -1864,9 +1863,23 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     if (max_insns > TCG_MAX_INSNS) {
         max_insns = TCG_MAX_INSNS;
     }
+    /* Since the ISA is fixed-width, we can bound by the number
+       of instructions remaining on the page.  */
+    num_insns = (TARGET_PAGE_SIZE - (ctx.pc & (TARGET_PAGE_SIZE - 1))) / 2;
+    if (max_insns > num_insns) {
+        max_insns = num_insns;
+    }
+    /* Single stepping means just that.  */
+    if (ctx.singlestep_enabled || singlestep) {
+        max_insns = 1;
+    }
 
     gen_tb_start(tb);
-    while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
+    num_insns = 0;
+
+    while (ctx.bstate == BS_NONE
+           && num_insns < max_insns
+           && !tcg_op_buf_full()) {
         tcg_gen_insn_start(ctx.pc, ctx.envflags);
         num_insns++;
 
@@ -1890,18 +1903,10 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
         ctx.opcode = cpu_lduw_code(env, ctx.pc);
 	decode_opc(&ctx);
 	ctx.pc += 2;
-	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
-	    break;
-        if (cs->singlestep_enabled) {
-	    break;
-        }
-        if (num_insns >= max_insns)
-            break;
-        if (singlestep)
-            break;
     }
-    if (tb->cflags & CF_LAST_IO)
+    if (tb->cflags & CF_LAST_IO) {
         gen_io_end();
+    }
     if (cs->singlestep_enabled) {
         gen_save_cpu_state(&ctx, true);
         gen_helper_debug(cpu_env);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06 15:50   ` Aurelien Jarno
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 04/11] target/sh4: Recognize common gUSA sequences Richard Henderson
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

For uniprocessors, SH4 uses optimistic restartable atomic sequences.
Upon an interrupt, a real kernel would simply notice magic values in
the registers and reset the PC to the start of the sequence.

For QEMU, we cannot do this in quite the same way.  Instead, we notice
the normal start of such a sequence (mov #-x,r15), and start a new TB
that can be executed under cpu_exec_step_atomic.

Reported-by: Bruno Haible  <bruno@clisp.org>
LP: https://bugs.launchpad.net/bugs/1701971
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/cpu.h       |  21 ++++++--
 target/sh4/helper.h    |   1 +
 target/sh4/op_helper.c |   6 +++
 target/sh4/translate.c | 137 +++++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 147 insertions(+), 18 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index b15116e..0a08b12 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -96,6 +96,12 @@
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
 #define DELAY_SLOT_RTE         (1 << 2)
 
+#define TB_FLAG_PENDING_MOVCA  (1 << 3)
+
+#define GUSA_SHIFT             4
+#define GUSA_EXCLUSIVE         (1 << 12)
+#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
+
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
     uint32_t ppn;		/* physical page number */
@@ -367,7 +373,11 @@ static inline int cpu_ptel_pr (uint32_t ptel)
 #define PTEA_TC        (1 << 3)
 #define cpu_ptea_tc(ptea) (((ptea) & PTEA_TC) >> 3)
 
-#define TB_FLAG_PENDING_MOVCA  (1 << 4)
+#ifdef CONFIG_USER_ONLY
+#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
+#else
+#define TB_FLAG_ENVFLAGS_MASK  DELAY_SLOT_MASK
+#endif
 
 static inline target_ulong cpu_read_sr(CPUSH4State *env)
 {
@@ -388,12 +398,17 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
     *pc = env->pc;
+#ifdef CONFIG_USER_ONLY
+    /* For a gUSA region, notice the end of the region.  */
+    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
+#else
     *cs_base = 0;
-    *flags = (env->flags & DELAY_SLOT_MASK)                    /* Bits  0- 2 */
+#endif
+    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
             | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
             | (env->sr & (1u << SR_FD))                        /* Bit 15 */
-            | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
+            | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
 }
 
 #endif /* SH4_CPU_H */
diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index dce859c..efbb560 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -6,6 +6,7 @@ DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env)
 DEF_HELPER_1(debug, noreturn, env)
 DEF_HELPER_1(sleep, noreturn, env)
 DEF_HELPER_2(trapa, noreturn, env, i32)
+DEF_HELPER_1(exclusive, noreturn, env)
 
 DEF_HELPER_3(movcal, void, env, i32, i32)
 DEF_HELPER_1(discard_movcal_backup, void, env)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 528a40a..3139ad2 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -115,6 +115,12 @@ void helper_trapa(CPUSH4State *env, uint32_t tra)
     raise_exception(env, 0x160, 0);
 }
 
+void helper_exclusive(CPUSH4State *env)
+{
+    /* We do not want cpu_restore_state to run.  */
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), 0);
+}
+
 void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value)
 {
     if (cpu_sh4_is_cached (env, address))
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index e1661e9..02c6efc 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -225,7 +225,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
     if (ctx->delayed_pc != (uint32_t) -1) {
         tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
     }
-    if ((ctx->tbflags & DELAY_SLOT_MASK) != ctx->envflags) {
+    if ((ctx->tbflags & TB_FLAG_ENVFLAGS_MASK) != ctx->envflags) {
         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
     }
 }
@@ -239,7 +239,7 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 #ifndef CONFIG_USER_ONLY
     return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
 #else
-    return true;
+    return (ctx->tbflags & GUSA_EXCLUSIVE) == 0;
 #endif
 }
 
@@ -260,16 +260,17 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 
 static void gen_jump(DisasContext * ctx)
 {
-    if (ctx->delayed_pc == (uint32_t) - 1) {
-	/* Target is not statically known, it comes necessarily from a
-	   delayed jump as immediate jump are conditinal jumps */
-	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
+    if (ctx->delayed_pc == -1) {
+        /* Target is not statically known, it comes necessarily from a
+           delayed jump as immediate jump are conditinal jumps */
+        tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
         tcg_gen_discard_i32(cpu_delayed_pc);
-	if (ctx->singlestep_enabled)
+        if (ctx->singlestep_enabled) {
             gen_helper_debug(cpu_env);
-	tcg_gen_exit_tb(0);
+        }
+        tcg_gen_exit_tb(0);
     } else {
-	gen_goto_tb(ctx, 0, ctx->delayed_pc);
+        gen_goto_tb(ctx, 0, ctx->delayed_pc);
     }
 }
 
@@ -278,6 +279,30 @@ static void gen_conditional_jump(DisasContext * ctx,
 				 target_ulong ift, target_ulong ifnott)
 {
     TCGLabel *l1 = gen_new_label();
+
+#ifdef CONFIG_USER_ONLY
+    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+        /* When in an exclusive region, we must continue to the end.
+           Therefore, exit the region on a taken branch, but otherwise
+           fall through to the next instruction.  */
+        uint32_t taken;
+        TCGCond cond;
+
+        if (ift == ctx->pc + 2) {
+            taken = ifnott;
+            cond = TCG_COND_NE;
+        } else {
+            taken = ift;
+            cond = TCG_COND_EQ;
+        }
+        tcg_gen_brcondi_i32(cond, cpu_sr_t, 0, l1);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        gen_goto_tb(ctx, 0, taken);
+        gen_set_label(l1);
+        return;
+    }
+#endif
+
     gen_save_cpu_state(ctx, false);
     tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1);
     gen_goto_tb(ctx, 0, ifnott);
@@ -289,13 +314,28 @@ static void gen_conditional_jump(DisasContext * ctx,
 /* Delayed conditional jump (bt or bf) */
 static void gen_delayed_conditional_jump(DisasContext * ctx)
 {
-    TCGLabel *l1;
-    TCGv ds;
+    TCGLabel *l1 = gen_new_label();
+    TCGv ds = tcg_temp_new();
 
-    l1 = gen_new_label();
-    ds = tcg_temp_new();
     tcg_gen_mov_i32(ds, cpu_delayed_cond);
     tcg_gen_discard_i32(cpu_delayed_cond);
+
+#ifdef CONFIG_USER_ONLY
+    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+        /* When in an exclusive region, we must continue to the end.
+           Therefore, exit the region on a taken branch, but otherwise
+           fall through to the next instruction.  */
+        tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
+
+        /* Leave the gUSA region.  */
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        gen_jump(ctx);
+
+        gen_set_label(l1);
+        return;
+    }
+#endif
+
     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
     gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
@@ -480,6 +520,15 @@ static void _decode_opc(DisasContext * ctx)
 	}
 	return;
     case 0xe000:		/* mov #imm,Rn */
+#ifdef CONFIG_USER_ONLY
+        /* Detect the start of a gUSA region.  If so, update envflags
+           and end the TB.  This will allow us to see the end of the
+           region (stored in R0) in the next TB.  */
+        if (B11_8 == 15 && B7_0s < 0) {
+            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
+            ctx->bstate = BS_STOP;
+        }
+#endif
 	tcg_gen_movi_i32(REG(B11_8), B7_0s);
 	return;
     case 0x9000:		/* mov.w @(disp,PC),Rn */
@@ -1822,6 +1871,20 @@ static void decode_opc(DisasContext * ctx)
     if (old_flags & DELAY_SLOT_MASK) {
         /* go out of the delay slot */
         ctx->envflags &= ~DELAY_SLOT_MASK;
+
+#ifdef CONFIG_USER_ONLY
+        /* When in an exclusive region, we must continue to the end
+           for conditional branches.  */
+        if (ctx->tbflags & GUSA_EXCLUSIVE
+            && old_flags & DELAY_SLOT_CONDITIONAL) {
+            gen_delayed_conditional_jump(ctx);
+            return;
+        }
+        /* Otherwise this is probably an invalid gUSA region.
+           Drop the GUSA bits so the next TB doesn't see them.  */
+        ctx->envflags &= ~GUSA_MASK;
+#endif
+
         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
@@ -1829,10 +1892,35 @@ static void decode_opc(DisasContext * ctx)
         } else {
             gen_jump(ctx);
 	}
-
     }
 }
 
+#ifdef CONFIG_USER_ONLY
+static int decode_gusa(DisasContext *ctx)
+{
+    uint32_t pc = ctx->pc;
+    uint32_t pc_end = ctx->tb->cs_base;
+
+    qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
+                  pc, pc_end);
+
+    /* Restart with the EXCLUSIVE bit set, within a TB run via
+       cpu_exec_step_atomic holding the exclusive lock.  */
+    tcg_gen_insn_start(pc, ctx->envflags);
+    ctx->envflags |= GUSA_EXCLUSIVE;
+    gen_save_cpu_state(ctx, false);
+    gen_helper_exclusive(cpu_env);
+    ctx->bstate = BS_EXCP;
+
+    /* We're not executing an instruction, but we must report one for the
+       purposes of accounting within the TB.  At which point we might as
+       well report the entire region so that it's immediately available
+       in the disassembly dump.  */
+    ctx->pc = pc_end;
+    return 1;
+}
+#endif
+
 void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 {
     SuperHCPU *cpu = sh_env_get_cpu(env);
@@ -1845,7 +1933,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     pc_start = tb->pc;
     ctx.pc = pc_start;
     ctx.tbflags = (uint32_t)tb->flags;
-    ctx.envflags = tb->flags & DELAY_SLOT_MASK;
+    ctx.envflags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
     ctx.bstate = BS_NONE;
     ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
@@ -1877,6 +1965,17 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     gen_tb_start(tb);
     num_insns = 0;
 
+#ifdef CONFIG_USER_ONLY
+    if (ctx.tbflags & GUSA_EXCLUSIVE) {
+        /* Regardless of single-stepping or the end of the page,
+           we must complete execution of the gUSA region while
+           holding the exclusive lock.  */
+        max_insns = (tb->cs_base - ctx.pc) / 2;
+    } else if (ctx.tbflags & GUSA_MASK) {
+        num_insns = decode_gusa(&ctx);
+    }
+#endif
+
     while (ctx.bstate == BS_NONE
            && num_insns < max_insns
            && !tcg_op_buf_full()) {
@@ -1907,6 +2006,14 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     if (tb->cflags & CF_LAST_IO) {
         gen_io_end();
     }
+
+#ifdef CONFIG_USER_ONLY
+    if ((ctx.tbflags & GUSA_EXCLUSIVE) && ctx.bstate == BS_NONE) {
+        /* Ending the region of exclusivity.  Clear the bits.  */
+        ctx.envflags &= ~GUSA_MASK;
+    }
+#endif
+
     if (cs->singlestep_enabled) {
         gen_save_cpu_state(&ctx, true);
         gen_helper_debug(cpu_env);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 04/11] target/sh4: Recognize common gUSA sequences
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (2 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery Richard Henderson
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

For many of the sequences produced by gcc or glibc,
we can translate these as host atomic operations.
Which saves the need to acquire the exclusive lock.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 300 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 290 insertions(+), 10 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 02c6efc..9ab7d6e 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1896,11 +1896,296 @@ static void decode_opc(DisasContext * ctx)
 }
 
 #ifdef CONFIG_USER_ONLY
-static int decode_gusa(DisasContext *ctx)
+/* For uniprocessors, SH4 uses optimistic restartable atomic sequences.
+   Upon an interrupt, a real kernel would simply notice magic values in
+   the registers and reset the PC to the start of the sequence.
+
+   For QEMU, we cannot do this in quite the same way.  Instead, we notice
+   the normal start of such a sequence (mov #-x,r15).  While we can handle
+   any sequence via cpu_exec_step_atomic, we can recognize the "normal"
+   sequences and transform them into atomic operations as seen by the host.
+*/
+static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns)
 {
+    uint16_t insns[5];
+    int ld_adr, ld_reg, ld_mop;
+    int op_reg, op_arg, op_opc;
+    int mt_reg, st_reg, st_mop;
+
     uint32_t pc = ctx->pc;
     uint32_t pc_end = ctx->tb->cs_base;
+    int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
+    int max_insns = (pc_end - pc) / 2;
+    int i;
+
+    if (pc != pc_end + backup || max_insns < 2) {
+        /* This is a malformed gUSA region.  Don't do anything special,
+           since the interpreter is likely to get confused.  */
+        ctx->envflags &= ~GUSA_MASK;
+        return 0;
+    }
+
+    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+        /* Regardless of single-stepping or the end of the page,
+           we must complete execution of the gUSA region while
+           holding the exclusive lock.  */
+        *pmax_insns = max_insns;
+        return 0;
+    }
+
+    /* The state machine below will consume only a few insns.
+       If there are more than that in a region, fail now.  */
+    if (max_insns > ARRAY_SIZE(insns)) {
+        goto fail;
+    }
+
+    /* Read all of the insns for the region.  */
+    for (i = 0; i < max_insns; ++i) {
+        insns[i] = cpu_lduw_code(env, pc + i * 2);
+    }
+
+    ld_adr = ld_reg = ld_mop = -1;
+    op_reg = op_arg = op_opc = -1;
+    mt_reg = -1;
+    st_reg = st_mop = -1;
+    i = 0;
+
+#define NEXT_INSN \
+    do { if (i >= max_insns) goto fail; ctx->opcode = insns[i++]; } while (0)
+
+    /*
+     * Expect a load to begin the region.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x6000: /* mov.b @Rm,Rn */
+        ld_mop = MO_SB;
+        break;
+    case 0x6001: /* mov.w @Rm,Rn */
+        ld_mop = MO_TESW;
+        break;
+    case 0x6002: /* mov.l @Rm,Rn */
+        ld_mop = MO_TESL;
+        break;
+    default:
+        goto fail;
+    }
+    ld_adr = B7_4;
+    op_reg = ld_reg = B11_8;
+    if (ld_adr == ld_reg) {
+        goto fail;
+    }
+
+    /*
+     * Expect an optional register move.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x6003: /* mov Rm,Rn */
+        /* Here we want to recognize the ld output being
+           saved for later consumtion (e.g. atomic_fetch_op).  */
+        if (ld_reg != B7_4) {
+            goto fail;
+        }
+        op_reg = B11_8;
+        break;
+
+    default:
+        /* Put back and re-examine as operation.  */
+        --i;
+    }
+
+    /*
+     * Expect the operation.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x300c: /* add Rm,Rn */
+        op_opc = INDEX_op_add_i32;
+        goto do_reg_op;
+    case 0x2009: /* and Rm,Rn */
+        op_opc = INDEX_op_and_i32;
+        goto do_reg_op;
+    case 0x200a: /* xor Rm,Rn */
+        op_opc = INDEX_op_xor_i32;
+        goto do_reg_op;
+    case 0x200b: /* or Rm,Rn */
+        op_opc = INDEX_op_or_i32;
+    do_reg_op:
+        /* The operation register should be as expected, and the
+           other input cannot depend on the load.  */
+        op_arg = B7_4;
+        if (op_reg != B11_8 || op_arg == op_reg || op_arg == ld_reg) {
+            goto fail;
+        }
+        break;
+
+    case 0x3000: /* cmp/eq Rm,Rn */
+        /* Looking for the middle of a compare-and-swap sequence,
+           beginning with the compare.  Operands can be either order,
+           but with only one overlapping the load.  */
+        if ((op_reg == B11_8) + (op_reg == B7_4) != 1) {
+            goto fail;
+        }
+        op_opc = INDEX_op_setcond_i32;  /* placeholder */
+        op_arg = (op_reg == B11_8 ? B7_4 : B11_8);
+
+        NEXT_INSN;
+        switch (ctx->opcode & 0xff00) {
+        case 0x8b00: /* bf label */
+        case 0x8f00: /* bf/s label */
+            if (pc + (i + 1 + B7_0s) * 2 != pc_end) {
+                goto fail;
+            }
+            if ((ctx->opcode & 0xff00) == 0x8b00) { /* bf label */
+                break;
+            }
+            /* We're looking to unconditionally modify Rn with the
+               result of the comparison, within the delay slot of
+               the branch.  This is used by older gcc.  */
+            NEXT_INSN;
+            if ((ctx->opcode & 0xf0ff) == 0x0029) { /* movt Rn */
+                mt_reg = B11_8;
+            } else {
+                goto fail;
+            }
+            break;
+
+        default:
+            goto fail;
+        }
+        break;
+
+    default:
+        /* Put back and re-examine as store.  */
+        --i;
+    }
+
+    /*
+     * Expect the store.
+     */
+    /* The store must be the last insn.  */
+    if (i != max_insns - 1) {
+        goto fail;
+    }
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x2000: /* mov.b Rm,@Rn */
+        st_mop = MO_UB;
+        break;
+    case 0x2001: /* mov.w Rm,@Rn */
+        st_mop = MO_UW;
+        break;
+    case 0x2002: /* mov.l Rm,@Rn */
+        st_mop = MO_UL;
+        break;
+    default:
+        goto fail;
+    }
+    /* The store must match the load.  */
+    if (ld_adr != B11_8 || st_mop != (ld_mop & MO_SIZE)) {
+        goto fail;
+    }
+    st_reg = B7_4;
+
+#undef NEXT_INSN
+
+    /*
+     * Emit the operation.
+     */
+    tcg_gen_insn_start(pc, ctx->envflags);
+    switch (op_opc) {
+    case -1:
+        /* No operation found.  Look for exchange pattern.  */
+        if (st_reg == ld_reg || st_reg == op_reg) {
+            goto fail;
+        }
+        tcg_gen_atomic_xchg_i32(REG(ld_reg), REG(ld_adr), REG(st_reg),
+                                ctx->memidx, ld_mop);
+        break;
+
+    case INDEX_op_add_i32:
+        if (op_reg != st_reg) {
+            goto fail;
+        }
+        if (op_reg == ld_reg && st_mop == MO_UL) {
+            tcg_gen_atomic_add_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_add_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            if (op_reg != ld_reg) {
+                /* Note that mop sizes < 4 cannot use add_fetch
+                   because it won't carry into the higher bits.  */
+                tcg_gen_add_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+            }
+        }
+        break;
+
+    case INDEX_op_and_i32:
+        if (op_reg != st_reg) {
+            goto fail;
+        }
+        if (op_reg == ld_reg) {
+            tcg_gen_atomic_and_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_and_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_and_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
+
+    case INDEX_op_or_i32:
+        if (op_reg != st_reg) {
+            goto fail;
+        }
+        if (op_reg == ld_reg) {
+            tcg_gen_atomic_or_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                        REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_or_i32(REG(ld_reg), REG(ld_adr),
+                                        REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_or_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
+
+    case INDEX_op_xor_i32:
+        if (op_reg != st_reg) {
+            goto fail;
+        }
+        if (op_reg == ld_reg) {
+            tcg_gen_atomic_xor_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_xor_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_xor_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
 
+    case INDEX_op_setcond_i32:
+        if (st_reg == ld_reg) {
+            goto fail;
+        }
+        tcg_gen_atomic_cmpxchg_i32(REG(ld_reg), REG(ld_adr), REG(op_arg),
+                                   REG(st_reg), ctx->memidx, ld_mop);
+        tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_reg), REG(op_arg));
+        if (mt_reg >= 0) {
+            tcg_gen_mov_i32(REG(mt_reg), cpu_sr_t);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    /* The entire region has been translated.  */
+    ctx->envflags &= ~GUSA_MASK;
+    ctx->pc = pc_end;
+    return max_insns;
+
+ fail:
     qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
                   pc, pc_end);
 
@@ -1913,8 +2198,8 @@ static int decode_gusa(DisasContext *ctx)
     ctx->bstate = BS_EXCP;
 
     /* We're not executing an instruction, but we must report one for the
-       purposes of accounting within the TB.  At which point we might as
-       well report the entire region so that it's immediately available
+       purposes of accounting within the TB.  We might as well report the
+       entire region consumed via ctx->pc so that it's immediately available
        in the disassembly dump.  */
     ctx->pc = pc_end;
     return 1;
@@ -1966,13 +2251,8 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     num_insns = 0;
 
 #ifdef CONFIG_USER_ONLY
-    if (ctx.tbflags & GUSA_EXCLUSIVE) {
-        /* Regardless of single-stepping or the end of the page,
-           we must complete execution of the gUSA region while
-           holding the exclusive lock.  */
-        max_insns = (tb->cs_base - ctx.pc) / 2;
-    } else if (ctx.tbflags & GUSA_MASK) {
-        num_insns = decode_gusa(&ctx);
+    if (ctx.tbflags & GUSA_MASK) {
+        num_insns = decode_gusa(&ctx, env, &max_insns);
     }
 #endif
 
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (3 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 04/11] target/sh4: Recognize common gUSA sequences Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  1:09   ` Laurent Vivier
  2017-07-06 12:09   ` Laurent Vivier
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 06/11] target/sh4: Hoist register bank selection Richard Henderson
                   ` (7 subsequent siblings)
  12 siblings, 2 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

We translate gUSA regions atomically in a parallel context.
But in a serial context a gUSA region may be interrupted.
In that case, restart the region as the kernel would.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 linux-user/signal.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 3d18d1b..1e716a9 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3471,6 +3471,23 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
     return (sp - frame_size) & -8ul;
 }
 
+/* Notice when we're in the middle of a gUSA region and reset.
+   Note that this will only occur for !parallel_cpus, as we will
+   translate such sequences differently in a parallel context.  */
+static void unwind_gusa(CPUSH4State *regs)
+{
+    /* If the stack pointer is sufficiently negative... */
+    if ((regs->gregs[15] & 0xc0000000u) == 0xc0000000u) {
+        /* Reset the PC to before the gUSA region, as computed from
+           R0 = region end, SP = -(region size), plus one more insn
+           that actually sets SP to the region size.  */
+        regs->pc = regs->gregs[0] + regs->gregs[15] - 2;
+
+        /* Reset the SP to the saved version in R1.  */
+        regs->gregs[15] = regs->gregs[1];
+    }
+}
+
 static void setup_sigcontext(struct target_sigcontext *sc,
                              CPUSH4State *regs, unsigned long mask)
 {
@@ -3534,6 +3551,8 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     abi_ulong frame_addr;
     int i;
 
+    unwind_gusa(regs);
+
     frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
     trace_user_setup_frame(regs, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
@@ -3583,6 +3602,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     abi_ulong frame_addr;
     int i;
 
+    unwind_gusa(regs);
+
     frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
     trace_user_setup_rt_frame(regs, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 06/11] target/sh4: Hoist register bank selection
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (4 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG Richard Henderson
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

Compute which register bank to use once at the start of translation.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 9ab7d6e..20e24d5 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -35,6 +35,8 @@
 
 typedef struct DisasContext {
     struct TranslationBlock *tb;
+    TCGv *gregs;         /* active bank */
+    TCGv *altregs;       /* inactive, alternate, bank */
     target_ulong pc;
     uint16_t opcode;
     uint32_t tbflags;    /* should stay unmodified during the TB translation */
@@ -64,7 +66,7 @@ enum {
 
 /* global register indexes */
 static TCGv_env cpu_env;
-static TCGv cpu_gregs[24];
+static TCGv cpu_gregs[2][16];
 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
@@ -99,16 +101,31 @@ void sh4_translate_init(void)
         "FPR12_BANK1", "FPR13_BANK1", "FPR14_BANK1", "FPR15_BANK1",
     };
 
-    if (done_init)
+    if (done_init) {
         return;
+    }
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
     tcg_ctx.tcg_env = cpu_env;
 
-    for (i = 0; i < 24; i++)
-        cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env,
-                                              offsetof(CPUSH4State, gregs[i]),
-                                              gregnames[i]);
+    for (i = 0; i < 8; i++) {
+        cpu_gregs[0][i]
+            = tcg_global_mem_new_i32(cpu_env,
+                                     offsetof(CPUSH4State, gregs[i]),
+                                     gregnames[i]);
+    }
+    for (i = 8; i < 16; i++) {
+        cpu_gregs[0][i] = cpu_gregs[1][i]
+            = tcg_global_mem_new_i32(cpu_env,
+                                     offsetof(CPUSH4State, gregs[i]),
+                                     gregnames[i]);
+    }
+    for (i = 16; i < 24; i++) {
+        cpu_gregs[1][i - 16]
+            = tcg_global_mem_new_i32(cpu_env,
+                                     offsetof(CPUSH4State, gregs[i]),
+                                     gregnames[i]);
+    }
 
     cpu_pc = tcg_global_mem_new_i32(cpu_env,
                                     offsetof(CPUSH4State, pc), "PC");
@@ -362,13 +379,8 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\
-                        && (ctx->tbflags & (1u << SR_RB))\
-                ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
-
-#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\
-                               || !(ctx->tbflags & (1u << SR_RB)))\
-		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
+#define REG(x)     ctx->gregs[x]
+#define ALTREG(x)  ctx->altregs[x]
 
 #define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
@@ -2214,6 +2226,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     target_ulong pc_start;
     int num_insns;
     int max_insns;
+    int bank;
 
     pc_start = tb->pc;
     ctx.pc = pc_start;
@@ -2229,6 +2242,10 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     ctx.features = env->features;
     ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
 
+    bank = (ctx.tbflags & (1 << SR_MD)) && (ctx.tbflags & (1 << SR_RB));
+    ctx.gregs = cpu_gregs[bank];
+    ctx.altregs = cpu_gregs[bank ^ 1];
+
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (5 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 06/11] target/sh4: Hoist register bank selection Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  1:55   ` Philippe Mathieu-Daudé
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 08/11] target/sh4: Pass DisasContext to fpr64 routines Richard Henderson
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

We were treating FREG as an index and REG as a TCGv.
Making FREG return a TCGv is both less confusing and
a step toward cleaner banking of cpu_fregs.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 123 +++++++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 71 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 20e24d5..e4fd6f2 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -382,10 +382,11 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define REG(x)     ctx->gregs[x]
 #define ALTREG(x)  ctx->altregs[x]
 
-#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)]
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
-#define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
+#define XREG(x) FREG(XHACK(x))
+/* Assumes lsb of (x) is always 0 */
+#define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
 
 #define CHECK_NOT_DELAY_SLOT \
     if (ctx->envflags & DELAY_SLOT_MASK) {                           \
@@ -1005,56 +1006,51 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_FPU_ENABLED
         if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, XREG(B7_4));
-	    gen_store_fpr64(fp, XREG(B11_8));
+	    gen_load_fpr64(fp, XHACK(B7_4));
+	    gen_store_fpr64(fp, XHACK(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
-	    tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+	    tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
 	}
 	return;
     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
         if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
-	    int fr = XREG(B7_4);
+	    int fr = XHACK(B7_4);
 	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
-            tcg_gen_qemu_st_i32(cpu_fregs[fr], REG(B11_8),
-                                ctx->memidx, MO_TEUL);
-            tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr_hi,
-                                ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_st_i32(FREG(fr), REG(B11_8), ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_st_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
 	    tcg_temp_free(addr_hi);
 	} else {
-            tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], REG(B11_8),
-                                ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
 	}
 	return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
         if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
-	    int fr = XREG(B11_8);
+	    int fr = XHACK(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
-            tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL);
-            tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
 	    tcg_temp_free(addr_hi);
 	} else {
-            tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4),
-                                ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
 	}
 	return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
         if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
-	    int fr = XREG(B11_8);
+	    int fr = XHACK(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
-            tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL);
-            tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
 	    tcg_temp_free(addr_hi);
 	} else {
-            tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4),
-                                ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
 	}
 	return;
@@ -1063,13 +1059,12 @@ static void _decode_opc(DisasContext * ctx)
         TCGv addr = tcg_temp_new_i32();
         tcg_gen_subi_i32(addr, REG(B11_8), 4);
         if (ctx->tbflags & FPSCR_SZ) {
-	    int fr = XREG(B7_4);
-            tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
+	    int fr = XHACK(B7_4);
+            tcg_gen_qemu_st_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_subi_i32(addr, addr, 4);
-            tcg_gen_qemu_st_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_st_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
 	} else {
-            tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr,
-                                ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
 	}
         tcg_gen_mov_i32(REG(B11_8), addr);
         tcg_temp_free(addr);
@@ -1080,15 +1075,12 @@ static void _decode_opc(DisasContext * ctx)
 	    TCGv addr = tcg_temp_new_i32();
 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
             if (ctx->tbflags & FPSCR_SZ) {
-		int fr = XREG(B11_8);
-                tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
-                                    ctx->memidx, MO_TEUL);
+		int fr = XHACK(B11_8);
+                tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
 		tcg_gen_addi_i32(addr, addr, 4);
-                tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr,
-                                    ctx->memidx, MO_TEUL);
+                tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
 	    } else {
-                tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], addr,
-                                    ctx->memidx, MO_TEUL);
+                tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
 	    }
 	    tcg_temp_free(addr);
 	}
@@ -1099,15 +1091,12 @@ static void _decode_opc(DisasContext * ctx)
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
             if (ctx->tbflags & FPSCR_SZ) {
-		int fr = XREG(B7_4);
-                tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
-                                    ctx->memidx, MO_TEUL);
+		int fr = XHACK(B7_4);
+                tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
 		tcg_gen_addi_i32(addr, addr, 4);
-                tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr,
-                                    ctx->memidx, MO_TEUL);
+                tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
 	    } else {
-                tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr,
-                                    ctx->memidx, MO_TEUL);
+                tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
 	    }
 	    tcg_temp_free(addr);
 	}
@@ -1155,32 +1144,26 @@ static void _decode_opc(DisasContext * ctx)
 	    } else {
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
-                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                       cpu_fregs[FREG(B11_8)],
-                                       cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fadd_FT(FREG(B11_8), cpu_env,
+                                       FREG(B11_8), FREG(B7_4));
                     break;
                 case 0xf001:		/* fsub Rm,Rn */
-                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                       cpu_fregs[FREG(B11_8)],
-                                       cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fsub_FT(FREG(B11_8), cpu_env,
+                                       FREG(B11_8), FREG(B7_4));
                     break;
                 case 0xf002:		/* fmul Rm,Rn */
-                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                       cpu_fregs[FREG(B11_8)],
-                                       cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fmul_FT(FREG(B11_8), cpu_env,
+                                       FREG(B11_8), FREG(B7_4));
                     break;
                 case 0xf003:		/* fdiv Rm,Rn */
-                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                       cpu_fregs[FREG(B11_8)],
-                                       cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fdiv_FT(FREG(B11_8), cpu_env,
+                                       FREG(B11_8), FREG(B7_4));
                     break;
                 case 0xf004:		/* fcmp/eq Rm,Rn */
-                    gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)],
-                                          cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_eq_FT(cpu_env, FREG(B11_8), FREG(B7_4));
                     return;
                 case 0xf005:		/* fcmp/gt Rm,Rn */
-                    gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)],
-                                          cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_gt_FT(cpu_env, FREG(B11_8), FREG(B7_4));
                     return;
                 }
 	    }
@@ -1192,9 +1175,8 @@ static void _decode_opc(DisasContext * ctx)
             if (ctx->tbflags & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
-                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)],
-                                   cpu_fregs[FREG(B11_8)]);
+                gen_helper_fmac_FT(FREG(B11_8), cpu_env,
+                                   FREG(0), FREG(B7_4), FREG(B11_8));
                 return;
             }
         }
@@ -1732,11 +1714,11 @@ static void _decode_opc(DisasContext * ctx)
         return;
     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul);
+	tcg_gen_mov_i32(FREG(B11_8), cpu_fpul);
 	return;
     case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	tcg_gen_mov_i32(cpu_fpul, cpu_fregs[FREG(B11_8)]);
+	tcg_gen_mov_i32(cpu_fpul, FREG(B11_8));
 	return;
     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
@@ -1750,7 +1732,7 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-            gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_env, cpu_fpul);
+            gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul);
 	}
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
@@ -1765,13 +1747,13 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-            gen_helper_ftrc_FT(cpu_fpul, cpu_env, cpu_fregs[FREG(B11_8)]);
+            gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8));
 	}
 	return;
     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
 	{
-	    gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
+	    gen_helper_fneg_T(FREG(B11_8), FREG(B11_8));
 	}
 	return;
     case 0xf05d: /* fabs FRn/DRn */
@@ -1785,7 +1767,7 @@ static void _decode_opc(DisasContext * ctx)
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
-	    gen_helper_fabs_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
+	    gen_helper_fabs_FT(FREG(B11_8), FREG(B11_8));
 	}
 	return;
     case 0xf06d: /* fsqrt FRn */
@@ -1799,8 +1781,7 @@ static void _decode_opc(DisasContext * ctx)
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
-            gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_env,
-                                cpu_fregs[FREG(B11_8)]);
+            gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
 	}
 	return;
     case 0xf07d: /* fsrra FRn */
@@ -1809,13 +1790,13 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
         if (!(ctx->tbflags & FPSCR_PR)) {
-	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
+	    tcg_gen_movi_i32(FREG(B11_8), 0);
 	}
 	return;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
         if (!(ctx->tbflags & FPSCR_PR)) {
-	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
+	    tcg_gen_movi_i32(FREG(B11_8), 0x3f800000);
 	}
 	return;
     case 0xf0ad: /* fcnvsd FPUL,DRn */
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 08/11] target/sh4: Pass DisasContext to fpr64 routines
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (6 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 09/11] target/sh4: Avoid a potential translator crash for malformed FPR64 Richard Henderson
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index e4fd6f2..05657a9 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -359,12 +359,12 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     gen_jump(ctx);
 }
 
-static inline void gen_load_fpr64(TCGv_i64 t, int reg)
+static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
 }
 
-static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
+static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
 }
@@ -1006,8 +1006,8 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_FPU_ENABLED
         if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, XHACK(B7_4));
-	    gen_store_fpr64(fp, XHACK(B11_8));
+	    gen_load_fpr64(ctx, fp, XHACK(B7_4));
+	    gen_store_fpr64(ctx, fp, XHACK(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
 	    tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
@@ -1116,8 +1116,8 @@ static void _decode_opc(DisasContext * ctx)
 		    break; /* illegal instruction */
 		fp0 = tcg_temp_new_i64();
 		fp1 = tcg_temp_new_i64();
-		gen_load_fpr64(fp0, DREG(B11_8));
-		gen_load_fpr64(fp1, DREG(B7_4));
+		gen_load_fpr64(ctx, fp0, DREG(B11_8));
+		gen_load_fpr64(ctx, fp1, DREG(B7_4));
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
                     gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
@@ -1138,7 +1138,7 @@ static void _decode_opc(DisasContext * ctx)
                     gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1);
                     return;
                 }
-		gen_store_fpr64(fp0, DREG(B11_8));
+		gen_store_fpr64(ctx, fp0, DREG(B11_8));
                 tcg_temp_free_i64(fp0);
                 tcg_temp_free_i64(fp1);
 	    } else {
@@ -1728,7 +1728,7 @@ static void _decode_opc(DisasContext * ctx)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
             gen_helper_float_DT(fp, cpu_env, cpu_fpul);
-	    gen_store_fpr64(fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
 	else {
@@ -1742,7 +1742,7 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, DREG(B11_8));
             gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
@@ -1762,9 +1762,9 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, DREG(B11_8));
 	    gen_helper_fabs_DT(fp, fp);
-	    gen_store_fpr64(fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
 	    gen_helper_fabs_FT(FREG(B11_8), FREG(B11_8));
@@ -1776,9 +1776,9 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, DREG(B11_8));
             gen_helper_fsqrt_DT(fp, cpu_env, fp);
-	    gen_store_fpr64(fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
             gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
@@ -1804,7 +1804,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
             gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
-	    gen_store_fpr64(fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
 	return;
@@ -1812,7 +1812,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_FPU_ENABLED
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, DREG(B11_8));
             gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 09/11] target/sh4: Avoid a potential translator crash for malformed FPR64
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (7 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 08/11] target/sh4: Pass DisasContext to fpr64 routines Richard Henderson
@ 2017-07-06  0:23 ` Richard Henderson
  2017-07-06  0:24 ` [Qemu-devel] [PATCH 10/11] target/sh4: Hoist fp bank selection Richard Henderson
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

Produce valid, but nonsensical, code given an odd register index.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 05657a9..7f015c3 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -359,14 +359,18 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     gen_jump(ctx);
 }
 
-static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+/* Assumes lsb of (x) is always 0.  */
+/* ??? Should the translator should signal an invalid opc?
+   In the meantime, using OR instead of PLUS to form the index of the
+   low register means we can't crash the translator for REG==15.  */
+static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
+    tcg_gen_concat_i32_i64(t, cpu_fregs[reg | 1], cpu_fregs[reg]);
 }
 
-static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
+    tcg_gen_extr_i64_i32(cpu_fregs[reg | 1], cpu_fregs[reg], t);
 }
 
 #define B3_0 (ctx->opcode & 0xf)
@@ -385,7 +389,6 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 #define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)]
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
 #define XREG(x) FREG(XHACK(x))
-/* Assumes lsb of (x) is always 0 */
 #define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
 
 #define CHECK_NOT_DELAY_SLOT \
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 10/11] target/sh4: Hoist fp bank selection
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (8 preceding siblings ...)
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 09/11] target/sh4: Avoid a potential translator crash for malformed FPR64 Richard Henderson
@ 2017-07-06  0:24 ` Richard Henderson
  2017-07-06  0:24 ` [Qemu-devel] [PATCH 11/11] target/sh4: Eliminate DREG macro Richard Henderson
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 7f015c3..a45d0ee 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -37,6 +37,7 @@ typedef struct DisasContext {
     struct TranslationBlock *tb;
     TCGv *gregs;         /* active bank */
     TCGv *altregs;       /* inactive, alternate, bank */
+    TCGv *fregs;         /* active bank */
     target_ulong pc;
     uint16_t opcode;
     uint32_t tbflags;    /* should stay unmodified during the TB translation */
@@ -72,7 +73,7 @@ static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
 static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
 static TCGv cpu_lock_addr, cpu_lock_value;
-static TCGv cpu_fregs[32];
+static TCGv cpu_fregs[2][16];
 
 /* internal register indexes */
 static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
@@ -176,10 +177,18 @@ void sh4_translate_init(void)
 				            offsetof(CPUSH4State, lock_value),
                                             "_lock_value_");
 
-    for (i = 0; i < 32; i++)
-        cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
-                                              offsetof(CPUSH4State, fregs[i]),
-                                              fregnames[i]);
+    for (i = 0; i < 16; i++) {
+        cpu_fregs[0][i]
+            = tcg_global_mem_new_i32(cpu_env,
+                                     offsetof(CPUSH4State, fregs[i]),
+                                     fregnames[i]);
+    }
+    for (i = 16; i < 32; i++) {
+        cpu_fregs[1][i - 16]
+            = tcg_global_mem_new_i32(cpu_env,
+                                     offsetof(CPUSH4State, fregs[i]),
+                                     fregnames[i]);
+    }
 
     done_init = 1;
 }
@@ -365,12 +374,12 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
    low register means we can't crash the translator for REG==15.  */
 static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_concat_i32_i64(t, cpu_fregs[reg | 1], cpu_fregs[reg]);
+    tcg_gen_concat_i32_i64(t, ctx->fregs[reg | 1], ctx->fregs[reg]);
 }
 
 static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_extr_i64_i32(cpu_fregs[reg | 1], cpu_fregs[reg], t);
+    tcg_gen_extr_i64_i32(ctx->fregs[reg | 1], ctx->fregs[reg], t);
 }
 
 #define B3_0 (ctx->opcode & 0xf)
@@ -386,10 +395,10 @@ static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 #define REG(x)     ctx->gregs[x]
 #define ALTREG(x)  ctx->altregs[x]
 
-#define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)]
-#define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) FREG(XHACK(x))
-#define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x)    ctx->fregs[x]
+#define XHACK(x)   ((((x) & 1 ) << 4) | ((x) & 0xe))
+#define XREG(x)    FREG(XHACK(x))
+#define DREG(x)    (x)
 
 #define CHECK_NOT_DELAY_SLOT \
     if (ctx->envflags & DELAY_SLOT_MASK) {                           \
@@ -2230,6 +2239,9 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     ctx.gregs = cpu_gregs[bank];
     ctx.altregs = cpu_gregs[bank ^ 1];
 
+    bank = (ctx.tbflags & FPSCR_FR) != 0;
+    ctx.fregs = cpu_fregs[bank];
+
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [Qemu-devel] [PATCH 11/11] target/sh4: Eliminate DREG macro
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (9 preceding siblings ...)
  2017-07-06  0:24 ` [Qemu-devel] [PATCH 10/11] target/sh4: Hoist fp bank selection Richard Henderson
@ 2017-07-06  0:24 ` Richard Henderson
  2017-07-06  1:15 ` [Qemu-devel] [PATCH 00/11] target/sh4 improvments Laurent Vivier
  2017-07-06 14:55 ` Aurelien Jarno
  12 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06  0:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, bruno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index a45d0ee..7e3de74 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -398,7 +398,6 @@ static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 #define FREG(x)    ctx->fregs[x]
 #define XHACK(x)   ((((x) & 1 ) << 4) | ((x) & 0xe))
 #define XREG(x)    FREG(XHACK(x))
-#define DREG(x)    (x)
 
 #define CHECK_NOT_DELAY_SLOT \
     if (ctx->envflags & DELAY_SLOT_MASK) {                           \
@@ -1128,8 +1127,8 @@ static void _decode_opc(DisasContext * ctx)
 		    break; /* illegal instruction */
 		fp0 = tcg_temp_new_i64();
 		fp1 = tcg_temp_new_i64();
-		gen_load_fpr64(ctx, fp0, DREG(B11_8));
-		gen_load_fpr64(ctx, fp1, DREG(B7_4));
+		gen_load_fpr64(ctx, fp0, B11_8);
+		gen_load_fpr64(ctx, fp1, B7_4);
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
                     gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
@@ -1150,7 +1149,7 @@ static void _decode_opc(DisasContext * ctx)
                     gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1);
                     return;
                 }
-		gen_store_fpr64(ctx, fp0, DREG(B11_8));
+		gen_store_fpr64(ctx, fp0, B11_8);
                 tcg_temp_free_i64(fp0);
                 tcg_temp_free_i64(fp1);
 	    } else {
@@ -1740,7 +1739,7 @@ static void _decode_opc(DisasContext * ctx)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
             gen_helper_float_DT(fp, cpu_env, cpu_fpul);
-	    gen_store_fpr64(ctx, fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, B11_8);
 	    tcg_temp_free_i64(fp);
 	}
 	else {
@@ -1754,7 +1753,7 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
-	    gen_load_fpr64(ctx, fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, B11_8);
             gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
@@ -1774,9 +1773,9 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(ctx, fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, B11_8);
 	    gen_helper_fabs_DT(fp, fp);
-	    gen_store_fpr64(ctx, fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, B11_8);
 	    tcg_temp_free_i64(fp);
 	} else {
 	    gen_helper_fabs_FT(FREG(B11_8), FREG(B11_8));
@@ -1788,9 +1787,9 @@ static void _decode_opc(DisasContext * ctx)
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(ctx, fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, B11_8);
             gen_helper_fsqrt_DT(fp, cpu_env, fp);
-	    gen_store_fpr64(ctx, fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, B11_8);
 	    tcg_temp_free_i64(fp);
 	} else {
             gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
@@ -1816,7 +1815,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
             gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
-	    gen_store_fpr64(ctx, fp, DREG(B11_8));
+	    gen_store_fpr64(ctx, fp, B11_8);
 	    tcg_temp_free_i64(fp);
 	}
 	return;
@@ -1824,7 +1823,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_FPU_ENABLED
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_load_fpr64(ctx, fp, DREG(B11_8));
+	    gen_load_fpr64(ctx, fp, B11_8);
             gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery Richard Henderson
@ 2017-07-06  1:09   ` Laurent Vivier
  2017-07-06  8:10     ` John Paul Adrian Glaubitz
  2017-07-06 11:07     ` John Paul Adrian Glaubitz
  2017-07-06 12:09   ` Laurent Vivier
  1 sibling, 2 replies; 26+ messages in thread
From: Laurent Vivier @ 2017-07-06  1:09 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: bruno, aurelien, John Paul Adrian Glaubitz

Le 06/07/2017 à 02:23, Richard Henderson a écrit :
> We translate gUSA regions atomically in a parallel context.
> But in a serial context a gUSA region may be interrupted.
> In that case, restart the region as the kernel would.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  linux-user/signal.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 3d18d1b..1e716a9 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -3471,6 +3471,23 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
>      return (sp - frame_size) & -8ul;
>  }
>  
> +/* Notice when we're in the middle of a gUSA region and reset.
> +   Note that this will only occur for !parallel_cpus, as we will
> +   translate such sequences differently in a parallel context.  */
> +static void unwind_gusa(CPUSH4State *regs)
> +{
> +    /* If the stack pointer is sufficiently negative... */
> +    if ((regs->gregs[15] & 0xc0000000u) == 0xc0000000u) {
> +        /* Reset the PC to before the gUSA region, as computed from
> +           R0 = region end, SP = -(region size), plus one more insn
> +           that actually sets SP to the region size.  */
> +        regs->pc = regs->gregs[0] + regs->gregs[15] - 2;
> +
> +        /* Reset the SP to the saved version in R1.  */
> +        regs->gregs[15] = regs->gregs[1];
> +    }
> +}
> +
>  static void setup_sigcontext(struct target_sigcontext *sc,
>                               CPUSH4State *regs, unsigned long mask)
>  {
> @@ -3534,6 +3551,8 @@ static void setup_frame(int sig, struct target_sigaction *ka,
>      abi_ulong frame_addr;
>      int i;
>  
> +    unwind_gusa(regs);
> +
>      frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
>      trace_user_setup_frame(regs, frame_addr);
>      if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
> @@ -3583,6 +3602,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
>      abi_ulong frame_addr;
>      int i;
>  
> +    unwind_gusa(regs);
> +
>      frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
>      trace_user_setup_rt_frame(regs, frame_addr);
>      if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
> 

Reviewed-by: Laurent Vivier <laurent@vivier.eu>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 00/11] target/sh4 improvments
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (10 preceding siblings ...)
  2017-07-06  0:24 ` [Qemu-devel] [PATCH 11/11] target/sh4: Eliminate DREG macro Richard Henderson
@ 2017-07-06  1:15 ` Laurent Vivier
  2017-07-06 14:55 ` Aurelien Jarno
  12 siblings, 0 replies; 26+ messages in thread
From: Laurent Vivier @ 2017-07-06  1:15 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: bruno, aurelien

Le 06/07/2017 à 02:23, Richard Henderson a écrit :
> This fixes two problems with atomic operations on sh4,
> including an attempt at supporting the user-space atomics
> technique used by most sh-linux-user binaries.

I tried some time ago to support gUSA hack by decoding the
instruction[1] the application wants to be atomic, but it is not viable
as we need to know all possible sequences the user can generate (but we
can guess it is always generated by glibc, and thus well known).

Laurent

[1]
https://github.com/vivier/qemu/commit/25013c121661c6831f08c2140114c8de06cf48da

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG Richard Henderson
@ 2017-07-06  1:55   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 26+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-07-06  1:55 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: bruno, aurelien

On 07/05/2017 09:23 PM, Richard Henderson wrote:
> We were treating FREG as an index and REG as a TCGv.
> Making FREG return a TCGv is both less confusing and
> a step toward cleaner banking of cpu_fregs.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> ---
>   target/sh4/translate.c | 123 +++++++++++++++++++++----------------------------
>   1 file changed, 52 insertions(+), 71 deletions(-)
> 
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 20e24d5..e4fd6f2 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -382,10 +382,11 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
>   #define REG(x)     ctx->gregs[x]
>   #define ALTREG(x)  ctx->altregs[x]
>   
> -#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
> +#define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)]
>   #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
> -#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
> -#define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
> +#define XREG(x) FREG(XHACK(x))
> +/* Assumes lsb of (x) is always 0 */
> +#define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
>   
>   #define CHECK_NOT_DELAY_SLOT \
>       if (ctx->envflags & DELAY_SLOT_MASK) {                           \
> @@ -1005,56 +1006,51 @@ static void _decode_opc(DisasContext * ctx)
>   	CHECK_FPU_ENABLED
>           if (ctx->tbflags & FPSCR_SZ) {
>   	    TCGv_i64 fp = tcg_temp_new_i64();
> -	    gen_load_fpr64(fp, XREG(B7_4));
> -	    gen_store_fpr64(fp, XREG(B11_8));
> +	    gen_load_fpr64(fp, XHACK(B7_4));
> +	    gen_store_fpr64(fp, XHACK(B11_8));
>   	    tcg_temp_free_i64(fp);
>   	} else {
> -	    tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
> +	    tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
>   	}
>   	return;
>       case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
>           if (ctx->tbflags & FPSCR_SZ) {
>   	    TCGv addr_hi = tcg_temp_new();
> -	    int fr = XREG(B7_4);
> +	    int fr = XHACK(B7_4);
>   	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
> -            tcg_gen_qemu_st_i32(cpu_fregs[fr], REG(B11_8),
> -                                ctx->memidx, MO_TEUL);
> -            tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr_hi,
> -                                ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_st_i32(FREG(fr), REG(B11_8), ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_st_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
>   	    tcg_temp_free(addr_hi);
>   	} else {
> -            tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], REG(B11_8),
> -                                ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
>   	}
>   	return;
>       case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
>           if (ctx->tbflags & FPSCR_SZ) {
>   	    TCGv addr_hi = tcg_temp_new();
> -	    int fr = XREG(B11_8);
> +	    int fr = XHACK(B11_8);
>   	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
> -            tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL);
> -            tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
>   	    tcg_temp_free(addr_hi);
>   	} else {
> -            tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4),
> -                                ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
>   	}
>   	return;
>       case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
>           if (ctx->tbflags & FPSCR_SZ) {
>   	    TCGv addr_hi = tcg_temp_new();
> -	    int fr = XREG(B11_8);
> +	    int fr = XHACK(B11_8);
>   	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
> -            tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL);
> -            tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL);
>   	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
>   	    tcg_temp_free(addr_hi);
>   	} else {
> -            tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4),
> -                                ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
>   	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
>   	}
>   	return;
> @@ -1063,13 +1059,12 @@ static void _decode_opc(DisasContext * ctx)
>           TCGv addr = tcg_temp_new_i32();
>           tcg_gen_subi_i32(addr, REG(B11_8), 4);
>           if (ctx->tbflags & FPSCR_SZ) {
> -	    int fr = XREG(B7_4);
> -            tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
> +	    int fr = XHACK(B7_4);
> +            tcg_gen_qemu_st_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
>   	    tcg_gen_subi_i32(addr, addr, 4);
> -            tcg_gen_qemu_st_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_st_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
>   	} else {
> -            tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr,
> -                                ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
>   	}
>           tcg_gen_mov_i32(REG(B11_8), addr);
>           tcg_temp_free(addr);
> @@ -1080,15 +1075,12 @@ static void _decode_opc(DisasContext * ctx)
>   	    TCGv addr = tcg_temp_new_i32();
>   	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
>               if (ctx->tbflags & FPSCR_SZ) {
> -		int fr = XREG(B11_8);
> -                tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
> -                                    ctx->memidx, MO_TEUL);
> +		int fr = XHACK(B11_8);
> +                tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
>   		tcg_gen_addi_i32(addr, addr, 4);
> -                tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr,
> -                                    ctx->memidx, MO_TEUL);
> +                tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
>   	    } else {
> -                tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], addr,
> -                                    ctx->memidx, MO_TEUL);
> +                tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
>   	    }
>   	    tcg_temp_free(addr);
>   	}
> @@ -1099,15 +1091,12 @@ static void _decode_opc(DisasContext * ctx)
>   	    TCGv addr = tcg_temp_new();
>   	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
>               if (ctx->tbflags & FPSCR_SZ) {
> -		int fr = XREG(B7_4);
> -                tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
> -                                    ctx->memidx, MO_TEUL);
> +		int fr = XHACK(B7_4);
> +                tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
>   		tcg_gen_addi_i32(addr, addr, 4);
> -                tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr,
> -                                    ctx->memidx, MO_TEUL);
> +                tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
>   	    } else {
> -                tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr,
> -                                    ctx->memidx, MO_TEUL);
> +                tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
>   	    }
>   	    tcg_temp_free(addr);
>   	}
> @@ -1155,32 +1144,26 @@ static void _decode_opc(DisasContext * ctx)
>   	    } else {
>                   switch (ctx->opcode & 0xf00f) {
>                   case 0xf000:		/* fadd Rm,Rn */
> -                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                       cpu_fregs[FREG(B11_8)],
> -                                       cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fadd_FT(FREG(B11_8), cpu_env,
> +                                       FREG(B11_8), FREG(B7_4));
>                       break;
>                   case 0xf001:		/* fsub Rm,Rn */
> -                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                       cpu_fregs[FREG(B11_8)],
> -                                       cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fsub_FT(FREG(B11_8), cpu_env,
> +                                       FREG(B11_8), FREG(B7_4));
>                       break;
>                   case 0xf002:		/* fmul Rm,Rn */
> -                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                       cpu_fregs[FREG(B11_8)],
> -                                       cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fmul_FT(FREG(B11_8), cpu_env,
> +                                       FREG(B11_8), FREG(B7_4));
>                       break;
>                   case 0xf003:		/* fdiv Rm,Rn */
> -                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                       cpu_fregs[FREG(B11_8)],
> -                                       cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fdiv_FT(FREG(B11_8), cpu_env,
> +                                       FREG(B11_8), FREG(B7_4));
>                       break;
>                   case 0xf004:		/* fcmp/eq Rm,Rn */
> -                    gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)],
> -                                          cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fcmp_eq_FT(cpu_env, FREG(B11_8), FREG(B7_4));
>                       return;
>                   case 0xf005:		/* fcmp/gt Rm,Rn */
> -                    gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)],
> -                                          cpu_fregs[FREG(B7_4)]);
> +                    gen_helper_fcmp_gt_FT(cpu_env, FREG(B11_8), FREG(B7_4));
>                       return;
>                   }
>   	    }
> @@ -1192,9 +1175,8 @@ static void _decode_opc(DisasContext * ctx)
>               if (ctx->tbflags & FPSCR_PR) {
>                   break; /* illegal instruction */
>               } else {
> -                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)],
> -                                   cpu_fregs[FREG(B11_8)]);
> +                gen_helper_fmac_FT(FREG(B11_8), cpu_env,
> +                                   FREG(0), FREG(B7_4), FREG(B11_8));
>                   return;
>               }
>           }
> @@ -1732,11 +1714,11 @@ static void _decode_opc(DisasContext * ctx)
>           return;
>       case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
> -	tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul);
> +	tcg_gen_mov_i32(FREG(B11_8), cpu_fpul);
>   	return;
>       case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
> -	tcg_gen_mov_i32(cpu_fpul, cpu_fregs[FREG(B11_8)]);
> +	tcg_gen_mov_i32(cpu_fpul, FREG(B11_8));
>   	return;
>       case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
>   	CHECK_FPU_ENABLED
> @@ -1750,7 +1732,7 @@ static void _decode_opc(DisasContext * ctx)
>   	    tcg_temp_free_i64(fp);
>   	}
>   	else {
> -            gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_env, cpu_fpul);
> +            gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul);
>   	}
>   	return;
>       case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
> @@ -1765,13 +1747,13 @@ static void _decode_opc(DisasContext * ctx)
>   	    tcg_temp_free_i64(fp);
>   	}
>   	else {
> -            gen_helper_ftrc_FT(cpu_fpul, cpu_env, cpu_fregs[FREG(B11_8)]);
> +            gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8));
>   	}
>   	return;
>       case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
>   	CHECK_FPU_ENABLED
>   	{
> -	    gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
> +	    gen_helper_fneg_T(FREG(B11_8), FREG(B11_8));
>   	}
>   	return;
>       case 0xf05d: /* fabs FRn/DRn */
> @@ -1785,7 +1767,7 @@ static void _decode_opc(DisasContext * ctx)
>   	    gen_store_fpr64(fp, DREG(B11_8));
>   	    tcg_temp_free_i64(fp);
>   	} else {
> -	    gen_helper_fabs_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
> +	    gen_helper_fabs_FT(FREG(B11_8), FREG(B11_8));
>   	}
>   	return;
>       case 0xf06d: /* fsqrt FRn */
> @@ -1799,8 +1781,7 @@ static void _decode_opc(DisasContext * ctx)
>   	    gen_store_fpr64(fp, DREG(B11_8));
>   	    tcg_temp_free_i64(fp);
>   	} else {
> -            gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> -                                cpu_fregs[FREG(B11_8)]);
> +            gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
>   	}
>   	return;
>       case 0xf07d: /* fsrra FRn */
> @@ -1809,13 +1790,13 @@ static void _decode_opc(DisasContext * ctx)
>       case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
>   	CHECK_FPU_ENABLED
>           if (!(ctx->tbflags & FPSCR_PR)) {
> -	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
> +	    tcg_gen_movi_i32(FREG(B11_8), 0);
>   	}
>   	return;
>       case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
>   	CHECK_FPU_ENABLED
>           if (!(ctx->tbflags & FPSCR_PR)) {
> -	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
> +	    tcg_gen_movi_i32(FREG(B11_8), 0x3f800000);
>   	}
>   	return;
>       case 0xf0ad: /* fcnvsd FPUL,DRn */
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  1:09   ` Laurent Vivier
@ 2017-07-06  8:10     ` John Paul Adrian Glaubitz
  2017-07-06  8:35       ` Laurent Vivier
  2017-07-06 11:07     ` John Paul Adrian Glaubitz
  1 sibling, 1 reply; 26+ messages in thread
From: John Paul Adrian Glaubitz @ 2017-07-06  8:10 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Richard Henderson, qemu-devel, bruno, aurelien

Hi!

Wow, great to see that there is actually now support for handling gUSA
in qemu-user on sh4. I wonder whether this will help resolve the
lock-up issues on qemu-sh4-user when building Debian packages for
sh4.

The lockups occur fairly often when any process uses
multi-threading. Would this help here?

Was gUSA support recently added to qemu or is this just a fix?

I will give this a try soon. Thanks Laurent for letting me know.

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  8:10     ` John Paul Adrian Glaubitz
@ 2017-07-06  8:35       ` Laurent Vivier
  2017-07-06  9:07         ` John Paul Adrian Glaubitz
  2017-07-06  9:13         ` John Paul Adrian Glaubitz
  0 siblings, 2 replies; 26+ messages in thread
From: Laurent Vivier @ 2017-07-06  8:35 UTC (permalink / raw)
  To: John Paul Adrian Glaubitz; +Cc: Richard Henderson, qemu-devel, bruno, aurelien

[-- Attachment #1: Type: text/plain, Size: 846 bytes --]

Le 06/07/2017 à 10:10, John Paul Adrian Glaubitz a écrit :
> Hi!
> 
> Wow, great to see that there is actually now support for handling gUSA
> in qemu-user on sh4. I wonder whether this will help resolve the
> lock-up issues on qemu-sh4-user when building Debian packages for
> sh4.
> 
> The lockups occur fairly often when any process uses
> multi-threading. Would this help here?
> 
> Was gUSA support recently added to qemu or is this just a fix?

This is a patch series proposed by Richard, it is not included in qemu
at the moment:

http://lists.nongnu.org/archive/html/qemu-devel/2017-07/msg01196.html

I think it would be interesting if you generate a qemu binary with it
and use it in your buildds to test it.

I guess the series can be pulled directly from Richard's branch:

git://github.com/rth7680/qemu.git tgt-sh4

Thanks,
Laurent

[-- Attachment #2: 0x3F2FBE3C.asc --]
[-- Type: application/pgp-keys, Size: 13126 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  8:35       ` Laurent Vivier
@ 2017-07-06  9:07         ` John Paul Adrian Glaubitz
  2017-07-06  9:13         ` John Paul Adrian Glaubitz
  1 sibling, 0 replies; 26+ messages in thread
From: John Paul Adrian Glaubitz @ 2017-07-06  9:07 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Richard Henderson, qemu-devel, bruno, aurelien

On Thu, Jul 06, 2017 at 10:35:15AM +0200, Laurent Vivier wrote:
> This is a patch series proposed by Richard, it is not included in qemu
> at the moment:
> 
> http://lists.nongnu.org/archive/html/qemu-devel/2017-07/msg01196.html

Aye, awesome!

> I think it would be interesting if you generate a qemu binary with it
> and use it in your buildds to test it.
> 
> I guess the series can be pulled directly from Richard's branch:
> 
> git://github.com/rth7680/qemu.git tgt-sh4

You bet, I will!

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  8:35       ` Laurent Vivier
  2017-07-06  9:07         ` John Paul Adrian Glaubitz
@ 2017-07-06  9:13         ` John Paul Adrian Glaubitz
  2017-07-06  9:19           ` Laurent Vivier
  1 sibling, 1 reply; 26+ messages in thread
From: John Paul Adrian Glaubitz @ 2017-07-06  9:13 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Richard Henderson, qemu-devel, bruno, aurelien

On Thu, Jul 06, 2017 at 10:35:15AM +0200, Laurent Vivier wrote:
> I think it would be interesting if you generate a qemu binary with it
> and use it in your buildds to test it.
> 
> I guess the series can be pulled directly from Richard's branch:
> 
> git://github.com/rth7680/qemu.git tgt-sh4

Pull from this tree and built qemu-sh4-user as static from the tgt-sh4
branch. Copied in Debian/sh4 chroot and tried to enter it:

root@nofan:/local_scratch/sid-sh4-sbuild> chroot .
bash: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
Segmentation fault
root@nofan:/local_scratch/sid-sh4-sbuild>

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  9:13         ` John Paul Adrian Glaubitz
@ 2017-07-06  9:19           ` Laurent Vivier
  0 siblings, 0 replies; 26+ messages in thread
From: Laurent Vivier @ 2017-07-06  9:19 UTC (permalink / raw)
  To: qemu-devel

Le 06/07/2017 à 11:13, John Paul Adrian Glaubitz a écrit :
> On Thu, Jul 06, 2017 at 10:35:15AM +0200, Laurent Vivier wrote:
>> I think it would be interesting if you generate a qemu binary with it
>> and use it in your buildds to test it.
>>
>> I guess the series can be pulled directly from Richard's branch:
>>
>> git://github.com/rth7680/qemu.git tgt-sh4
> 
> Pull from this tree and built qemu-sh4-user as static from the tgt-sh4
> branch. Copied in Debian/sh4 chroot and tried to enter it:
> 
> root@nofan:/local_scratch/sid-sh4-sbuild> chroot .
> bash: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
> Segmentation fault
> root@nofan:/local_scratch/sid-sh4-sbuild>

Could you try origin/master to see if the regression is introduced by
this series or by a previous one?

Thanks,
Laurent

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  1:09   ` Laurent Vivier
  2017-07-06  8:10     ` John Paul Adrian Glaubitz
@ 2017-07-06 11:07     ` John Paul Adrian Glaubitz
  1 sibling, 0 replies; 26+ messages in thread
From: John Paul Adrian Glaubitz @ 2017-07-06 11:07 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: Richard Henderson, qemu-devel, bruno, aurelien

On Thu, Jul 06, 2017 at 03:09:59AM +0200, Laurent Vivier wrote:
> Reviewed-by: Laurent Vivier <laurent@vivier.eu>

I have identified this patch as reponsible for the
segfaults. Reverting this patch fixes the issue. The crashes are
random. Sometimes it crashes directly when entering the chroot,
sometimes only after a command like running "apt
update". Interestingly, the issue does not reproduce on an older
chroot from 2015.

I have uploaded two chroots for testing, one from 2015 [1] and one
freshly generated [2] which shows the crashes with patch nr. 5
applied.

Adrian

> [1] https://people.debian.org/~glaubitz/chroots/unstable-sh4-20150315.tar.gz
> [2] https://people.debian.org/~glaubitz/chroots/unstable-sh4-20170706.tgz

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz@debian.org
`. `'   Freie Universitaet Berlin - glaubitz@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery Richard Henderson
  2017-07-06  1:09   ` Laurent Vivier
@ 2017-07-06 12:09   ` Laurent Vivier
  1 sibling, 0 replies; 26+ messages in thread
From: Laurent Vivier @ 2017-07-06 12:09 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: bruno, aurelien

Le 06/07/2017 à 02:23, Richard Henderson a écrit :
> We translate gUSA regions atomically in a parallel context.
> But in a serial context a gUSA region may be interrupted.
> In that case, restart the region as the kernel would.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  linux-user/signal.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 3d18d1b..1e716a9 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -3471,6 +3471,23 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
>      return (sp - frame_size) & -8ul;
>  }
>  
> +/* Notice when we're in the middle of a gUSA region and reset.
> +   Note that this will only occur for !parallel_cpus, as we will
> +   translate such sequences differently in a parallel context.  */
> +static void unwind_gusa(CPUSH4State *regs)
> +{
> +    /* If the stack pointer is sufficiently negative... */
> +    if ((regs->gregs[15] & 0xc0000000u) == 0xc0000000u) {

kernel also checks PC < gUSA region end point,
try this:

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1e716a9..4e1e4f0 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3477,7 +3477,8 @@ static abi_ulong get_sigframe(struct
target_sigaction *ka,
 static void unwind_gusa(CPUSH4State *regs)
 {
     /* If the stack pointer is sufficiently negative... */
-    if ((regs->gregs[15] & 0xc0000000u) == 0xc0000000u) {
+    if ((regs->gregs[15] & 0xc0000000u) == 0xc0000000u &&
+        regs->pc < regs->gregs[0]) {
         /* Reset the PC to before the gUSA region, as computed from
            R0 = region end, SP = -(region size), plus one more insn
            that actually sets SP to the region size.  */

Laurent

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 00/11] target/sh4 improvments
  2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
                   ` (11 preceding siblings ...)
  2017-07-06  1:15 ` [Qemu-devel] [PATCH 00/11] target/sh4 improvments Laurent Vivier
@ 2017-07-06 14:55 ` Aurelien Jarno
  12 siblings, 0 replies; 26+ messages in thread
From: Aurelien Jarno @ 2017-07-06 14:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, bruno

On 2017-07-05 14:23, Richard Henderson wrote:
> This fixes two problems with atomic operations on sh4,
> including an attempt at supporting the user-space atomics
> technique used by most sh-linux-user binaries.
> 
> This is good enough to run the one occurrence in linux-user-test-0.3.
> I'm still downloading enough of a cross environment to be able to run
> more recent sh4 binaries.  Including the one in the LP bug report.
> 
> Thoughts and more extensive testing appreciated.

Thanks for you work. For the next version, could you please base it on
this patchset:

http://lists.nongnu.org/archive/html/qemu-devel/2017-07/msg00095.html

I'll try to review it over the next days, but unfortunately I'll have
little time before Monday.

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests Richard Henderson
@ 2017-07-06 15:17   ` Aurelien Jarno
  0 siblings, 0 replies; 26+ messages in thread
From: Aurelien Jarno @ 2017-07-06 15:17 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, bruno

On 2017-07-05 14:23, Richard Henderson wrote:
> We can fold 3 different tests within the decode loop
> into a more accurate computation of max_insns to start.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/sh4/translate.c | 29 +++++++++++++++++------------
>  1 file changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 6b247fa..e1661e9 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1856,7 +1856,6 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>      ctx.features = env->features;
>      ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
>  
> -    num_insns = 0;
>      max_insns = tb->cflags & CF_COUNT_MASK;
>      if (max_insns == 0) {
>          max_insns = CF_COUNT_MASK;
> @@ -1864,9 +1863,23 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>      if (max_insns > TCG_MAX_INSNS) {
>          max_insns = TCG_MAX_INSNS;
>      }
> +    /* Since the ISA is fixed-width, we can bound by the number
> +       of instructions remaining on the page.  */
> +    num_insns = (TARGET_PAGE_SIZE - (ctx.pc & (TARGET_PAGE_SIZE - 1))) / 2;

This could be written as num_insn = -(ctx.pc | TARGET_PAGE_SIZE) / 2;

> +    if (max_insns > num_insns) {
> +        max_insns = num_insns;
> +    }

You are following the existing pattern, so I can really blame you, but
maybe it's the moment to change the existing code into something like:

    max_insns = MIN(max_insn, ...)

> +    /* Single stepping means just that.  */
> +    if (ctx.singlestep_enabled || singlestep) {
> +        max_insns = 1;
> +    }
>  
>      gen_tb_start(tb);
> -    while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
> +    num_insns = 0;
> +
> +    while (ctx.bstate == BS_NONE
> +           && num_insns < max_insns
> +           && !tcg_op_buf_full()) {
>          tcg_gen_insn_start(ctx.pc, ctx.envflags);
>          num_insns++;
>  
> @@ -1890,18 +1903,10 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>          ctx.opcode = cpu_lduw_code(env, ctx.pc);
>  	decode_opc(&ctx);
>  	ctx.pc += 2;
> -	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
> -	    break;
> -        if (cs->singlestep_enabled) {
> -	    break;
> -        }
> -        if (num_insns >= max_insns)
> -            break;
> -        if (singlestep)
> -            break;
>      }
> -    if (tb->cflags & CF_LAST_IO)
> +    if (tb->cflags & CF_LAST_IO) {
>          gen_io_end();
> +    }
>      if (cs->singlestep_enabled) {
>          gen_save_cpu_state(&ctx, true);
>          gen_helper_debug(cpu_env);

Besides the minor nitpicks above:

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco Richard Henderson
@ 2017-07-06 15:25   ` Richard Henderson
  0 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2017-07-06 15:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: bruno, aurelien

On 07/05/2017 02:23 PM, Richard Henderson wrote:
> As for other targets, cmpxchg isn't quite right for ll/sc,
> suffering from an ABA race, but is sufficient to implement
> portable atomic operations.
> 
> Signed-off-by: Richard Henderson<rth@twiddle.net>
> ---
>   target/sh4/cpu.h       |  3 ++-
>   target/sh4/translate.c | 56 +++++++++++++++++++++++++++++++++-----------------
>   2 files changed, 39 insertions(+), 20 deletions(-)

Note to self: missing reset of lock_addr across interrupt and priv change 
boundaries.


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics
  2017-07-06  0:23 ` [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics Richard Henderson
@ 2017-07-06 15:50   ` Aurelien Jarno
  0 siblings, 0 replies; 26+ messages in thread
From: Aurelien Jarno @ 2017-07-06 15:50 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, bruno

On 2017-07-05 14:23, Richard Henderson wrote:
> For uniprocessors, SH4 uses optimistic restartable atomic sequences.
> Upon an interrupt, a real kernel would simply notice magic values in
> the registers and reset the PC to the start of the sequence.
> 
> For QEMU, we cannot do this in quite the same way.  Instead, we notice
> the normal start of such a sequence (mov #-x,r15), and start a new TB
> that can be executed under cpu_exec_step_atomic.
> 
> Reported-by: Bruno Haible  <bruno@clisp.org>
> LP: https://bugs.launchpad.net/bugs/1701971
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/sh4/cpu.h       |  21 ++++++--
>  target/sh4/helper.h    |   1 +
>  target/sh4/op_helper.c |   6 +++
>  target/sh4/translate.c | 137 +++++++++++++++++++++++++++++++++++++++++++------
>  4 files changed, 147 insertions(+), 18 deletions(-)

I haven't reviewed this patch in details, but note that it breaks
booting a system under qemu-system.

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2017-07-06 15:50 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-06  0:23 [Qemu-devel] [PATCH 00/11] target/sh4 improvments Richard Henderson
2017-07-06  0:23 ` [Qemu-devel] [PATCH 01/11] target/sh4: Use cmpxchg for movco Richard Henderson
2017-07-06 15:25   ` Richard Henderson
2017-07-06  0:23 ` [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests Richard Henderson
2017-07-06 15:17   ` Aurelien Jarno
2017-07-06  0:23 ` [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics Richard Henderson
2017-07-06 15:50   ` Aurelien Jarno
2017-07-06  0:23 ` [Qemu-devel] [PATCH 04/11] target/sh4: Recognize common gUSA sequences Richard Henderson
2017-07-06  0:23 ` [Qemu-devel] [PATCH 05/11] linux-user/sh4: Notice gUSA regions during signal delivery Richard Henderson
2017-07-06  1:09   ` Laurent Vivier
2017-07-06  8:10     ` John Paul Adrian Glaubitz
2017-07-06  8:35       ` Laurent Vivier
2017-07-06  9:07         ` John Paul Adrian Glaubitz
2017-07-06  9:13         ` John Paul Adrian Glaubitz
2017-07-06  9:19           ` Laurent Vivier
2017-07-06 11:07     ` John Paul Adrian Glaubitz
2017-07-06 12:09   ` Laurent Vivier
2017-07-06  0:23 ` [Qemu-devel] [PATCH 06/11] target/sh4: Hoist register bank selection Richard Henderson
2017-07-06  0:23 ` [Qemu-devel] [PATCH 07/11] target/sh4: Unify cpu_fregs into FREG Richard Henderson
2017-07-06  1:55   ` Philippe Mathieu-Daudé
2017-07-06  0:23 ` [Qemu-devel] [PATCH 08/11] target/sh4: Pass DisasContext to fpr64 routines Richard Henderson
2017-07-06  0:23 ` [Qemu-devel] [PATCH 09/11] target/sh4: Avoid a potential translator crash for malformed FPR64 Richard Henderson
2017-07-06  0:24 ` [Qemu-devel] [PATCH 10/11] target/sh4: Hoist fp bank selection Richard Henderson
2017-07-06  0:24 ` [Qemu-devel] [PATCH 11/11] target/sh4: Eliminate DREG macro Richard Henderson
2017-07-06  1:15 ` [Qemu-devel] [PATCH 00/11] target/sh4 improvments Laurent Vivier
2017-07-06 14:55 ` Aurelien Jarno

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.