All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations
@ 2017-05-01 22:10 Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags Aurelien Jarno
                   ` (13 more replies)
  0 siblings, 14 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

This patch series try to improve the SH4 target by using the (more or
less) recently introduced TCG features. It also fixes some issues spot
when writting the patches (linking of TB with different flags, SH4-A
specific instructions allowed on SH4) and correctly trap unaligned
accesses.

Aurelien Jarno (14):
  target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags
  target/sh4: get rid of DELAY_SLOT_CLEARME
  target/sh4: do not include DELAY_SLOT_TRUE in the TB state
  target/sh4: move DELAY_SLOT_TRUE flag into a separate global
  target/sh4: fix BS_STOP exit
  target/sh4: fix BS_EXCP exit
  target/sh4: only save flags state at the end of the TB
  target/sh4: fold ctx->bstate = BS_BRANCH into gen_conditional_jump
  target/sh4: optimize gen_store_fpr64
  target/sh4: optimize gen_write_sr using extract op
  target/sh4: generate fences for SH4
  target/sh4: implement tas.b using atomic helper
  target/sh4: movua.l is an SH4-A only instruction
  target/sh4: trap unaligned accesses

 target/sh4/cpu.c       |   1 +
 target/sh4/cpu.h       |  18 ++-
 target/sh4/helper.c    |   4 +-
 target/sh4/op_helper.c |  19 +++
 target/sh4/translate.c | 323 ++++++++++++++++++++++++-------------------------
 5 files changed, 183 insertions(+), 182 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-02 12:16   ` Philippe Mathieu-Daudé
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME Aurelien Jarno
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

There is a confusion (and not only in the SH4 target) between tb->flags,
env->flags and ctx->flags. To avoid it, split ctx->flags into
ctx->tbflags and ctx->envflags. ctx->tbflags stays unchanged during the
whole TB translation, while ctx->envflags evolves and is kept in sync
with env->flags using TCG instructions. ctx->envflags now only contains
the part that of env->flags that is contained in the TB state, i.e. the
DELAY_SLOT* flags.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 161 +++++++++++++++++++++++++------------------------
 1 file changed, 82 insertions(+), 79 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index c89a14733f..0b16ff33ea 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -37,7 +37,8 @@ typedef struct DisasContext {
     struct TranslationBlock *tb;
     target_ulong pc;
     uint16_t opcode;
-    uint32_t flags;
+    uint32_t tbflags;
+    uint32_t envflags;
     int bstate;
     int memidx;
     uint32_t delayed_pc;
@@ -49,7 +50,7 @@ typedef struct DisasContext {
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(ctx) 1
 #else
-#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD)))
+#define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
 #endif
 
 enum {
@@ -317,51 +318,50 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\
-                        && (ctx->flags & (1u << SR_RB))\
+#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\
+                        && (ctx->tbflags & (1u << SR_RB))\
                 ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\
-                               || !(ctx->flags & (1u << SR_RB)))\
+#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\
+                               || !(ctx->tbflags & (1u << SR_RB)))\
 		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
+#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
 #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
 
 #define CHECK_NOT_DELAY_SLOT \
-  if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
-  {                                                           \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                      \
-      gen_helper_raise_slot_illegal_instruction(cpu_env);     \
-      ctx->bstate = BS_BRANCH;                                \
-      return;                                                 \
-  }
-
-#define CHECK_PRIVILEGED                                        \
-  if (IS_USER(ctx)) {                                           \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
-      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_illegal_instruction(cpu_env);   \
-      } else {                                                  \
-          gen_helper_raise_illegal_instruction(cpu_env);        \
-      }                                                         \
-      ctx->bstate = BS_BRANCH;                                  \
-      return;                                                   \
-  }
-
-#define CHECK_FPU_ENABLED                                       \
-  if (ctx->flags & (1u << SR_FD)) {                             \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
-      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_fpu_disable(cpu_env);           \
-      } else {                                                  \
-          gen_helper_raise_fpu_disable(cpu_env);                \
-      }                                                         \
-      ctx->bstate = BS_BRANCH;                                  \
-      return;                                                   \
-  }
+    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {     \
+        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        gen_helper_raise_slot_illegal_instruction(cpu_env);          \
+        ctx->bstate = BS_BRANCH;                                     \
+        return;                                                      \
+    }
+
+#define CHECK_PRIVILEGED                                             \
+    if (IS_USER(ctx)) {                                              \
+        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+            gen_helper_raise_slot_illegal_instruction(cpu_env);      \
+        } else {                                                     \
+            gen_helper_raise_illegal_instruction(cpu_env);           \
+        }                                                            \
+        ctx->bstate = BS_BRANCH;                                     \
+        return;                                                      \
+    }
+
+#define CHECK_FPU_ENABLED                                            \
+    if (ctx->tbflags & (1u << SR_FD)) {                              \
+        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+            gen_helper_raise_slot_fpu_disable(cpu_env);              \
+        } else {                                                     \
+            gen_helper_raise_fpu_disable(cpu_env);                   \
+        }                                                            \
+        ctx->bstate = BS_BRANCH;                                     \
+        return;                                                      \
+    }
 
 static void _decode_opc(DisasContext * ctx)
 {
@@ -409,7 +409,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x000b:		/* rts */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0028:		/* clrmac */
@@ -431,7 +431,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0058:		/* sets */
@@ -498,14 +498,14 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
 	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
 	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	return;
     }
 
@@ -939,7 +939,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, XREG(B7_4));
 	    gen_store_fpr64(fp, XREG(B11_8));
@@ -950,7 +950,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B7_4);
 	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
@@ -966,7 +966,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -980,7 +980,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -998,7 +998,7 @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_FPU_ENABLED
         TCGv addr = tcg_temp_new_i32();
         tcg_gen_subi_i32(addr, REG(B11_8), 4);
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    int fr = XREG(B7_4);
             tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_subi_i32(addr, addr, 4);
@@ -1015,7 +1015,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv addr = tcg_temp_new_i32();
 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
-            if (ctx->flags & FPSCR_SZ) {
+            if (ctx->tbflags & FPSCR_SZ) {
 		int fr = XREG(B11_8);
                 tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
                                     ctx->memidx, MO_TEUL);
@@ -1034,7 +1034,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
-            if (ctx->flags & FPSCR_SZ) {
+            if (ctx->tbflags & FPSCR_SZ) {
 		int fr = XREG(B7_4);
                 tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
                                     ctx->memidx, MO_TEUL);
@@ -1056,7 +1056,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	{
 	    CHECK_FPU_ENABLED
-            if (ctx->flags & FPSCR_PR) {
+            if (ctx->tbflags & FPSCR_PR) {
                 TCGv_i64 fp0, fp1;
 
 		if (ctx->opcode & 0x0110)
@@ -1125,7 +1125,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf00e: /* fmac FR0,RM,Rn */
         {
             CHECK_FPU_ENABLED
-            if (ctx->flags & FPSCR_PR) {
+            if (ctx->tbflags & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
                 gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
@@ -1162,7 +1162,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
 	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
-	ctx->flags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
@@ -1173,7 +1173,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
 	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
-	ctx->flags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
@@ -1354,14 +1354,14 @@ static void _decode_opc(DisasContext * ctx)
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->pc + 4);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x4015:		/* cmp/pl Rn */
@@ -1377,14 +1377,14 @@ static void _decode_opc(DisasContext * ctx)
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400e:		/* ldc Rm,SR */
@@ -1663,7 +1663,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1678,7 +1678,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1699,7 +1699,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf05d: /* fabs FRn/DRn */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1713,7 +1713,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf06d: /* fsqrt FRn */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1731,13 +1731,13 @@ static void _decode_opc(DisasContext * ctx)
 	break;
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-        if (!(ctx->flags & FPSCR_PR)) {
+        if (!(ctx->tbflags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
 	}
 	return;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-        if (!(ctx->flags & FPSCR_PR)) {
+        if (!(ctx->tbflags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
 	}
 	return;
@@ -1761,7 +1761,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf0ed: /* fipr FVm,FVn */
         CHECK_FPU_ENABLED
-        if ((ctx->flags & FPSCR_PR) == 0) {
+        if ((ctx->tbflags & FPSCR_PR) == 0) {
             TCGv m, n;
             m = tcg_const_i32((ctx->opcode >> 8) & 3);
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
@@ -1774,7 +1774,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf0fd: /* ftrv XMTRX,FVn */
         CHECK_FPU_ENABLED
         if ((ctx->opcode & 0x0300) == 0x0100 &&
-            (ctx->flags & FPSCR_PR) == 0) {
+            (ctx->tbflags & FPSCR_PR) == 0) {
             TCGv n;
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
             gen_helper_ftrv(cpu_env, n);
@@ -1789,7 +1789,7 @@ static void _decode_opc(DisasContext * ctx)
     fflush(stderr);
 #endif
     tcg_gen_movi_i32(cpu_pc, ctx->pc);
-    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
         gen_helper_raise_illegal_instruction(cpu_env);
@@ -1799,20 +1799,20 @@ static void _decode_opc(DisasContext * ctx)
 
 static void decode_opc(DisasContext * ctx)
 {
-    uint32_t old_flags = ctx->flags;
+    uint32_t old_flags = ctx->envflags;
 
     _decode_opc(ctx);
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-        if (ctx->flags & DELAY_SLOT_CLEARME) {
+        if (ctx->envflags & DELAY_SLOT_CLEARME) {
             gen_store_flags(0);
         } else {
 	    /* go out of the delay slot */
-	    uint32_t new_flags = ctx->flags;
+            uint32_t new_flags = ctx->envflags;
 	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
 	    gen_store_flags(new_flags);
         }
-        ctx->flags = 0;
+        ctx->envflags = 0;
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
@@ -1823,8 +1823,9 @@ static void decode_opc(DisasContext * ctx)
     }
 
     /* go into a delay slot */
-    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
-        gen_store_flags(ctx->flags);
+    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+        gen_store_flags(ctx->envflags);
+    }
 }
 
 void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
@@ -1838,16 +1839,18 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 
     pc_start = tb->pc;
     ctx.pc = pc_start;
-    ctx.flags = (uint32_t)tb->flags;
+    ctx.tbflags = (uint32_t)tb->flags;
+    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL |
+                                DELAY_SLOT_CLEARME);
     ctx.bstate = BS_NONE;
-    ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0;
+    ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
     ctx.tb = tb;
     ctx.singlestep_enabled = cs->singlestep_enabled;
     ctx.features = env->features;
-    ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
+    ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -1860,7 +1863,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
-        tcg_gen_insn_start(ctx.pc, ctx.flags);
+        tcg_gen_insn_start(ctx.pc, ctx.envflags);
         num_insns++;
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
@@ -1904,8 +1907,8 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
             /* gen_op_interrupt_restart(); */
             /* fall through */
         case BS_NONE:
-            if (ctx.flags) {
-                gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
+            if (ctx.envflags) {
+                gen_store_flags(ctx.envflags | DELAY_SLOT_CLEARME);
 	    }
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-02 12:32   ` Philippe Mathieu-Daudé
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 03/14] target/sh4: do not include DELAY_SLOT_TRUE in the TB state Aurelien Jarno
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Now that ctx->flags has been split, it becomes clear that
DELAY_SLOT_CLEARME has not impact on the code generation: in both case
ctx->envflags is cleared, either by clearing all the flags, or by
setting it to 0. This is left-over from pre-TCG era.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/cpu.h       |  3 +--
 target/sh4/helper.c    |  2 --
 target/sh4/translate.c | 17 +++++------------
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index cad8989f7e..9445cc779f 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -93,7 +93,6 @@
 #define DELAY_SLOT             (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
 #define DELAY_SLOT_TRUE        (1 << 2)
-#define DELAY_SLOT_CLEARME     (1 << 3)
 /* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
  * after the delay slot should be taken or not. It is calculated from SR_T.
  *
@@ -384,7 +383,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
     *pc = env->pc;
     *cs_base = 0;
     *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
-                    | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME))   /* Bits  0- 3 */
+                    | DELAY_SLOT_TRUE))                        /* Bits  0- 2 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
             | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
             | (env->sr & (1u << SR_FD))                        /* Bit 15 */
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 036c5ca56c..71bb49a670 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -170,8 +170,6 @@ void superh_cpu_do_interrupt(CPUState *cs)
 	/* Clear flags for exception/interrupt routine. */
 	env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE);
     }
-    if (env->flags & DELAY_SLOT_CLEARME)
-        env->flags = 0;
 
     if (do_exp) {
         env->expevt = cs->exception_index;
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 0b16ff33ea..d4cdf746dc 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1804,15 +1804,9 @@ static void decode_opc(DisasContext * ctx)
     _decode_opc(ctx);
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-        if (ctx->envflags & DELAY_SLOT_CLEARME) {
-            gen_store_flags(0);
-        } else {
-	    /* go out of the delay slot */
-            uint32_t new_flags = ctx->envflags;
-	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-	    gen_store_flags(new_flags);
-        }
-        ctx->envflags = 0;
+        /* go out of the delay slot */
+        ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+        gen_store_flags(ctx->envflags);
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
@@ -1840,8 +1834,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     pc_start = tb->pc;
     ctx.pc = pc_start;
     ctx.tbflags = (uint32_t)tb->flags;
-    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL |
-                                DELAY_SLOT_CLEARME);
+    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
     ctx.bstate = BS_NONE;
     ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
@@ -1908,7 +1901,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
             /* fall through */
         case BS_NONE:
             if (ctx.envflags) {
-                gen_store_flags(ctx.envflags | DELAY_SLOT_CLEARME);
+                gen_store_flags(ctx.envflags);
 	    }
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 03/14] target/sh4: do not include DELAY_SLOT_TRUE in the TB state
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 04/14] target/sh4: move DELAY_SLOT_TRUE flag into a separate global Aurelien Jarno
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

DELAY_SLOT_TRUE is used as a dynamic condition for the branch after the
delay slot instruction. It is not used in code generation, so there is
no need to including in the TB state.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/cpu.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 9445cc779f..da8d15f1b9 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -382,8 +382,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
-                    | DELAY_SLOT_TRUE))                        /* Bits  0- 2 */
+    *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
             | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
             | (env->sr & (1u << SR_FD))                        /* Bit 15 */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 04/14] target/sh4: move DELAY_SLOT_TRUE flag into a separate global
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (2 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 03/14] target/sh4: do not include DELAY_SLOT_TRUE in the TB state Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 05/14] target/sh4: fix BS_STOP exit Aurelien Jarno
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Instead of using one bit of the env flags to store the condition of the
next delay slot, use a separate global. It simplifies reading and
writing the flags variable and also removes some confusion between
ctx->envflags and env->flags.

Note that the global is first transfered to a temp in order to be
able to discard the global before the brcond.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/cpu.h       | 10 ++--------
 target/sh4/helper.c    |  2 +-
 target/sh4/translate.c | 22 +++++++++++++---------
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index da8d15f1b9..faab3012f9 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -92,13 +92,6 @@
 
 #define DELAY_SLOT             (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
-#define DELAY_SLOT_TRUE        (1 << 2)
-/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
- * after the delay slot should be taken or not. It is calculated from SR_T.
- *
- * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
- * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
- */
 
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
@@ -148,7 +141,8 @@ typedef struct CPUSH4State {
     uint32_t sgr;		/* saved global register 15 */
     uint32_t dbr;		/* debug base register */
     uint32_t pc;		/* program counter */
-    uint32_t delayed_pc;	/* target of delayed jump */
+    uint32_t delayed_pc;        /* target of delayed branch */
+    uint32_t delayed_cond;      /* condition of delayed branch */
     uint32_t mach;		/* multiply and accumulate high */
     uint32_t macl;		/* multiply and accumulate low */
     uint32_t pr;		/* procedure register */
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 71bb49a670..8f8ce81401 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -168,7 +168,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
         /* Branch instruction should be executed again before delay slot. */
 	env->spc -= 2;
 	/* Clear flags for exception/interrupt routine. */
-	env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE);
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
     }
 
     if (do_exp) {
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index d4cdf746dc..df3655cc64 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -72,7 +72,7 @@ static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
 static TCGv cpu_fregs[32];
 
 /* internal register indexes */
-static TCGv cpu_flags, cpu_delayed_pc;
+static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
 
 #include "exec/gen-icount.h"
 
@@ -147,6 +147,10 @@ void sh4_translate_init(void)
     cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env,
 					    offsetof(CPUSH4State, delayed_pc),
 					    "_delayed_pc_");
+    cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env,
+                                              offsetof(CPUSH4State,
+                                                       delayed_cond),
+                                              "_delayed_cond_");
     cpu_ldst = tcg_global_mem_new_i32(cpu_env,
 				      offsetof(CPUSH4State, ldst), "_ldst_");
 
@@ -252,11 +256,12 @@ static void gen_jump(DisasContext * ctx)
 
 static inline void gen_branch_slot(uint32_t delayed_pc, int t)
 {
-    TCGLabel *label = gen_new_label();
     tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
-    tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label);
-    tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
-    gen_set_label(label);
+    if (t) {
+        tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
+    } else {
+        tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
+    }
 }
 
 /* Immediate conditional jump (bt or bf) */
@@ -278,18 +283,17 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
 
     l1 = gen_new_label();
     ds = tcg_temp_new();
-    tcg_gen_andi_i32(ds, cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_mov_i32(ds, cpu_delayed_cond);
+    tcg_gen_discard_i32(cpu_delayed_cond);
     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
     gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
-    tcg_gen_andi_i32(cpu_flags, cpu_flags, ~DELAY_SLOT_TRUE);
     gen_jump(ctx);
 }
 
 static inline void gen_store_flags(uint32_t flags)
 {
-    tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
-    tcg_gen_ori_i32(cpu_flags, cpu_flags, flags);
+    tcg_gen_movi_i32(cpu_flags, flags);
 }
 
 static inline void gen_load_fpr64(TCGv_i64 t, int reg)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 05/14] target/sh4: fix BS_STOP exit
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (3 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 04/14] target/sh4: move DELAY_SLOT_TRUE flag into a separate global Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 06/14] target/sh4: fix BS_EXCP exit Aurelien Jarno
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

When stopping the translation because the state has changed, goto_tb
should not be used as it might link TB with different flags.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index df3655cc64..3999da87c7 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1901,8 +1901,9 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     } else {
 	switch (ctx.bstate) {
         case BS_STOP:
-            /* gen_op_interrupt_restart(); */
-            /* fall through */
+            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            tcg_gen_exit_tb(0);
+            break;
         case BS_NONE:
             if (ctx.envflags) {
                 gen_store_flags(ctx.envflags);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 06/14] target/sh4: fix BS_EXCP exit
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (4 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 05/14] target/sh4: fix BS_STOP exit Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 07/14] target/sh4: only save flags state at the end of the TB Aurelien Jarno
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

In case of exception, there is no need to call tcg_gen_exit_tb as the
exception helper won't return.

Also fix a few cases where BS_BRANCH is called instead of BS_EXCP.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 3999da87c7..213eb2a0bb 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -339,7 +339,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
     if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {     \
         tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
         gen_helper_raise_slot_illegal_instruction(cpu_env);          \
-        ctx->bstate = BS_BRANCH;                                     \
+        ctx->bstate = BS_EXCP;                                       \
         return;                                                      \
     }
 
@@ -351,7 +351,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
         } else {                                                     \
             gen_helper_raise_illegal_instruction(cpu_env);           \
         }                                                            \
-        ctx->bstate = BS_BRANCH;                                     \
+        ctx->bstate = BS_EXCP;                                       \
         return;                                                      \
     }
 
@@ -363,7 +363,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
         } else {                                                     \
             gen_helper_raise_fpu_disable(cpu_env);                   \
         }                                                            \
-        ctx->bstate = BS_BRANCH;                                     \
+        ctx->bstate = BS_EXCP;                                       \
         return;                                                      \
     }
 
@@ -1289,7 +1289,7 @@ static void _decode_opc(DisasContext * ctx)
 	    imm = tcg_const_i32(B7_0);
             gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
-	    ctx->bstate = BS_BRANCH;
+            ctx->bstate = BS_EXCP;
 	}
 	return;
     case 0xc800:		/* tst #imm,R0 */
@@ -1798,7 +1798,7 @@ static void _decode_opc(DisasContext * ctx)
     } else {
         gen_helper_raise_illegal_instruction(cpu_env);
     }
-    ctx->bstate = BS_BRANCH;
+    ctx->bstate = BS_EXCP;
 }
 
 static void decode_opc(DisasContext * ctx)
@@ -1867,7 +1867,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
             /* We have hit a breakpoint - make sure PC is up-to-date */
             tcg_gen_movi_i32(cpu_pc, ctx.pc);
             gen_helper_debug(cpu_env);
-            ctx.bstate = BS_BRANCH;
+            ctx.bstate = BS_EXCP;
             /* The address covered by the breakpoint must be included in
                [tb->pc, tb->pc + tb->size) in order to for it to be
                properly cleared -- thus we increment the PC here so that
@@ -1911,9 +1911,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_EXCP:
-            /* gen_op_interrupt_restart(); */
-            tcg_gen_exit_tb(0);
-            break;
+            /* fall through */
         case BS_BRANCH:
         default:
             break;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 07/14] target/sh4: only save flags state at the end of the TB
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (5 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 06/14] target/sh4: fix BS_EXCP exit Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 08/14] target/sh4: fold ctx->bstate = BS_BRANCH into gen_conditional_jump Aurelien Jarno
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

There is no need to save flags when entering and exiting the delay slot.
They can be saved only when reaching the end of the TB. If the TB is
interrupted before by an exception, they will be restored using
restore_state_to_opc.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 69 ++++++++++++++++++++++++--------------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 213eb2a0bb..8c99e4fb21 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -212,6 +212,20 @@ static void gen_write_sr(TCGv src)
     tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
 }
 
+static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
+{
+    if (save_pc) {
+        tcg_gen_movi_i32(cpu_pc, ctx->pc);
+    }
+    if (ctx->delayed_pc != (uint32_t) -1) {
+        tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
+    }
+    if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
+        != ctx->envflags) {
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags);
+    }
+}
+
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
     if (unlikely(ctx->singlestep_enabled)) {
@@ -246,6 +260,7 @@ static void gen_jump(DisasContext * ctx)
 	/* Target is not statically known, it comes necessarily from a
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
+        tcg_gen_discard_i32(cpu_delayed_pc);
 	if (ctx->singlestep_enabled)
             gen_helper_debug(cpu_env);
 	tcg_gen_exit_tb(0);
@@ -254,21 +269,12 @@ static void gen_jump(DisasContext * ctx)
     }
 }
 
-static inline void gen_branch_slot(uint32_t delayed_pc, int t)
-{
-    tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
-    if (t) {
-        tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
-    } else {
-        tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
-    }
-}
-
 /* Immediate conditional jump (bt or bf) */
 static void gen_conditional_jump(DisasContext * ctx,
 				 target_ulong ift, target_ulong ifnott)
 {
     TCGLabel *l1 = gen_new_label();
+    gen_save_cpu_state(ctx, false);
     tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1);
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
@@ -291,11 +297,6 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     gen_jump(ctx);
 }
 
-static inline void gen_store_flags(uint32_t flags)
-{
-    tcg_gen_movi_i32(cpu_flags, flags);
-}
-
 static inline void gen_load_fpr64(TCGv_i64 t, int reg)
 {
     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
@@ -337,7 +338,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 
 #define CHECK_NOT_DELAY_SLOT \
     if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {     \
-        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        gen_save_cpu_state(ctx, true);                               \
         gen_helper_raise_slot_illegal_instruction(cpu_env);          \
         ctx->bstate = BS_EXCP;                                       \
         return;                                                      \
@@ -345,7 +346,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 
 #define CHECK_PRIVILEGED                                             \
     if (IS_USER(ctx)) {                                              \
-        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        gen_save_cpu_state(ctx, true);                               \
         if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
             gen_helper_raise_slot_illegal_instruction(cpu_env);      \
         } else {                                                     \
@@ -357,7 +358,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 
 #define CHECK_FPU_ENABLED                                            \
     if (ctx->tbflags & (1u << SR_FD)) {                              \
-        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
+        gen_save_cpu_state(ctx, true);                               \
         if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
             gen_helper_raise_slot_fpu_disable(cpu_env);              \
         } else {                                                     \
@@ -501,14 +502,12 @@ static void _decode_opc(DisasContext * ctx)
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
-	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
         ctx->envflags |= DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
-	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
         ctx->envflags |= DELAY_SLOT;
 	return;
     }
@@ -1165,7 +1164,8 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
-	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
+        tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
+        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8900:		/* bt label */
@@ -1176,7 +1176,8 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
-	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
+        tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
+        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
@@ -1285,7 +1286,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv imm;
 	    CHECK_NOT_DELAY_SLOT
-            tcg_gen_movi_i32(cpu_pc, ctx->pc);
+            gen_save_cpu_state(ctx, true);
 	    imm = tcg_const_i32(B7_0);
             gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
@@ -1792,7 +1793,7 @@ static void _decode_opc(DisasContext * ctx)
 	    ctx->opcode, ctx->pc);
     fflush(stderr);
 #endif
-    tcg_gen_movi_i32(cpu_pc, ctx->pc);
+    gen_save_cpu_state(ctx, true);
     if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
@@ -1810,7 +1811,7 @@ static void decode_opc(DisasContext * ctx)
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         /* go out of the delay slot */
         ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-        gen_store_flags(ctx->envflags);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags);
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
@@ -1819,11 +1820,6 @@ static void decode_opc(DisasContext * ctx)
 	}
 
     }
-
-    /* go into a delay slot */
-    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-        gen_store_flags(ctx->envflags);
-    }
 }
 
 void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
@@ -1865,7 +1861,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
             /* We have hit a breakpoint - make sure PC is up-to-date */
-            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_save_cpu_state(&ctx, true);
             gen_helper_debug(cpu_env);
             ctx.bstate = BS_EXCP;
             /* The address covered by the breakpoint must be included in
@@ -1896,18 +1892,16 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     if (cs->singlestep_enabled) {
-        tcg_gen_movi_i32(cpu_pc, ctx.pc);
+        gen_save_cpu_state(&ctx, true);
         gen_helper_debug(cpu_env);
     } else {
 	switch (ctx.bstate) {
         case BS_STOP:
-            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_save_cpu_state(&ctx, true);
             tcg_gen_exit_tb(0);
             break;
         case BS_NONE:
-            if (ctx.envflags) {
-                gen_store_flags(ctx.envflags);
-	    }
+            gen_save_cpu_state(&ctx, false);
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_EXCP:
@@ -1940,4 +1934,7 @@ void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
 {
     env->pc = data[0];
     env->flags = data[1];
+    /* Theoretically delayed_pc should also be restored. In practice the
+       branch instruction is re-executed after exception, so the delayed
+       branch target will be recomputed. */
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 08/14] target/sh4: fold ctx->bstate = BS_BRANCH into gen_conditional_jump
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (6 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 07/14] target/sh4: only save flags state at the end of the TB Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 09/14] target/sh4: optimize gen_store_fpr64 Aurelien Jarno
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 8c99e4fb21..a8399239d1 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -279,6 +279,7 @@ static void gen_conditional_jump(DisasContext * ctx,
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
     gen_goto_tb(ctx, 1, ift);
+    ctx->bstate = BS_BRANCH;
 }
 
 /* Delayed conditional jump (bt or bf) */
@@ -1158,9 +1159,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x8b00:		/* bf label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_conditional_jump(ctx, ctx->pc + 2,
-				 ctx->pc + 4 + B7_0s * 2);
-	ctx->bstate = BS_BRANCH;
+        gen_conditional_jump(ctx, ctx->pc + 2, ctx->pc + 4 + B7_0s * 2);
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
@@ -1170,9 +1169,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2,
-				 ctx->pc + 2);
-	ctx->bstate = BS_BRANCH;
+        gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, ctx->pc + 2);
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 09/14] target/sh4: optimize gen_store_fpr64
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (7 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 08/14] target/sh4: fold ctx->bstate = BS_BRANCH into gen_conditional_jump Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 10/14] target/sh4: optimize gen_write_sr using extract op Aurelien Jarno
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Isuing extrh and avoiding intermediate temps.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index a8399239d1..23636eeb4c 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -305,13 +305,8 @@ static inline void gen_load_fpr64(TCGv_i64 t, int reg)
 
 static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(tmp, t);
-    tcg_gen_mov_i32(cpu_fregs[reg + 1], tmp);
-    tcg_gen_shri_i64(t, t, 32);
-    tcg_gen_extrl_i64_i32(tmp, t);
-    tcg_gen_mov_i32(cpu_fregs[reg], tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_extrl_i64_i32(cpu_fregs[reg + 1], t);
+    tcg_gen_extrh_i64_i32(cpu_fregs[reg], t);
 }
 
 #define B3_0 (ctx->opcode & 0xf)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 10/14] target/sh4: optimize gen_write_sr using extract op
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (8 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 09/14] target/sh4: optimize gen_store_fpr64 Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4 Aurelien Jarno
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

This doesn't change the generated code on x86, but optimizes it on most
RISC architectures and makes the code simpler to read.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 23636eeb4c..7de459c9a5 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -204,12 +204,9 @@ static void gen_write_sr(TCGv src)
 {
     tcg_gen_andi_i32(cpu_sr, src,
                      ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
-    tcg_gen_shri_i32(cpu_sr_q, src, SR_Q);
-    tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1);
-    tcg_gen_shri_i32(cpu_sr_m, src, SR_M);
-    tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1);
-    tcg_gen_shri_i32(cpu_sr_t, src, SR_T);
-    tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
+    tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1);
+    tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1);
+    tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1);
 }
 
 static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (9 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 10/14] target/sh4: optimize gen_write_sr using extract op Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-02 12:58   ` Philippe Mathieu-Daudé
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 12/14] target/sh4: implement tas.b using atomic helper Aurelien Jarno
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

synco is a SH4-A only instruction.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 7de459c9a5..c226be9718 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1570,10 +1570,11 @@ static void _decode_opc(DisasContext * ctx)
 	else
 	    break;
     case 0x00ab:		/* synco */
-	if (ctx->features & SH_FEATURE_SH4A)
-	    return;
-	else
-	    break;
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+            return;
+        }
+        break;
     case 0x4024:		/* rotcl Rn */
 	{
 	    TCGv tmp = tcg_temp_new();
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 12/14] target/sh4: implement tas.b using atomic helper
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (10 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4 Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction Aurelien Jarno
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses Aurelien Jarno
  13 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

We only emulate UP SH4, however as the tas.b instruction is used in the GNU
libc, this improve linux-user emulation.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index c226be9718..a158b0e480 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1635,19 +1635,14 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16);
 	return;
     case 0x401b:		/* tas.b @Rn */
-	{
-	    TCGv addr, val;
-	    addr = tcg_temp_local_new();
-	    tcg_gen_mov_i32(addr, REG(B11_8));
-	    val = tcg_temp_local_new();
-            tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
+        {
+            TCGv val = tcg_const_i32(0x80);
+            tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val,
+                                        ctx->memidx, MO_UB);
             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
-	    tcg_gen_ori_i32(val, val, 0x80);
-            tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
-	    tcg_temp_free(val);
-	    tcg_temp_free(addr);
-	}
-	return;
+            tcg_temp_free(val);
+        }
+        return;
     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
 	tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (11 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 12/14] target/sh4: implement tas.b using atomic helper Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-02 12:52   ` Philippe Mathieu-Daudé
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses Aurelien Jarno
  13 siblings, 1 reply; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

At the same time change the comment describing the instruction the same
way than other instruction, so that the code is easier to read and search.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/translate.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index a158b0e480..bc70166602 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1502,17 +1502,21 @@ static void _decode_opc(DisasContext * ctx)
         }
         ctx->has_movcal = 1;
 	return;
-    case 0x40a9:
-	/* MOVUA.L @Rm,R0 (Rm) -> R0
-	   Load non-boundary-aligned data */
-        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	return;
-    case 0x40e9:
-	/* MOVUA.L @Rm+,R0   (Rm) -> R0, Rm + 4 -> Rm
-	   Load non-boundary-aligned data */
-        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
-	return;
+    case 0x40a9:                /* movua.l @Rm,R0 */
+        /* Load non-boundary-aligned data */
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+            return;
+        }
+        break;
+    case 0x40e9:                /* movua.l @Rm+,R0 */
+        /* Load non-boundary-aligned data */
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+            tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
+            return;
+        }
+        break;
     case 0x0029:		/* movt Rn */
         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
 	return;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses
  2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
                   ` (12 preceding siblings ...)
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction Aurelien Jarno
@ 2017-05-01 22:10 ` Aurelien Jarno
  2017-05-02 12:57   ` Philippe Mathieu-Daudé
  13 siblings, 1 reply; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-01 22:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

SH4 requires that memory accesses are naturally aligned, except for the
SH4-A movua.l instructions which can do unaligned loads.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/cpu.c       |  1 +
 target/sh4/cpu.h       |  4 ++++
 target/sh4/op_helper.c | 19 +++++++++++++++++++
 target/sh4/translate.c |  6 ++++--
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 9a481c35dc..9da7e1ed38 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -301,6 +301,7 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
 #ifdef CONFIG_USER_ONLY
     cc->handle_mmu_fault = superh_cpu_handle_mmu_fault;
 #else
+    cc->do_unaligned_access = superh_cpu_do_unaligned_access;
     cc->get_phys_page_debug = superh_cpu_get_phys_page_debug;
 #endif
     cc->disas_set_info = superh_cpu_disas_set_info;
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index faab3012f9..6c07c6b24b 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -24,6 +24,7 @@
 #include "cpu-qom.h"
 
 #define TARGET_LONG_BITS 32
+#define ALIGNED_ONLY
 
 /* CPU Subtypes */
 #define SH_CPU_SH7750  (1 << 0)
@@ -215,6 +216,9 @@ void superh_cpu_dump_state(CPUState *cpu, FILE *f,
 hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int superh_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int superh_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, uintptr_t retaddr);
 
 void sh4_translate_init(void);
 SuperHCPU *cpu_sh4_init(const char *cpu_model);
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 684d3f3758..4abd05667c 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -24,6 +24,25 @@
 
 #ifndef CONFIG_USER_ONLY
 
+void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, uintptr_t retaddr)
+{
+    if (retaddr) {
+        cpu_restore_state(cs, retaddr);
+    }
+    switch (access_type) {
+    case MMU_INST_FETCH:
+    case MMU_DATA_LOAD:
+        cs->exception_index = 0x0e0;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = 0x100;
+        break;
+    }
+    cpu_loop_exit(cs);
+}
+
 void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
               int mmu_idx, uintptr_t retaddr)
 {
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index bc70166602..fe3f73b7c0 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1505,14 +1505,16 @@ static void _decode_opc(DisasContext * ctx)
     case 0x40a9:                /* movua.l @Rm,R0 */
         /* Load non-boundary-aligned data */
         if (ctx->features & SH_FEATURE_SH4A) {
-            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+                                MO_TEUL | MO_UNALN);
             return;
         }
         break;
     case 0x40e9:                /* movua.l @Rm+,R0 */
         /* Load non-boundary-aligned data */
         if (ctx->features & SH_FEATURE_SH4A) {
-            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+                                MO_TEUL | MO_UNALN);
             tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
             return;
         }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags Aurelien Jarno
@ 2017-05-02 12:16   ` Philippe Mathieu-Daudé
  2017-05-02 19:38     ` Aurelien Jarno
  0 siblings, 1 reply; 21+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-05-02 12:16 UTC (permalink / raw)
  To: Aurelien Jarno, qemu-devel

Hi Aurelien,

On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> There is a confusion (and not only in the SH4 target) between tb->flags,
> env->flags and ctx->flags. To avoid it, split ctx->flags into
> ctx->tbflags and ctx->envflags. ctx->tbflags stays unchanged during the
> whole TB translation, while ctx->envflags evolves and is kept in sync
> with env->flags using TCG instructions. ctx->envflags now only contains
> the part that of env->flags that is contained in the TB state, i.e. the
> DELAY_SLOT* flags.

I agree with your split, but I'd rather put the 2nd part of your commit 
description as comments in the struct DisasContext declaration, if you 
mind :)

>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
>  target/sh4/translate.c | 161 +++++++++++++++++++++++++------------------------
>  1 file changed, 82 insertions(+), 79 deletions(-)
>
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index c89a14733f..0b16ff33ea 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -37,7 +37,8 @@ typedef struct DisasContext {
>      struct TranslationBlock *tb;
>      target_ulong pc;
>      uint16_t opcode;
> -    uint32_t flags;
> +    uint32_t tbflags;
> +    uint32_t envflags;
>      int bstate;
>      int memidx;
>      uint32_t delayed_pc;
> @@ -49,7 +50,7 @@ typedef struct DisasContext {
>  #if defined(CONFIG_USER_ONLY)
>  #define IS_USER(ctx) 1
>  #else
> -#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD)))
> +#define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
>  #endif
>
>  enum {
> @@ -317,51 +318,50 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
>  #define B11_8 ((ctx->opcode >> 8) & 0xf)
>  #define B15_12 ((ctx->opcode >> 12) & 0xf)
>
> -#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\
> -                        && (ctx->flags & (1u << SR_RB))\
> +#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\
> +                        && (ctx->tbflags & (1u << SR_RB))\
>                  ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
>
> -#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\
> -                               || !(ctx->flags & (1u << SR_RB)))\
> +#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\
> +                               || !(ctx->tbflags & (1u << SR_RB)))\
>  		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
>
> -#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
> +#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
>  #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
> -#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
> +#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
>  #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
>
>  #define CHECK_NOT_DELAY_SLOT \
> -  if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
> -  {                                                           \
> -      tcg_gen_movi_i32(cpu_pc, ctx->pc);                      \
> -      gen_helper_raise_slot_illegal_instruction(cpu_env);     \
> -      ctx->bstate = BS_BRANCH;                                \
> -      return;                                                 \
> -  }
> -
> -#define CHECK_PRIVILEGED                                        \
> -  if (IS_USER(ctx)) {                                           \
> -      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
> -      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
> -          gen_helper_raise_slot_illegal_instruction(cpu_env);   \
> -      } else {                                                  \
> -          gen_helper_raise_illegal_instruction(cpu_env);        \
> -      }                                                         \
> -      ctx->bstate = BS_BRANCH;                                  \
> -      return;                                                   \
> -  }
> -
> -#define CHECK_FPU_ENABLED                                       \
> -  if (ctx->flags & (1u << SR_FD)) {                             \
> -      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
> -      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
> -          gen_helper_raise_slot_fpu_disable(cpu_env);           \
> -      } else {                                                  \
> -          gen_helper_raise_fpu_disable(cpu_env);                \
> -      }                                                         \
> -      ctx->bstate = BS_BRANCH;                                  \
> -      return;                                                   \
> -  }
> +    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {     \
> +        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
> +        gen_helper_raise_slot_illegal_instruction(cpu_env);          \
> +        ctx->bstate = BS_BRANCH;                                     \
> +        return;                                                      \
> +    }
> +
> +#define CHECK_PRIVILEGED                                             \
> +    if (IS_USER(ctx)) {                                              \
> +        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
> +        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
> +            gen_helper_raise_slot_illegal_instruction(cpu_env);      \
> +        } else {                                                     \
> +            gen_helper_raise_illegal_instruction(cpu_env);           \
> +        }                                                            \
> +        ctx->bstate = BS_BRANCH;                                     \
> +        return;                                                      \
> +    }
> +
> +#define CHECK_FPU_ENABLED                                            \
> +    if (ctx->tbflags & (1u << SR_FD)) {                              \
> +        tcg_gen_movi_i32(cpu_pc, ctx->pc);                           \
> +        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
> +            gen_helper_raise_slot_fpu_disable(cpu_env);              \
> +        } else {                                                     \
> +            gen_helper_raise_fpu_disable(cpu_env);                   \
> +        }                                                            \
> +        ctx->bstate = BS_BRANCH;                                     \
> +        return;                                                      \
> +    }
>
>  static void _decode_opc(DisasContext * ctx)
>  {
> @@ -409,7 +409,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x000b:		/* rts */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x0028:		/* clrmac */
> @@ -431,7 +431,7 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_NOT_DELAY_SLOT
>          gen_write_sr(cpu_ssr);
>  	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x0058:		/* sets */
> @@ -498,14 +498,14 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_NOT_DELAY_SLOT
>  	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
>  	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	return;
>      case 0xb000:		/* bsr disp */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
>  	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
>  	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	return;
>      }
>
> @@ -939,7 +939,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_SZ) {
> +        if (ctx->tbflags & FPSCR_SZ) {
>  	    TCGv_i64 fp = tcg_temp_new_i64();
>  	    gen_load_fpr64(fp, XREG(B7_4));
>  	    gen_store_fpr64(fp, XREG(B11_8));
> @@ -950,7 +950,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_SZ) {
> +        if (ctx->tbflags & FPSCR_SZ) {
>  	    TCGv addr_hi = tcg_temp_new();
>  	    int fr = XREG(B7_4);
>  	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
> @@ -966,7 +966,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_SZ) {
> +        if (ctx->tbflags & FPSCR_SZ) {
>  	    TCGv addr_hi = tcg_temp_new();
>  	    int fr = XREG(B11_8);
>  	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
> @@ -980,7 +980,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_SZ) {
> +        if (ctx->tbflags & FPSCR_SZ) {
>  	    TCGv addr_hi = tcg_temp_new();
>  	    int fr = XREG(B11_8);
>  	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
> @@ -998,7 +998,7 @@ static void _decode_opc(DisasContext * ctx)
>  	CHECK_FPU_ENABLED
>          TCGv addr = tcg_temp_new_i32();
>          tcg_gen_subi_i32(addr, REG(B11_8), 4);
> -        if (ctx->flags & FPSCR_SZ) {
> +        if (ctx->tbflags & FPSCR_SZ) {
>  	    int fr = XREG(B7_4);
>              tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
>  	    tcg_gen_subi_i32(addr, addr, 4);
> @@ -1015,7 +1015,7 @@ static void _decode_opc(DisasContext * ctx)
>  	{
>  	    TCGv addr = tcg_temp_new_i32();
>  	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
> -            if (ctx->flags & FPSCR_SZ) {
> +            if (ctx->tbflags & FPSCR_SZ) {
>  		int fr = XREG(B11_8);
>                  tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
>                                      ctx->memidx, MO_TEUL);
> @@ -1034,7 +1034,7 @@ static void _decode_opc(DisasContext * ctx)
>  	{
>  	    TCGv addr = tcg_temp_new();
>  	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
> -            if (ctx->flags & FPSCR_SZ) {
> +            if (ctx->tbflags & FPSCR_SZ) {
>  		int fr = XREG(B7_4);
>                  tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
>                                      ctx->memidx, MO_TEUL);
> @@ -1056,7 +1056,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
>  	{
>  	    CHECK_FPU_ENABLED
> -            if (ctx->flags & FPSCR_PR) {
> +            if (ctx->tbflags & FPSCR_PR) {
>                  TCGv_i64 fp0, fp1;
>
>  		if (ctx->opcode & 0x0110)
> @@ -1125,7 +1125,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0xf00e: /* fmac FR0,RM,Rn */
>          {
>              CHECK_FPU_ENABLED
> -            if (ctx->flags & FPSCR_PR) {
> +            if (ctx->tbflags & FPSCR_PR) {
>                  break; /* illegal instruction */
>              } else {
>                  gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
> @@ -1162,7 +1162,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x8f00:		/* bf/s label */
>  	CHECK_NOT_DELAY_SLOT
>  	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
> -	ctx->flags |= DELAY_SLOT_CONDITIONAL;
> +        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
>  	return;
>      case 0x8900:		/* bt label */
>  	CHECK_NOT_DELAY_SLOT
> @@ -1173,7 +1173,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x8d00:		/* bt/s label */
>  	CHECK_NOT_DELAY_SLOT
>  	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
> -	ctx->flags |= DELAY_SLOT_CONDITIONAL;
> +        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
>  	return;
>      case 0x8800:		/* cmp/eq #imm,R0 */
>          tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
> @@ -1354,14 +1354,14 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x0023:		/* braf Rn */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->pc + 4);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x0003:		/* bsrf Rn */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
>  	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x4015:		/* cmp/pl Rn */
> @@ -1377,14 +1377,14 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x402b:		/* jmp @Rn */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x400b:		/* jsr @Rn */
>  	CHECK_NOT_DELAY_SLOT
>  	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
>  	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
> -	ctx->flags |= DELAY_SLOT;
> +        ctx->envflags |= DELAY_SLOT;
>  	ctx->delayed_pc = (uint32_t) - 1;
>  	return;
>      case 0x400e:		/* ldc Rm,SR */
> @@ -1663,7 +1663,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_PR) {
> +        if (ctx->tbflags & FPSCR_PR) {
>  	    TCGv_i64 fp;
>  	    if (ctx->opcode & 0x0100)
>  		break; /* illegal instruction */
> @@ -1678,7 +1678,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_PR) {
> +        if (ctx->tbflags & FPSCR_PR) {
>  	    TCGv_i64 fp;
>  	    if (ctx->opcode & 0x0100)
>  		break; /* illegal instruction */
> @@ -1699,7 +1699,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf05d: /* fabs FRn/DRn */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_PR) {
> +        if (ctx->tbflags & FPSCR_PR) {
>  	    if (ctx->opcode & 0x0100)
>  		break; /* illegal instruction */
>  	    TCGv_i64 fp = tcg_temp_new_i64();
> @@ -1713,7 +1713,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf06d: /* fsqrt FRn */
>  	CHECK_FPU_ENABLED
> -        if (ctx->flags & FPSCR_PR) {
> +        if (ctx->tbflags & FPSCR_PR) {
>  	    if (ctx->opcode & 0x0100)
>  		break; /* illegal instruction */
>  	    TCGv_i64 fp = tcg_temp_new_i64();
> @@ -1731,13 +1731,13 @@ static void _decode_opc(DisasContext * ctx)
>  	break;
>      case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
>  	CHECK_FPU_ENABLED
> -        if (!(ctx->flags & FPSCR_PR)) {
> +        if (!(ctx->tbflags & FPSCR_PR)) {
>  	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
>  	}
>  	return;
>      case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
>  	CHECK_FPU_ENABLED
> -        if (!(ctx->flags & FPSCR_PR)) {
> +        if (!(ctx->tbflags & FPSCR_PR)) {
>  	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
>  	}
>  	return;
> @@ -1761,7 +1761,7 @@ static void _decode_opc(DisasContext * ctx)
>  	return;
>      case 0xf0ed: /* fipr FVm,FVn */
>          CHECK_FPU_ENABLED
> -        if ((ctx->flags & FPSCR_PR) == 0) {
> +        if ((ctx->tbflags & FPSCR_PR) == 0) {
>              TCGv m, n;
>              m = tcg_const_i32((ctx->opcode >> 8) & 3);
>              n = tcg_const_i32((ctx->opcode >> 10) & 3);
> @@ -1774,7 +1774,7 @@ static void _decode_opc(DisasContext * ctx)
>      case 0xf0fd: /* ftrv XMTRX,FVn */
>          CHECK_FPU_ENABLED
>          if ((ctx->opcode & 0x0300) == 0x0100 &&
> -            (ctx->flags & FPSCR_PR) == 0) {
> +            (ctx->tbflags & FPSCR_PR) == 0) {
>              TCGv n;
>              n = tcg_const_i32((ctx->opcode >> 10) & 3);
>              gen_helper_ftrv(cpu_env, n);
> @@ -1789,7 +1789,7 @@ static void _decode_opc(DisasContext * ctx)
>      fflush(stderr);
>  #endif
>      tcg_gen_movi_i32(cpu_pc, ctx->pc);
> -    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
> +    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
>          gen_helper_raise_slot_illegal_instruction(cpu_env);
>      } else {
>          gen_helper_raise_illegal_instruction(cpu_env);
> @@ -1799,20 +1799,20 @@ static void _decode_opc(DisasContext * ctx)
>
>  static void decode_opc(DisasContext * ctx)
>  {
> -    uint32_t old_flags = ctx->flags;
> +    uint32_t old_flags = ctx->envflags;
>
>      _decode_opc(ctx);
>
>      if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
> -        if (ctx->flags & DELAY_SLOT_CLEARME) {
> +        if (ctx->envflags & DELAY_SLOT_CLEARME) {
>              gen_store_flags(0);
>          } else {
>  	    /* go out of the delay slot */
> -	    uint32_t new_flags = ctx->flags;
> +            uint32_t new_flags = ctx->envflags;
>  	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
>  	    gen_store_flags(new_flags);
>          }
> -        ctx->flags = 0;
> +        ctx->envflags = 0;
>          ctx->bstate = BS_BRANCH;
>          if (old_flags & DELAY_SLOT_CONDITIONAL) {
>  	    gen_delayed_conditional_jump(ctx);
> @@ -1823,8 +1823,9 @@ static void decode_opc(DisasContext * ctx)
>      }
>
>      /* go into a delay slot */
> -    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
> -        gen_store_flags(ctx->flags);
> +    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
> +        gen_store_flags(ctx->envflags);
> +    }
>  }
>
>  void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
> @@ -1838,16 +1839,18 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>
>      pc_start = tb->pc;
>      ctx.pc = pc_start;
> -    ctx.flags = (uint32_t)tb->flags;
> +    ctx.tbflags = (uint32_t)tb->flags;
> +    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL |
> +                                DELAY_SLOT_CLEARME);
>      ctx.bstate = BS_NONE;
> -    ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0;
> +    ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
>      /* We don't know if the delayed pc came from a dynamic or static branch,
>         so assume it is a dynamic branch.  */
>      ctx.delayed_pc = -1; /* use delayed pc from env pointer */
>      ctx.tb = tb;
>      ctx.singlestep_enabled = cs->singlestep_enabled;
>      ctx.features = env->features;
> -    ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
> +    ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
>
>      num_insns = 0;
>      max_insns = tb->cflags & CF_COUNT_MASK;
> @@ -1860,7 +1863,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>
>      gen_tb_start(tb);
>      while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
> -        tcg_gen_insn_start(ctx.pc, ctx.flags);
> +        tcg_gen_insn_start(ctx.pc, ctx.envflags);
>          num_insns++;
>
>          if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
> @@ -1904,8 +1907,8 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>              /* gen_op_interrupt_restart(); */
>              /* fall through */
>          case BS_NONE:
> -            if (ctx.flags) {
> -                gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
> +            if (ctx.envflags) {
> +                gen_store_flags(ctx.envflags | DELAY_SLOT_CLEARME);
>  	    }
>              gen_goto_tb(&ctx, 0, ctx.pc);
>              break;
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME Aurelien Jarno
@ 2017-05-02 12:32   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 21+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-05-02 12:32 UTC (permalink / raw)
  To: Aurelien Jarno, qemu-devel

On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> Now that ctx->flags has been split, it becomes clear that
> DELAY_SLOT_CLEARME has not impact on the code generation: in both case
> ctx->envflags is cleared, either by clearing all the flags, or by
> setting it to 0. This is left-over from pre-TCG era.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> ---
>  target/sh4/cpu.h       |  3 +--
>  target/sh4/helper.c    |  2 --
>  target/sh4/translate.c | 17 +++++------------
>  3 files changed, 6 insertions(+), 16 deletions(-)
>
> diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
> index cad8989f7e..9445cc779f 100644
> --- a/target/sh4/cpu.h
> +++ b/target/sh4/cpu.h
> @@ -93,7 +93,6 @@
>  #define DELAY_SLOT             (1 << 0)
>  #define DELAY_SLOT_CONDITIONAL (1 << 1)
>  #define DELAY_SLOT_TRUE        (1 << 2)
> -#define DELAY_SLOT_CLEARME     (1 << 3)
>  /* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
>   * after the delay slot should be taken or not. It is calculated from SR_T.
>   *
> @@ -384,7 +383,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
>      *pc = env->pc;
>      *cs_base = 0;
>      *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
> -                    | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME))   /* Bits  0- 3 */
> +                    | DELAY_SLOT_TRUE))                        /* Bits  0- 2 */
>              | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
>              | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
>              | (env->sr & (1u << SR_FD))                        /* Bit 15 */
> diff --git a/target/sh4/helper.c b/target/sh4/helper.c
> index 036c5ca56c..71bb49a670 100644
> --- a/target/sh4/helper.c
> +++ b/target/sh4/helper.c
> @@ -170,8 +170,6 @@ void superh_cpu_do_interrupt(CPUState *cs)
>  	/* Clear flags for exception/interrupt routine. */
>  	env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE);
>      }
> -    if (env->flags & DELAY_SLOT_CLEARME)
> -        env->flags = 0;
>
>      if (do_exp) {
>          env->expevt = cs->exception_index;
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 0b16ff33ea..d4cdf746dc 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1804,15 +1804,9 @@ static void decode_opc(DisasContext * ctx)
>      _decode_opc(ctx);
>
>      if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
> -        if (ctx->envflags & DELAY_SLOT_CLEARME) {
> -            gen_store_flags(0);
> -        } else {
> -	    /* go out of the delay slot */
> -            uint32_t new_flags = ctx->envflags;
> -	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
> -	    gen_store_flags(new_flags);
> -        }
> -        ctx->envflags = 0;
> +        /* go out of the delay slot */
> +        ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
> +        gen_store_flags(ctx->envflags);
>          ctx->bstate = BS_BRANCH;
>          if (old_flags & DELAY_SLOT_CONDITIONAL) {
>  	    gen_delayed_conditional_jump(ctx);
> @@ -1840,8 +1834,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>      pc_start = tb->pc;
>      ctx.pc = pc_start;
>      ctx.tbflags = (uint32_t)tb->flags;
> -    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL |
> -                                DELAY_SLOT_CLEARME);
> +    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
>      ctx.bstate = BS_NONE;
>      ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
>      /* We don't know if the delayed pc came from a dynamic or static branch,
> @@ -1908,7 +1901,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>              /* fall through */
>          case BS_NONE:
>              if (ctx.envflags) {
> -                gen_store_flags(ctx.envflags | DELAY_SLOT_CLEARME);
> +                gen_store_flags(ctx.envflags);
>  	    }
>              gen_goto_tb(&ctx, 0, ctx.pc);
>              break;
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction Aurelien Jarno
@ 2017-05-02 12:52   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 21+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-05-02 12:52 UTC (permalink / raw)
  To: Aurelien Jarno, qemu-devel

On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> At the same time change the comment describing the instruction the same
> way than other instruction, so that the code is easier to read and search.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> ---
>  target/sh4/translate.c | 26 +++++++++++++++-----------
>  1 file changed, 15 insertions(+), 11 deletions(-)
>
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index a158b0e480..bc70166602 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1502,17 +1502,21 @@ static void _decode_opc(DisasContext * ctx)
>          }
>          ctx->has_movcal = 1;
>  	return;
> -    case 0x40a9:
> -	/* MOVUA.L @Rm,R0 (Rm) -> R0
> -	   Load non-boundary-aligned data */
> -        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> -	return;
> -    case 0x40e9:
> -	/* MOVUA.L @Rm+,R0   (Rm) -> R0, Rm + 4 -> Rm
> -	   Load non-boundary-aligned data */
> -        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> -	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
> -	return;
> +    case 0x40a9:                /* movua.l @Rm,R0 */
> +        /* Load non-boundary-aligned data */
> +        if (ctx->features & SH_FEATURE_SH4A) {
> +            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> +            return;
> +        }
> +        break;
> +    case 0x40e9:                /* movua.l @Rm+,R0 */
> +        /* Load non-boundary-aligned data */
> +        if (ctx->features & SH_FEATURE_SH4A) {
> +            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> +            tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
> +            return;
> +        }
> +        break;
>      case 0x0029:		/* movt Rn */
>          tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
>  	return;
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses Aurelien Jarno
@ 2017-05-02 12:57   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 21+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-05-02 12:57 UTC (permalink / raw)
  To: Aurelien Jarno, qemu-devel

On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> SH4 requires that memory accesses are naturally aligned, except for the
> SH4-A movua.l instructions which can do unaligned loads.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> ---
>  target/sh4/cpu.c       |  1 +
>  target/sh4/cpu.h       |  4 ++++
>  target/sh4/op_helper.c | 19 +++++++++++++++++++
>  target/sh4/translate.c |  6 ++++--
>  4 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
> index 9a481c35dc..9da7e1ed38 100644
> --- a/target/sh4/cpu.c
> +++ b/target/sh4/cpu.c
> @@ -301,6 +301,7 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
>  #ifdef CONFIG_USER_ONLY
>      cc->handle_mmu_fault = superh_cpu_handle_mmu_fault;
>  #else
> +    cc->do_unaligned_access = superh_cpu_do_unaligned_access;
>      cc->get_phys_page_debug = superh_cpu_get_phys_page_debug;
>  #endif
>      cc->disas_set_info = superh_cpu_disas_set_info;
> diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
> index faab3012f9..6c07c6b24b 100644
> --- a/target/sh4/cpu.h
> +++ b/target/sh4/cpu.h
> @@ -24,6 +24,7 @@
>  #include "cpu-qom.h"
>
>  #define TARGET_LONG_BITS 32
> +#define ALIGNED_ONLY
>
>  /* CPU Subtypes */
>  #define SH_CPU_SH7750  (1 << 0)
> @@ -215,6 +216,9 @@ void superh_cpu_dump_state(CPUState *cpu, FILE *f,
>  hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
>  int superh_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
>  int superh_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
> +void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
> +                                    MMUAccessType access_type,
> +                                    int mmu_idx, uintptr_t retaddr);
>
>  void sh4_translate_init(void);
>  SuperHCPU *cpu_sh4_init(const char *cpu_model);
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index 684d3f3758..4abd05667c 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -24,6 +24,25 @@
>
>  #ifndef CONFIG_USER_ONLY
>
> +void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
> +                                    MMUAccessType access_type,
> +                                    int mmu_idx, uintptr_t retaddr)
> +{
> +    if (retaddr) {
> +        cpu_restore_state(cs, retaddr);
> +    }
> +    switch (access_type) {
> +    case MMU_INST_FETCH:
> +    case MMU_DATA_LOAD:
> +        cs->exception_index = 0x0e0;
> +        break;
> +    case MMU_DATA_STORE:
> +        cs->exception_index = 0x100;
> +        break;
> +    }
> +    cpu_loop_exit(cs);
> +}
> +
>  void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
>                int mmu_idx, uintptr_t retaddr)
>  {
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index bc70166602..fe3f73b7c0 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1505,14 +1505,16 @@ static void _decode_opc(DisasContext * ctx)
>      case 0x40a9:                /* movua.l @Rm,R0 */
>          /* Load non-boundary-aligned data */
>          if (ctx->features & SH_FEATURE_SH4A) {
> -            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
> +                                MO_TEUL | MO_UNALN);
>              return;
>          }
>          break;
>      case 0x40e9:                /* movua.l @Rm+,R0 */
>          /* Load non-boundary-aligned data */
>          if (ctx->features & SH_FEATURE_SH4A) {
> -            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
> +            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
> +                                MO_TEUL | MO_UNALN);
>              tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
>              return;
>          }
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4
  2017-05-01 22:10 ` [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4 Aurelien Jarno
@ 2017-05-02 12:58   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 21+ messages in thread
From: Philippe Mathieu-Daudé @ 2017-05-02 12:58 UTC (permalink / raw)
  To: Aurelien Jarno, qemu-devel

On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> synco is a SH4-A only instruction.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> ---
>  target/sh4/translate.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 7de459c9a5..c226be9718 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1570,10 +1570,11 @@ static void _decode_opc(DisasContext * ctx)
>  	else
>  	    break;
>      case 0x00ab:		/* synco */
> -	if (ctx->features & SH_FEATURE_SH4A)
> -	    return;
> -	else
> -	    break;
> +        if (ctx->features & SH_FEATURE_SH4A) {
> +            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
> +            return;
> +        }
> +        break;
>      case 0x4024:		/* rotcl Rn */
>  	{
>  	    TCGv tmp = tcg_temp_new();
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags
  2017-05-02 12:16   ` Philippe Mathieu-Daudé
@ 2017-05-02 19:38     ` Aurelien Jarno
  0 siblings, 0 replies; 21+ messages in thread
From: Aurelien Jarno @ 2017-05-02 19:38 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé; +Cc: qemu-devel

On 2017-05-02 09:16, Philippe Mathieu-Daudé wrote:
> Hi Aurelien,
> 
> On 05/01/2017 07:10 PM, Aurelien Jarno wrote:
> > There is a confusion (and not only in the SH4 target) between tb->flags,
> > env->flags and ctx->flags. To avoid it, split ctx->flags into
> > ctx->tbflags and ctx->envflags. ctx->tbflags stays unchanged during the
> > whole TB translation, while ctx->envflags evolves and is kept in sync
> > with env->flags using TCG instructions. ctx->envflags now only contains
> > the part that of env->flags that is contained in the TB state, i.e. the
> > DELAY_SLOT* flags.
> 
> I agree with your split, but I'd rather put the 2nd part of your commit
> description as comments in the struct DisasContext declaration, if you mind
> :)

Thanks for the review. I'll do that in the v2, although at some point it
should probably go to some higher level documentation, as it is often a
source of confusion.

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-05-02 19:38 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-01 22:10 [Qemu-devel] [PATCH 00/14] target/sh4: misc fixes, cleanup and optimizations Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 01/14] target/sh4: split ctx->flags into ctx->tbflags and ctx->envflags Aurelien Jarno
2017-05-02 12:16   ` Philippe Mathieu-Daudé
2017-05-02 19:38     ` Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 02/14] target/sh4: get rid of DELAY_SLOT_CLEARME Aurelien Jarno
2017-05-02 12:32   ` Philippe Mathieu-Daudé
2017-05-01 22:10 ` [Qemu-devel] [PATCH 03/14] target/sh4: do not include DELAY_SLOT_TRUE in the TB state Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 04/14] target/sh4: move DELAY_SLOT_TRUE flag into a separate global Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 05/14] target/sh4: fix BS_STOP exit Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 06/14] target/sh4: fix BS_EXCP exit Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 07/14] target/sh4: only save flags state at the end of the TB Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 08/14] target/sh4: fold ctx->bstate = BS_BRANCH into gen_conditional_jump Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 09/14] target/sh4: optimize gen_store_fpr64 Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 10/14] target/sh4: optimize gen_write_sr using extract op Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 11/14] target/sh4: generate fences for SH4 Aurelien Jarno
2017-05-02 12:58   ` Philippe Mathieu-Daudé
2017-05-01 22:10 ` [Qemu-devel] [PATCH 12/14] target/sh4: implement tas.b using atomic helper Aurelien Jarno
2017-05-01 22:10 ` [Qemu-devel] [PATCH 13/14] target/sh4: movua.l is an SH4-A only instruction Aurelien Jarno
2017-05-02 12:52   ` Philippe Mathieu-Daudé
2017-05-01 22:10 ` [Qemu-devel] [PATCH 14/14] target/sh4: trap unaligned accesses Aurelien Jarno
2017-05-02 12:57   ` Philippe Mathieu-Daudé

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.