All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU
@ 2012-03-30 17:05 Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 1/5] target-mips: Set opn in gen_ldst_multiple Richard Henderson
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

This is a re-base of a patch set sent in December.  The only
real changes are CPUState -> CPUMIPSState.  The commit message
for patch 4 describe the real intent.

This patch set is also available at

  git://repo.or.cz/qemu/rth.git rth/mips/fpregs


r~


Richard Henderson (5):
  target-mips: Set opn in gen_ldst_multiple.
  target-mips: Fix MIPS_DEBUG.
  target-mips: Pass DisasContext to fpr32 load/store routines
  target-mips: Use TCG registers for the FPU.
  target-mips: Add accessors for the two 32-bit halves of a 64-bit FPR

 target-mips/translate.c |  547 +++++++++++++++++++++++++++--------------------
 1 files changed, 318 insertions(+), 229 deletions(-)

-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 1/5] target-mips: Set opn in gen_ldst_multiple.
  2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
@ 2012-03-30 17:05 ` Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 2/5] target-mips: Fix MIPS_DEBUG Richard Henderson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Used by MIPS_DEBUG, when enabled.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index a663b74..85dd65a 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -9833,6 +9833,7 @@ static void gen_andi16 (CPUMIPSState *env, DisasContext *ctx)
 static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist,
                                int base, int16_t offset)
 {
+    const char *opn = "ldst_multiple";
     TCGv t0, t1;
     TCGv_i32 t2;
 
@@ -9852,19 +9853,24 @@ static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist,
     switch (opc) {
     case LWM32:
         gen_helper_lwm(t0, t1, t2);
+        opn = "lwm";
         break;
     case SWM32:
         gen_helper_swm(t0, t1, t2);
+        opn = "swm";
         break;
 #ifdef TARGET_MIPS64
     case LDM:
         gen_helper_ldm(t0, t1, t2);
+        opn = "ldm";
         break;
     case SDM:
         gen_helper_sdm(t0, t1, t2);
+        opn = "sdm";
         break;
 #endif
     }
+    (void)opn;
     MIPS_DEBUG("%s, %x, %d(%s)", opn, reglist, offset, regnames[base]);
     tcg_temp_free(t0);
     tcg_temp_free(t1);
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 2/5] target-mips: Fix MIPS_DEBUG.
  2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 1/5] target-mips: Set opn in gen_ldst_multiple Richard Henderson
@ 2012-03-30 17:05 ` Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 3/5] target-mips: Pass DisasContext to fpr32 load/store routines Richard Henderson
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

The macro uses the DisasContext.  Pass it around as needed.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |   74 ++++++++++++++++++++++++-----------------------
 1 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 85dd65a..f18df40 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1413,7 +1413,8 @@ static void gen_arith_imm (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
 }
 
 /* Logic with immediate operand */
-static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_logic_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
 {
     target_ulong uimm;
     const char *opn = "imm logic";
@@ -1456,7 +1457,8 @@ static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int1
 }
 
 /* Set on less than with immediate operand */
-static void gen_slt_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_slt_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                        int rt, int rs, int16_t imm)
 {
     target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
     const char *opn = "imm arith";
@@ -1757,7 +1759,8 @@ static void gen_arith (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
 }
 
 /* Conditional move */
-static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_cond_move(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                          int rd, int rs, int rt)
 {
     const char *opn = "cond move";
     int l1;
@@ -1795,7 +1798,8 @@ static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int
 }
 
 /* Logic */
-static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_logic(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
 {
     const char *opn = "logic";
 
@@ -1856,7 +1860,8 @@ static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
 }
 
 /* Set on lower than */
-static void gen_slt (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_slt(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                    int rd, int rs, int rt)
 {
     const char *opn = "slt";
     TCGv t0, t1;
@@ -8756,10 +8761,10 @@ static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
         gen_arith_imm(env, ctx, OPC_ADDIU, rx, rx, imm);
         break;
     case M16_OPC_SLTI:
-        gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+        gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
         break;
     case M16_OPC_SLTIU:
-        gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+        gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
         break;
     case M16_OPC_I8:
         switch (funct) {
@@ -8970,15 +8975,13 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
     case M16_OPC_SLTI:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+            gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
         }
         break;
     case M16_OPC_SLTIU:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+            gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
         }
         break;
     case M16_OPC_I8:
@@ -9053,8 +9056,7 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
     case M16_OPC_CMPI:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_logic_imm(env, OPC_XORI, 24, rx, imm);
+            gen_logic_imm(env, ctx, OPC_XORI, 24, rx, imm);
         }
         break;
 #if defined(TARGET_MIPS64)
@@ -9166,10 +9168,10 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
             }
             break;
         case RR_SLT:
-            gen_slt(env, OPC_SLT, 24, rx, ry);
+            gen_slt(env, ctx, OPC_SLT, 24, rx, ry);
             break;
         case RR_SLTU:
-            gen_slt(env, OPC_SLTU, 24, rx, ry);
+            gen_slt(env, ctx, OPC_SLTU, 24, rx, ry);
             break;
         case RR_BREAK:
             generate_exception(ctx, EXCP_BREAK);
@@ -9190,22 +9192,22 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
             break;
 #endif
         case RR_CMP:
-            gen_logic(env, OPC_XOR, 24, rx, ry);
+            gen_logic(env, ctx, OPC_XOR, 24, rx, ry);
             break;
         case RR_NEG:
             gen_arith(env, ctx, OPC_SUBU, rx, 0, ry);
             break;
         case RR_AND:
-            gen_logic(env, OPC_AND, rx, rx, ry);
+            gen_logic(env, ctx, OPC_AND, rx, rx, ry);
             break;
         case RR_OR:
-            gen_logic(env, OPC_OR, rx, rx, ry);
+            gen_logic(env, ctx, OPC_OR, rx, rx, ry);
             break;
         case RR_XOR:
-            gen_logic(env, OPC_XOR, rx, rx, ry);
+            gen_logic(env, ctx, OPC_XOR, rx, rx, ry);
             break;
         case RR_NOT:
-            gen_logic(env, OPC_NOR, rx, ry, 0);
+            gen_logic(env, ctx, OPC_NOR, rx, ry, 0);
             break;
         case RR_MFHI:
             gen_HILO(ctx, OPC_MFHI, rx);
@@ -9827,7 +9829,7 @@ static void gen_andi16 (CPUMIPSState *env, DisasContext *ctx)
     int rs = mmreg(uMIPS_RS(ctx->opcode));
     int encoded = ZIMM(ctx->opcode, 0, 4);
 
-    gen_logic_imm(env, OPC_ANDI, rd, rs, decoded_imm[encoded]);
+    gen_logic_imm(env, ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]);
 }
 
 static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist,
@@ -9889,25 +9891,25 @@ static void gen_pool16c_insn (CPUMIPSState *env, DisasContext *ctx, int *is_bran
     case NOT16 + 1:
     case NOT16 + 2:
     case NOT16 + 3:
-        gen_logic(env, OPC_NOR, rd, rs, 0);
+        gen_logic(env, ctx, OPC_NOR, rd, rs, 0);
         break;
     case XOR16 + 0:
     case XOR16 + 1:
     case XOR16 + 2:
     case XOR16 + 3:
-        gen_logic(env, OPC_XOR, rd, rd, rs);
+        gen_logic(env, ctx, OPC_XOR, rd, rd, rs);
         break;
     case AND16 + 0:
     case AND16 + 1:
     case AND16 + 2:
     case AND16 + 3:
-        gen_logic(env, OPC_AND, rd, rd, rs);
+        gen_logic(env, ctx, OPC_AND, rd, rd, rs);
         break;
     case OR16 + 0:
     case OR16 + 1:
     case OR16 + 2:
     case OR16 + 3:
-        gen_logic(env, OPC_OR, rd, rd, rs);
+        gen_logic(env, ctx, OPC_OR, rd, rd, rs);
         break;
     case LWM16 + 0:
     case LWM16 + 1:
@@ -10707,7 +10709,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
             case XOR32:
                 mips32_op = OPC_XOR;
             do_logic:
-                gen_logic(env, mips32_op, rd, rs, rt);
+                gen_logic(env, ctx, mips32_op, rd, rs, rt);
                 break;
                 /* Set less than */
             case SLT:
@@ -10716,7 +10718,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
             case SLTU:
                 mips32_op = OPC_SLTU;
             do_slt:
-                gen_slt(env, mips32_op, rd, rs, rt);
+                gen_slt(env, ctx, mips32_op, rd, rs, rt);
                 break;
             default:
                 goto pool32a_invalid;
@@ -10732,7 +10734,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
             case MOVZ:
                 mips32_op = OPC_MOVZ;
             do_cmov:
-                gen_cond_move(env, mips32_op, rd, rs, rt);
+                gen_cond_move(env, ctx, mips32_op, rd, rs, rt);
                 break;
             case LWXS:
                 gen_ldxs(ctx, rs, rt, rd);
@@ -11144,7 +11146,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
                target. */
             break;
         case LUI:
-            gen_logic_imm(env, OPC_LUI, rs, -1, imm);
+            gen_logic_imm(env, ctx, OPC_LUI, rs, -1, imm);
             break;
         case SYNCI:
             break;
@@ -11263,7 +11265,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
     case ANDI32:
         mips32_op = OPC_ANDI;
     do_logici:
-        gen_logic_imm(env, mips32_op, rt, rs, imm);
+        gen_logic_imm(env, ctx, mips32_op, rt, rs, imm);
         break;
 
         /* Set less than immediate */
@@ -11273,7 +11275,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
     case SLTIU32:
         mips32_op = OPC_SLTIU;
     do_slti:
-        gen_slt_imm(env, mips32_op, rt, rs, imm);
+        gen_slt_imm(env, ctx, mips32_op, rt, rs, imm);
         break;
     case JALX32:
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
@@ -11750,7 +11752,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
         case OPC_MOVZ:
             check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32 |
                                  INSN_LOONGSON2E | INSN_LOONGSON2F);
-            gen_cond_move(env, op1, rd, rs, rt);
+            gen_cond_move(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_ADD ... OPC_SUBU:
             gen_arith(env, ctx, op1, rd, rs, rt);
@@ -11777,13 +11779,13 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
             break;
         case OPC_SLT:          /* Set on less than */
         case OPC_SLTU:
-            gen_slt(env, op1, rd, rs, rt);
+            gen_slt(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_AND:          /* Logic*/
         case OPC_OR:
         case OPC_NOR:
         case OPC_XOR:
-            gen_logic(env, op1, rd, rs, rt);
+            gen_logic(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_MULT ... OPC_DIVU:
             if (sa) {
@@ -12184,13 +12186,13 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
          break;
     case OPC_SLTI: /* Set on less than with immediate opcode */
     case OPC_SLTIU:
-         gen_slt_imm(env, op, rt, rs, imm);
+         gen_slt_imm(env, ctx, op, rt, rs, imm);
          break;
     case OPC_ANDI: /* Arithmetic with immediate opcode */
     case OPC_LUI:
     case OPC_ORI:
     case OPC_XORI:
-         gen_logic_imm(env, op, rt, rs, imm);
+         gen_logic_imm(env, ctx, op, rt, rs, imm);
          break;
     case OPC_J ... OPC_JAL: /* Jump */
          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 3/5] target-mips: Pass DisasContext to fpr32 load/store routines
  2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 1/5] target-mips: Set opn in gen_ldst_multiple Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 2/5] target-mips: Fix MIPS_DEBUG Richard Henderson
@ 2012-03-30 17:05 ` Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 4/5] target-mips: Use TCG registers for the FPU Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 5/5] target-mips: Add accessors for the two 32-bit halves of a 64-bit FPR Richard Henderson
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

The large mechanical change in support of a follow-on patch
that changes the representation of the fp registers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |  312 +++++++++++++++++++++++------------------------
 1 files changed, 153 insertions(+), 159 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index f18df40..3b86a1c 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -640,42 +640,42 @@ static inline void gen_store_srsgpr (int from, int to)
 }
 
 /* Floating point register moves. */
-static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
+static inline void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
     tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
-static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
+static inline void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
     tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
 {
     tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
 {
     tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
         tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
         TCGv_i32 t1 = tcg_temp_new_i32();
-        gen_load_fpr32(t0, reg & ~1);
-        gen_load_fpr32(t1, reg | 1);
+        gen_load_fpr32(ctx, t0, reg & ~1);
+        gen_load_fpr32(ctx, t1, reg | 1);
         tcg_gen_concat_i32_i64(t, t0, t1);
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
     }
 }
 
-static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
         tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
@@ -683,10 +683,10 @@ static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i32 t1 = tcg_temp_new_i32();
         tcg_gen_trunc_i64_i32(t1, t);
-        gen_store_fpr32(t1, reg & ~1);
+        gen_store_fpr32(ctx, t1, reg & ~1);
         tcg_gen_shri_i64(t0, t, 32);
         tcg_gen_trunc_i64_i32(t1, t0);
-        gen_store_fpr32(t1, reg | 1);
+        gen_store_fpr32(ctx, t1, reg | 1);
         tcg_temp_free_i32(t1);
         tcg_temp_free_i64(t0);
     }
@@ -840,12 +840,6 @@ static inline void check_mips_64(DisasContext *ctx)
         generate_exception(ctx, EXCP_RI);
 }
 
-/* Define small wrappers for gen_load_fpr* so that we have a uniform
-   calling interface for 32 and 64-bit FPRs.  No sense in changing
-   all callers for gen_load_fpr32 when we need the CTX parameter for
-   this one use.  */
-#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(x, y)
-#define gen_ldcmp_fpr64(ctx, x, y) gen_load_fpr64(ctx, x, y)
 #define FOP_CONDS(type, abs, fmt, ifmt, bits)                                 \
 static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
                                                int ft, int fs, int cc)        \
@@ -868,8 +862,8 @@ static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
         }                                                                     \
         break;                                                                \
     }                                                                         \
-    gen_ldcmp_fpr##bits (ctx, fp0, fs);                                       \
-    gen_ldcmp_fpr##bits (ctx, fp1, ft);                                       \
+    gen_load_fpr##bits(ctx, fp0, fs);                                         \
+    gen_load_fpr##bits(ctx, fp1, ft);                                         \
     switch (n) {                                                              \
     case  0: gen_helper_2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
     case  1: gen_helper_2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
@@ -1264,7 +1258,7 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
 
             tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, ft);
+            gen_store_fpr32(ctx, fp0, ft);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwc1";
@@ -1274,7 +1268,7 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv t1 = tcg_temp_new();
 
-            gen_load_fpr32(fp0, ft);
+            gen_load_fpr32(ctx, fp0, ft);
             tcg_gen_extu_i32_tl(t1, fp0);
             tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
             tcg_temp_free(t1);
@@ -5704,13 +5698,13 @@ static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd,
         if (h == 0) {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, rt);
+            gen_load_fpr32(ctx, fp0, rt);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         } else {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32h(fp0, rt);
+            gen_load_fpr32h(ctx, fp0, rt);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         }
@@ -5903,13 +5897,13 @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, rd);
+            gen_store_fpr32(ctx, fp0, rd);
             tcg_temp_free_i32(fp0);
         } else {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32h(fp0, rd);
+            gen_store_fpr32h(ctx, fp0, rd);
             tcg_temp_free_i32(fp0);
         }
         break;
@@ -6325,7 +6319,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         }
@@ -6338,7 +6332,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, fs);
+            gen_store_fpr32(ctx, fp0, fs);
             tcg_temp_free_i32(fp0);
         }
         opn = "mtc1";
@@ -6369,7 +6363,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32h(fp0, fs);
+            gen_load_fpr32h(ctx, fp0, fs);
             tcg_gen_ext_i32_tl(t0, fp0);
             tcg_temp_free_i32(fp0);
         }
@@ -6382,7 +6376,7 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32h(fp0, fs);
+            gen_store_fpr32h(ctx, fp0, fs);
             tcg_temp_free_i32(fp0);
         }
         opn = "mthc1";
@@ -6428,7 +6422,7 @@ static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
     gen_set_label(l1);
 }
 
-static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
+static void gen_movcf_s(DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
     int cond;
     TCGv_i32 t0 = tcg_temp_new_i32();
@@ -6441,13 +6435,13 @@ static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
     tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(t0, fs);
-    gen_store_fpr32(t0, fd);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
     gen_set_label(l1);
     tcg_temp_free_i32(t0);
 }
 
-static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
+static void gen_movcf_d(DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
     int cond;
     TCGv_i32 t0 = tcg_temp_new_i32();
@@ -6469,7 +6463,7 @@ static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int t
     gen_set_label(l1);
 }
 
-static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
+static void gen_movcf_ps(DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
     int cond;
     TCGv_i32 t0 = tcg_temp_new_i32();
@@ -6483,14 +6477,14 @@ static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
     tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(t0, fs);
-    gen_store_fpr32(t0, fd);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
     gen_set_label(l1);
 
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc+1));
     tcg_gen_brcondi_i32(cond, t0, 0, l2);
-    gen_load_fpr32h(t0, fs);
-    gen_store_fpr32h(t0, fd);
+    gen_load_fpr32h(ctx, t0, fs);
+    gen_store_fpr32h(ctx, t0, fd);
     tcg_temp_free_i32(t0);
     gen_set_label(l2);
 }
@@ -6545,11 +6539,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_add_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "add.s";
@@ -6560,11 +6554,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_sub_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "sub.s";
@@ -6575,11 +6569,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_mul_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "mul.s";
@@ -6590,11 +6584,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_div_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "div.s";
@@ -6604,9 +6598,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_sqrt_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "sqrt.s";
@@ -6615,9 +6609,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_abs_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "abs.s";
@@ -6626,8 +6620,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_store_fpr32(fp0, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "mov.s";
@@ -6636,9 +6630,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_chs_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "neg.s";
@@ -6649,7 +6643,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_roundl_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6663,7 +6657,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_truncl_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6677,7 +6671,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_ceill_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6691,7 +6685,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_floorl_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6703,9 +6697,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_roundw_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "round.w.s";
@@ -6714,9 +6708,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_truncw_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "trunc.w.s";
@@ -6725,9 +6719,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_ceilw_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "ceil.w.s";
@@ -6736,15 +6730,15 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_floorw_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "floor.w.s";
         break;
     case OPC_MOVCF_S:
-        gen_movcf_s(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        gen_movcf_s(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.s";
         break;
     case OPC_MOVZ_S:
@@ -6756,8 +6750,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
                 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
             }
             fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(fp0, fs);
-            gen_store_fpr32(fp0, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             gen_set_label(l1);
         }
@@ -6771,8 +6765,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             if (ft != 0) {
                 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
                 fp0 = tcg_temp_new_i32();
-                gen_load_fpr32(fp0, fs);
-                gen_store_fpr32(fp0, fd);
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_store_fpr32(ctx, fp0, fd);
                 tcg_temp_free_i32(fp0);
                 gen_set_label(l1);
             }
@@ -6784,9 +6778,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_recip_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "recip.s";
@@ -6796,9 +6790,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_rsqrt_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "rsqrt.s";
@@ -6809,11 +6803,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, fd);
             gen_helper_float_recip2_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "recip2.s";
@@ -6823,9 +6817,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_recip1_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "recip1.s";
@@ -6835,9 +6829,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_rsqrt1_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "rsqrt1.s";
@@ -6848,11 +6842,11 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
             gen_helper_float_rsqrt2_s(fp0, fp0, fp1);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "rsqrt2.s";
@@ -6863,7 +6857,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtd_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6875,9 +6869,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvtw_s(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.w.s";
@@ -6888,7 +6882,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtl_s(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -6903,8 +6897,8 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32_0 = tcg_temp_new_i32();
             TCGv_i32 fp32_1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp32_0, fs);
-            gen_load_fpr32(fp32_1, ft);
+            gen_load_fpr32(ctx, fp32_0, fs);
+            gen_load_fpr32(ctx, fp32_1, ft);
             tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
             tcg_temp_free_i32(fp32_1);
             tcg_temp_free_i32(fp32_0);
@@ -7105,7 +7099,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_roundw_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "round.w.d";
@@ -7119,7 +7113,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_truncw_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "trunc.w.d";
@@ -7133,7 +7127,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_ceilw_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "ceil.w.d";
@@ -7147,7 +7141,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_floorw_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "floor.w.d";
@@ -7299,7 +7293,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvts_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.s.d";
@@ -7313,7 +7307,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvtw_d(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.w.d";
@@ -7334,9 +7328,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvts_w(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.w";
@@ -7347,7 +7341,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp32 = tcg_temp_new_i32();
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
-            gen_load_fpr32(fp32, fs);
+            gen_load_fpr32(ctx, fp32, fs);
             gen_helper_float_cvtd_w(fp64, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
@@ -7364,7 +7358,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             gen_load_fpr64(ctx, fp64, fs);
             gen_helper_float_cvts_l(fp32, fp64);
             tcg_temp_free_i64(fp64);
-            gen_store_fpr32(fp32, fd);
+            gen_store_fpr32(ctx, fp32, fd);
             tcg_temp_free_i32(fp32);
         }
         opn = "cvt.s.l";
@@ -7475,7 +7469,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         break;
     case OPC_MOVCF_PS:
         check_cp1_64bitmode(ctx);
-        gen_movcf_ps(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        gen_movcf_ps(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
         opn = "movcf.ps";
         break;
     case OPC_MOVZ_PS:
@@ -7600,9 +7594,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32h(fp0, fs);
+            gen_load_fpr32h(ctx, fp0, fs);
             gen_helper_float_cvts_pu(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.pu";
@@ -7624,9 +7618,9 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             gen_helper_float_cvts_pl(fp0, fp0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "cvt.s.pl";
@@ -7637,10 +7631,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_store_fpr32h(fp0, fd);
-            gen_store_fpr32(fp1, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_store_fpr32h(ctx, fp0, fd);
+            gen_store_fpr32(ctx, fp1, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7652,10 +7646,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32h(fp1, ft);
-            gen_store_fpr32(fp1, fd);
-            gen_store_fpr32h(fp0, fd);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32h(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7667,10 +7661,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32h(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_store_fpr32(fp1, fd);
-            gen_store_fpr32h(fp0, fd);
+            gen_load_fpr32h(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7682,10 +7676,10 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv_i32 fp1 = tcg_temp_new_i32();
 
-            gen_load_fpr32h(fp0, fs);
-            gen_load_fpr32h(fp1, ft);
-            gen_store_fpr32(fp1, fd);
-            gen_store_fpr32h(fp0, fd);
+            gen_load_fpr32h(ctx, fp0, fs);
+            gen_load_fpr32h(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7761,7 +7755,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
 
             tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
             tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(fp0, fd);
+            gen_store_fpr32(ctx, fp0, fd);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwxc1";
@@ -7796,7 +7790,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp0 = tcg_temp_new_i32();
             TCGv t1 = tcg_temp_new();
 
-            gen_load_fpr32(fp0, fs);
+            gen_load_fpr32(ctx, fp0, fs);
             tcg_gen_extu_i32_tl(t1, fp0);
             tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
             tcg_temp_free_i32(fp0);
@@ -7857,24 +7851,24 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             tcg_gen_andi_tl(t0, t0, 0x7);
 
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_load_fpr32(fp, fs);
-            gen_load_fpr32h(fph, fs);
-            gen_store_fpr32(fp, fd);
-            gen_store_fpr32h(fph, fd);
+            gen_load_fpr32(ctx, fp, fs);
+            gen_load_fpr32h(ctx, fph, fs);
+            gen_store_fpr32(ctx, fp, fd);
+            gen_store_fpr32h(ctx, fph, fd);
             tcg_gen_br(l2);
             gen_set_label(l1);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
             tcg_temp_free(t0);
 #ifdef TARGET_WORDS_BIGENDIAN
-            gen_load_fpr32(fp, fs);
-            gen_load_fpr32h(fph, ft);
-            gen_store_fpr32h(fp, fd);
-            gen_store_fpr32(fph, fd);
+            gen_load_fpr32(ctx, fp, fs);
+            gen_load_fpr32h(ctx, fph, ft);
+            gen_store_fpr32h(ctx, fp, fd);
+            gen_store_fpr32(ctx, fph, fd);
 #else
-            gen_load_fpr32h(fph, fs);
-            gen_load_fpr32(fp, ft);
-            gen_store_fpr32(fph, fd);
-            gen_store_fpr32h(fp, fd);
+            gen_load_fpr32h(ctx, fph, fs);
+            gen_load_fpr32(ctx, fp, ft);
+            gen_store_fpr32(ctx, fph, fd);
+            gen_store_fpr32h(ctx, fp, fd);
 #endif
             gen_set_label(l2);
             tcg_temp_free_i32(fp);
@@ -7889,13 +7883,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_muladd_s(fp2, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "madd.s";
@@ -7944,13 +7938,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_mulsub_s(fp2, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "msub.s";
@@ -7999,13 +7993,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_nmuladd_s(fp2, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "nmadd.s";
@@ -8054,13 +8048,13 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             TCGv_i32 fp1 = tcg_temp_new_i32();
             TCGv_i32 fp2 = tcg_temp_new_i32();
 
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32(fp2, fr);
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
             gen_helper_float_nmulsub_s(fp2, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
-            gen_store_fpr32(fp2, fd);
+            gen_store_fpr32(ctx, fp2, fd);
             tcg_temp_free_i32(fp2);
         }
         opn = "nmsub.s";
@@ -10985,13 +10979,13 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
                 case MOVF_FMT:
                     switch (fmt) {
                     case FMT_SDPS_S:
-                        gen_movcf_s(rs, rt, cc, 0);
+                        gen_movcf_s(ctx, rs, rt, cc, 0);
                         break;
                     case FMT_SDPS_D:
                         gen_movcf_d(ctx, rs, rt, cc, 0);
                         break;
                     case FMT_SDPS_PS:
-                        gen_movcf_ps(rs, rt, cc, 0);
+                        gen_movcf_ps(ctx, rs, rt, cc, 0);
                         break;
                     default:
                         goto pool32f_invalid;
@@ -11000,13 +10994,13 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
                 case MOVT_FMT:
                     switch (fmt) {
                     case FMT_SDPS_S:
-                        gen_movcf_s(rs, rt, cc, 1);
+                        gen_movcf_s(ctx, rs, rt, cc, 1);
                         break;
                     case FMT_SDPS_D:
                         gen_movcf_d(ctx, rs, rt, cc, 1);
                         break;
                     case FMT_SDPS_PS:
-                        gen_movcf_ps(rs, rt, cc, 1);
+                        gen_movcf_ps(ctx, rs, rt, cc, 1);
                         break;
                     default:
                         goto pool32f_invalid;
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 4/5] target-mips: Use TCG registers for the FPU.
  2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
                   ` (2 preceding siblings ...)
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 3/5] target-mips: Pass DisasContext to fpr32 load/store routines Richard Henderson
@ 2012-03-30 17:05 ` Richard Henderson
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 5/5] target-mips: Add accessors for the two 32-bit halves of a 64-bit FPR Richard Henderson
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

With normal FP, this doesn't have much affect on the generated code,
because most of the FP operations are not CONST/PURE, and so we spill
registers in about the same frequency as the explicit load/stores.

But with Loongson multimedia instructions, which are all integral and
whose helpers are in fact CONST+PURE, this greatly improves the code.

On a 64-bit host, rather than over-use the deposit operation, we
create TCG registers for both the 64-bit FPU register as a whole
and the two 32-bit halves.  We only ever reference the whole register
or the two half registers in any one TB, so there's no problem with
aliasing.

On a 32-bit host, we only create the 64-bit FPU registers, and then
directly reference the internal 32-bit TCG register halves as needed.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |  161 ++++++++++++++++++++++++++++++++++-------------
 1 files changed, 117 insertions(+), 44 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 3b86a1c..10d8407 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -479,6 +479,14 @@ static TCGv cpu_dspctrl, btarget, bcond;
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
+/* FPU registers.  These alias, but we'll only use one or the other in any
+   one TB based on MIPS_HFLAG_F64.  */
+#if TCG_TARGET_REG_BITS == 64
+static TCGv_i32 fpu_f32[32];
+static TCGv_i32 fpu_fh32[32];
+#endif
+static TCGv_i64 fpu_f64[32];
+
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 
 #include "gen-icount.h"
@@ -527,26 +535,40 @@ enum {
     BS_EXCP     = 3, /* We reached an exception condition */
 };
 
-static const char *regnames[] =
-    { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
-      "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
-      "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-      "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", };
+static const char * const regnames[] = {
+    "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+    "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+    "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
+};
 
-static const char *regnames_HI[] =
-    { "HI0", "HI1", "HI2", "HI3", };
+static const char * const regnames_HI[] = {
+    "HI0", "HI1", "HI2", "HI3",
+};
 
-static const char *regnames_LO[] =
-    { "LO0", "LO1", "LO2", "LO3", };
+static const char * const regnames_LO[] = {
+    "LO0", "LO1", "LO2", "LO3",
+};
 
-static const char *regnames_ACX[] =
-    { "ACX0", "ACX1", "ACX2", "ACX3", };
+static const char * const regnames_ACX[] = {
+    "ACX0", "ACX1", "ACX2", "ACX3",
+};
 
-static const char *fregnames[] =
-    { "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
-      "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
-      "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-      "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
+static const char * const fregnames[] = {
+    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+    "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+};
+
+#if TCG_TARGET_REG_BITS == 64
+static const char * const fhregnames[] = {
+    "fh0",  "fh1",  "fh2",  "fh3",  "fh4",  "fh5",  "fh6",  "fh7",
+    "fh8",  "fh9",  "fh10", "fh11", "fh12", "fh13", "fh14", "fh15",
+    "fh16", "fh17", "fh18", "fh19", "fh20", "fh21", "fh22", "fh23",
+    "fh24", "fh25", "fh26", "fh27", "fh28", "fh29", "fh30", "fh31",
+};
+#endif
 
 #ifdef MIPS_DEBUG_DISAS
 #define MIPS_DEBUG(fmt, ...)                         \
@@ -640,55 +662,91 @@ static inline void gen_store_srsgpr (int from, int to)
 }
 
 /* Floating point register moves. */
-static inline void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(t, TCGV_LOW(fpu_f64[reg]));
+#else
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
+    } else {
+        tcg_gen_mov_i32(t, fpu_f32[reg]);
+    }
+#endif
 }
 
-static inline void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
 {
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg]), t);
+#else
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t));
+        tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
+    } else {
+        tcg_gen_mov_i32(fpu_f32[reg], t);
+    }
+#endif
 }
 
-static inline void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
 {
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(t, TCGV_HIGH(fpu_f64[reg]));
+#else
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        TCGv_i64 t64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
+        tcg_gen_trunc_i64_i32(t, t64);
+        tcg_temp_free_i64(t64);
+    } else {
+        tcg_gen_mov_i32(t, fpu_fh32[reg]);
+    }
+#endif
 }
 
-static inline void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
 {
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(TCGV_HIGH(fpu_f64[reg]), t);
+#else
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t));
+        tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
+    } else {
+        tcg_gen_mov_i32(fpu_fh32[reg], t);
+    }
+#endif
 }
 
-static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+        tcg_gen_mov_i64(t, fpu_f64[reg]);
     } else {
-        TCGv_i32 t0 = tcg_temp_new_i32();
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        gen_load_fpr32(ctx, t0, reg & ~1);
-        gen_load_fpr32(ctx, t1, reg | 1);
-        tcg_gen_concat_i32_i64(t, t0, t1);
-        tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
+#if TCG_TARGET_REG_BITS == 32
+        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
+#else
+        tcg_gen_concat_i32_i64(t, fpu_f32[reg & ~1], fpu_f32[reg | 1]);
+#endif
     }
 }
 
-static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+        tcg_gen_mov_i64(fpu_f64[reg], t);
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t1, t);
-        gen_store_fpr32(ctx, t1, reg & ~1);
-        tcg_gen_shri_i64(t0, t, 32);
-        tcg_gen_trunc_i64_i32(t1, t0);
-        gen_store_fpr32(ctx, t1, reg | 1);
-        tcg_temp_free_i32(t1);
-        tcg_temp_free_i64(t0);
+#if TCG_TARGET_REG_BITS == 32
+        tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg & ~1]), TCGV_LOW(t));
+        tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg | 1]), TCGV_HIGH(t));
+#else
+        TCGv_i64 t64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(t64, t, 32);
+        tcg_gen_trunc_i64_i32(fpu_f32[reg | 1], t64);
+        tcg_temp_free_i64(t64);
+        tcg_gen_trunc_i64_i32(fpu_f32[reg & ~1], t);
+#endif
     }
 }
 
@@ -12682,6 +12740,21 @@ static void mips_tcg_init(void)
                                        offsetof(CPUMIPSState, active_fpu.fcr31),
                                        "fcr31");
 
+#if TCG_TARGET_REG_BITS == 64
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]);
+        fpu_f32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fregnames[i]);
+    }
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]);
+        fpu_fh32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fhregnames[i]);
+    }
+#endif
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]);
+        fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
+    }
+
     /* register helpers */
 #define GEN_HELPER 2
 #include "helper.h"
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 5/5] target-mips: Add accessors for the two 32-bit halves of a 64-bit FPR
  2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
                   ` (3 preceding siblings ...)
  2012-03-30 17:05 ` [Qemu-devel] [PATCH 4/5] target-mips: Use TCG registers for the FPU Richard Henderson
@ 2012-03-30 17:05 ` Richard Henderson
  4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-03-30 17:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Not much used yet, but more users to come.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |   46 ++++++++++++++++++++++++++++++----------------
 1 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 10d8407..2396339 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -750,6 +750,29 @@ static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
     }
 }
 
+static void gen_load_fpr_pair(DisasContext *ctx, TCGv_i32 tl,
+                              TCGv_i32 th, int reg)
+{
+    gen_load_fpr32(ctx, tl, reg);
+    gen_load_fpr32h(ctx, th, reg);
+}
+
+static void gen_store_fpr_pair(DisasContext *ctx, TCGv_i32 tl,
+                               TCGv_i32 th, int reg)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg]), tl);
+    tcg_gen_mov_i32(TCGV_HIGH(fpu_f64[reg]), th);
+#else
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_concat32_i64(fpu_f64[reg], tl, th);
+    } else {
+        tcg_gen_mov_i32(fpu_f32[reg], tl);
+        tcg_gen_mov_i32(fpu_fh32[reg], th);
+    }
+#endif
+}
+
 static inline int get_fp_bit (int cc)
 {
     if (cc)
@@ -7691,8 +7714,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
-            gen_store_fpr32h(ctx, fp0, fd);
-            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr_pair(ctx, fp1, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7706,8 +7728,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
+            gen_store_fpr_pair(ctx, fp1, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7721,8 +7742,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32h(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
+            gen_store_fpr_pair(ctx, fp1, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7736,8 +7756,7 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
 
             gen_load_fpr32h(ctx, fp0, fs);
             gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
+            gen_store_fpr_pair(ctx, fp1, fp0, fd);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
         }
@@ -7909,10 +7928,8 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             tcg_gen_andi_tl(t0, t0, 0x7);
 
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_load_fpr32(ctx, fp, fs);
-            gen_load_fpr32h(ctx, fph, fs);
-            gen_store_fpr32(ctx, fp, fd);
-            gen_store_fpr32h(ctx, fph, fd);
+            gen_load_fpr_pair(ctx, fp, fph, fs);
+            gen_store_fpr_pair(ctx, fp, fph, fd);
             tcg_gen_br(l2);
             gen_set_label(l1);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
@@ -7920,14 +7937,11 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
 #ifdef TARGET_WORDS_BIGENDIAN
             gen_load_fpr32(ctx, fp, fs);
             gen_load_fpr32h(ctx, fph, ft);
-            gen_store_fpr32h(ctx, fp, fd);
-            gen_store_fpr32(ctx, fph, fd);
 #else
             gen_load_fpr32h(ctx, fph, fs);
             gen_load_fpr32(ctx, fp, ft);
-            gen_store_fpr32(ctx, fph, fd);
-            gen_store_fpr32h(ctx, fp, fd);
 #endif
+            gen_store_fpr_pair(ctx, fph, fp, fd);
             gen_set_label(l2);
             tcg_temp_free_i32(fp);
             tcg_temp_free_i32(fph);
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-03-30 17:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-30 17:05 [Qemu-devel] [PATCH v2 0/5] tcg-mips: Use TCG registers for the FPU Richard Henderson
2012-03-30 17:05 ` [Qemu-devel] [PATCH 1/5] target-mips: Set opn in gen_ldst_multiple Richard Henderson
2012-03-30 17:05 ` [Qemu-devel] [PATCH 2/5] target-mips: Fix MIPS_DEBUG Richard Henderson
2012-03-30 17:05 ` [Qemu-devel] [PATCH 3/5] target-mips: Pass DisasContext to fpr32 load/store routines Richard Henderson
2012-03-30 17:05 ` [Qemu-devel] [PATCH 4/5] target-mips: Use TCG registers for the FPU Richard Henderson
2012-03-30 17:05 ` [Qemu-devel] [PATCH 5/5] target-mips: Add accessors for the two 32-bit halves of a 64-bit FPR Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.