All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups
@ 2014-09-15 15:03 Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 01/14] ppc: do not look at the MMU index to detect PR/HV mode Paolo Bonzini
                   ` (15 more replies)
  0 siblings, 16 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Patches 1-3 speed up softmmu emulation by avoiding TLB flushes on changes
to IR/DR.

Patches 4-14 speed up emulation in general by rewriting the handling of
condition registers.

Paolo Bonzini (14):
  ppc: do not look at the MMU index to detect PR/HV mode
  softmmu: support up to 12 MMU modes
  target-ppc: use separate indices for various translation modes
  ppc: introduce ppc_get_cr and ppc_set_cr
  ppc: use CRF_* in fpu_helper.c
  ppc: introduce helpers for mfocrf/mtocrf
  ppc: reorganize gen_compute_fprf
  ppc: introduce gen_op_mfcr/gen_op_mtcr
  ppc: introduce ppc_get_crf and ppc_set_crf
  ppc: use movcond for isel
  ppc: store CR registers in 32 1-bit registers
  ppc: use movcond to implement evsel
  ppc: inline ppc_get_crf/ppc_set_crf when clearer
  ppc: dump all 32 CR bits

 include/exec/cpu_ldst.h  | 120 ++++++++-
 linux-user/elfload.c     |   4 +-
 linux-user/main.c        |   9 +-
 linux-user/signal.c      |   8 +-
 monitor.c                |   9 +-
 target-ppc/cpu.h         |  66 ++++-
 target-ppc/excp_helper.c |   5 +-
 target-ppc/fpu_helper.c  |  82 +++---
 target-ppc/gdbstub.c     |  42 +--
 target-ppc/helper.h      |   9 +-
 target-ppc/helper_regs.h |  11 +-
 target-ppc/int_helper.c  |  46 +++-
 target-ppc/kvm.c         |  11 +-
 target-ppc/machine.c     |   9 +
 target-ppc/translate.c   | 686 ++++++++++++++++++++++++-----------------------
 15 files changed, 631 insertions(+), 486 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 01/14] ppc: do not look at the MMU index to detect PR/HV mode
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 02/14] softmmu: support up to 12 MMU modes Paolo Bonzini
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

The MMU index is an internal detail that should not be needed by the
translator (except to generate loads and stores).  Look at the MSR
directly.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-ppc/translate.c | 165 +++++++++++++++++++++++--------------------------
 1 file changed, 77 insertions(+), 88 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ff0dc13..2c9d8aa 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -189,6 +189,7 @@ typedef struct DisasContext {
     uint32_t opcode;
     uint32_t exception;
     /* Routine used to access memory */
+    bool pr, hv;
     int mem_idx;
     int access_type;
     /* Translation flags */
@@ -643,20 +644,6 @@ static opc_handler_t invalid_handler = {
     .handler = gen_invalid,
 };
 
-#if defined(TARGET_PPC64)
-/* NOTE: as this time, the only use of is_user_mode() is in 64 bit code.  And */
-/*       so the function is wrapped in the standard 64-bit ifdef in order to  */
-/*       avoid compiler warnings in 32-bit implementations.                   */
-static bool is_user_mode(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    return true;
-#else
-    return ctx->mem_idx == 0;
-#endif
-}
-#endif
-
 /***                           Integer comparison                          ***/
 
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
@@ -1456,25 +1443,25 @@ static void gen_or(DisasContext *ctx)
             break;
 #if !defined(CONFIG_USER_ONLY)
         case 31:
-            if (ctx->mem_idx > 0) {
+            if (!ctx->pr) {
                 /* Set process priority to very low */
                 prio = 1;
             }
             break;
         case 5:
-            if (ctx->mem_idx > 0) {
+            if (!ctx->pr) {
                 /* Set process priority to medium-hight */
                 prio = 5;
             }
             break;
         case 3:
-            if (ctx->mem_idx > 0) {
+            if (!ctx->pr) {
                 /* Set process priority to high */
                 prio = 6;
             }
             break;
         case 7:
-            if (ctx->mem_idx > 1) {
+            if (ctx->hv) {
                 /* Set process priority to very high */
                 prio = 7;
             }
@@ -2903,7 +2890,7 @@ static void gen_lq(DisasContext *ctx)
     bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
     bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
-    if (!legal_in_user_mode && is_user_mode(ctx)) {
+    if (!legal_in_user_mode && ctx->pr) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -3026,7 +3013,7 @@ static void gen_std(DisasContext *ctx)
         bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
         bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
-        if (!legal_in_user_mode && is_user_mode(ctx)) {
+        if (!legal_in_user_mode && ctx->pr) {
             gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
             return;
         }
@@ -4004,14 +3991,14 @@ static void gen_mcrf(DisasContext *ctx)
 
 /***                           System linkage                              ***/
 
-/* rfi (mem_idx only) */
+/* rfi (supervisor only) */
 static void gen_rfi(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     /* Restore CPU state */
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4028,7 +4015,7 @@ static void gen_rfid(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     /* Restore CPU state */
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4044,7 +4031,7 @@ static void gen_hrfid(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     /* Restore CPU state */
-    if (unlikely(ctx->mem_idx <= 1)) {
+    if (unlikely(!ctx->hv)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4213,7 +4200,7 @@ static void gen_mfmsr(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4237,9 +4224,9 @@ static inline void gen_op_mfspr(DisasContext *ctx)
     uint32_t sprn = SPR(ctx->opcode);
 
 #if !defined(CONFIG_USER_ONLY)
-    if (ctx->mem_idx == 2)
+    if (ctx->hv)
         read_cb = ctx->spr_cb[sprn].hea_read;
-    else if (ctx->mem_idx)
+    else if (!ctx->pr)
         read_cb = ctx->spr_cb[sprn].oea_read;
     else
 #endif
@@ -4317,7 +4304,7 @@ static void gen_mtmsrd(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4348,7 +4335,7 @@ static void gen_mtmsr(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4388,9 +4375,9 @@ static void gen_mtspr(DisasContext *ctx)
     uint32_t sprn = SPR(ctx->opcode);
 
 #if !defined(CONFIG_USER_ONLY)
-    if (ctx->mem_idx == 2)
+    if (ctx->hv)
         write_cb = ctx->spr_cb[sprn].hea_write;
-    else if (ctx->mem_idx)
+    else if (!ctx->pr)
         write_cb = ctx->spr_cb[sprn].oea_write;
     else
 #endif
@@ -4437,7 +4424,7 @@ static void gen_dcbi(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv EA, val;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4574,7 +4561,7 @@ static void gen_mfsr(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4591,7 +4578,7 @@ static void gen_mfsrin(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4610,7 +4597,7 @@ static void gen_mtsr(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4627,7 +4614,7 @@ static void gen_mtsrin(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4649,7 +4636,7 @@ static void gen_mfsr_64b(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4666,7 +4653,7 @@ static void gen_mfsrin_64b(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4685,7 +4672,7 @@ static void gen_mtsr_64b(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4702,7 +4689,7 @@ static void gen_mtsrin_64b(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4720,7 +4707,7 @@ static void gen_slbmte(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4734,7 +4721,7 @@ static void gen_slbmfee(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4748,7 +4735,7 @@ static void gen_slbmfev(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -4759,7 +4746,7 @@ static void gen_slbmfev(DisasContext *ctx)
 #endif /* defined(TARGET_PPC64) */
 
 /***                      Lookaside buffer management                      ***/
-/* Optional & mem_idx only: */
+/* Optional & supervisor only: */
 
 /* tlbia */
 static void gen_tlbia(DisasContext *ctx)
@@ -4767,7 +4754,7 @@ static void gen_tlbia(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4781,7 +4768,7 @@ static void gen_tlbiel(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4795,7 +4782,7 @@ static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4816,7 +4803,7 @@ static void gen_tlbsync(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4834,7 +4821,7 @@ static void gen_slbia(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4848,7 +4835,7 @@ static void gen_slbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5556,7 +5543,7 @@ static void gen_mfrom(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5572,7 +5559,7 @@ static void gen_tlbld_6xx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5586,7 +5573,7 @@ static void gen_tlbli_6xx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5602,7 +5589,7 @@ static void gen_tlbld_74xx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5616,7 +5603,7 @@ static void gen_tlbli_74xx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5639,7 +5626,7 @@ static void gen_cli(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5660,7 +5647,7 @@ static void gen_mfsri(DisasContext *ctx)
     int ra = rA(ctx->opcode);
     int rd = rD(ctx->opcode);
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5681,7 +5668,7 @@ static void gen_rac(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5697,7 +5684,7 @@ static void gen_rfsvc(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -5859,7 +5846,7 @@ static void gen_tlbiva(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6092,7 +6079,7 @@ static void gen_mfdcr(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv dcrn;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -6111,7 +6098,7 @@ static void gen_mtdcr(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
     TCGv dcrn;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -6130,7 +6117,7 @@ static void gen_mfdcrx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -6149,7 +6136,7 @@ static void gen_mtdcrx(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
         return;
     }
@@ -6187,7 +6174,7 @@ static void gen_dccci(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6202,7 +6189,7 @@ static void gen_dcread(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv EA, val;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6232,7 +6219,7 @@ static void gen_iccci(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6246,7 +6233,7 @@ static void gen_icread(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6254,13 +6241,13 @@ static void gen_icread(DisasContext *ctx)
 #endif
 }
 
-/* rfci (mem_idx only) */
+/* rfci (supervisor only) */
 static void gen_rfci_40x(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6275,7 +6262,7 @@ static void gen_rfci(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6293,7 +6280,7 @@ static void gen_rfdi(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6309,7 +6296,7 @@ static void gen_rfmci(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6327,7 +6314,7 @@ static void gen_tlbre_40x(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6354,7 +6341,7 @@ static void gen_tlbsx_40x(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6378,7 +6365,7 @@ static void gen_tlbwe_40x(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6406,7 +6393,7 @@ static void gen_tlbre_440(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6435,7 +6422,7 @@ static void gen_tlbsx_440(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6459,7 +6446,7 @@ static void gen_tlbwe_440(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6489,7 +6476,7 @@ static void gen_tlbre_booke206(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6505,7 +6492,7 @@ static void gen_tlbsx_booke206(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6529,7 +6516,7 @@ static void gen_tlbwe_booke206(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6544,7 +6531,7 @@ static void gen_tlbivax_booke206(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6563,7 +6550,7 @@ static void gen_tlbilx_booke206(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6598,7 +6585,7 @@ static void gen_wrtee(DisasContext *ctx)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
     TCGv t0;
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6620,7 +6607,7 @@ static void gen_wrteei(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(!ctx->mem_idx)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6673,7 +6660,7 @@ static void gen_msgclr(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(ctx->mem_idx == 0)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -6687,7 +6674,7 @@ static void gen_msgsnd(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(ctx->mem_idx == 0)) {
+    if (unlikely(ctx->pr)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -11302,6 +11289,8 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     ctx.tb = tb;
     ctx.exception = POWERPC_EXCP_NONE;
     ctx.spr_cb = env->spr_cb;
+    ctx.pr = msr_pr;
+    ctx.hv = !msr_pr && msr_hv;
     ctx.mem_idx = env->mmu_idx;
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 02/14] softmmu: support up to 12 MMU modes
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 01/14] ppc: do not look at the MMU index to detect PR/HV mode Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes Paolo Bonzini
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/cpu_ldst.h | 120 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index e5550e7..a21ae3e 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -290,9 +290,123 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 #undef MEMSUFFIX
 #endif /* (NB_MMU_MODES >= 6) */
 
-#if (NB_MMU_MODES > 6)
-#error "NB_MMU_MODES > 6 is not supported for now"
-#endif /* (NB_MMU_MODES > 6) */
+#if (NB_MMU_MODES >= 7)
+
+#define CPU_MMU_INDEX 6
+#define MEMSUFFIX MMU_MODE6_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 7) */
+
+#if (NB_MMU_MODES >= 8)
+
+#define CPU_MMU_INDEX 7
+#define MEMSUFFIX MMU_MODE7_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 8) */
+
+#if (NB_MMU_MODES >= 9)
+
+#define CPU_MMU_INDEX 8
+#define MEMSUFFIX MMU_MODE8_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 9) */
+
+#if (NB_MMU_MODES >= 10)
+
+#define CPU_MMU_INDEX 9
+#define MEMSUFFIX MMU_MODE9_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 10) */
+
+#if (NB_MMU_MODES >= 11)
+
+#define CPU_MMU_INDEX 10
+#define MEMSUFFIX MMU_MODE10_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 11) */
+
+#if (NB_MMU_MODES >= 12)
+
+#define CPU_MMU_INDEX 11
+#define MEMSUFFIX MMU_MODE11_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 12) */
+
+#if (NB_MMU_MODES > 12)
+#error "NB_MMU_MODES > 12 is not supported for now"
+#endif /* (NB_MMU_MODES > 12) */
 
 /* these access are slower, they must be as rare as possible */
 #define CPU_MMU_INDEX (cpu_mmu_index(env))
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 01/14] ppc: do not look at the MMU index to detect PR/HV mode Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 02/14] softmmu: support up to 12 MMU modes Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-16 17:20   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr Paolo Bonzini
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

PowerPC TCG flushes the TLB on every IR/DR change, which basically
means on every user<->kernel context switch.  Encode IR/DR in the
MMU index.

This brings the number of TLB flushes down from ~900000 to ~50000
for starting up the Debian installer, which is in line with x86
and gives a ~10% performance improvement.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-ppc/cpu.h         |  7 ++-----
 target-ppc/excp_helper.c |  3 ---
 target-ppc/helper_regs.h | 11 ++++++-----
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index b64c652..c29ce3b 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -922,7 +922,8 @@ struct ppc_segment_page_sizes {
 
 /*****************************************************************************/
 /* The whole PowerPC CPU context */
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES 12
+#define MMU_USER_IDX 3  /* PR=IR=DR=1 */
 
 #define PPC_CPU_OPCODES_LEN 0x40
 
@@ -1231,10 +1232,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model)
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#define MMU_MODE2_SUFFIX _hypv
-#define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env)
 {
     return env->mmu_idx;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 922e86d..96ad9d7 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 
     if (env->spr[SPR_LPCR] & LPCR_AIL) {
         new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
-        /* If we disactivated any translation, flush TLBs */
-        tlb_flush(cs, 1);
     }
 
 #ifdef TARGET_PPC64
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 271fddf..23b8ded 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
 
 static inline void hreg_compute_mem_idx(CPUPPCState *env)
 {
+    int high;
+
     /* Precompute MMU index */
     if (msr_pr == 0 && msr_hv != 0) {
-        env->mmu_idx = 2;
+        high = 2;
     } else {
-        env->mmu_idx = 1 - msr_pr;
+        high = 1 - msr_pr;
     }
+    env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr;
 }
 
 static inline void hreg_compute_hflags(CPUPPCState *env)
@@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
     /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
     hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
         (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
-        (1 << MSR_LE) | (1 << MSR_VSX);
+        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
     hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
     hreg_compute_mem_idx(env);
     env->hflags = env->msr & hflags_mask;
@@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     if (((value >> MSR_IR) & 1) != msr_ir ||
         ((value >> MSR_DR) & 1) != msr_dr) {
-        /* Flush all tlb when changing translation mode */
-        tlb_flush(cs, 1);
         excp = POWERPC_EXCP_NONE;
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (2 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 19:24   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 05/14] ppc: use CRF_* in fpu_helper.c Paolo Bonzini
                   ` (11 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

New functions to put together all 32 CR bits.  Avoids easy
off-by-one mistakes such as the one fixed by commit f13f529
(ppc: fix monitor access to CR, 2014-08-28).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: replaces "ppc: use ARRAY_SIZE in gdbstub.c"

 linux-user/elfload.c |  4 +---
 linux-user/signal.c  |  8 ++------
 monitor.c            |  9 +--------
 target-ppc/cpu.h     | 20 ++++++++++++++++++++
 target-ppc/gdbstub.c | 42 ++++++++----------------------------------
 target-ppc/kvm.c     | 11 ++---------
 6 files changed, 34 insertions(+), 60 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index bea803b..a7d1714 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -857,9 +857,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
     (*regs)[36] = tswapreg(env->lr);
     (*regs)[37] = tswapreg(env->xer);
 
-    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
-        ccr |= env->crf[i] << (32 - ((i + 1) * 4));
-    }
+    ccr = ppc_get_cr(env);
     (*regs)[38] = tswapreg(ccr);
 }
 
diff --git a/linux-user/signal.c b/linux-user/signal.c
index e11b208..97c3107 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4534,9 +4534,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
     __put_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]);
     __put_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
 
-    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
-        ccr |= env->crf[i] << (32 - ((i + 1) * 4));
-    }
+    ccr = ppc_get_cr(env);
     __put_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
 
     /* Save Altivec registers if necessary.  */
@@ -4616,9 +4614,7 @@ static void restore_user_regs(CPUPPCState *env,
     __get_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
     __get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
 
-    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
-        env->crf[i] = (ccr >> (32 - ((i + 1) * 4))) & 0xf;
-    }
+    ppc_set_cr(env, ccr);
 
     if (!sig) {
         env->gpr[2] = save_r2;
diff --git a/monitor.c b/monitor.c
index ec73dd4..80acf25 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2963,14 +2963,7 @@ static target_long monitor_get_pc (const struct MonitorDef *md, int val)
 static target_long monitor_get_ccr (const struct MonitorDef *md, int val)
 {
     CPUArchState *env = mon_get_cpu();
-    unsigned int u;
-    int i;
-
-    u = 0;
-    for (i = 0; i < 8; i++)
-        u |= env->crf[i] << (32 - (4 * (i + 1)));
-
-    return u;
+    return ppc_get_cr(env);
 }
 
 static target_long monitor_get_msr (const struct MonitorDef *md, int val)
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index c29ce3b..0c0196d 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1197,6 +1197,26 @@ void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
 
 void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
 
+static inline uint32_t ppc_get_cr(const CPUPPCState *env)
+{
+    uint32_t cr = 0;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
+        cr |= env->crf[i] << (32 - ((i + 1) * 4));
+    }
+    return cr;
+}
+
+static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
+        env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+    }
+}
+
 static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
 {
     uint64_t gprv;
diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
index 14675f4..11d138e 100644
--- a/target-ppc/gdbstub.c
+++ b/target-ppc/gdbstub.c
@@ -135,15 +135,8 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
             gdb_get_regl(mem_buf, env->msr);
             break;
         case 66:
-            {
-                uint32_t cr = 0;
-                int i;
-                for (i = 0; i < 8; i++) {
-                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
-                }
-                gdb_get_reg32(mem_buf, cr);
-                break;
-            }
+            gdb_get_reg32(mem_buf, ppc_get_cr(env));
+            break;
         case 67:
             gdb_get_regl(mem_buf, env->lr);
             break;
@@ -191,15 +184,8 @@ int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
             gdb_get_reg64(mem_buf, env->msr);
             break;
         case 66 + 32:
-            {
-                uint32_t cr = 0;
-                int i;
-                for (i = 0; i < 8; i++) {
-                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
-                }
-                gdb_get_reg32(mem_buf, cr);
-                break;
-            }
+            gdb_get_reg32(mem_buf, ppc_get_cr(env));
+            break;
         case 67 + 32:
             gdb_get_reg64(mem_buf, env->lr);
             break;
@@ -243,14 +229,8 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
             ppc_store_msr(env, ldtul_p(mem_buf));
             break;
         case 66:
-            {
-                uint32_t cr = ldl_p(mem_buf);
-                int i;
-                for (i = 0; i < 8; i++) {
-                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
-                }
-                break;
-            }
+            ppc_set_cr(env, ldl_p(mem_buf));
+            break;
         case 67:
             env->lr = ldtul_p(mem_buf);
             break;
@@ -293,14 +273,8 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
             ppc_store_msr(env, ldq_p(mem_buf));
             break;
         case 66 + 32:
-            {
-                uint32_t cr = ldl_p(mem_buf);
-                int i;
-                for (i = 0; i < 8; i++) {
-                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
-                }
-                break;
-            }
+            ppc_set_cr(env, ldl_p(mem_buf));
+            break;
         case 67 + 32:
             env->lr = ldq_p(mem_buf);
             break;
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 9c23c6b..e541b9e 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -831,10 +831,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     for (i = 0;i < 32; i++)
         regs.gpr[i] = env->gpr[i];
 
-    regs.cr = 0;
-    for (i = 0; i < 8; i++) {
-        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
-    }
+    regs.cr = ppc_get_cr(env);
 
     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
     if (ret < 0)
@@ -955,11 +952,7 @@ int kvm_arch_get_registers(CPUState *cs)
     if (ret < 0)
         return ret;
 
-    cr = regs.cr;
-    for (i = 7; i >= 0; i--) {
-        env->crf[i] = cr & 15;
-        cr >>= 4;
-    }
+    ppc_set_cr(env, regs.cr);
 
     env->ctr = regs.ctr;
     env->lr = regs.lr;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 05/14] ppc: use CRF_* in fpu_helper.c
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (3 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Reviewed-by: Tom Musta <tommusta@gmail.com>
Tested-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: renamed the "ret" variable to "fpcc"

 target-ppc/fpu_helper.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index da93d12..b4e6d72 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1043,7 +1043,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
         }
     }
 
-    return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
+    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
 }
 
 uint32_t helper_ftsqrt(uint64_t frb)
@@ -1074,33 +1074,33 @@ uint32_t helper_ftsqrt(uint64_t frb)
         }
     }
 
-    return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
+    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
 }
 
 void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                   uint32_t crfD)
 {
     CPU_DoubleU farg1, farg2;
-    uint32_t ret = 0;
+    uint32_t fpcc;
 
     farg1.ll = arg1;
     farg2.ll = arg2;
 
     if (unlikely(float64_is_any_nan(farg1.d) ||
                  float64_is_any_nan(farg2.d))) {
-        ret = 0x01UL;
+        fpcc = CRF_SO;
     } else if (float64_lt(farg1.d, farg2.d, &env->fp_status)) {
-        ret = 0x08UL;
+        fpcc = CRF_LT;
     } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) {
-        ret = 0x04UL;
+        fpcc = CRF_GT;
     } else {
-        ret = 0x02UL;
+        fpcc = CRF_EQ;
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= ret << FPSCR_FPRF;
-    env->crf[crfD] = ret;
-    if (unlikely(ret == 0x01UL
+    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
+    env->crf[crfD] = (1 << fpcc);
+    if (unlikely(fpcc == CRF_SO
                  && (float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d)))) {
         /* sNaN comparison */
@@ -1112,26 +1112,26 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                   uint32_t crfD)
 {
     CPU_DoubleU farg1, farg2;
-    uint32_t ret = 0;
+    uint32_t fpcc;
 
     farg1.ll = arg1;
     farg2.ll = arg2;
 
     if (unlikely(float64_is_any_nan(farg1.d) ||
                  float64_is_any_nan(farg2.d))) {
-        ret = 0x01UL;
+        fpcc = CRF_SO;
     } else if (float64_lt(farg1.d, farg2.d, &env->fp_status)) {
-        ret = 0x08UL;
+        fpcc = CRF_LT;
     } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) {
-        ret = 0x04UL;
+        fpcc = CRF_GT;
     } else {
-        ret = 0x02UL;
+        fpcc = CRF_EQ;
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= ret << FPSCR_FPRF;
-    env->crf[crfD] = ret;
-    if (unlikely(ret == 0x01UL)) {
+    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
+    env->crf[crfD] = (1 << fpcc);
+    if (unlikely(fpcc == CRF_SO)) {
         if (float64_is_signaling_nan(farg1.d) ||
             float64_is_signaling_nan(farg2.d)) {
             /* sNaN comparison */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (4 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 05/14] ppc: use CRF_* in fpu_helper.c Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 19:32   ` Tom Musta
  2014-09-18 21:01   ` Richard Henderson
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf Paolo Bonzini
                   ` (9 subsequent siblings)
  15 siblings, 2 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: used ARRAY_SIZE and ppc_get_cr

 target-ppc/helper.h     |  3 +++
 target-ppc/int_helper.c | 17 +++++++++++++++++
 target-ppc/translate.c  | 31 ++++---------------------------
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 0cfdc8a..ee748a1 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -59,6 +59,9 @@ DEF_HELPER_2(fpscr_setbit, void, env, i32)
 DEF_HELPER_2(float64_to_float32, i32, env, i64)
 DEF_HELPER_2(float32_to_float64, i64, env, i32)
 
+DEF_HELPER_1(mfocrf, tl, env)
+DEF_HELPER_3(mtocrf, void, env, tl, i32)
+
 DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
 DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
 
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 83c1ad0..54e8998 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -289,6 +289,23 @@ target_ulong helper_popcntw(target_ulong val)
 }
 #endif
 
+void helper_mtocrf(CPUPPCState *env, target_ulong cr, uint32_t mask)
+{
+    int i;
+    for (i = ARRAY_SIZE(env->crf); --i >= 0; ) {
+        if (mask & 1) {
+            env->crf[i] = cr & 0x0F;
+        }
+        cr >>= 4;
+        mask >>= 1;
+    }
+}
+
+target_ulong helper_mfocrf(CPUPPCState *env)
+{
+    return ppc_get_cr(env);
+}
+
 /*****************************************************************************/
 /* PowerPC 601 specific instructions (POWER bridge) */
 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2c9d8aa..c28bddf 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4173,24 +4173,7 @@ static void gen_mfcr(DisasContext *ctx)
                             cpu_gpr[rD(ctx->opcode)], crn * 4);
         }
     } else {
-        TCGv_i32 t0 = tcg_temp_new_i32();
-        tcg_gen_mov_i32(t0, cpu_crf[0]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[1]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[2]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[3]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[4]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[5]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[6]);
-        tcg_gen_shli_i32(t0, t0, 4);
-        tcg_gen_or_i32(t0, t0, cpu_crf[7]);
-        tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
-        tcg_temp_free_i32(t0);
+        gen_helper_mfocrf(cpu_gpr[rD(ctx->opcode)], cpu_env);
     }
 }
 
@@ -4285,15 +4268,9 @@ static void gen_mtcrf(DisasContext *ctx)
             tcg_temp_free_i32(temp);
         }
     } else {
-        TCGv_i32 temp = tcg_temp_new_i32();
-        tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx->opcode)]);
-        for (crn = 0 ; crn < 8 ; crn++) {
-            if (crm & (1 << crn)) {
-                    tcg_gen_shri_i32(cpu_crf[7 - crn], temp, crn * 4);
-                    tcg_gen_andi_i32(cpu_crf[7 - crn], cpu_crf[7 - crn], 0xf);
-            }
-        }
-        tcg_temp_free_i32(temp);
+        TCGv_i32 t0 = tcg_const_i32(crm);
+        gen_helper_mtocrf(cpu_env, cpu_gpr[rS(ctx->opcode)], t0);
+        tcg_temp_free_i32(t0);
     }
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (5 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 19:48   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: fixed leak of temporaries

 target-ppc/translate.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c28bddf..a8b6b7c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -252,23 +252,22 @@ static inline void gen_reset_fpstatus(void)
 
 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
 {
-    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t0;
 
-    if (set_fprf != 0) {
-        /* This case might be optimized later */
-        tcg_gen_movi_i32(t0, 1);
-        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-        if (unlikely(set_rc)) {
-            tcg_gen_mov_i32(cpu_crf[1], t0);
-        }
-        gen_helper_float_check_status(cpu_env);
-    } else if (unlikely(set_rc)) {
-        /* We always need to compute fpcc */
-        tcg_gen_movi_i32(t0, 0);
-        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
+    if (set_fprf == 0 && !set_rc) {
+        return;
+    }
+
+    t0 = tcg_temp_new_i32();
+    tcg_gen_movi_i32(t0, set_fprf != 0);
+    gen_helper_compute_fprf(t0, cpu_env, arg, t0);
+    if (set_rc) {
         tcg_gen_mov_i32(cpu_crf[1], t0);
     }
 
+    if (set_fprf != 0) {
+        gen_helper_float_check_status(cpu_env);
+    }
     tcg_temp_free_i32(t0);
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (6 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 19:49   ` Tom Musta
  2014-09-18 21:38   ` Richard Henderson
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
                   ` (7 subsequent siblings)
  15 siblings, 2 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: fixed TCG debug failures

 target-ppc/translate.c | 61 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a8b6b7c..52062a8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -250,6 +250,21 @@ static inline void gen_reset_fpstatus(void)
     gen_helper_reset_fpstatus(cpu_env);
 }
 
+static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
+{
+    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
+}
+
+static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
+{
+    if (shift) {
+        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
+        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
+    } else {
+        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
+    }
+}
+
 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
 {
     TCGv_i32 t0;
@@ -262,7 +277,7 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
     tcg_gen_movi_i32(t0, set_fprf != 0);
     gen_helper_compute_fprf(t0, cpu_env, arg, t0);
     if (set_rc) {
-        tcg_gen_mov_i32(cpu_crf[1], t0);
+        gen_op_mtcr(4, t0, 0);
     }
 
     if (set_fprf != 0) {
@@ -2457,6 +2472,7 @@ static void gen_fmrgow(DisasContext *ctx)
 static void gen_mcrfs(DisasContext *ctx)
 {
     TCGv tmp = tcg_temp_new();
+    TCGv_i32 tmp32 = tcg_temp_new_i32();
     int bfa;
 
     if (unlikely(!ctx->fpu_enabled)) {
@@ -2465,10 +2481,11 @@ static void gen_mcrfs(DisasContext *ctx)
     }
     bfa = 4 * (7 - crfS(ctx->opcode));
     tcg_gen_shri_tl(tmp, cpu_fpscr, bfa);
-    tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], tmp);
+    tcg_gen_trunc_tl_i32(tmp32, tmp);
     tcg_temp_free(tmp);
-    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 0xf);
+    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp32, 0);
     tcg_gen_andi_tl(cpu_fpscr, cpu_fpscr, ~(0xF << bfa));
+    tcg_temp_free_i32(tmp32);
 }
 
 /* mffs */
@@ -2503,8 +2520,10 @@ static void gen_mtfsb0(DisasContext *ctx)
         tcg_temp_free_i32(t0);
     }
     if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+        TCGv_i32 tmp32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
+        gen_op_mtcr(4, tmp32, FPSCR_OX);
+        tcg_temp_free_i32(tmp32);
     }
 }
 
@@ -2529,8 +2548,10 @@ static void gen_mtfsb1(DisasContext *ctx)
         tcg_temp_free_i32(t0);
     }
     if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+        TCGv_i32 tmp32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
+        gen_op_mtcr(4, tmp32, FPSCR_OX);
+        tcg_temp_free_i32(tmp32);
     }
     /* We can raise a differed exception */
     gen_helper_float_check_status(cpu_env);
@@ -2564,8 +2585,10 @@ static void gen_mtfsf(DisasContext *ctx)
     gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0);
     tcg_temp_free_i32(t0);
     if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+        TCGv_i32 tmp32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
+        gen_op_mtcr(4, tmp32, FPSCR_OX);
+        tcg_temp_free_i32(tmp32);
     }
     /* We can raise a differed exception */
     gen_helper_float_check_status(cpu_env);
@@ -2598,8 +2621,10 @@ static void gen_mtfsfi(DisasContext *ctx)
     tcg_temp_free_i64(t0);
     tcg_temp_free_i32(t1);
     if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+        TCGv_i32 tmp32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
+        gen_op_mtcr(4, tmp32, FPSCR_OX);
+        tcg_temp_free_i32(tmp32);
     }
     /* We can raise a differed exception */
     gen_helper_float_check_status(cpu_env);
@@ -4166,10 +4191,11 @@ static void gen_mfcr(DisasContext *ctx)
     if (likely(ctx->opcode & 0x00100000)) {
         crm = CRM(ctx->opcode);
         if (likely(crm && ((crm & (crm - 1)) == 0))) {
+            TCGv_i32 t0 = tcg_temp_new_i32();
             crn = ctz32 (crm);
-            tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], cpu_crf[7 - crn]);
-            tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)],
-                            cpu_gpr[rD(ctx->opcode)], crn * 4);
+            gen_op_mfcr(t0, (7 - crn) * 4, crn * 4);
+            tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+            tcg_temp_free_i32(t0);
         }
     } else {
         gen_helper_mfocrf(cpu_gpr[rD(ctx->opcode)], cpu_env);
@@ -4262,8 +4288,7 @@ static void gen_mtcrf(DisasContext *ctx)
             TCGv_i32 temp = tcg_temp_new_i32();
             crn = ctz32 (crm);
             tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx->opcode)]);
-            tcg_gen_shri_i32(temp, temp, crn * 4);
-            tcg_gen_andi_i32(cpu_crf[7 - crn], temp, 0xf);
+            gen_op_mtcr((7 - crn) * 4, temp, crn * 4);
             tcg_temp_free_i32(temp);
         }
     } else {
@@ -8188,13 +8213,13 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
-    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
+    gen_op_mtcr(4, tmp, 28);
     tcg_temp_free_i32(tmp);
 }
 #else
 static void gen_set_cr1_from_fpscr(DisasContext *ctx)
 {
-        tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
+    gen_op_mtcr(4, cpu_fpscr, 28);
 }
 #endif
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (7 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 19:51   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel Paolo Bonzini
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

These two functions will group together four CR bits into a single
value, once we change the representation of condition registers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 linux-user/main.c        |  2 +-
 target-ppc/cpu.h         | 10 ++++++++++
 target-ppc/excp_helper.c |  2 +-
 target-ppc/fpu_helper.c  |  6 ++++--
 target-ppc/int_helper.c  | 14 +++++++-------
 target-ppc/translate.c   | 13 +++++++------
 6 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 472a16d..152c031 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1550,7 +1550,7 @@ static int do_store_exclusive(CPUPPCState *env)
                 }
             }
         }
-        env->crf[0] = (stored << 1) | xer_so;
+        ppc_set_crf(env, 0, (stored << 1) | xer_so);
         env->reserve_addr = (target_ulong)-1;
     }
     if (!segv) {
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 0c0196d..91eac17 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1217,6 +1217,16 @@ static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
     }
 }
 
+static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
+{
+    return env->crf[i];
+}
+
+static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
+{
+    env->crf[i] = val;
+}
+
 static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
 {
     uint64_t gprv;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 96ad9d7..08637c1 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -504,7 +504,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                          env->error_code);
             }
 #endif
-            msr |= env->crf[0] << 28;
+            msr |= ppc_get_crf(env, 0) << 28;
             msr |= env->error_code; /* key, D/I, S/L bits */
             /* Set way using a LRU mechanism */
             msr |= ((env->last_way + 1) & (env->nb_ways - 1)) << 17;
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index b4e6d72..8cf321b 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1099,7 +1099,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
     env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
-    env->crf[crfD] = (1 << fpcc);
+    ppc_set_crf(env, crfD, 1 << fpcc);
+
     if (unlikely(fpcc == CRF_SO
                  && (float64_is_signaling_nan(farg1.d) ||
                      float64_is_signaling_nan(farg2.d)))) {
@@ -1130,7 +1131,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
     env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
-    env->crf[crfD] = (1 << fpcc);
+    ppc_set_crf(env, crfD, 1 << fpcc);
+
     if (unlikely(fpcc == CRF_SO)) {
         if (float64_is_signaling_nan(farg1.d) ||
             float64_is_signaling_nan(farg2.d)) {
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 54e8998..b76a895 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -294,7 +294,7 @@ void helper_mtocrf(CPUPPCState *env, target_ulong cr, uint32_t mask)
     int i;
     for (i = ARRAY_SIZE(env->crf); --i >= 0; ) {
         if (mask & 1) {
-            env->crf[i] = cr & 0x0F;
+            ppc_set_crf(env, i, cr & 0x0F);
         }
         cr >>= 4;
         mask >>= 1;
@@ -657,7 +657,7 @@ VCF(sx, int32_to_float32, s32)
             none |= result;                                             \
         }                                                               \
         if (record) {                                                   \
-            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
+            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
         }                                                               \
     }
 #define VCMP(suffix, compare, element)          \
@@ -703,7 +703,7 @@ VCMP(gtsd, >, s64)
             none |= result;                                             \
         }                                                               \
         if (record) {                                                   \
-            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
+            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
         }                                                               \
     }
 #define VCMPFP(suffix, compare, order)          \
@@ -737,7 +737,7 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
         }
     }
     if (record) {
-        env->crf[6] = (all_in == 0) << 1;
+        ppc_set_crf(env, 6, (all_in == 0) << 1);
     }
 }
 
@@ -2558,7 +2558,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
         if ((high & mask) == 0) {
             if (update_Rc) {
-                env->crf[0] = 0x4;
+                ppc_set_crf(env, 0, 0x4);
             }
             goto done;
         }
@@ -2567,7 +2567,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
         if ((low & mask) == 0) {
             if (update_Rc) {
-                env->crf[0] = 0x8;
+                ppc_set_crf(env, 0, 0x8);
             }
             goto done;
         }
@@ -2575,7 +2575,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     }
     i = 8;
     if (update_Rc) {
-        env->crf[0] = 0x2;
+        ppc_set_crf(env, 0, 0x2);
     }
  done:
     env->xer = (env->xer & ~0x7F) | i;
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 52062a8..9ff8763 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11102,18 +11102,19 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
             cpu_fprintf(f, "\n");
     }
     cpu_fprintf(f, "CR ");
-    for (i = 0; i < 8; i++)
-        cpu_fprintf(f, "%01x", env->crf[i]);
+    for (i = 0; i < 8; i++) {
+        cpu_fprintf(f, "%01x", ppc_get_crf(env, i));
+    }
     cpu_fprintf(f, "  [");
     for (i = 0; i < 8; i++) {
         char a = '-';
-        if (env->crf[i] & 0x08)
+        if (ppc_get_crf(env, i) & 0x08)
             a = 'L';
-        else if (env->crf[i] & 0x04)
+        else if (ppc_get_crf(env, i) & 0x04)
             a = 'G';
-        else if (env->crf[i] & 0x02)
+        else if (ppc_get_crf(env, i) & 0x02)
             a = 'E';
-        cpu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
+        cpu_fprintf(f, " %c%c", a, ppc_get_crf(env, i) & 0x01 ? 'O' : ' ');
     }
     cpu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
                 env->reserve_addr);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (8 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 20:05   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
                   ` (5 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: fixed TCG debugging failures

 target-ppc/translate.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 9ff8763..0933c00 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -777,27 +777,27 @@ static void gen_cmpli(DisasContext *ctx)
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
-    int l1, l2;
     uint32_t bi = rC(ctx->opcode);
     uint32_t mask;
     TCGv_i32 t0;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
+    TCGv t1, true_op, zero;
 
     mask = 0x08 >> (bi & 0x03);
     t0 = tcg_temp_new_i32();
     tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-    if (rA(ctx->opcode) == 0)
-        tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], 0);
-    else
-        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    gen_set_label(l2);
+    t1 = tcg_temp_new();
+    tcg_gen_extu_i32_tl(t1, t0);
+    zero = tcg_const_tl(0);
+    if (rA(ctx->opcode) == 0) {
+        true_op = zero;
+    } else {
+        true_op = cpu_gpr[rA(ctx->opcode)];
+    }
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero,
+                       true_op, cpu_gpr[rB(ctx->opcode)]);
+    tcg_temp_free(t1);
     tcg_temp_free_i32(t0);
+    tcg_temp_free(zero);
 }
 
 /* cmpb: PowerPC 2.05 specification */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (9 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 20:25   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 12/14] ppc: use movcond to implement evsel Paolo Bonzini
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

This makes comparisons much smaller and faster.  The speedup is
approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.

Note that CRF_* constants are flipped to match PowerPC's big
bit-endianness.  Previously, the CR register was effectively stored
in mixed endianness, so now there is less indirection going on.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: fixed all issues reported by Tom, notably: 1) temporary
	leak in gen_op_mfcr; 2) missing set of cr[so] for gen_op_cmp32;
	3) i32 vs. tl typing issues; 4) creqv/nand/nor/orc extra 1 bits.

 linux-user/main.c       |   4 +-
 target-ppc/cpu.h        |  41 +++---
 target-ppc/fpu_helper.c |  44 ++-----
 target-ppc/helper.h     |   6 -
 target-ppc/int_helper.c |   2 +-
 target-ppc/machine.c    |   9 ++
 target-ppc/translate.c  | 344 ++++++++++++++++++++++++++----------------------
 7 files changed, 236 insertions(+), 214 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 152c031..b403f24 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
              * PPC ABI uses overflow flag in cr0 to signal an error
              * in syscalls.
              */
-            env->crf[0] &= ~0x1;
+            env->cr[CRF_SO] = 0;
             ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
                              env->gpr[5], env->gpr[6], env->gpr[7],
                              env->gpr[8], 0, 0);
@@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
                 break;
             }
             if (ret > (target_ulong)(-515)) {
-                env->crf[0] |= 0x1;
+                env->cr[CRF_SO] = 1;
                 ret = -ret;
             }
             env->gpr[3] = ret;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 91eac17..41b8299 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -940,7 +940,7 @@ struct CPUPPCState {
     /* CTR */
     target_ulong ctr;
     /* condition register */
-    uint32_t crf[8];
+    uint32_t cr[32];
 #if defined(TARGET_PPC64)
     /* CFAR */
     target_ulong cfar;
@@ -1059,6 +1059,9 @@ struct CPUPPCState {
     uint64_t dtl_addr, dtl_size;
 #endif /* TARGET_PPC64 */
 
+    /* condition register, for migration compatibility */
+    uint32_t crf[8];
+
     int error_code;
     uint32_t pending_interrupts;
 #if !defined(CONFIG_USER_ONLY)
@@ -1202,8 +1205,8 @@ static inline uint32_t ppc_get_cr(const CPUPPCState *env)
     uint32_t cr = 0;
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
-        cr |= env->crf[i] << (32 - ((i + 1) * 4));
+    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
+        cr |= env->cr[i] << (31 - i);
     }
     return cr;
 }
@@ -1212,19 +1215,27 @@ static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
-        env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
+        env->cr[i] = (cr >> (31 - i)) & 1;
     }
 }
 
 static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
 {
-    return env->crf[i];
+    uint32_t r;
+    r = env->cr[i * 4];
+    r = (r << 1) | (env->cr[i * 4 + 1]);
+    r = (r << 1) | (env->cr[i * 4 + 2]);
+    r = (r << 1) | (env->cr[i * 4 + 3]);
+    return r;
 }
 
 static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
 {
-    env->crf[i] = val;
+    env->cr[i * 4 + 0] = (val & 0x08) != 0;
+    env->cr[i * 4 + 1] = (val & 0x04) != 0;
+    env->cr[i * 4 + 2] = (val & 0x02) != 0;
+    env->cr[i * 4 + 3] = (val & 0x01) != 0;
 }
 
 static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
@@ -1271,14 +1282,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 
 /*****************************************************************************/
 /* CRF definitions */
-#define CRF_LT        3
-#define CRF_GT        2
-#define CRF_EQ        1
-#define CRF_SO        0
-#define CRF_CH        (1 << CRF_LT)
-#define CRF_CL        (1 << CRF_GT)
-#define CRF_CH_OR_CL  (1 << CRF_EQ)
-#define CRF_CH_AND_CL (1 << CRF_SO)
+#define CRF_LT        0
+#define CRF_GT        1
+#define CRF_EQ        2
+#define CRF_SO        3
+#define CRF_CH        CRF_LT
+#define CRF_CL        CRF_GT
+#define CRF_CH_OR_CL  CRF_EQ
+#define CRF_CH_AND_CL CRF_SO
 
 /* XER definitions */
 #define XER_SO  31
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 8cf321b..7894dc5 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
-    ppc_set_crf(env, crfD, 1 << fpcc);
+    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
+    ppc_set_crf(env, crfD, 0x08 >> fpcc);
 
     if (unlikely(fpcc == CRF_SO
                  && (float64_is_signaling_nan(farg1.d) ||
@@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
-    ppc_set_crf(env, crfD, 1 << fpcc);
+    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
+    ppc_set_crf(env, crfD, 0x08 >> fpcc);
 
     if (unlikely(fpcc == CRF_SO)) {
         if (float64_is_signaling_nan(farg1.d) ||
@@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+    return float32_lt(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
+    return !float32_le(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+    return float32_eq(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1460,30 +1460,6 @@ HELPER_SINGLE_SPE_CMP(fscmpgt);
 /* efscmpeq */
 HELPER_SINGLE_SPE_CMP(fscmpeq);
 
-static inline uint32_t evcmp_merge(int t0, int t1)
-{
-    return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
-}
-
-#define HELPER_VECTOR_SPE_CMP(name)                                     \
-    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
-    {                                                                   \
-        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
-                           e##name(env, op1, op2));                     \
-    }
-/* evfststlt */
-HELPER_VECTOR_SPE_CMP(fststlt);
-/* evfststgt */
-HELPER_VECTOR_SPE_CMP(fststgt);
-/* evfststeq */
-HELPER_VECTOR_SPE_CMP(fststeq);
-/* evfscmplt */
-HELPER_VECTOR_SPE_CMP(fscmplt);
-/* evfscmpgt */
-HELPER_VECTOR_SPE_CMP(fscmpgt);
-/* evfscmpeq */
-HELPER_VECTOR_SPE_CMP(fscmpeq);
-
 /* Double-precision floating-point conversion */
 uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
 {
@@ -1725,7 +1701,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1, uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+    return float64_lt(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1734,7 +1710,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
+    return !float64_le(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1743,7 +1719,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index ee748a1..dff7c1c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -492,12 +492,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
 DEF_HELPER_3(efscmplt, i32, env, i32, i32)
 DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
 DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
-DEF_HELPER_3(evfststlt, i32, env, i64, i64)
-DEF_HELPER_3(evfststgt, i32, env, i64, i64)
-DEF_HELPER_3(evfststeq, i32, env, i64, i64)
-DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
 DEF_HELPER_2(efdcfsi, i64, env, i32)
 DEF_HELPER_2(efdcfsid, i64, env, i64)
 DEF_HELPER_2(efdcfui, i64, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index b76a895..96f2e7d 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2580,7 +2580,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
  done:
     env->xer = (env->xer & ~0x7F) | i;
     if (update_Rc) {
-        env->crf[0] |= xer_so;
+        env->cr[CRF_SO] = xer_so;
     }
     return i;
 }
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index c801b82..9fa309a 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
     CPUPPCState *env = &cpu->env;
     int i;
 
+    for (i = 0; i < 8; i++) {
+        env->crf[i] = ppc_get_crf(env, i);
+    }
+
     env->spr[SPR_LR] = env->lr;
     env->spr[SPR_CTR] = env->ctr;
     env->spr[SPR_XER] = env->xer;
@@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
      * software has to take care of running QEMU in a compatible mode.
      */
     env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
+
+    for (i = 0; i < 8; i++) {
+        ppc_set_crf(env, i, env->crf[i]);
+    }
+
     env->lr = env->spr[SPR_LR];
     env->ctr = env->spr[SPR_CTR];
     env->xer = env->spr[SPR_XER];
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0933c00..d8c9240 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
     + 10*4 + 22*5 /* FPR */
     + 2*(10*6 + 22*7) /* AVRh, AVRl */
     + 10*5 + 22*6 /* VSR */
-    + 8*5 /* CRF */];
+    + 32*8 /* CR */];
 static TCGv cpu_gpr[32];
 static TCGv cpu_gprh[32];
 static TCGv_i64 cpu_fpr[32];
 static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
 static TCGv_i64 cpu_vsr[32];
-static TCGv_i32 cpu_crf[8];
+static TCGv_i32 cpu_cr[32];
 static TCGv cpu_nip;
 static TCGv cpu_msr;
 static TCGv cpu_ctr;
@@ -89,12 +89,13 @@ void ppc_translate_init(void)
     p = cpu_reg_names;
     cpu_reg_names_size = sizeof(cpu_reg_names);
 
-    for (i = 0; i < 8; i++) {
-        snprintf(p, cpu_reg_names_size, "crf%d", i);
-        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
-                                            offsetof(CPUPPCState, crf[i]), p);
-        p += 5;
-        cpu_reg_names_size -= 5;
+    for (i = 0; i < 32; i++) {
+        static const char names[] = "lt\0gt\0eq\0so";
+        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) * 3);
+        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
+                                           offsetof(CPUPPCState, cr[i]), p);
+        p += 8;
+        cpu_reg_names_size -= 8;
     }
 
     for (i = 0; i < 32; i++) {
@@ -252,17 +253,31 @@ static inline void gen_reset_fpstatus(void)
 
 static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
 {
-    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
 {
     if (shift) {
-        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
-        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
+        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
+        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
     } else {
-        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
+        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
     }
+    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
+    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
+    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
+    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
+    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
+    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
 }
 
 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
@@ -663,27 +678,19 @@ static opc_handler_t invalid_handler = {
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
 {
     TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1 = tcg_temp_new_i32();
 
-    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
 
     tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_LT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
 
     tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_GT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
 
     tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_EQ);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
 
     tcg_temp_free(t0);
-    tcg_temp_free_i32(t1);
 }
 
 static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
@@ -695,19 +702,26 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
 
 static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
 {
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    if (s) {
-        tcg_gen_ext32s_tl(t0, arg0);
-        tcg_gen_ext32s_tl(t1, arg1);
-    } else {
-        tcg_gen_ext32u_tl(t0, arg0);
-        tcg_gen_ext32u_tl(t1, arg1);
-    }
-    gen_op_cmp(t0, t1, s, crf);
-    tcg_temp_free(t1);
-    tcg_temp_free(t0);
+    TCGv_i32 t0, t1;
+
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, arg0);
+    tcg_gen_trunc_tl_i32(t1, arg1);
+
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
+
+    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
+                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
+
+    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
+                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
+
+    tcg_gen_setcond_i32(TCG_COND_EQ, 
+                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int crf)
@@ -778,15 +792,10 @@ static void gen_cmpli(DisasContext *ctx)
 static void gen_isel(DisasContext *ctx)
 {
     uint32_t bi = rC(ctx->opcode);
-    uint32_t mask;
-    TCGv_i32 t0;
     TCGv t1, true_op, zero;
 
-    mask = 0x08 >> (bi & 0x03);
-    t0 = tcg_temp_new_i32();
-    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
     t1 = tcg_temp_new();
-    tcg_gen_extu_i32_tl(t1, t0);
+    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
     zero = tcg_const_tl(0);
     if (rA(ctx->opcode) == 0) {
         true_op = zero;
@@ -796,7 +805,6 @@ static void gen_isel(DisasContext *ctx)
     tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero,
                        true_op, cpu_gpr[rB(ctx->opcode)]);
     tcg_temp_free(t1);
-    tcg_temp_free_i32(t0);
     tcg_temp_free(zero);
 }
 
@@ -2318,21 +2326,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
 
 static void gen_ftdiv(DisasContext *ctx)
 {
+    TCGv_i32 crf;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
         return;
     }
-    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+    crf = tcg_temp_new_i32();
+    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
                      cpu_fpr[rB(ctx->opcode)]);
+    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+    tcg_temp_free_i32(crf);
 }
 
 static void gen_ftsqrt(DisasContext *ctx)
 {
+    TCGv_i32 crf;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
         return;
     }
-    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+    crf = tcg_temp_new_i32();
+    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
+    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+    tcg_temp_free_i32(crf);
 }
 
 
@@ -3330,10 +3346,13 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA,
 {
     int l1;
 
-    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
+    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
     l1 = gen_new_label();
     tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
-    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
 #if defined(TARGET_PPC64)
     if (size == 8) {
         gen_qemu_st64(ctx, cpu_gpr[reg], EA);
@@ -3900,17 +3919,11 @@ static inline void gen_bcond(DisasContext *ctx, int type)
     if ((bo & 0x10) == 0) {
         /* Test CR */
         uint32_t bi = BI(ctx->opcode);
-        uint32_t mask = 0x08 >> (bi & 0x03);
-        TCGv_i32 temp = tcg_temp_new_i32();
-
         if (bo & 0x8) {
-            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
+            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
         } else {
-            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
-            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
+            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
         }
-        tcg_temp_free_i32(temp);
     }
     gen_update_cfar(ctx, ctx->nip);
     if (type == BCOND_IM) {
@@ -3959,35 +3972,21 @@ static void gen_bctar(DisasContext *ctx)
 }
 
 /***                      Condition register logical                       ***/
-#define GEN_CRLOGIC(name, tcg_op, opc)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    uint8_t bitmask;                                                          \
-    int sh;                                                                   \
-    TCGv_i32 t0, t1;                                                          \
-    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);             \
-    t0 = tcg_temp_new_i32();                                                  \
-    if (sh > 0)                                                               \
-        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);            \
-    else if (sh < 0)                                                          \
-        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);           \
-    else                                                                      \
-        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                 \
-    t1 = tcg_temp_new_i32();                                                  \
-    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);             \
-    if (sh > 0)                                                               \
-        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);            \
-    else if (sh < 0)                                                          \
-        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);           \
-    else                                                                      \
-        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                 \
-    tcg_op(t0, t0, t1);                                                       \
-    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                             \
-    tcg_gen_andi_i32(t0, t0, bitmask);                                        \
-    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);          \
-    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                  \
-    tcg_temp_free_i32(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
+#define GEN_CRLOGIC(name, tcg_op, opc)                                         \
+static void glue(gen_, name)(DisasContext *ctx)                                \
+{                                                                              \
+    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],               \
+           cpu_cr[crbB(ctx->opcode)]);                                         \
+}
+
+#define GEN_CRLOGIC_MASK(name, tcg_op, opc)                                     \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+{                                                                               \
+    TCGv_i32 one = tcg_const_i32(1);                                            \
+    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],                \
+           cpu_cr[crbB(ctx->opcode)]);                                          \
+    tcg_gen_and_i32(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbD(ctx->opcode)], one); \
+    tcg_temp_free_i32(one);                                                     \
 }
 
 /* crand */
@@ -3995,22 +3994,26 @@ GEN_CRLOGIC(crand, tcg_gen_and_i32, 0x08);
 /* crandc */
 GEN_CRLOGIC(crandc, tcg_gen_andc_i32, 0x04);
 /* creqv */
-GEN_CRLOGIC(creqv, tcg_gen_eqv_i32, 0x09);
+GEN_CRLOGIC_MASK(creqv, tcg_gen_eqv_i32, 0x09);
 /* crnand */
-GEN_CRLOGIC(crnand, tcg_gen_nand_i32, 0x07);
+GEN_CRLOGIC_MASK(crnand, tcg_gen_nand_i32, 0x07);
 /* crnor */
-GEN_CRLOGIC(crnor, tcg_gen_nor_i32, 0x01);
+GEN_CRLOGIC_MASK(crnor, tcg_gen_nor_i32, 0x01);
 /* cror */
 GEN_CRLOGIC(cror, tcg_gen_or_i32, 0x0E);
 /* crorc */
-GEN_CRLOGIC(crorc, tcg_gen_orc_i32, 0x0D);
+GEN_CRLOGIC_MASK(crorc, tcg_gen_orc_i32, 0x0D);
 /* crxor */
 GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
 
 /* mcrf */
 static void gen_mcrf(DisasContext *ctx)
 {
-    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
+    int i;
+    for (i = 0; i < 4; i++) {
+        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
+                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
+    }
 }
 
 /***                           System linkage                              ***/
@@ -4163,20 +4166,12 @@ static void gen_write_xer(TCGv src)
 /* mcrxr */
 static void gen_mcrxr(DisasContext *ctx)
 {
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
-
-    tcg_gen_trunc_tl_i32(t0, cpu_so);
-    tcg_gen_trunc_tl_i32(t1, cpu_ov);
-    tcg_gen_trunc_tl_i32(dst, cpu_ca);
-    tcg_gen_shli_i32(t0, t0, 3);
-    tcg_gen_shli_i32(t1, t1, 2);
-    tcg_gen_shli_i32(dst, dst, 1);
-    tcg_gen_or_i32(dst, dst, t0);
-    tcg_gen_or_i32(dst, dst, t1);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
+    int crf = crfD(ctx->opcode);
+
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
+    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
 
     tcg_gen_movi_tl(cpu_so, 0);
     tcg_gen_movi_tl(cpu_ov, 0);
@@ -6351,11 +6346,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
     gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
-        int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
-        gen_set_label(l1);
+        t0 = tcg_temp_new();
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+        tcg_temp_free(t0);
     }
 #endif
 }
@@ -6432,11 +6429,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
     gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
-        int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
-        gen_set_label(l1);
+        t0 = tcg_temp_new();
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+        tcg_temp_free(t0);
     }
 #endif
 }
@@ -7402,7 +7401,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
 static void gen_##op(DisasContext *ctx)             \
 {                                                   \
     TCGv_ptr ra, rb, rd;                            \
-    TCGv_i32 ps;                                    \
+    TCGv_i32 ps, crf;                               \
                                                     \
     if (unlikely(!ctx->altivec_enabled)) {          \
         gen_exception(ctx, POWERPC_EXCP_VPU);       \
@@ -7414,13 +7413,16 @@ static void gen_##op(DisasContext *ctx)             \
     rd = gen_avr_ptr(rD(ctx->opcode));              \
                                                     \
     ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+    crf = tcg_temp_new_i32();                       \
                                                     \
-    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
+    gen_helper_##op(crf, rd, ra, rb, ps);           \
+    gen_op_mtcr(6 << 2, crf, 0);                    \
                                                     \
     tcg_temp_free_ptr(ra);                          \
     tcg_temp_free_ptr(rb);                          \
     tcg_temp_free_ptr(rd);                          \
     tcg_temp_free_i32(ps);                          \
+    tcg_temp_free_i32(crf);                         \
 }
 
 GEN_BCD(bcdadd)
@@ -8248,6 +8250,7 @@ static void gen_##name(DisasContext *ctx)        \
 static void gen_##name(DisasContext *ctx)         \
 {                                                 \
     TCGv_ptr ra, rb;                              \
+    TCGv_i32 tmp;                                 \
     if (unlikely(!ctx->fpu_enabled)) {            \
         gen_exception(ctx, POWERPC_EXCP_FPU);     \
         return;                                   \
@@ -8255,8 +8258,10 @@ static void gen_##name(DisasContext *ctx)         \
     gen_update_nip(ctx, ctx->nip - 4);            \
     ra = gen_fprp_ptr(rA(ctx->opcode));           \
     rb = gen_fprp_ptr(rB(ctx->opcode));           \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, rb);           \
+    tmp = tcg_temp_new_i32();                     \
+    gen_helper_##name(tmp, cpu_env, ra, rb);      \
+    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
+    tcg_temp_free_i32(tmp);                       \
     tcg_temp_free_ptr(ra);                        \
     tcg_temp_free_ptr(rb);                        \
 }
@@ -8265,7 +8270,7 @@ static void gen_##name(DisasContext *ctx)         \
 static void gen_##name(DisasContext *ctx)         \
 {                                                 \
     TCGv_ptr ra;                                  \
-    TCGv_i32 dcm;                                 \
+    TCGv_i32 dcm, tmp;                            \
     if (unlikely(!ctx->fpu_enabled)) {            \
         gen_exception(ctx, POWERPC_EXCP_FPU);     \
         return;                                   \
@@ -8273,8 +8278,10 @@ static void gen_##name(DisasContext *ctx)         \
     gen_update_nip(ctx, ctx->nip - 4);            \
     ra = gen_fprp_ptr(rA(ctx->opcode));           \
     dcm = tcg_const_i32(DCM(ctx->opcode));        \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, dcm);          \
+    tmp = tcg_temp_new_i32();                     \
+    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
+    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
+    tcg_temp_free_i32(tmp);                       \
     tcg_temp_free_ptr(ra);                        \
     tcg_temp_free_i32(dcm);                       \
 }
@@ -8699,37 +8706,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
 #define GEN_SPEOP_COMP(name, tcg_cond)                                        \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
+    TCGv tmp = tcg_temp_new();                                                \
+                                                                              \
     if (unlikely(!ctx->spe_enabled)) {                                        \
         gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
         return;                                                               \
     }                                                                         \
-    int l1 = gen_new_label();                                                 \
-    int l2 = gen_new_label();                                                 \
-    int l3 = gen_new_label();                                                 \
-    int l4 = gen_new_label();                                                 \
                                                                               \
     tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
     tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
     tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
     tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
                                                                               \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
-                       cpu_gpr[rB(ctx->opcode)], l1);                         \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
-    tcg_gen_br(l2);                                                           \
-    gen_set_label(l1);                                                        \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
-                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
-    gen_set_label(l2);                                                        \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
-                       cpu_gprh[rB(ctx->opcode)], l3);                        \
-    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
-                     ~(CRF_CH | CRF_CH_AND_CL));                              \
-    tcg_gen_br(l4);                                                           \
-    gen_set_label(l3);                                                        \
-    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
-                    CRF_CH | CRF_CH_OR_CL);                                   \
-    gen_set_label(l4);                                                        \
+    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
+                       cpu_gpr[rA(ctx->opcode)],                              \
+                       cpu_gpr[rB(ctx->opcode)]);                             \
+    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);        \
+    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
+                       cpu_gprh[rA(ctx->opcode)],                             \
+                       cpu_gprh[rB(ctx->opcode)]);                            \
+    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);        \
+    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
+    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
 }
 GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
 GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
@@ -8800,22 +8802,20 @@ static inline void gen_evsel(DisasContext *ctx)
     int l2 = gen_new_label();
     int l3 = gen_new_label();
     int l4 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
     tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
     tcg_gen_br(l2);
     gen_set_label(l1);
     tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
     gen_set_label(l2);
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
+
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, l3);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_br(l4);
     gen_set_label(l3);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
     gen_set_label(l4);
-    tcg_temp_free_i32(t0);
 }
 
 static void gen_evsel0(DisasContext *ctx)
@@ -9397,9 +9397,12 @@ static inline void gen_##name(DisasContext *ctx)                              \
     t0 = tcg_temp_new_i32();                                                  \
     t1 = tcg_temp_new_i32();                                                  \
                                                                               \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, t1); \
                                                                               \
     tcg_temp_free_i32(t0);                                                    \
     tcg_temp_free_i32(t1);                                                    \
@@ -9416,10 +9419,39 @@ static inline void gen_##name(DisasContext *ctx)                              \
     t1 = tcg_temp_new_i64();                                                  \
     gen_load_gpr64(t0, rA(ctx->opcode));                                      \
     gen_load_gpr64(t1, rB(ctx->opcode));                                      \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
+    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,        \
+                      t0, t1);                                                \
     tcg_temp_free_i64(t0);                                                    \
     tcg_temp_free_i64(t1);                                                    \
 }
+#define GEN_SPEFPUOP_COMP_V64(name, helper)                                       \
+static inline void gen_##name(DisasContext *ctx)                                  \
+{                                                                                 \
+    TCGv_i32 t0, t1;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                            \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                    \
+        return;                                                                   \
+    }                                                                             \
+    t0 = tcg_temp_new_i32();                                                      \
+    t1 = tcg_temp_new_i32();                                                      \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                           \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                           \
+    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, t0, t1); \
+    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                          \
+    tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]);                          \
+    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, t0, t1); \
+    tcg_temp_free_i32(t0);                                                        \
+    tcg_temp_free_i32(t1);                                                        \
+    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],                  \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                        \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                       \
+    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],                \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                       \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                      \
+}
 
 /* Single precision floating-point vectors operations */
 /* Arithmetic */
@@ -9474,12 +9506,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
 GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
 
 /* Comparison */
-GEN_SPEFPUOP_COMP_64(evfscmpgt);
-GEN_SPEFPUOP_COMP_64(evfscmplt);
-GEN_SPEFPUOP_COMP_64(evfscmpeq);
-GEN_SPEFPUOP_COMP_64(evfststgt);
-GEN_SPEFPUOP_COMP_64(evfststlt);
-GEN_SPEFPUOP_COMP_64(evfststeq);
+GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
+GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
+GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
+GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
+GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
+GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
 
 /* Opcodes definitions */
 GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 12/14] ppc: use movcond to implement evsel
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (10 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer Paolo Bonzini
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

This simplifies the code and avoids basic block splitting.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: new, noticed during re-review

 target-ppc/translate.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d8c9240..cdd5187 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -8798,24 +8798,19 @@ static inline void gen_evsplatfi(DisasContext *ctx)
 
 static inline void gen_evsel(DisasContext *ctx)
 {
-    int l1 = gen_new_label();
-    int l2 = gen_new_label();
-    int l3 = gen_new_label();
-    int l4 = gen_new_label();
+    TCGv tmp = tcg_temp_new();
+    TCGv zero = tcg_const_tl(0);
 
-    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-    gen_set_label(l2);
+    tcg_gen_extu_i32_tl(tmp, cpu_cr[(ctx->opcode & 0x07) * 4 + CRF_CH]);
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gprh[rD(ctx->opcode)], tmp, zero,
+                       cpu_gprh[rA(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
 
-    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, l3);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_br(l4);
-    gen_set_label(l3);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    gen_set_label(l4);
+    tcg_gen_extu_i32_tl(tmp, cpu_cr[(ctx->opcode & 0x07) * 4 + CRF_CL]);
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], tmp, zero,
+                       cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+
+    tcg_temp_free(zero);
+    tcg_temp_free(tmp);
 }
 
 static void gen_evsel0(DisasContext *ctx)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (11 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 12/14] ppc: use movcond to implement evsel Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 20:33   ` Tom Musta
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 14/14] ppc: dump all 32 CR bits Paolo Bonzini
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

Do not go through the loop when we're setting the four CR fields to
separate constants or conditions.  This is clearer than putting together
4-bit value and passing it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: due to previous changes, ppc_get_crf never needs this
	treatment, so I adjusted the subject

 linux-user/main.c       |  5 ++++-
 target-ppc/fpu_helper.c | 12 ++++++++++--
 target-ppc/int_helper.c | 27 +++++++++++++++++++++------
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index b403f24..5a0b31f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1550,7 +1550,10 @@ static int do_store_exclusive(CPUPPCState *env)
                 }
             }
         }
-        ppc_set_crf(env, 0, (stored << 1) | xer_so);
+        env->cr[CRF_LT] = 0;
+        env->cr[CRF_GT] = 0;
+        env->cr[CRF_EQ] = stored;
+        env->cr[CRF_SO] = xer_so;
         env->reserve_addr = (target_ulong)-1;
     }
     if (!segv) {
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7894dc5..c86320f 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1099,7 +1099,11 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
     env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
-    ppc_set_crf(env, crfD, 0x08 >> fpcc);
+
+    env->cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
+    env->cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
+    env->cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
+    env->cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
 
     if (unlikely(fpcc == CRF_SO
                  && (float64_is_signaling_nan(farg1.d) ||
@@ -1131,7 +1135,11 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
     env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
-    ppc_set_crf(env, crfD, 0x08 >> fpcc);
+
+    env->cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
+    env->cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
+    env->cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
+    env->cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
 
     if (unlikely(fpcc == CRF_SO)) {
         if (float64_is_signaling_nan(farg1.d) ||
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 96f2e7d..be52437 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -657,7 +657,10 @@ VCF(sx, int32_to_float32, s32)
             none |= result;                                             \
         }                                                               \
         if (record) {                                                   \
-            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
+            env->cr[24 + CRF_LT] = (all != 0);                          \
+            env->cr[24 + CRF_GT] = 0;                                   \
+            env->cr[24 + CRF_EQ] = (none == 0);                         \
+            env->cr[24 + CRF_SO] = 0;                                   \
         }                                                               \
     }
 #define VCMP(suffix, compare, element)          \
@@ -703,7 +706,10 @@ VCMP(gtsd, >, s64)
             none |= result;                                             \
         }                                                               \
         if (record) {                                                   \
-            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
+            env->cr[24 + CRF_LT] = (all != 0);                          \
+            env->cr[24 + CRF_GT] = 0;                                   \
+            env->cr[24 + CRF_EQ] = (none == 0);                         \
+            env->cr[24 + CRF_SO] = 0;                                   \
         }                                                               \
     }
 #define VCMPFP(suffix, compare, order)          \
@@ -737,7 +743,10 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
         }
     }
     if (record) {
-        ppc_set_crf(env, 6, (all_in == 0) << 1);
+        env->cr[24 + CRF_LT] = 0;
+        env->cr[24 + CRF_GT] = 0;
+        env->cr[24 + CRF_EQ] = (all_in == 0);
+        env->cr[24 + CRF_SO] = 0;
     }
 }
 
@@ -2558,7 +2567,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
         if ((high & mask) == 0) {
             if (update_Rc) {
-                ppc_set_crf(env, 0, 0x4);
+                env->cr[CRF_LT] = 0;
+                env->cr[CRF_GT] = 1;
+                env->cr[CRF_EQ] = 0;
             }
             goto done;
         }
@@ -2567,7 +2578,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
         if ((low & mask) == 0) {
             if (update_Rc) {
-                ppc_set_crf(env, 0, 0x8);
+                env->cr[CRF_LT] = 1;
+                env->cr[CRF_GT] = 0;
+                env->cr[CRF_EQ] = 0;
             }
             goto done;
         }
@@ -2575,7 +2588,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
     }
     i = 8;
     if (update_Rc) {
-        ppc_set_crf(env, 0, 0x2);
+        env->cr[CRF_LT] = 0;
+        env->cr[CRF_GT] = 0;
+        env->cr[CRF_EQ] = 1;
     }
  done:
     env->xer = (env->xer & ~0x7F) | i;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Qemu-devel] [PATCH 14/14] ppc: dump all 32 CR bits
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (12 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer Paolo Bonzini
@ 2014-09-15 15:03 ` Paolo Bonzini
  2014-09-18 20:43 ` [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Tom Musta
  2014-11-03 11:56 ` Alexander Graf
  15 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-15 15:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: tommusta, agraf

This is more precise when bits have been modified with CR
boolean operations.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-ppc/translate.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index cdd5187..65c9447 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11132,18 +11132,14 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     for (i = 0; i < 8; i++) {
         cpu_fprintf(f, "%01x", ppc_get_crf(env, i));
     }
-    cpu_fprintf(f, "  [");
-    for (i = 0; i < 8; i++) {
-        char a = '-';
-        if (ppc_get_crf(env, i) & 0x08)
-            a = 'L';
-        else if (ppc_get_crf(env, i) & 0x04)
-            a = 'G';
-        else if (ppc_get_crf(env, i) & 0x02)
-            a = 'E';
-        cpu_fprintf(f, " %c%c", a, ppc_get_crf(env, i) & 0x01 ? 'O' : ' ');
-    }
-    cpu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
+    cpu_fprintf(f, "  ");
+    for (i = 0; i < 32; i++) {
+        if ((i & 3) == 0) {
+            cpu_fprintf(f, "%c", i ? ' ' : '[');
+        }
+        cpu_fprintf(f, "%c", env->cr[i] ? "LGEO"[i&3] : '.');
+    }
+    cpu_fprintf(f, "]       RES " TARGET_FMT_lx "\n",
                 env->reserve_addr);
     for (i = 0; i < 32; i++) {
         if ((i & (RFPL - 1)) == 0)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes Paolo Bonzini
@ 2014-09-16 17:20   ` Tom Musta
  2014-09-16 18:02     ` Richard Henderson
  2014-09-16 18:49     ` Peter Maydell
  0 siblings, 2 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-16 17:20 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> PowerPC TCG flushes the TLB on every IR/DR change, which basically
> means on every user<->kernel context switch.  Encode IR/DR in the
> MMU index.
> 
> This brings the number of TLB flushes down from ~900000 to ~50000
> for starting up the Debian installer, which is in line with x86
> and gives a ~10% performance improvement.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  target-ppc/cpu.h         |  7 ++-----
>  target-ppc/excp_helper.c |  3 ---
>  target-ppc/helper_regs.h | 11 ++++++-----
>  3 files changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index b64c652..c29ce3b 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -922,7 +922,8 @@ struct ppc_segment_page_sizes {
>  
>  /*****************************************************************************/
>  /* The whole PowerPC CPU context */
> -#define NB_MMU_MODES 3
> +#define NB_MMU_MODES 12
> +#define MMU_USER_IDX 3  /* PR=IR=DR=1 */

This doesn't build for me:

  CC    ppc64-softmmu/tcg/tcg.o
In file included from /bghome/tmusta/powerisa/qemu/qemu/tcg/tcg.c:264:
/bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c: In function ?tcg_out_tlb_read?:
/bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c:1394: error: size of array ?qemu_build_bug_on__1396? is negative
make[1]: *** [tcg/tcg.o] Error 1
make: *** [subdir-ppc64-softmmu] Error 2

which correlates with this:

  1389      /* Compensate for very large offsets.  */
  1390      if (add_off >= 0x8000) {
  1391          /* Most target env are smaller than 32k; none are larger than 64k.
  1392             Simplify the logic here merely to offset by 0x7ff0, giving us a
  1393             range just shy of 64k.  Check this assumption.  */
  1394          QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
  1395                                     tlb_table[NB_MMU_MODES - 1][1])
  1396                            > 0x7ff0 + 0x7fff);
  1397          tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
  1398          base = TCG_REG_TMP1;
  1399          cmp_off -= 0x7ff0;
  1400          add_off -= 0x7ff0;
  1401      }




>  
>  #define PPC_CPU_OPCODES_LEN 0x40
>  
> @@ -1231,10 +1232,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model)
>  #define cpu_list ppc_cpu_list
>  
>  /* MMU modes definitions */
> -#define MMU_MODE0_SUFFIX _user
> -#define MMU_MODE1_SUFFIX _kernel
> -#define MMU_MODE2_SUFFIX _hypv
> -#define MMU_USER_IDX 0
>  static inline int cpu_mmu_index (CPUPPCState *env)
>  {
>      return env->mmu_idx;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 922e86d..96ad9d7 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>  
>      if (env->spr[SPR_LPCR] & LPCR_AIL) {
>          new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
> -        /* If we disactivated any translation, flush TLBs */
> -        tlb_flush(cs, 1);
>      }
>  
>  #ifdef TARGET_PPC64
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 271fddf..23b8ded 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  
>  static inline void hreg_compute_mem_idx(CPUPPCState *env)
>  {
> +    int high;
> +
>      /* Precompute MMU index */
>      if (msr_pr == 0 && msr_hv != 0) {
> -        env->mmu_idx = 2;
> +        high = 2;
>      } else {
> -        env->mmu_idx = 1 - msr_pr;
> +        high = 1 - msr_pr;
>      }
> +    env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr;
>  }
>  
>  static inline void hreg_compute_hflags(CPUPPCState *env)
> @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
>      /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
>      hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
>          (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
> -        (1 << MSR_LE) | (1 << MSR_VSX);
> +        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
>      hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
>      hreg_compute_mem_idx(env);
>      env->hflags = env->msr & hflags_mask;
> @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      }
>      if (((value >> MSR_IR) & 1) != msr_ir ||
>          ((value >> MSR_DR) & 1) != msr_dr) {
> -        /* Flush all tlb when changing translation mode */
> -        tlb_flush(cs, 1);
>          excp = POWERPC_EXCP_NONE;
>          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>      }
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 17:20   ` Tom Musta
@ 2014-09-16 18:02     ` Richard Henderson
  2014-09-16 18:27       ` Paolo Bonzini
  2014-09-16 18:49     ` Peter Maydell
  1 sibling, 1 reply; 44+ messages in thread
From: Richard Henderson @ 2014-09-16 18:02 UTC (permalink / raw)
  To: Tom Musta, Paolo Bonzini, qemu-devel; +Cc: agraf

On 09/16/2014 10:20 AM, Tom Musta wrote:
> On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
>> PowerPC TCG flushes the TLB on every IR/DR change, which basically
>> means on every user<->kernel context switch.  Encode IR/DR in the
>> MMU index.
>>
>> This brings the number of TLB flushes down from ~900000 to ~50000
>> for starting up the Debian installer, which is in line with x86
>> and gives a ~10% performance improvement.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  target-ppc/cpu.h         |  7 ++-----
>>  target-ppc/excp_helper.c |  3 ---
>>  target-ppc/helper_regs.h | 11 ++++++-----
>>  3 files changed, 8 insertions(+), 13 deletions(-)
>>
>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>> index b64c652..c29ce3b 100644
>> --- a/target-ppc/cpu.h
>> +++ b/target-ppc/cpu.h
>> @@ -922,7 +922,8 @@ struct ppc_segment_page_sizes {
>>  
>>  /*****************************************************************************/
>>  /* The whole PowerPC CPU context */
>> -#define NB_MMU_MODES 3
>> +#define NB_MMU_MODES 12
>> +#define MMU_USER_IDX 3  /* PR=IR=DR=1 */
> 
> This doesn't build for me:
> 
>   CC    ppc64-softmmu/tcg/tcg.o
> In file included from /bghome/tmusta/powerisa/qemu/qemu/tcg/tcg.c:264:
> /bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c: In function ?tcg_out_tlb_read?:
> /bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c:1394: error: size of array ?qemu_build_bug_on__1396? is negative
> make[1]: *** [tcg/tcg.o] Error 1
> make: *** [subdir-ppc64-softmmu] Error 2
> 
> which correlates with this:
> 
>   1389      /* Compensate for very large offsets.  */
>   1390      if (add_off >= 0x8000) {
>   1391          /* Most target env are smaller than 32k; none are larger than 64k.
>   1392             Simplify the logic here merely to offset by 0x7ff0, giving us a
>   1393             range just shy of 64k.  Check this assumption.  */
>   1394          QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
>   1395                                     tlb_table[NB_MMU_MODES - 1][1])
>   1396                            > 0x7ff0 + 0x7fff);
>   1397          tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
>   1398          base = TCG_REG_TMP1;
>   1399          cmp_off -= 0x7ff0;
>   1400          add_off -= 0x7ff0;
>   1401      }

Ouch, yes indeed.

While we could probably fix this for ppc (using addis), it's not nearly so
easily fixable for arm -- without impacting performance anyway.

Does 96k worth of TLBs really help that much?  Are all 12 of them actually
used?  Can we use a more complex encoding scheme for the mmu_idx and use less?


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 18:02     ` Richard Henderson
@ 2014-09-16 18:27       ` Paolo Bonzini
  2014-09-16 18:41         ` Richard Henderson
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-16 18:27 UTC (permalink / raw)
  To: Richard Henderson, Tom Musta, qemu-devel; +Cc: agraf

Il 16/09/2014 20:02, Richard Henderson ha scritto:
> While we could probably fix this for ppc (using addis), it's not nearly so
> easily fixable for arm -- without impacting performance anyway.
> 
> Does 96k worth of TLBs really help that much?  Are all 12 of them actually
> used?  Can we use a more complex encoding scheme for the mmu_idx and use less?

In practice, only 3 to 7 are---hence my original attempt at using some
kind of FIFO caching:

   user mode, translation enabled
   kernel mode, paging disabled
   kernel mode, paging enabled
   supervisor mode, paging disabled
   supervisor mode, paging enabled

Plus perhaps kernel and supervisor mode with only data paging enabled.

You could lump together the IR!=0, DR!=0 cases, and flush that one TLB
index if the IR/DR pair changes with respect to the last time.  This
would use 6 indices.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 18:27       ` Paolo Bonzini
@ 2014-09-16 18:41         ` Richard Henderson
  2014-09-16 22:23           ` Richard Henderson
  0 siblings, 1 reply; 44+ messages in thread
From: Richard Henderson @ 2014-09-16 18:41 UTC (permalink / raw)
  To: Paolo Bonzini, Tom Musta, qemu-devel; +Cc: agraf

On 09/16/2014 11:27 AM, Paolo Bonzini wrote:
> Il 16/09/2014 20:02, Richard Henderson ha scritto:
>> While we could probably fix this for ppc (using addis), it's not nearly so
>> easily fixable for arm -- without impacting performance anyway.
>>
>> Does 96k worth of TLBs really help that much?  Are all 12 of them actually
>> used?  Can we use a more complex encoding scheme for the mmu_idx and use less?
> 
> In practice, only 3 to 7 are---hence my original attempt at using some
> kind of FIFO caching:
> 
>    user mode, translation enabled
>    kernel mode, paging disabled
>    kernel mode, paging enabled
>    supervisor mode, paging disabled
>    supervisor mode, paging enabled
> 
> Plus perhaps kernel and supervisor mode with only data paging enabled.
> 
> You could lump together the IR!=0, DR!=0 cases, and flush that one TLB
> index if the IR/DR pair changes with respect to the last time.  This
> would use 6 indices.

I think I would prefer a solution that uses 6 indicies, as will not cause env
to overflow 64k, and not require that any tcg backends be updated.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 17:20   ` Tom Musta
  2014-09-16 18:02     ` Richard Henderson
@ 2014-09-16 18:49     ` Peter Maydell
  2014-09-16 22:13       ` Richard Henderson
  1 sibling, 1 reply; 44+ messages in thread
From: Peter Maydell @ 2014-09-16 18:49 UTC (permalink / raw)
  To: Tom Musta; +Cc: Paolo Bonzini, QEMU Developers, Alexander Graf

On 16 September 2014 10:20, Tom Musta <tommusta@gmail.com> wrote:
>
>   1389      /* Compensate for very large offsets.  */
>   1390      if (add_off >= 0x8000) {
>   1391          /* Most target env are smaller than 32k; none are larger than 64k.
>   1392             Simplify the logic here merely to offset by 0x7ff0, giving us a
>   1393             range just shy of 64k.  Check this assumption.  */
>   1394          QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
>   1395                                     tlb_table[NB_MMU_MODES - 1][1])
>   1396                            > 0x7ff0 + 0x7fff);
>   1397          tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
>   1398          base = TCG_REG_TMP1;
>   1399          cmp_off -= 0x7ff0;
>   1400          add_off -= 0x7ff0;
>   1401      }

Is it possible to promote this BUILD_BUG_ON from "only on
PPC hosts" to "on all builds" ? It's really checking a
property of the target CPU's code, not a property of
the TCG backend, and I bet a lot of our backends don't
get built very often so we could easily miss breakage.
I guess you'd need to define and check a worst-case value
in a common header somewhere.

thanks
-- PMM

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 18:49     ` Peter Maydell
@ 2014-09-16 22:13       ` Richard Henderson
  0 siblings, 0 replies; 44+ messages in thread
From: Richard Henderson @ 2014-09-16 22:13 UTC (permalink / raw)
  To: Peter Maydell, Tom Musta; +Cc: Paolo Bonzini, QEMU Developers, Alexander Graf

On 09/16/2014 11:49 AM, Peter Maydell wrote:
> On 16 September 2014 10:20, Tom Musta <tommusta@gmail.com> wrote:
>>
>>   1389      /* Compensate for very large offsets.  */
>>   1390      if (add_off >= 0x8000) {
>>   1391          /* Most target env are smaller than 32k; none are larger than 64k.
>>   1392             Simplify the logic here merely to offset by 0x7ff0, giving us a
>>   1393             range just shy of 64k.  Check this assumption.  */
>>   1394          QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
>>   1395                                     tlb_table[NB_MMU_MODES - 1][1])
>>   1396                            > 0x7ff0 + 0x7fff);
>>   1397          tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
>>   1398          base = TCG_REG_TMP1;
>>   1399          cmp_off -= 0x7ff0;
>>   1400          add_off -= 0x7ff0;
>>   1401      }
> 
> Is it possible to promote this BUILD_BUG_ON from "only on
> PPC hosts" to "on all builds" ? It's really checking a
> property of the target CPU's code, not a property of
> the TCG backend, and I bet a lot of our backends don't
> get built very often so we could easily miss breakage.
> I guess you'd need to define and check a worst-case value
> in a common header somewhere.

Meh.  It is a property of the tcg backend, in that it is a property of the code
that immediately follows.  And that's what makes the BUG_ON clear and obvious, IMO.

For what it's worth, ppc as written has the smallest constraint of the current
backends, and I'm fairly confident that'll get built often-ish.

If you've got a rearrangement that puts the assert somewhere else, and keeps
the magic numbers understandable... I'll certainly have a look, but I don't see
how to retain the obviousness with a different placement.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 18:41         ` Richard Henderson
@ 2014-09-16 22:23           ` Richard Henderson
  2014-09-17  6:22             ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Richard Henderson @ 2014-09-16 22:23 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: Tom Musta, Peter Maydell

On 09/16/2014 11:41 AM, Richard Henderson wrote:
>> In practice, only 3 to 7 are---hence my original attempt at using some
>> kind of FIFO caching:
>>
>>    user mode, translation enabled
>>    kernel mode, paging disabled
>>    kernel mode, paging enabled
>>    supervisor mode, paging disabled
>>    supervisor mode, paging enabled
>>
>> Plus perhaps kernel and supervisor mode with only data paging enabled.
>>
>> You could lump together the IR!=0, DR!=0 cases, and flush that one TLB
>> index if the IR/DR pair changes with respect to the last time.  This
>> would use 6 indices.
> 
> I think I would prefer a solution that uses 6 indicies, as will not cause env
> to overflow 64k, and not require that any tcg backends be updated.

... alternately ...

What if instead of having a "mmu_index" for the mmu arrays, we have a pointer
to the "mmu context".  This does imply an extra memory load on the fast path,
but probably not an extra instruction.

With this, we can suddenly afford to have a relatively large number of mmu
contexts, with which we could implement address space numbers for relevant targets.

It is, of course, a much larger change, but perhaps it's of larger benefit.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-16 22:23           ` Richard Henderson
@ 2014-09-17  6:22             ` Paolo Bonzini
  2014-09-17  8:53               ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-17  6:22 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Tom Musta, Alexander Graf, qemu-ppc, qemu-devel, Peter Maydell


> What if instead of having a "mmu_index" for the mmu arrays, we have a pointer
> to the "mmu context".  This does imply an extra memory load on the fast path,
> but probably not an extra instruction.
> 
> With this, we can suddenly afford to have a relatively large number of mmu
> contexts, with which we could implement address space numbers for relevant
> targets.
> 
> It is, of course, a much larger change, but perhaps it's of larger benefit.

Sounds good.  I can give it a shot---in the meanwhile, since I forgot to
Cc qemu-ppc, Alex can you review/apply patch 1?

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-17  6:22             ` Paolo Bonzini
@ 2014-09-17  8:53               ` Paolo Bonzini
  2014-09-17 15:33                 ` Richard Henderson
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-17  8:53 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Peter Maydell, Tom Musta, qemu-ppc, Alexander Graf, qemu-devel

Il 17/09/2014 08:22, Paolo Bonzini ha scritto:
> 
>> What if instead of having a "mmu_index" for the mmu arrays, we have a pointer
>> to the "mmu context".  This does imply an extra memory load on the fast path,
>> but probably not an extra instruction.
>>
>> With this, we can suddenly afford to have a relatively large number of mmu
>> contexts, with which we could implement address space numbers for relevant
>> targets.
>>
>> It is, of course, a much larger change, but perhaps it's of larger benefit.
> 
> Sounds good.  I can give it a shot---in the meanwhile, since I forgot to
> Cc qemu-ppc, Alex can you review/apply patch 1?

Much simpler: let's cut the size of the TLB in half on affected targets.

This does sacrifice some speed, but you still get about two thirds of
the improvement (boot speed of a Debian installation ISO: 30s without
patches, 24s with small TLB, 22s with large TLB) compared to the current
TCG target.

For 32-bit target and 32-bit host we can still use the full TLB size.

The following can be easily squashed in patch 2:

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 0ca6f0b..ed78884 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -69,8 +69,6 @@ typedef uint64_t target_ulong;
 #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
 
 #if !defined(CONFIG_USER_ONLY)
-#define CPU_TLB_BITS 8
-#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
 /* use a fully associative victim tlb of 8 entries */
 #define CPU_VTLB_SIZE 8
 
@@ -80,6 +78,16 @@ typedef uint64_t target_ulong;
 #define CPU_TLB_ENTRY_BITS 5
 #endif
 
+/* All the TLBs together must be smaller than 64k on RISC machines  */
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__aarch64__) \
+    && !defined(__sparc__) && !defined(CONFIG_TCG_INTERPRETER)
+#define CPU_TLB_BITS (NB_MMU_MODES < 8 ? 8 : 12 - CPU_TLB_ENTRY_BITS)
+#else
+#define CPU_TLB_BITS 8
+#endif
+
+#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+
 typedef struct CPUTLBEntry {
     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not

Tom, can you test this on PPC?

Paolo

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-17  8:53               ` Paolo Bonzini
@ 2014-09-17 15:33                 ` Richard Henderson
  2014-09-17 15:50                   ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Richard Henderson @ 2014-09-17 15:33 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Maydell, Tom Musta, qemu-ppc, Alexander Graf, qemu-devel

On 09/17/2014 01:53 AM, Paolo Bonzini wrote:
> +/* All the TLBs together must be smaller than 64k on RISC machines  */
> +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__aarch64__) \
> +    && !defined(__sparc__) && !defined(CONFIG_TCG_INTERPRETER)
> +#define CPU_TLB_BITS (NB_MMU_MODES < 8 ? 8 : 12 - CPU_TLB_ENTRY_BITS)
> +#else
> +#define CPU_TLB_BITS 8
> +#endif

Hum.  Well, it's not that all the tlbs together that must be less than 64k,
it's the addend of the first entry of the last tlb that must be within 64k of
the start of env.  Nit picking, but perhaps we can word the comment better.

And if we choose to do something like this, this is where I'd prefer a define
in the relevant tcg-target.h.  Because you've missed ia64 and s390 that have
positive offsets larger than 64k (21 and 19 bits, respectively).

But otherwise I'm ok with this as a solution.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-17 15:33                 ` Richard Henderson
@ 2014-09-17 15:50                   ` Paolo Bonzini
  2014-09-17 15:55                     ` Richard Henderson
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-17 15:50 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Peter Maydell, Tom Musta, qemu-ppc, Alexander Graf, qemu-devel

Il 17/09/2014 17:33, Richard Henderson ha scritto:
> Hum.  Well, it's not that all the tlbs together that must be less than 64k,
> it's the addend of the first entry of the last tlb that must be within 64k of
> the start of env.  Nit picking, but perhaps we can word the comment better.

Indeed.

> And if we choose to do something like this, this is where I'd prefer a define
> in the relevant tcg-target.h.  Because you've missed ia64 and s390 that have
> positive offsets larger than 64k (21 and 19 bits, respectively).

Right, but with 16 MMU modes the maximum size is 128k, well within s390
and ia64's limits.

> But otherwise I'm ok with this as a solution.

Thanks!

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes
  2014-09-17 15:50                   ` Paolo Bonzini
@ 2014-09-17 15:55                     ` Richard Henderson
  0 siblings, 0 replies; 44+ messages in thread
From: Richard Henderson @ 2014-09-17 15:55 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Maydell, Tom Musta, qemu-ppc, Alexander Graf, qemu-devel

On 09/17/2014 08:50 AM, Paolo Bonzini wrote:
>> > And if we choose to do something like this, this is where I'd prefer a define
>> > in the relevant tcg-target.h.  Because you've missed ia64 and s390 that have
>> > positive offsets larger than 64k (21 and 19 bits, respectively).
> Right, but with 16 MMU modes the maximum size is 128k, well within s390
> and ia64's limits.
> 
My point exactly -- they weren't listed in your set of !defined conditionals,
so they'd use the reduced tlb size.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr Paolo Bonzini
@ 2014-09-18 19:24   ` Tom Musta
  0 siblings, 0 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-18 19:24 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> New functions to put together all 32 CR bits.  Avoids easy
> off-by-one mistakes such as the one fixed by commit f13f529
> (ppc: fix monitor access to CR, 2014-08-28).
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: replaces "ppc: use ARRAY_SIZE in gdbstub.c"
> 
>  linux-user/elfload.c |  4 +---
>  linux-user/signal.c  |  8 ++------
>  monitor.c            |  9 +--------
>  target-ppc/cpu.h     | 20 ++++++++++++++++++++
>  target-ppc/gdbstub.c | 42 ++++++++----------------------------------
>  target-ppc/kvm.c     | 11 ++---------
>  6 files changed, 34 insertions(+), 60 deletions(-)
> 
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index bea803b..a7d1714 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -857,9 +857,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
>      (*regs)[36] = tswapreg(env->lr);
>      (*regs)[37] = tswapreg(env->xer);
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        ccr |= env->crf[i] << (32 - ((i + 1) * 4));
> -    }
> +    ccr = ppc_get_cr(env);
>      (*regs)[38] = tswapreg(ccr);
>  }
>  
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index e11b208..97c3107 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -4534,9 +4534,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
>      __put_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]);
>      __put_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        ccr |= env->crf[i] << (32 - ((i + 1) * 4));
> -    }
> +    ccr = ppc_get_cr(env);
>      __put_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
>  
>      /* Save Altivec registers if necessary.  */
> @@ -4616,9 +4614,7 @@ static void restore_user_regs(CPUPPCState *env,
>      __get_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
>      __get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        env->crf[i] = (ccr >> (32 - ((i + 1) * 4))) & 0xf;
> -    }
> +    ppc_set_cr(env, ccr);
>  
>      if (!sig) {
>          env->gpr[2] = save_r2;
> diff --git a/monitor.c b/monitor.c
> index ec73dd4..80acf25 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -2963,14 +2963,7 @@ static target_long monitor_get_pc (const struct MonitorDef *md, int val)
>  static target_long monitor_get_ccr (const struct MonitorDef *md, int val)
>  {
>      CPUArchState *env = mon_get_cpu();
> -    unsigned int u;
> -    int i;
> -
> -    u = 0;
> -    for (i = 0; i < 8; i++)
> -        u |= env->crf[i] << (32 - (4 * (i + 1)));
> -
> -    return u;
> +    return ppc_get_cr(env);
>  }
>  
>  static target_long monitor_get_msr (const struct MonitorDef *md, int val)
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index c29ce3b..0c0196d 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1197,6 +1197,26 @@ void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
>  
>  void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
>  
> +static inline uint32_t ppc_get_cr(const CPUPPCState *env)
> +{
> +    uint32_t cr = 0;
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> +        cr |= env->crf[i] << (32 - ((i + 1) * 4));
> +    }
> +    return cr;
> +}
> +
> +static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
> +{
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> +        env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
> +    }
> +}
> +
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
>  {
>      uint64_t gprv;
> diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
> index 14675f4..11d138e 100644
> --- a/target-ppc/gdbstub.c
> +++ b/target-ppc/gdbstub.c
> @@ -135,15 +135,8 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
>              gdb_get_regl(mem_buf, env->msr);
>              break;
>          case 66:
> -            {
> -                uint32_t cr = 0;
> -                int i;
> -                for (i = 0; i < 8; i++) {
> -                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
> -                }
> -                gdb_get_reg32(mem_buf, cr);
> -                break;
> -            }
> +            gdb_get_reg32(mem_buf, ppc_get_cr(env));
> +            break;
>          case 67:
>              gdb_get_regl(mem_buf, env->lr);
>              break;
> @@ -191,15 +184,8 @@ int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
>              gdb_get_reg64(mem_buf, env->msr);
>              break;
>          case 66 + 32:
> -            {
> -                uint32_t cr = 0;
> -                int i;
> -                for (i = 0; i < 8; i++) {
> -                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
> -                }
> -                gdb_get_reg32(mem_buf, cr);
> -                break;
> -            }
> +            gdb_get_reg32(mem_buf, ppc_get_cr(env));
> +            break;
>          case 67 + 32:
>              gdb_get_reg64(mem_buf, env->lr);
>              break;
> @@ -243,14 +229,8 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
>              ppc_store_msr(env, ldtul_p(mem_buf));
>              break;
>          case 66:
> -            {
> -                uint32_t cr = ldl_p(mem_buf);
> -                int i;
> -                for (i = 0; i < 8; i++) {
> -                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
> -                }
> -                break;
> -            }
> +            ppc_set_cr(env, ldl_p(mem_buf));
> +            break;
>          case 67:
>              env->lr = ldtul_p(mem_buf);
>              break;
> @@ -293,14 +273,8 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
>              ppc_store_msr(env, ldq_p(mem_buf));
>              break;
>          case 66 + 32:
> -            {
> -                uint32_t cr = ldl_p(mem_buf);
> -                int i;
> -                for (i = 0; i < 8; i++) {
> -                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
> -                }
> -                break;
> -            }
> +            ppc_set_cr(env, ldl_p(mem_buf));
> +            break;
>          case 67 + 32:
>              env->lr = ldq_p(mem_buf);
>              break;
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 9c23c6b..e541b9e 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -831,10 +831,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>      for (i = 0;i < 32; i++)
>          regs.gpr[i] = env->gpr[i];
>  
> -    regs.cr = 0;
> -    for (i = 0; i < 8; i++) {
> -        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
> -    }
> +    regs.cr = ppc_get_cr(env);
>  
>      ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
>      if (ret < 0)
> @@ -955,11 +952,7 @@ int kvm_arch_get_registers(CPUState *cs)
>      if (ret < 0)
>          return ret;
>  
> -    cr = regs.cr;
> -    for (i = 7; i >= 0; i--) {
> -        env->crf[i] = cr & 15;
> -        cr >>= 4;
> -    }
> +    ppc_set_cr(env, regs.cr);
>  
>      env->ctr = regs.ctr;
>      env->lr = regs.lr;
> 

One minor issue with this patch:

  CC    ppc64-softmmu/target-ppc/kvm.o
/bghome/tmusta/powerisa/qemu/qemu/target-ppc/kvm.c: In function ?kvm_arch_get_registers?:
/bghome/tmusta/powerisa/qemu/qemu/target-ppc/kvm.c:948: warning: unused variable ?cr?

which, of course, can be fixed like this:

> git diff
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index e541b9e..74c1324 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -945,7 +945,6 @@ int kvm_arch_get_registers(CPUState *cs)
     CPUPPCState *env = &cpu->env;
     struct kvm_regs regs;
     struct kvm_sregs sregs;
-    uint32_t cr;
     int i, ret;

     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);


Otherwise ...
Reviewed-by: Tom Musta <tommusta@gmail.com>

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
@ 2014-09-18 19:32   ` Tom Musta
  2014-09-18 21:01   ` Richard Henderson
  1 sibling, 0 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-18 19:32 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: used ARRAY_SIZE and ppc_get_cr
> 
>  target-ppc/helper.h     |  3 +++
>  target-ppc/int_helper.c | 17 +++++++++++++++++
>  target-ppc/translate.c  | 31 ++++---------------------------
>  3 files changed, 24 insertions(+), 27 deletions(-)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 0cfdc8a..ee748a1 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -59,6 +59,9 @@ DEF_HELPER_2(fpscr_setbit, void, env, i32)
>  DEF_HELPER_2(float64_to_float32, i32, env, i64)
>  DEF_HELPER_2(float32_to_float64, i64, env, i32)
>  
> +DEF_HELPER_1(mfocrf, tl, env)
> +DEF_HELPER_3(mtocrf, void, env, tl, i32)
> +
>  DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
>  DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
>  
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 83c1ad0..54e8998 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -289,6 +289,23 @@ target_ulong helper_popcntw(target_ulong val)
>  }
>  #endif
>  
> +void helper_mtocrf(CPUPPCState *env, target_ulong cr, uint32_t mask)
> +{
> +    int i;
> +    for (i = ARRAY_SIZE(env->crf); --i >= 0; ) {
> +        if (mask & 1) {
> +            env->crf[i] = cr & 0x0F;
> +        }
> +        cr >>= 4;
> +        mask >>= 1;
> +    }
> +}
> +
> +target_ulong helper_mfocrf(CPUPPCState *env)
> +{
> +    return ppc_get_cr(env);
> +}
> +
>  /*****************************************************************************/
>  /* PowerPC 601 specific instructions (POWER bridge) */
>  target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 2c9d8aa..c28bddf 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4173,24 +4173,7 @@ static void gen_mfcr(DisasContext *ctx)
>                              cpu_gpr[rD(ctx->opcode)], crn * 4);
>          }
>      } else {
> -        TCGv_i32 t0 = tcg_temp_new_i32();
> -        tcg_gen_mov_i32(t0, cpu_crf[0]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[1]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[2]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[3]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[4]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[5]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[6]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[7]);
> -        tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
> -        tcg_temp_free_i32(t0);
> +        gen_helper_mfocrf(cpu_gpr[rD(ctx->opcode)], cpu_env);
>      }
>  }
>  
> @@ -4285,15 +4268,9 @@ static void gen_mtcrf(DisasContext *ctx)
>              tcg_temp_free_i32(temp);
>          }
>      } else {
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -        tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx->opcode)]);
> -        for (crn = 0 ; crn < 8 ; crn++) {
> -            if (crm & (1 << crn)) {
> -                    tcg_gen_shri_i32(cpu_crf[7 - crn], temp, crn * 4);
> -                    tcg_gen_andi_i32(cpu_crf[7 - crn], cpu_crf[7 - crn], 0xf);
> -            }
> -        }
> -        tcg_temp_free_i32(temp);
> +        TCGv_i32 t0 = tcg_const_i32(crm);
> +        gen_helper_mtocrf(cpu_env, cpu_gpr[rS(ctx->opcode)], t0);
> +        tcg_temp_free_i32(t0);
>      }
>  }
>  
> 

Reviewed-by: Tom Musta <tommusta@gmail.com>
Tested-by: Tom Musta <tommusta@gmail.com>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf Paolo Bonzini
@ 2014-09-18 19:48   ` Tom Musta
  0 siblings, 0 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-18 19:48 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: fixed leak of temporaries
> 
>  target-ppc/translate.c | 25 ++++++++++++-------------
>  1 file changed, 12 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index c28bddf..a8b6b7c 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -252,23 +252,22 @@ static inline void gen_reset_fpstatus(void)
>  
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
>  {
> -    TCGv_i32 t0 = tcg_temp_new_i32();
> +    TCGv_i32 t0;
>  
> -    if (set_fprf != 0) {
> -        /* This case might be optimized later */
> -        tcg_gen_movi_i32(t0, 1);
> -        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
> -        if (unlikely(set_rc)) {
> -            tcg_gen_mov_i32(cpu_crf[1], t0);
> -        }
> -        gen_helper_float_check_status(cpu_env);
> -    } else if (unlikely(set_rc)) {
> -        /* We always need to compute fpcc */
> -        tcg_gen_movi_i32(t0, 0);
> -        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
> +    if (set_fprf == 0 && !set_rc) {
> +        return;
> +    }
> +
> +    t0 = tcg_temp_new_i32();
> +    tcg_gen_movi_i32(t0, set_fprf != 0);
> +    gen_helper_compute_fprf(t0, cpu_env, arg, t0);
> +    if (set_rc) {
>          tcg_gen_mov_i32(cpu_crf[1], t0);
>      }
>  
> +    if (set_fprf != 0) {
> +        gen_helper_float_check_status(cpu_env);
> +    }
>      tcg_temp_free_i32(t0);
>  }
>  
> 

Reviewed-by: Tom Musta <tommusta@gmail.com>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
@ 2014-09-18 19:49   ` Tom Musta
  2014-09-18 21:38   ` Richard Henderson
  1 sibling, 0 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-18 19:49 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: fixed TCG debug failures
> 
>  target-ppc/translate.c | 61 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 43 insertions(+), 18 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a8b6b7c..52062a8 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -250,6 +250,21 @@ static inline void gen_reset_fpstatus(void)
>      gen_helper_reset_fpstatus(cpu_env);
>  }
>  
> +static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
> +{
> +    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +}
> +
> +static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
> +{
> +    if (shift) {
> +        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
> +    } else {
> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +    }
> +}
> +
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
>  {
>      TCGv_i32 t0;
> @@ -262,7 +277,7 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
>      tcg_gen_movi_i32(t0, set_fprf != 0);
>      gen_helper_compute_fprf(t0, cpu_env, arg, t0);
>      if (set_rc) {
> -        tcg_gen_mov_i32(cpu_crf[1], t0);
> +        gen_op_mtcr(4, t0, 0);
>      }
>  
>      if (set_fprf != 0) {
> @@ -2457,6 +2472,7 @@ static void gen_fmrgow(DisasContext *ctx)
>  static void gen_mcrfs(DisasContext *ctx)
>  {
>      TCGv tmp = tcg_temp_new();
> +    TCGv_i32 tmp32 = tcg_temp_new_i32();
>      int bfa;
>  
>      if (unlikely(!ctx->fpu_enabled)) {
> @@ -2465,10 +2481,11 @@ static void gen_mcrfs(DisasContext *ctx)
>      }
>      bfa = 4 * (7 - crfS(ctx->opcode));
>      tcg_gen_shri_tl(tmp, cpu_fpscr, bfa);
> -    tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], tmp);
> +    tcg_gen_trunc_tl_i32(tmp32, tmp);
>      tcg_temp_free(tmp);
> -    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 0xf);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp32, 0);
>      tcg_gen_andi_tl(cpu_fpscr, cpu_fpscr, ~(0xF << bfa));
> +    tcg_temp_free_i32(tmp32);
>  }
>  
>  /* mffs */
> @@ -2503,8 +2520,10 @@ static void gen_mtfsb0(DisasContext *ctx)
>          tcg_temp_free_i32(t0);
>      }
>      if (unlikely(Rc(ctx->opcode) != 0)) {
> -        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
> -        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
> +        TCGv_i32 tmp32 = tcg_temp_new_i32();
> +        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
> +        gen_op_mtcr(4, tmp32, FPSCR_OX);
> +        tcg_temp_free_i32(tmp32);
>      }
>  }
>  
> @@ -2529,8 +2548,10 @@ static void gen_mtfsb1(DisasContext *ctx)
>          tcg_temp_free_i32(t0);
>      }
>      if (unlikely(Rc(ctx->opcode) != 0)) {
> -        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
> -        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
> +        TCGv_i32 tmp32 = tcg_temp_new_i32();
> +        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
> +        gen_op_mtcr(4, tmp32, FPSCR_OX);
> +        tcg_temp_free_i32(tmp32);
>      }
>      /* We can raise a differed exception */
>      gen_helper_float_check_status(cpu_env);
> @@ -2564,8 +2585,10 @@ static void gen_mtfsf(DisasContext *ctx)
>      gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0);
>      tcg_temp_free_i32(t0);
>      if (unlikely(Rc(ctx->opcode) != 0)) {
> -        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
> -        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
> +        TCGv_i32 tmp32 = tcg_temp_new_i32();
> +        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
> +        gen_op_mtcr(4, tmp32, FPSCR_OX);
> +        tcg_temp_free_i32(tmp32);
>      }
>      /* We can raise a differed exception */
>      gen_helper_float_check_status(cpu_env);
> @@ -2598,8 +2621,10 @@ static void gen_mtfsfi(DisasContext *ctx)
>      tcg_temp_free_i64(t0);
>      tcg_temp_free_i32(t1);
>      if (unlikely(Rc(ctx->opcode) != 0)) {
> -        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
> -        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
> +        TCGv_i32 tmp32 = tcg_temp_new_i32();
> +        tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
> +        gen_op_mtcr(4, tmp32, FPSCR_OX);
> +        tcg_temp_free_i32(tmp32);
>      }
>      /* We can raise a differed exception */
>      gen_helper_float_check_status(cpu_env);
> @@ -4166,10 +4191,11 @@ static void gen_mfcr(DisasContext *ctx)
>      if (likely(ctx->opcode & 0x00100000)) {
>          crm = CRM(ctx->opcode);
>          if (likely(crm && ((crm & (crm - 1)) == 0))) {
> +            TCGv_i32 t0 = tcg_temp_new_i32();
>              crn = ctz32 (crm);
> -            tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], cpu_crf[7 - crn]);
> -            tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)],
> -                            cpu_gpr[rD(ctx->opcode)], crn * 4);
> +            gen_op_mfcr(t0, (7 - crn) * 4, crn * 4);
> +            tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
> +            tcg_temp_free_i32(t0);
>          }
>      } else {
>          gen_helper_mfocrf(cpu_gpr[rD(ctx->opcode)], cpu_env);
> @@ -4262,8 +4288,7 @@ static void gen_mtcrf(DisasContext *ctx)
>              TCGv_i32 temp = tcg_temp_new_i32();
>              crn = ctz32 (crm);
>              tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx->opcode)]);
> -            tcg_gen_shri_i32(temp, temp, crn * 4);
> -            tcg_gen_andi_i32(cpu_crf[7 - crn], temp, 0xf);
> +            gen_op_mtcr((7 - crn) * 4, temp, crn * 4);
>              tcg_temp_free_i32(temp);
>          }
>      } else {
> @@ -8188,13 +8213,13 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx)
>  {
>      TCGv_i32 tmp = tcg_temp_new_i32();
>      tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
> -    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
> +    gen_op_mtcr(4, tmp, 28);
>      tcg_temp_free_i32(tmp);
>  }
>  #else
>  static void gen_set_cr1_from_fpscr(DisasContext *ctx)
>  {
> -        tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
> +    gen_op_mtcr(4, cpu_fpscr, 28);
>  }
>  #endif
>  
> 

Reviewed-by: Tom Musta <tommusta@gmail.com>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
@ 2014-09-18 19:51   ` Tom Musta
  2014-09-19 14:52     ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Tom Musta @ 2014-09-18 19:51 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> These two functions will group together four CR bits into a single
> value, once we change the representation of condition registers.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  linux-user/main.c        |  2 +-
>  target-ppc/cpu.h         | 10 ++++++++++
>  target-ppc/excp_helper.c |  2 +-
>  target-ppc/fpu_helper.c  |  6 ++++--
>  target-ppc/int_helper.c  | 14 +++++++-------
>  target-ppc/translate.c   | 13 +++++++------
>  6 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 472a16d..152c031 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1550,7 +1550,7 @@ static int do_store_exclusive(CPUPPCState *env)
>                  }
>              }
>          }
> -        env->crf[0] = (stored << 1) | xer_so;
> +        ppc_set_crf(env, 0, (stored << 1) | xer_so);
>          env->reserve_addr = (target_ulong)-1;
>      }
>      if (!segv) {
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 0c0196d..91eac17 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1217,6 +1217,16 @@ static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
>      }
>  }
>  
> +static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
> +{
> +    return env->crf[i];
> +}
> +
> +static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
> +{
> +    env->crf[i] = val;
> +}
> +
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
>  {
>      uint64_t gprv;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 96ad9d7..08637c1 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -504,7 +504,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>                           env->error_code);
>              }
>  #endif
> -            msr |= env->crf[0] << 28;
> +            msr |= ppc_get_crf(env, 0) << 28;
>              msr |= env->error_code; /* key, D/I, S/L bits */
>              /* Set way using a LRU mechanism */
>              msr |= ((env->last_way + 1) & (env->nb_ways - 1)) << 17;
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index b4e6d72..8cf321b 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1099,7 +1099,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
>      env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    env->crf[crfD] = (1 << fpcc);
> +    ppc_set_crf(env, crfD, 1 << fpcc);
> +
>      if (unlikely(fpcc == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
>                       float64_is_signaling_nan(farg2.d)))) {
> @@ -1130,7 +1131,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
>      env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    env->crf[crfD] = (1 << fpcc);
> +    ppc_set_crf(env, crfD, 1 << fpcc);
> +
>      if (unlikely(fpcc == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
>              float64_is_signaling_nan(farg2.d)) {
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 54e8998..b76a895 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -294,7 +294,7 @@ void helper_mtocrf(CPUPPCState *env, target_ulong cr, uint32_t mask)
>      int i;
>      for (i = ARRAY_SIZE(env->crf); --i >= 0; ) {
>          if (mask & 1) {
> -            env->crf[i] = cr & 0x0F;
> +            ppc_set_crf(env, i, cr & 0x0F);
>          }
>          cr >>= 4;
>          mask >>= 1;
> @@ -657,7 +657,7 @@ VCF(sx, int32_to_float32, s32)
>              none |= result;                                             \
>          }                                                               \
>          if (record) {                                                   \
> -            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
> +            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
>          }                                                               \
>      }
>  #define VCMP(suffix, compare, element)          \
> @@ -703,7 +703,7 @@ VCMP(gtsd, >, s64)
>              none |= result;                                             \
>          }                                                               \
>          if (record) {                                                   \
> -            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
> +            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
>          }                                                               \
>      }
>  #define VCMPFP(suffix, compare, order)          \
> @@ -737,7 +737,7 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
>          }
>      }
>      if (record) {
> -        env->crf[6] = (all_in == 0) << 1;
> +        ppc_set_crf(env, 6, (all_in == 0) << 1);
>      }
>  }
>  
> @@ -2558,7 +2558,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>          if ((high & mask) == 0) {
>              if (update_Rc) {
> -                env->crf[0] = 0x4;
> +                ppc_set_crf(env, 0, 0x4);
>              }
>              goto done;
>          }
> @@ -2567,7 +2567,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>          if ((low & mask) == 0) {
>              if (update_Rc) {
> -                env->crf[0] = 0x8;
> +                ppc_set_crf(env, 0, 0x8);
>              }
>              goto done;
>          }
> @@ -2575,7 +2575,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      }
>      i = 8;
>      if (update_Rc) {
> -        env->crf[0] = 0x2;
> +        ppc_set_crf(env, 0, 0x2);
>      }
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 52062a8..9ff8763 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -11102,18 +11102,19 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>              cpu_fprintf(f, "\n");
>      }
>      cpu_fprintf(f, "CR ");
> -    for (i = 0; i < 8; i++)
> -        cpu_fprintf(f, "%01x", env->crf[i]);
> +    for (i = 0; i < 8; i++) {
> +        cpu_fprintf(f, "%01x", ppc_get_crf(env, i));
> +    }
>      cpu_fprintf(f, "  [");
>      for (i = 0; i < 8; i++) {
>          char a = '-';
> -        if (env->crf[i] & 0x08)
> +        if (ppc_get_crf(env, i) & 0x08)
>              a = 'L';
> -        else if (env->crf[i] & 0x04)
> +        else if (ppc_get_crf(env, i) & 0x04)
>              a = 'G';
> -        else if (env->crf[i] & 0x02)
> +        else if (ppc_get_crf(env, i) & 0x02)
>              a = 'E';
> -        cpu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
> +        cpu_fprintf(f, " %c%c", a, ppc_get_crf(env, i) & 0x01 ? 'O' : ' ');
>      }
>      cpu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
>                  env->reserve_addr);
> 


Checkpatch fails:
WARNING: braces {} are necessary for all arms of this statement
#171: FILE: target-ppc/translate.c:11111:
+        if (ppc_get_crf(env, i) & 0x08)
[...]
-        else if (env->crf[i] & 0x04)
[...]
             a = 'G';
[...]

WARNING: braces {} are necessary for all arms of this statement
#174: FILE: target-ppc/translate.c:11113:
+        else if (ppc_get_crf(env, i) & 0x04)
[...]
-        else if (env->crf[i] & 0x02)
[...]

WARNING: braces {} are necessary for all arms of this statement
#177: FILE: target-ppc/translate.c:11115:
+        else if (ppc_get_crf(env, i) & 0x02)
[...]

total: 0 errors, 3 warnings, 131 lines checked

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel Paolo Bonzini
@ 2014-09-18 20:05   ` Tom Musta
  0 siblings, 0 replies; 44+ messages in thread
From: Tom Musta @ 2014-09-18 20:05 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: fixed TCG debugging failures
> 
>  target-ppc/translate.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 9ff8763..0933c00 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -777,27 +777,27 @@ static void gen_cmpli(DisasContext *ctx)
>  /* isel (PowerPC 2.03 specification) */
>  static void gen_isel(DisasContext *ctx)
>  {
> -    int l1, l2;
>      uint32_t bi = rC(ctx->opcode);
>      uint32_t mask;
>      TCGv_i32 t0;
> -
> -    l1 = gen_new_label();
> -    l2 = gen_new_label();
> +    TCGv t1, true_op, zero;
>  
>      mask = 0x08 >> (bi & 0x03);
>      t0 = tcg_temp_new_i32();
>      tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> -    if (rA(ctx->opcode) == 0)
> -        tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], 0);
> -    else
> -        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
> -    tcg_gen_br(l2);
> -    gen_set_label(l1);
> -    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
> -    gen_set_label(l2);
> +    t1 = tcg_temp_new();
> +    tcg_gen_extu_i32_tl(t1, t0);
> +    zero = tcg_const_tl(0);
> +    if (rA(ctx->opcode) == 0) {
> +        true_op = zero;
> +    } else {
> +        true_op = cpu_gpr[rA(ctx->opcode)];
> +    }
> +    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero,
> +                       true_op, cpu_gpr[rB(ctx->opcode)]);
> +    tcg_temp_free(t1);
>      tcg_temp_free_i32(t0);
> +    tcg_temp_free(zero);
>  }
>  
>  /* cmpb: PowerPC 2.05 specification */
> 

Reviewed-by: Tom Musta <tommusta@gmail.com>
Tested-by: Tom Musta <tommusta@gmail.com>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
@ 2014-09-18 20:25   ` Tom Musta
  2014-09-19 13:53     ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Tom Musta @ 2014-09-18 20:25 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> This makes comparisons much smaller and faster.  The speedup is
> approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
> 
> Note that CRF_* constants are flipped to match PowerPC's big
> bit-endianness.  Previously, the CR register was effectively stored
> in mixed endianness, so now there is less indirection going on.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: fixed all issues reported by Tom, notably: 1) temporary
> 	leak in gen_op_mfcr; 2) missing set of cr[so] for gen_op_cmp32;
> 	3) i32 vs. tl typing issues; 4) creqv/nand/nor/orc extra 1 bits.
> 
>  linux-user/main.c       |   4 +-
>  target-ppc/cpu.h        |  41 +++---
>  target-ppc/fpu_helper.c |  44 ++-----
>  target-ppc/helper.h     |   6 -
>  target-ppc/int_helper.c |   2 +-
>  target-ppc/machine.c    |   9 ++
>  target-ppc/translate.c  | 344 ++++++++++++++++++++++++++----------------------
>  7 files changed, 236 insertions(+), 214 deletions(-)
> 

Run checkpatch.pl  In fairness, you are modifying code that didn't pass before ... but still ....

> diff --git a/linux-user/main.c b/linux-user/main.c
> index 152c031..b403f24 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
>               * PPC ABI uses overflow flag in cr0 to signal an error
>               * in syscalls.
>               */
> -            env->crf[0] &= ~0x1;
> +            env->cr[CRF_SO] = 0;
>              ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
>                               env->gpr[5], env->gpr[6], env->gpr[7],
>                               env->gpr[8], 0, 0);
> @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
>                  break;
>              }
>              if (ret > (target_ulong)(-515)) {
> -                env->crf[0] |= 0x1;
> +                env->cr[CRF_SO] = 1;
>                  ret = -ret;
>              }
>              env->gpr[3] = ret;
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 91eac17..41b8299 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -940,7 +940,7 @@ struct CPUPPCState {
>      /* CTR */
>      target_ulong ctr;
>      /* condition register */
> -    uint32_t crf[8];
> +    uint32_t cr[32];
>  #if defined(TARGET_PPC64)
>      /* CFAR */
>      target_ulong cfar;
> @@ -1059,6 +1059,9 @@ struct CPUPPCState {
>      uint64_t dtl_addr, dtl_size;
>  #endif /* TARGET_PPC64 */
>  
> +    /* condition register, for migration compatibility */
> +    uint32_t crf[8];
> +
>      int error_code;
>      uint32_t pending_interrupts;
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1202,8 +1205,8 @@ static inline uint32_t ppc_get_cr(const CPUPPCState *env)
>      uint32_t cr = 0;
>      int i;
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        cr |= env->crf[i] << (32 - ((i + 1) * 4));
> +    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
> +        cr |= env->cr[i] << (31 - i);
>      }
>      return cr;
>  }
> @@ -1212,19 +1215,27 @@ static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr)
>  {
>      int i;
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
> +    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
> +        env->cr[i] = (cr >> (31 - i)) & 1;
>      }
>  }
>  
>  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
>  {
> -    return env->crf[i];
> +    uint32_t r;
> +    r = env->cr[i * 4];
> +    r = (r << 1) | (env->cr[i * 4 + 1]);
> +    r = (r << 1) | (env->cr[i * 4 + 2]);
> +    r = (r << 1) | (env->cr[i * 4 + 3]);
> +    return r;
>  }
>  
>  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
>  {
> -    env->crf[i] = val;
> +    env->cr[i * 4 + 0] = (val & 0x08) != 0;
> +    env->cr[i * 4 + 1] = (val & 0x04) != 0;
> +    env->cr[i * 4 + 2] = (val & 0x02) != 0;
> +    env->cr[i * 4 + 3] = (val & 0x01) != 0;
>  }
>  
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
> @@ -1271,14 +1282,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
>  
>  /*****************************************************************************/
>  /* CRF definitions */
> -#define CRF_LT        3
> -#define CRF_GT        2
> -#define CRF_EQ        1
> -#define CRF_SO        0
> -#define CRF_CH        (1 << CRF_LT)
> -#define CRF_CL        (1 << CRF_GT)
> -#define CRF_CH_OR_CL  (1 << CRF_EQ)
> -#define CRF_CH_AND_CL (1 << CRF_SO)
> +#define CRF_LT        0
> +#define CRF_GT        1
> +#define CRF_EQ        2
> +#define CRF_SO        3
> +#define CRF_CH        CRF_LT
> +#define CRF_CL        CRF_GT
> +#define CRF_CH_OR_CL  CRF_EQ
> +#define CRF_CH_AND_CL CRF_SO

This breaks what you did in patch 5, which used LE bit numbering to perform shifts.  And it breaks other code that uses the old LE convention.

Here is what I found:

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7894dc5..3f656e5 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1043,7 +1043,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
         }
     }

-    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
+    return (1 << 3) | (fg_flag << 2) | (fe_flag << 1);
 }

 uint32_t helper_ftsqrt(uint64_t frb)
@@ -1074,7 +1074,7 @@ uint32_t helper_ftsqrt(uint64_t frb)
         }
     }

-    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
+    return (1 << 3) | (fg_flag << 2) | (fe_flag << 1);
 }

 void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 96f2e7d..2f88854 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2303,25 +2303,25 @@ uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
         if (sgna == sgnb) {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
             zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
-            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgna > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         } else if (bcd_cmp_mag(a, b) > 0) {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
             zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
-            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgna > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         } else {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
             zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
-            cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgnb > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         }
     }

     if (unlikely(invalid)) {
         result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
-        cr = 1 << CRF_SO;
+        cr = 8 >> CRF_SO;
     } else if (overflow) {
-        cr |= 1 << CRF_SO;
+        cr |= 0x8 >> CRF_SO;
     } else if (zero) {
-        cr = 1 << CRF_EQ;
+        cr = 8 >> CRF_EQ;
     }

     *r = result;



>  
>  /* XER definitions */
>  #define XER_SO  31
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 8cf321b..7894dc5 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    ppc_set_crf(env, crfD, 1 << fpcc);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> +    ppc_set_crf(env, crfD, 0x08 >> fpcc);
>  
>      if (unlikely(fpcc == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
> @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    ppc_set_crf(env, crfD, 1 << fpcc);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> +    ppc_set_crf(env, crfD, 0x08 >> fpcc);
>  
>      if (unlikely(fpcc == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
> @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_lt(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
> +    return !float32_le(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_eq(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1460,30 +1460,6 @@ HELPER_SINGLE_SPE_CMP(fscmpgt);
>  /* efscmpeq */
>  HELPER_SINGLE_SPE_CMP(fscmpeq);
>  
> -static inline uint32_t evcmp_merge(int t0, int t1)
> -{
> -    return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
> -}
> -
> -#define HELPER_VECTOR_SPE_CMP(name)                                     \
> -    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
> -    {                                                                   \
> -        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
> -                           e##name(env, op1, op2));                     \
> -    }
> -/* evfststlt */
> -HELPER_VECTOR_SPE_CMP(fststlt);
> -/* evfststgt */
> -HELPER_VECTOR_SPE_CMP(fststgt);
> -/* evfststeq */
> -HELPER_VECTOR_SPE_CMP(fststeq);
> -/* evfscmplt */
> -HELPER_VECTOR_SPE_CMP(fscmplt);
> -/* evfscmpgt */
> -HELPER_VECTOR_SPE_CMP(fscmpgt);
> -/* evfscmpeq */
> -HELPER_VECTOR_SPE_CMP(fscmpeq);
> -
>  /* Double-precision floating-point conversion */
>  uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
>  {
> @@ -1725,7 +1701,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_lt(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1734,7 +1710,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
> +    return !float64_le(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1743,7 +1719,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index ee748a1..dff7c1c 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -492,12 +492,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
>  DEF_HELPER_3(efscmplt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
> -DEF_HELPER_3(evfststlt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststeq, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
>  DEF_HELPER_2(efdcfsi, i64, env, i32)
>  DEF_HELPER_2(efdcfsid, i64, env, i64)
>  DEF_HELPER_2(efdcfui, i64, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index b76a895..96f2e7d 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2580,7 +2580,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
>      if (update_Rc) {
> -        env->crf[0] |= xer_so;
> +        env->cr[CRF_SO] = xer_so;
>      }
>      return i;
>  }
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index c801b82..9fa309a 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
>      CPUPPCState *env = &cpu->env;
>      int i;
>  
> +    for (i = 0; i < 8; i++) {
> +        env->crf[i] = ppc_get_crf(env, i);
> +    }
> +
>      env->spr[SPR_LR] = env->lr;
>      env->spr[SPR_CTR] = env->ctr;
>      env->spr[SPR_XER] = env->xer;
> @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
>       * software has to take care of running QEMU in a compatible mode.
>       */
>      env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
> +
> +    for (i = 0; i < 8; i++) {
> +        ppc_set_crf(env, i, env->crf[i]);
> +    }
> +
>      env->lr = env->spr[SPR_LR];
>      env->ctr = env->spr[SPR_CTR];
>      env->xer = env->spr[SPR_XER];
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 0933c00..d8c9240 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
>      + 10*4 + 22*5 /* FPR */
>      + 2*(10*6 + 22*7) /* AVRh, AVRl */
>      + 10*5 + 22*6 /* VSR */
> -    + 8*5 /* CRF */];
> +    + 32*8 /* CR */];
>  static TCGv cpu_gpr[32];
>  static TCGv cpu_gprh[32];
>  static TCGv_i64 cpu_fpr[32];
>  static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
>  static TCGv_i64 cpu_vsr[32];
> -static TCGv_i32 cpu_crf[8];
> +static TCGv_i32 cpu_cr[32];
>  static TCGv cpu_nip;
>  static TCGv cpu_msr;
>  static TCGv cpu_ctr;
> @@ -89,12 +89,13 @@ void ppc_translate_init(void)
>      p = cpu_reg_names;
>      cpu_reg_names_size = sizeof(cpu_reg_names);
>  
> -    for (i = 0; i < 8; i++) {
> -        snprintf(p, cpu_reg_names_size, "crf%d", i);
> -        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
> -                                            offsetof(CPUPPCState, crf[i]), p);
> -        p += 5;
> -        cpu_reg_names_size -= 5;
> +    for (i = 0; i < 32; i++) {
> +        static const char names[] = "lt\0gt\0eq\0so";
> +        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) * 3);
> +        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                           offsetof(CPUPPCState, cr[i]), p);
> +        p += 8;
> +        cpu_reg_names_size -= 8;
>      }
>  
>      for (i = 0; i < 32; i++) {
> @@ -252,17 +253,31 @@ static inline void gen_reset_fpstatus(void)
>  
>  static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
>  {
> -    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +
> +    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
> +    tcg_temp_free_i32(t0);
>  }

You are missing one last OR .... like this:

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 03cdd05..47ee4e3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -260,6 +260,7 @@ static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
     tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
     tcg_gen_or_i32(dest, dest, t0);
     tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
+    tcg_gen_or_i32(dest, dest, t0);
     tcg_temp_free_i32(t0);
 }


>  
>  static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
>  {
>      if (shift) {
> -        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
> +        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
>      } else {
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
>      }
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
> +    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
>  }
>  
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
> @@ -663,27 +678,19 @@ static opc_handler_t invalid_handler = {
>  static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
>  {
>      TCGv t0 = tcg_temp_new();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_LT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_GT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
>  
>      tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_EQ);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
>  
>      tcg_temp_free(t0);
> -    tcg_temp_free_i32(t1);
>  }
>  
>  static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
> @@ -695,19 +702,26 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
>  
>  static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
>  {
> -    TCGv t0, t1;
> -    t0 = tcg_temp_new();
> -    t1 = tcg_temp_new();
> -    if (s) {
> -        tcg_gen_ext32s_tl(t0, arg0);
> -        tcg_gen_ext32s_tl(t1, arg1);
> -    } else {
> -        tcg_gen_ext32u_tl(t0, arg0);
> -        tcg_gen_ext32u_tl(t1, arg1);
> -    }
> -    gen_op_cmp(t0, t1, s, crf);
> -    tcg_temp_free(t1);
> -    tcg_temp_free(t0);
> +    TCGv_i32 t0, t1;
> +
> +    t0 = tcg_temp_new_i32();
> +    t1 = tcg_temp_new_i32();
> +    tcg_gen_trunc_tl_i32(t0, arg0);
> +    tcg_gen_trunc_tl_i32(t1, arg1);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
> +                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
> +                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
> +
> +    tcg_gen_setcond_i32(TCG_COND_EQ, 
> +                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
> +
> +    tcg_temp_free_i32(t1);
> +    tcg_temp_free_i32(t0);
>  }
>  
>  static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int crf)
> @@ -778,15 +792,10 @@ static void gen_cmpli(DisasContext *ctx)
>  static void gen_isel(DisasContext *ctx)
>  {
>      uint32_t bi = rC(ctx->opcode);
> -    uint32_t mask;
> -    TCGv_i32 t0;
>      TCGv t1, true_op, zero;
>  
> -    mask = 0x08 >> (bi & 0x03);
> -    t0 = tcg_temp_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
>      t1 = tcg_temp_new();
> -    tcg_gen_extu_i32_tl(t1, t0);
> +    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
>      zero = tcg_const_tl(0);
>      if (rA(ctx->opcode) == 0) {
>          true_op = zero;
> @@ -796,7 +805,6 @@ static void gen_isel(DisasContext *ctx)
>      tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero,
>                         true_op, cpu_gpr[rB(ctx->opcode)]);
>      tcg_temp_free(t1);
> -    tcg_temp_free_i32(t0);
>      tcg_temp_free(zero);
>  }
>  
> @@ -2318,21 +2326,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
>  
>  static void gen_ftdiv(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
>                       cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  static void gen_ftsqrt(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  
> @@ -3330,10 +3346,13 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA,
>  {
>      int l1;
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
>      l1 = gen_new_label();
>      tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
> -    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
>  #if defined(TARGET_PPC64)
>      if (size == 8) {
>          gen_qemu_st64(ctx, cpu_gpr[reg], EA);
> @@ -3900,17 +3919,11 @@ static inline void gen_bcond(DisasContext *ctx, int type)
>      if ((bo & 0x10) == 0) {
>          /* Test CR */
>          uint32_t bi = BI(ctx->opcode);
> -        uint32_t mask = 0x08 >> (bi & 0x03);
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -
>          if (bo & 0x8) {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
>          } else {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
>          }
> -        tcg_temp_free_i32(temp);
>      }
>      gen_update_cfar(ctx, ctx->nip);
>      if (type == BCOND_IM) {
> @@ -3959,35 +3972,21 @@ static void gen_bctar(DisasContext *ctx)
>  }
>  
>  /***                      Condition register logical                       ***/
> -#define GEN_CRLOGIC(name, tcg_op, opc)                                        \
> -static void glue(gen_, name)(DisasContext *ctx)                                       \
> -{                                                                             \
> -    uint8_t bitmask;                                                          \
> -    int sh;                                                                   \
> -    TCGv_i32 t0, t1;                                                          \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);             \
> -    t0 = tcg_temp_new_i32();                                                  \
> -    if (sh > 0)                                                               \
> -        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);            \
> -    else if (sh < 0)                                                          \
> -        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);           \
> -    else                                                                      \
> -        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                 \
> -    t1 = tcg_temp_new_i32();                                                  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);             \
> -    if (sh > 0)                                                               \
> -        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);            \
> -    else if (sh < 0)                                                          \
> -        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);           \
> -    else                                                                      \
> -        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                 \
> -    tcg_op(t0, t0, t1);                                                       \
> -    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                             \
> -    tcg_gen_andi_i32(t0, t0, bitmask);                                        \
> -    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);          \
> -    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                  \
> -    tcg_temp_free_i32(t0);                                                    \
> -    tcg_temp_free_i32(t1);                                                    \
> +#define GEN_CRLOGIC(name, tcg_op, opc)                                         \
> +static void glue(gen_, name)(DisasContext *ctx)                                \
> +{                                                                              \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],               \
> +           cpu_cr[crbB(ctx->opcode)]);                                         \
> +}
> +
> +#define GEN_CRLOGIC_MASK(name, tcg_op, opc)                                     \
> +static void glue(gen_, name)(DisasContext *ctx)                                 \
> +{                                                                               \
> +    TCGv_i32 one = tcg_const_i32(1);                                            \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],                \
> +           cpu_cr[crbB(ctx->opcode)]);                                          \
> +    tcg_gen_and_i32(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbD(ctx->opcode)], one); \
> +    tcg_temp_free_i32(one);                                                     \
>  }
>  
>  /* crand */
> @@ -3995,22 +3994,26 @@ GEN_CRLOGIC(crand, tcg_gen_and_i32, 0x08);
>  /* crandc */
>  GEN_CRLOGIC(crandc, tcg_gen_andc_i32, 0x04);
>  /* creqv */
> -GEN_CRLOGIC(creqv, tcg_gen_eqv_i32, 0x09);
> +GEN_CRLOGIC_MASK(creqv, tcg_gen_eqv_i32, 0x09);
>  /* crnand */
> -GEN_CRLOGIC(crnand, tcg_gen_nand_i32, 0x07);
> +GEN_CRLOGIC_MASK(crnand, tcg_gen_nand_i32, 0x07);
>  /* crnor */
> -GEN_CRLOGIC(crnor, tcg_gen_nor_i32, 0x01);
> +GEN_CRLOGIC_MASK(crnor, tcg_gen_nor_i32, 0x01);
>  /* cror */
>  GEN_CRLOGIC(cror, tcg_gen_or_i32, 0x0E);
>  /* crorc */
> -GEN_CRLOGIC(crorc, tcg_gen_orc_i32, 0x0D);
> +GEN_CRLOGIC_MASK(crorc, tcg_gen_orc_i32, 0x0D);
>  /* crxor */
>  GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
>  
>  /* mcrf */
>  static void gen_mcrf(DisasContext *ctx)
>  {
> -    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
> +    int i;
> +    for (i = 0; i < 4; i++) {
> +        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
> +                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
> +    }
>  }
>  
>  /***                           System linkage                              ***/
> @@ -4163,20 +4166,12 @@ static void gen_write_xer(TCGv src)
>  /* mcrxr */
>  static void gen_mcrxr(DisasContext *ctx)
>  {
> -    TCGv_i32 t0 = tcg_temp_new_i32();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
> -    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
> -
> -    tcg_gen_trunc_tl_i32(t0, cpu_so);
> -    tcg_gen_trunc_tl_i32(t1, cpu_ov);
> -    tcg_gen_trunc_tl_i32(dst, cpu_ca);
> -    tcg_gen_shli_i32(t0, t0, 3);
> -    tcg_gen_shli_i32(t1, t1, 2);
> -    tcg_gen_shli_i32(dst, dst, 1);
> -    tcg_gen_or_i32(dst, dst, t0);
> -    tcg_gen_or_i32(dst, dst, t1);
> -    tcg_temp_free_i32(t0);
> -    tcg_temp_free_i32(t1);
> +    int crf = crfD(ctx->opcode);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
> +    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
>  
>      tcg_gen_movi_tl(cpu_so, 0);
>      tcg_gen_movi_tl(cpu_ov, 0);
> @@ -6351,11 +6346,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
>      gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -6432,11 +6429,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
>      gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -7402,7 +7401,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
>  static void gen_##op(DisasContext *ctx)             \
>  {                                                   \
>      TCGv_ptr ra, rb, rd;                            \
> -    TCGv_i32 ps;                                    \
> +    TCGv_i32 ps, crf;                               \
>                                                      \
>      if (unlikely(!ctx->altivec_enabled)) {          \
>          gen_exception(ctx, POWERPC_EXCP_VPU);       \
> @@ -7414,13 +7413,16 @@ static void gen_##op(DisasContext *ctx)             \
>      rd = gen_avr_ptr(rD(ctx->opcode));              \
>                                                      \
>      ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
> +    crf = tcg_temp_new_i32();                       \
>                                                      \
> -    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
> +    gen_helper_##op(crf, rd, ra, rb, ps);           \
> +    gen_op_mtcr(6 << 2, crf, 0);                    \
>                                                      \
>      tcg_temp_free_ptr(ra);                          \
>      tcg_temp_free_ptr(rb);                          \
>      tcg_temp_free_ptr(rd);                          \
>      tcg_temp_free_i32(ps);                          \
> +    tcg_temp_free_i32(crf);                         \
>  }
>  
>  GEN_BCD(bcdadd)
> @@ -8248,6 +8250,7 @@ static void gen_##name(DisasContext *ctx)        \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra, rb;                              \
> +    TCGv_i32 tmp;                                 \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8255,8 +8258,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      rb = gen_fprp_ptr(rB(ctx->opcode));           \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, rb);           \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, rb);      \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_ptr(rb);                        \
>  }
> @@ -8265,7 +8270,7 @@ static void gen_##name(DisasContext *ctx)         \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra;                                  \
> -    TCGv_i32 dcm;                                 \
> +    TCGv_i32 dcm, tmp;                            \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8273,8 +8278,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      dcm = tcg_const_i32(DCM(ctx->opcode));        \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, dcm);          \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_i32(dcm);                       \
>  }
> @@ -8699,37 +8706,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
>  #define GEN_SPEOP_COMP(name, tcg_cond)                                        \
>  static inline void gen_##name(DisasContext *ctx)                              \
>  {                                                                             \
> +    TCGv tmp = tcg_temp_new();                                                \
> +                                                                              \
>      if (unlikely(!ctx->spe_enabled)) {                                        \
>          gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
>          return;                                                               \
>      }                                                                         \
> -    int l1 = gen_new_label();                                                 \
> -    int l2 = gen_new_label();                                                 \
> -    int l3 = gen_new_label();                                                 \
> -    int l4 = gen_new_label();                                                 \
>                                                                                \
>      tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
>      tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
>      tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
>      tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
>                                                                                \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
> -                       cpu_gpr[rB(ctx->opcode)], l1);                         \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
> -    tcg_gen_br(l2);                                                           \
> -    gen_set_label(l1);                                                        \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
> -                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
> -    gen_set_label(l2);                                                        \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
> -                       cpu_gprh[rB(ctx->opcode)], l3);                        \
> -    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
> -                     ~(CRF_CH | CRF_CH_AND_CL));                              \
> -    tcg_gen_br(l4);                                                           \
> -    gen_set_label(l3);                                                        \
> -    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
> -                    CRF_CH | CRF_CH_OR_CL);                                   \
> -    gen_set_label(l4);                                                        \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
> +                       cpu_gpr[rA(ctx->opcode)],                              \
> +                       cpu_gpr[rB(ctx->opcode)]);                             \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);        \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
> +                       cpu_gprh[rA(ctx->opcode)],                             \
> +                       cpu_gprh[rB(ctx->opcode)]);                            \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);        \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
>  }
>  GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
>  GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
> @@ -8800,22 +8802,20 @@ static inline void gen_evsel(DisasContext *ctx)
>      int l2 = gen_new_label();
>      int l3 = gen_new_label();
>      int l4 = gen_new_label();
> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
>      tcg_gen_br(l2);
>      gen_set_label(l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
>      gen_set_label(l2);
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
>      tcg_gen_br(l4);
>      gen_set_label(l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
>      gen_set_label(l4);
> -    tcg_temp_free_i32(t0);
>  }
>  
>  static void gen_evsel0(DisasContext *ctx)
> @@ -9397,9 +9397,12 @@ static inline void gen_##name(DisasContext *ctx)                              \
>      t0 = tcg_temp_new_i32();                                                  \
>      t1 = tcg_temp_new_i32();                                                  \
>                                                                                \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
>      tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
>      tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, t1); \
>                                                                                \
>      tcg_temp_free_i32(t0);                                                    \
>      tcg_temp_free_i32(t1);                                                    \
> @@ -9416,10 +9419,39 @@ static inline void gen_##name(DisasContext *ctx)                              \
>      t1 = tcg_temp_new_i64();                                                  \
>      gen_load_gpr64(t0, rA(ctx->opcode));                                      \
>      gen_load_gpr64(t1, rB(ctx->opcode));                                      \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,        \
> +                      t0, t1);                                                \
>      tcg_temp_free_i64(t0);                                                    \
>      tcg_temp_free_i64(t1);                                                    \
>  }
> +#define GEN_SPEFPUOP_COMP_V64(name, helper)                                       \
> +static inline void gen_##name(DisasContext *ctx)                                  \
> +{                                                                                 \
> +    TCGv_i32 t0, t1;                                                              \
> +    if (unlikely(!ctx->spe_enabled)) {                                            \
> +        gen_exception(ctx, POWERPC_EXCP_SPEU);                                    \
> +        return;                                                                   \
> +    }                                                                             \
> +    t0 = tcg_temp_new_i32();                                                      \
> +    t1 = tcg_temp_new_i32();                                                      \
> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                           \
> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                           \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, t0, t1); \
> +    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                          \
> +    tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]);                          \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, t0, t1); \
> +    tcg_temp_free_i32(t0);                                                        \
> +    tcg_temp_free_i32(t1);                                                        \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],                  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                        \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                       \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],                \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                       \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                      \
> +}
>  
>  /* Single precision floating-point vectors operations */
>  /* Arithmetic */
> @@ -9474,12 +9506,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
>  GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
>  
>  /* Comparison */
> -GEN_SPEFPUOP_COMP_64(evfscmpgt);
> -GEN_SPEFPUOP_COMP_64(evfscmplt);
> -GEN_SPEFPUOP_COMP_64(evfscmpeq);
> -GEN_SPEFPUOP_COMP_64(evfststgt);
> -GEN_SPEFPUOP_COMP_64(evfststlt);
> -GEN_SPEFPUOP_COMP_64(evfststeq);
> +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
> +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
> +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
> +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
> +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
> +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
>  
>  /* Opcodes definitions */
>  GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
> 

There are some other places in helper where env->crf[*] was still being set.  Here are the ones that I found:

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 3f656e5..e624f97 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2141,7 +2141,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
         }                                                               \
     }                                                                   \
                                                                         \
-    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+    ppc_set_crf(env, BF(opcode),                                        \
+                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
 }

 VSX_TDIV(xstdivdp, 1, float64, VsrD(0), -1022, 1023, 52)
@@ -2195,7 +2196,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
         }                                                               \
     }                                                                   \
                                                                         \
-    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+    ppc_set_crf(env, BF(opcode),                                        \
+                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
 }

 VSX_TSQRT(xstsqrtdp, 1, float64, VsrD(0), -1022, 52)
@@ -2358,7 +2360,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
                                                                          \
     env->fpscr &= ~(0x0F << FPSCR_FPRF);                                 \
     env->fpscr |= cc << FPSCR_FPRF;                                      \
-    env->crf[BF(opcode)] = cc;                                           \
+    ppc_set_crf(env, BF(opcode), cc);                                   \
                                                                          \
     helper_float_check_status(env);                                      \
 }
@@ -2450,7 +2452,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
                                                                           \
     putVSR(xT(opcode), &xt, env);                                         \
     if ((opcode >> (31-21)) & 1) {                                        \
-        env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0);       \
+        ppc_set_crf(env, 6,                                               \
+                    (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0));        \
     }                                                                     \
     helper_float_check_status(env);                                       \
  }



Note that I do not have the capability of testing any of the SPE instructions.

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer Paolo Bonzini
@ 2014-09-18 20:33   ` Tom Musta
  2014-09-19 13:51     ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Tom Musta @ 2014-09-18 20:33 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Do not go through the loop when we're setting the four CR fields to
> separate constants or conditions.  This is clearer than putting together
> 4-bit value and passing it.

I guess "clearer" is in the eye of the beholder .... :)

In general, replacing a single line of code with four is not a simplification (IMO).

That said, I was not able to spot or identify by testing any functional problems with this patch.

> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v1->v2: due to previous changes, ppc_get_crf never needs this
> 	treatment, so I adjusted the subject
> 
>  linux-user/main.c       |  5 ++++-
>  target-ppc/fpu_helper.c | 12 ++++++++++--
>  target-ppc/int_helper.c | 27 +++++++++++++++++++++------
>  3 files changed, 35 insertions(+), 9 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index b403f24..5a0b31f 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1550,7 +1550,10 @@ static int do_store_exclusive(CPUPPCState *env)
>                  }
>              }
>          }
> -        ppc_set_crf(env, 0, (stored << 1) | xer_so);
> +        env->cr[CRF_LT] = 0;
> +        env->cr[CRF_GT] = 0;
> +        env->cr[CRF_EQ] = stored;
> +        env->cr[CRF_SO] = xer_so;
>          env->reserve_addr = (target_ulong)-1;
>      }
>      if (!segv) {
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 7894dc5..c86320f 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1099,7 +1099,11 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
>      env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> -    ppc_set_crf(env, crfD, 0x08 >> fpcc);
> +
> +    env->cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
> +    env->cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
> +    env->cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
> +    env->cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
>  
>      if (unlikely(fpcc == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
> @@ -1131,7 +1135,11 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
>      env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> -    ppc_set_crf(env, crfD, 0x08 >> fpcc);
> +
> +    env->cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
> +    env->cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
> +    env->cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
> +    env->cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
>  
>      if (unlikely(fpcc == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 96f2e7d..be52437 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -657,7 +657,10 @@ VCF(sx, int32_to_float32, s32)
>              none |= result;                                             \
>          }                                                               \
>          if (record) {                                                   \
> -            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
> +            env->cr[24 + CRF_LT] = (all != 0);                          \
> +            env->cr[24 + CRF_GT] = 0;                                   \
> +            env->cr[24 + CRF_EQ] = (none == 0);                         \
> +            env->cr[24 + CRF_SO] = 0;                                   \
>          }                                                               \
>      }
>  #define VCMP(suffix, compare, element)          \
> @@ -703,7 +706,10 @@ VCMP(gtsd, >, s64)
>              none |= result;                                             \
>          }                                                               \
>          if (record) {                                                   \
> -            ppc_set_crf(env, 6, ((all != 0) << 3) | ((none == 0) << 1)); \
> +            env->cr[24 + CRF_LT] = (all != 0);                          \
> +            env->cr[24 + CRF_GT] = 0;                                   \
> +            env->cr[24 + CRF_EQ] = (none == 0);                         \
> +            env->cr[24 + CRF_SO] = 0;                                   \
>          }                                                               \
>      }
>  #define VCMPFP(suffix, compare, order)          \
> @@ -737,7 +743,10 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
>          }
>      }
>      if (record) {
> -        ppc_set_crf(env, 6, (all_in == 0) << 1);
> +        env->cr[24 + CRF_LT] = 0;
> +        env->cr[24 + CRF_GT] = 0;
> +        env->cr[24 + CRF_EQ] = (all_in == 0);
> +        env->cr[24 + CRF_SO] = 0;
>      }
>  }
>  
> @@ -2558,7 +2567,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>          if ((high & mask) == 0) {
>              if (update_Rc) {
> -                ppc_set_crf(env, 0, 0x4);
> +                env->cr[CRF_LT] = 0;
> +                env->cr[CRF_GT] = 1;
> +                env->cr[CRF_EQ] = 0;
>              }
>              goto done;
>          }
> @@ -2567,7 +2578,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>          if ((low & mask) == 0) {
>              if (update_Rc) {
> -                ppc_set_crf(env, 0, 0x8);
> +                env->cr[CRF_LT] = 1;
> +                env->cr[CRF_GT] = 0;
> +                env->cr[CRF_EQ] = 0;
>              }
>              goto done;
>          }
> @@ -2575,7 +2588,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>      }
>      i = 8;
>      if (update_Rc) {
> -        ppc_set_crf(env, 0, 0x2);
> +        env->cr[CRF_LT] = 0;
> +        env->cr[CRF_GT] = 0;
> +        env->cr[CRF_EQ] = 1;
>      }
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (13 preceding siblings ...)
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 14/14] ppc: dump all 32 CR bits Paolo Bonzini
@ 2014-09-18 20:43 ` Tom Musta
  2014-09-19 15:16   ` Paolo Bonzini
  2014-11-03 11:56 ` Alexander Graf
  15 siblings, 1 reply; 44+ messages in thread
From: Tom Musta @ 2014-09-18 20:43 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> Patches 1-3 speed up softmmu emulation by avoiding TLB flushes on changes
> to IR/DR.
> 
> Patches 4-14 speed up emulation in general by rewriting the handling of
> condition registers.
> 
> Paolo Bonzini (14):
>   ppc: do not look at the MMU index to detect PR/HV mode
>   softmmu: support up to 12 MMU modes
>   target-ppc: use separate indices for various translation modes
>   ppc: introduce ppc_get_cr and ppc_set_cr
>   ppc: use CRF_* in fpu_helper.c
>   ppc: introduce helpers for mfocrf/mtocrf
>   ppc: reorganize gen_compute_fprf
>   ppc: introduce gen_op_mfcr/gen_op_mtcr
>   ppc: introduce ppc_get_crf and ppc_set_crf
>   ppc: use movcond for isel
>   ppc: store CR registers in 32 1-bit registers
>   ppc: use movcond to implement evsel
>   ppc: inline ppc_get_crf/ppc_set_crf when clearer
>   ppc: dump all 32 CR bits
> 
>  include/exec/cpu_ldst.h  | 120 ++++++++-
>  linux-user/elfload.c     |   4 +-
>  linux-user/main.c        |   9 +-
>  linux-user/signal.c      |   8 +-
>  monitor.c                |   9 +-
>  target-ppc/cpu.h         |  66 ++++-
>  target-ppc/excp_helper.c |   5 +-
>  target-ppc/fpu_helper.c  |  82 +++---
>  target-ppc/gdbstub.c     |  42 +--
>  target-ppc/helper.h      |   9 +-
>  target-ppc/helper_regs.h |  11 +-
>  target-ppc/int_helper.c  |  46 +++-
>  target-ppc/kvm.c         |  11 +-
>  target-ppc/machine.c     |   9 +
>  target-ppc/translate.c   | 686 ++++++++++++++++++++++++-----------------------
>  15 files changed, 631 insertions(+), 486 deletions(-)
> 


Paolo:  I spent some time reviewing and testing patches 4-14.  See my specific comments.

I also attempted to identify the speedup of just these patches.  My test was booting an Ubunutu 14.04 (PPC64LE) image to the login prompt, checking some of the timestamps along the way.  I was able to observe a speedup on a modest sized laptop (x86) host
-- about 2%.  I did not see any difference on a Power7 host.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
  2014-09-18 19:32   ` Tom Musta
@ 2014-09-18 21:01   ` Richard Henderson
  1 sibling, 0 replies; 44+ messages in thread
From: Richard Henderson @ 2014-09-18 21:01 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: tommusta, agraf

On 09/15/2014 08:03 AM, Paolo Bonzini wrote:
> @@ -4173,24 +4173,7 @@ static void gen_mfcr(DisasContext *ctx)
>                              cpu_gpr[rD(ctx->opcode)], crn * 4);
>          }
>      } else {
> -        TCGv_i32 t0 = tcg_temp_new_i32();
> -        tcg_gen_mov_i32(t0, cpu_crf[0]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[1]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[2]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[3]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[4]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[5]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[6]);
> -        tcg_gen_shli_i32(t0, t0, 4);
> -        tcg_gen_or_i32(t0, t0, cpu_crf[7]);
> -        tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
> -        tcg_temp_free_i32(t0);
> +        gen_helper_mfocrf(cpu_gpr[rD(ctx->opcode)], cpu_env);

I don't like this helper name, because it isn't mfocrf.  We handled the "one"
case above.  This is the old-style "mfcr", i.e. move from all cr.

> @@ -4285,15 +4268,9 @@ static void gen_mtcrf(DisasContext *ctx)
>              tcg_temp_free_i32(temp);
>          }
>      } else {
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -        tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx->opcode)]);
> -        for (crn = 0 ; crn < 8 ; crn++) {
> -            if (crm & (1 << crn)) {
> -                    tcg_gen_shri_i32(cpu_crf[7 - crn], temp, crn * 4);
> -                    tcg_gen_andi_i32(cpu_crf[7 - crn], cpu_crf[7 - crn], 0xf);
> -            }
> -        }
> -        tcg_temp_free_i32(temp);
> +        TCGv_i32 t0 = tcg_const_i32(crm);
> +        gen_helper_mtocrf(cpu_env, cpu_gpr[rS(ctx->opcode)], t0);
> +        tcg_temp_free_i32(t0);

Similarly.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr
  2014-09-15 15:03 ` [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
  2014-09-18 19:49   ` Tom Musta
@ 2014-09-18 21:38   ` Richard Henderson
  2014-09-19 13:31     ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Richard Henderson @ 2014-09-18 21:38 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: tommusta, agraf

On 09/15/2014 08:03 AM, Paolo Bonzini wrote:
> +static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
> +{
> +    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +}
> +
> +static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
> +{
> +    if (shift) {
> +        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
> +    } else {
> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +    }
> +}

Continuing on the name nit-picking, these *are* the mfocr and mtocr operations.


r~

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr
  2014-09-18 21:38   ` Richard Henderson
@ 2014-09-19 13:31     ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-19 13:31 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: tommusta, agraf

Il 18/09/2014 23:38, Richard Henderson ha scritto:
> On 09/15/2014 08:03 AM, Paolo Bonzini wrote:
>> +static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
>> +{
>> +    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
>> +}
>> +
>> +static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
>> +{
>> +    if (shift) {
>> +        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
>> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
>> +    } else {
>> +        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
>> +    }
>> +}
> 
> Continuing on the name nit-picking, these *are* the mfocr and mtocr operations.

Ok, will swap.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer
  2014-09-18 20:33   ` Tom Musta
@ 2014-09-19 13:51     ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-19 13:51 UTC (permalink / raw)
  To: Tom Musta, qemu-devel; +Cc: agraf

Il 18/09/2014 22:33, Tom Musta ha scritto:
>> > Do not go through the loop when we're setting the four CR fields to
>> > separate constants or conditions.  This is clearer than putting together
>> > 4-bit value and passing it.
> I guess "clearer" is in the eye of the beholder .... :)
> 
> In general, replacing a single line of code with four is not a simplification (IMO).
> 
> That said, I was not able to spot or identify by testing any functional problems with this patch.
> 

True.  It is better to say that it avoids messing with bit endianness.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers
  2014-09-18 20:25   ` Tom Musta
@ 2014-09-19 13:53     ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-19 13:53 UTC (permalink / raw)
  To: Tom Musta, qemu-devel; +Cc: agraf

Il 18/09/2014 22:25, Tom Musta ha scritto:
> This breaks what you did in patch 5, which used LE bit numbering to
> perform shifts.

Yeah, I change "1 << x" to "8 >> x" in this patch for the fcmp
helpers, but not the others.

> And it breaks other code that uses the old LE
> convention.

I'll fix it like this:

git diff target-ppc/fpu_helper.c
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index da93d12..06e4559 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1018,32 +1018,32 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
     if (unlikely(float64_is_infinity(fra) ||
                  float64_is_infinity(frb) ||
                  float64_is_zero(frb))) {
-        fe_flag = 1;
-        fg_flag = 1;
+        fe_flag = 8 >> CRF_EQ;
+        fg_flag = 8 >> CRF_GT;
     } else {
         int e_a = ppc_float64_get_unbiased_exp(fra);
         int e_b = ppc_float64_get_unbiased_exp(frb);
 
         if (unlikely(float64_is_any_nan(fra) ||
                      float64_is_any_nan(frb))) {
-            fe_flag = 1;
+            fe_flag = 8 >> CRF_EQ;
         } else if ((e_b <= -1022) || (e_b >= 1021)) {
-            fe_flag = 1;
+            fe_flag = 8 >> CRF_EQ;
         } else if (!float64_is_zero(fra) &&
                    (((e_a - e_b) >= 1023) ||
                     ((e_a - e_b) <= -1021) ||
                     (e_a <= -970))) {
-            fe_flag = 1;
+            fe_flag = 8 >> CRF_EQ;
         }
 
         if (unlikely(float64_is_zero_or_denormal(frb))) {
             /* XB is not zero because of the above check and */
             /* so must be denormalized.                      */
-            fg_flag = 1;
+            fg_flag = 8 >> CRF_GT;
         }
     }
 
-    return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
+    return (8 >> CRF_LT) | fg_flag | fe_flag;
 }
 
 uint32_t helper_ftsqrt(uint64_t frb)

and similarly for ftsqrt.

Paolo

> There are some other places in helper where env->crf[*] was still being set.  Here are the ones that I found:
> 
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 3f656e5..e624f97 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -2141,7 +2141,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
>          }                                                               \
>      }                                                                   \
>                                                                          \
> -    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
> +    ppc_set_crf(env, BF(opcode),                                        \
> +                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
>  }
> 
>  VSX_TDIV(xstdivdp, 1, float64, VsrD(0), -1022, 1023, 52)
> @@ -2195,7 +2196,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
>          }                                                               \
>      }                                                                   \
>                                                                          \
> -    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
> +    ppc_set_crf(env, BF(opcode),                                        \
> +                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
>  }
> 
>  VSX_TSQRT(xstsqrtdp, 1, float64, VsrD(0), -1022, 52)
> @@ -2358,7 +2360,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
>                                                                           \
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);                                 \
>      env->fpscr |= cc << FPSCR_FPRF;                                      \
> -    env->crf[BF(opcode)] = cc;                                           \
> +    ppc_set_crf(env, BF(opcode), cc);                                   \
>                                                                           \
>      helper_float_check_status(env);                                      \
>  }
> @@ -2450,7 +2452,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
>                                                                            \
>      putVSR(xT(opcode), &xt, env);                                         \
>      if ((opcode >> (31-21)) & 1) {                                        \
> -        env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0);       \
> +        ppc_set_crf(env, 6,                                               \
> +                    (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0));        \
>      }                                                                     \
>      helper_float_check_status(env);                                       \
>   }
> 
> 
> 
> Note that I do not have the capability of testing any of the SPE instructions.
> 

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf
  2014-09-18 19:51   ` Tom Musta
@ 2014-09-19 14:52     ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-19 14:52 UTC (permalink / raw)
  To: Tom Musta, qemu-devel; +Cc: agraf

Il 18/09/2014 21:51, Tom Musta ha scritto:
> 
> Checkpatch fails:
> WARNING: braces {} are necessary for all arms of this statement
> #171: FILE: target-ppc/translate.c:11111:
> +        if (ppc_get_crf(env, i) & 0x08)
> [...]
> -        else if (env->crf[i] & 0x04)
> [...]
>              a = 'G';
> [...]
> 
> WARNING: braces {} are necessary for all arms of this statement
> #174: FILE: target-ppc/translate.c:11113:
> +        else if (ppc_get_crf(env, i) & 0x04)
> [...]
> -        else if (env->crf[i] & 0x02)
> [...]
> 
> WARNING: braces {} are necessary for all arms of this statement
> #177: FILE: target-ppc/translate.c:11115:
> +        else if (ppc_get_crf(env, i) & 0x02)
> [...]
> 
> total: 0 errors, 3 warnings, 131 lines checked
> 

This goes away with patch 14, so I made the intermediate patch lighter
by omitting the code-style cleanups.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups
  2014-09-18 20:43 ` [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Tom Musta
@ 2014-09-19 15:16   ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2014-09-19 15:16 UTC (permalink / raw)
  To: Tom Musta, qemu-devel; +Cc: agraf

Il 18/09/2014 22:43, Tom Musta ha scritto:
> I also attempted to identify the speedup of just these patches.  My
> test was booting an Ubunutu 14.04 (PPC64LE) image to the login
> prompt, checking some of the timestamps along the way.  I was able to
> observe a speedup on a modest sized laptop (x86) host -- about 2%.  I
> did not see any difference on a Power7 host.
> 

You probably can get a higher speedup on user-mode emulation.  I saw a
much smaller difference on my old G4 laptop too, compared to x86.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups
  2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
                   ` (14 preceding siblings ...)
  2014-09-18 20:43 ` [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Tom Musta
@ 2014-11-03 11:56 ` Alexander Graf
  15 siblings, 0 replies; 44+ messages in thread
From: Alexander Graf @ 2014-11-03 11:56 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: tommusta, qemu-ppc



On 15.09.14 17:03, Paolo Bonzini wrote:
> Patches 1-3 speed up softmmu emulation by avoiding TLB flushes on changes
> to IR/DR.
> 
> Patches 4-14 speed up emulation in general by rewriting the handling of
> condition registers.

The only change that seemed to not require a respin and was
uncontroversial is patch 1. I like the direction, so I applied that one
only for now.

Looking forward to a respin of the others :).


Alex

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2014-11-03 11:56 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-15 15:03 [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 01/14] ppc: do not look at the MMU index to detect PR/HV mode Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 02/14] softmmu: support up to 12 MMU modes Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes Paolo Bonzini
2014-09-16 17:20   ` Tom Musta
2014-09-16 18:02     ` Richard Henderson
2014-09-16 18:27       ` Paolo Bonzini
2014-09-16 18:41         ` Richard Henderson
2014-09-16 22:23           ` Richard Henderson
2014-09-17  6:22             ` Paolo Bonzini
2014-09-17  8:53               ` Paolo Bonzini
2014-09-17 15:33                 ` Richard Henderson
2014-09-17 15:50                   ` Paolo Bonzini
2014-09-17 15:55                     ` Richard Henderson
2014-09-16 18:49     ` Peter Maydell
2014-09-16 22:13       ` Richard Henderson
2014-09-15 15:03 ` [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr Paolo Bonzini
2014-09-18 19:24   ` Tom Musta
2014-09-15 15:03 ` [Qemu-devel] [PATCH 05/14] ppc: use CRF_* in fpu_helper.c Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 06/14] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
2014-09-18 19:32   ` Tom Musta
2014-09-18 21:01   ` Richard Henderson
2014-09-15 15:03 ` [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf Paolo Bonzini
2014-09-18 19:48   ` Tom Musta
2014-09-15 15:03 ` [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
2014-09-18 19:49   ` Tom Musta
2014-09-18 21:38   ` Richard Henderson
2014-09-19 13:31     ` Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
2014-09-18 19:51   ` Tom Musta
2014-09-19 14:52     ` Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel Paolo Bonzini
2014-09-18 20:05   ` Tom Musta
2014-09-15 15:03 ` [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
2014-09-18 20:25   ` Tom Musta
2014-09-19 13:53     ` Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 12/14] ppc: use movcond to implement evsel Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer Paolo Bonzini
2014-09-18 20:33   ` Tom Musta
2014-09-19 13:51     ` Paolo Bonzini
2014-09-15 15:03 ` [Qemu-devel] [PATCH 14/14] ppc: dump all 32 CR bits Paolo Bonzini
2014-09-18 20:43 ` [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups Tom Musta
2014-09-19 15:16   ` Paolo Bonzini
2014-11-03 11:56 ` Alexander Graf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.