All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-27  6:17   ` David Gibson
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 02/15] target-ppc: Introduce POWER ISA 3.0 flag Nikunj A Dadhania
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

The patch adds CPU PVR definition for POWER9 and enables QEMU to launch
guests/linux-user in TCG mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[ Added POWER9 alias, POWER9 SPAPR core and dropped MMU defines ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/spapr_cpu_core.c     |  5 +++
 target-ppc/cpu-models.c     |  5 +++
 target-ppc/cpu-models.h     |  1 +
 target-ppc/cpu-qom.h        |  1 +
 target-ppc/mmu_helper.c     |  2 +-
 target-ppc/translate_init.c | 86 ++++++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 5a132bf..713c5f3 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -352,6 +352,7 @@ SPAPR_CPU_CORE_INITFN(POWER7+_v2.1, POWER7plus);
 SPAPR_CPU_CORE_INITFN(POWER8_v2.0, POWER8);
 SPAPR_CPU_CORE_INITFN(POWER8E_v2.1, POWER8E);
 SPAPR_CPU_CORE_INITFN(POWER8NVL_v1.0, POWER8NVL);
+SPAPR_CPU_CORE_INITFN(POWER9_v1.0, POWER9);
 
 typedef struct SPAPRCoreInfo {
     const char *name;
@@ -395,6 +396,10 @@ static const SPAPRCoreInfo spapr_cores[] = {
     { .name = "POWER8NVL_v1.0", .initfn = spapr_cpu_core_POWER8NVL_initfn },
     { .name = "POWER8NVL", .initfn = spapr_cpu_core_POWER8NVL_initfn },
 
+    /* POWER9 and aliases */
+    { .name = "POWER9_v1.0", .initfn = spapr_cpu_core_POWER9_initfn },
+    { .name = "POWER9", .initfn = spapr_cpu_core_POWER9_initfn },
+
     { .name = NULL }
 };
 
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 5209e63..901cf40 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1147,6 +1147,10 @@
                 "POWER8NVL v1.0")
     POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
                 "PowerPC 970 v2.2")
+
+    POWERPC_DEF("POWER9_v1.0",   CPU_POWERPC_POWER9_BASE,            POWER9,
+                "POWER9 v1.0")
+
     POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
                 "PowerPC 970FX v1.0 (G5)")
     POWERPC_DEF("970fx_v2.0",    CPU_POWERPC_970FX_v20,              970,
@@ -1395,6 +1399,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "POWER8E", "POWER8E_v2.1" },
     { "POWER8", "POWER8_v2.0" },
     { "POWER8NVL", "POWER8NVL_v1.0" },
+    { "POWER9", "POWER9_v1.0" },
     { "970", "970_v2.2" },
     { "970fx", "970fx_v3.1" },
     { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index f21a44c..7d9e6a2 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -562,6 +562,7 @@ enum {
     CPU_POWERPC_POWER8_v20         = 0x004D0200,
     CPU_POWERPC_POWER8NVL_BASE     = 0x004C0000,
     CPU_POWERPC_POWER8NVL_v10      = 0x004C0100,
+    CPU_POWERPC_POWER9_BASE        = 0x004E0000,
     CPU_POWERPC_970_v22            = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 2864105..713deef 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -86,6 +86,7 @@ enum powerpc_mmu_t {
     POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
                              | POWERPC_MMU_64K
                              | POWERPC_MMU_AMR | 0x00000004,
+    /* FIXME Add POWERPC_MMU_3_OO defines */
     /* Architecture 2.07 "degraded" (no 1T segments)           */
     POWERPC_MMU_2_07a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
                              | 0x00000004,
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 3eb3cd7..737f338 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1941,7 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
         break;
     default:
         /* XXX: TODO */
-        cpu_abort(CPU(cpu), "Unknown MMU model\n");
+        cpu_abort(CPU(cpu), "Unknown MMU model %d\n", env->mmu_model);
         break;
     }
 }
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 5f28a36..1e12d80 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7459,7 +7459,8 @@ enum BOOK3S_CPU_TYPE {
     BOOK3S_CPU_POWER5PLUS,
     BOOK3S_CPU_POWER6,
     BOOK3S_CPU_POWER7,
-    BOOK3S_CPU_POWER8
+    BOOK3S_CPU_POWER8,
+    BOOK3S_CPU_POWER9
 };
 
 static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
@@ -8241,6 +8242,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         break;
     case BOOK3S_CPU_POWER7:
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
         gen_spr_book3s_ids(env);
         gen_spr_amr(env, version >= BOOK3S_CPU_POWER8);
         gen_spr_book3s_purr(env);
@@ -8293,6 +8295,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         break;
     case BOOK3S_CPU_POWER7:
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
     default:
         env->slb_nr = 32;
         break;
@@ -8310,6 +8313,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         ppcPOWER7_irq_init(ppc_env_get_cpu(env));
         break;
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
         init_excp_POWER8(env);
         ppcPOWER7_irq_init(ppc_env_get_cpu(env));
         break;
@@ -8772,6 +8776,86 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
 }
+static void init_proc_POWER9(CPUPPCState *env)
+{
+    init_proc_book3s_64(env, BOOK3S_CPU_POWER9);
+}
+
+static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER9_BASE) {
+        return true;
+    }
+    return false;
+}
+
+POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER9";
+    dc->desc = "POWER9";
+    dc->props = powerpc_servercpu_properties;
+    pcc->pvr_match = ppc_pvr_match_power9;
+    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07;
+    pcc->init_proc = init_proc_POWER9;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM | PPC2_PM_ISA206;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_TM) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    /* Using 2.07 defines until new radix model is added. */
+    pcc->mmu_model = POWERPC_MMU_2_07;
+#if defined(CONFIG_SOFTMMU)
+    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
+    /* segment page size remain the same */
+    pcc->sps = &POWER7_POWER8_sps;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER8;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
+}
 
 #if !defined(CONFIG_USER_ONLY)
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 02/15] target-ppc: Introduce POWER ISA 3.0 flag
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 03/15] target-ppc: adding addpcis instruction Nikunj A Dadhania
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

This flag will be used for POWER9 instructions.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 target-ppc/cpu.h            | 5 ++++-
 target-ppc/translate_init.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 5fce1ff..c499315 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2094,6 +2094,8 @@ enum {
     PPC2_TM            = 0x0000000000020000ULL,
     /* Server PM instructgions (ISA 2.06, Book III)                          */
     PPC2_PM_ISA206     = 0x0000000000040000ULL,
+    /* POWER ISA 3.0                                                         */
+    PPC2_ISA300        = 0x0000000000080000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
                         PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2101,7 +2103,8 @@ enum {
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
                         PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
-                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206)
+                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
+                        PPC2_ISA300)
 };
 
 /*****************************************************************************/
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 1e12d80..0d8cff1 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8820,7 +8820,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
                         PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
                         PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
                         PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM | PPC2_PM_ISA206;
+                        PPC2_TM | PPC2_PM_ISA206 | PPC2_ISA300;
     pcc->msr_mask = (1ull << MSR_SF) |
                     (1ull << MSR_TM) |
                     (1ull << MSR_VR) |
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 03/15] target-ppc: adding addpcis instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 02/15] target-ppc: Introduce POWER ISA 3.0 flag Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 04/15] target-ppc: add cmprb instruction Nikunj A Dadhania
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

ISA 3.0 instruction for adding immediate value shifted with next
instruction address and return the result in the target register.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 target-ppc/translate.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 92030b6..ca246ea 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -432,6 +432,20 @@ static inline uint32_t name(uint32_t opcode)                                  \
     return (((opcode >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) |             \
             ((opcode >> (shift2)) & ((1 << (nb2)) - 1));                      \
 }
+
+#define EXTRACT_HELPER_DXFORM(name,                                           \
+                              d0_bits, shift_op_d0, shift_d0,                 \
+                              d1_bits, shift_op_d1, shift_d1,                 \
+                              d2_bits, shift_op_d2, shift_d2)                 \
+static inline int16_t name(uint32_t opcode)                                   \
+{                                                                             \
+    return                                                                    \
+        (((opcode >> (shift_op_d0)) & ((1 << (d0_bits)) - 1)) << (shift_d0)) | \
+        (((opcode >> (shift_op_d1)) & ((1 << (d1_bits)) - 1)) << (shift_d1)) | \
+        (((opcode >> (shift_op_d2)) & ((1 << (d2_bits)) - 1)) << (shift_d2));  \
+}
+
+
 /* Opcode part 1 */
 EXTRACT_HELPER(opc1, 26, 6);
 /* Opcode part 2 */
@@ -501,6 +515,9 @@ EXTRACT_HELPER(FPL, 25, 1);
 EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
+/* addpcis */
+EXTRACT_HELPER_DXFORM(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
+
 /***                            Jump target decoding                       ***/
 /* Immediate address */
 static inline target_ulong LI(uint32_t opcode)
@@ -984,6 +1001,14 @@ static void gen_addis(DisasContext *ctx)
     }
 }
 
+/* addpcis */
+static void gen_addpcis(DisasContext *ctx)
+{
+    target_long d = DX(ctx->opcode);
+
+    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], ctx->nip + (d << 16));
+}
+
 static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
@@ -9877,6 +9902,7 @@ GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(addic_, "addic.", 0x0D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addis, 0x0F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
+GEN_HANDLER_E(addpcis, 0x13, 0x2, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 04/15] target-ppc: add cmprb instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (2 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 03/15] target-ppc: adding addpcis instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 05/15] target-ppc: add modulo word operations Nikunj A Dadhania
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

ISA 3.0 Compare Ranged Byte instruction useful for
isupper/islower/isaplha kind of operation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ca246ea..7e349e8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -817,6 +817,43 @@ static void gen_cmpli(DisasContext *ctx)
     }
 }
 
+/* cmprb - range comparison: isupper, isaplha, islower*/
+static void gen_cmprb(DisasContext *ctx)
+{
+    TCGv_i32 src1 = tcg_temp_new_i32();
+    TCGv_i32 src2 = tcg_temp_new_i32();
+    TCGv_i32 src2lo = tcg_temp_new_i32();
+    TCGv_i32 src2hi = tcg_temp_new_i32();
+    TCGv_i32 crf = cpu_crf[crfD(ctx->opcode)];
+
+    tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);
+
+    tcg_gen_ext8u_i32(src2lo, src2);
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);
+
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+    tcg_gen_and_i32(crf, src2lo, src2hi);
+
+    if (ctx->opcode & 0x00200000) {
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2lo, src2);
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2hi, src2);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+        tcg_gen_and_i32(src2lo, src2lo, src2hi);
+        tcg_gen_or_i32(crf, crf, src2lo);
+    }
+    tcg_gen_shli_i32(crf, crf, CRF_GT);
+    tcg_temp_free_i32(src1);
+    tcg_temp_free_i32(src2);
+    tcg_temp_free_i32(src2lo);
+    tcg_temp_free_i32(src2hi);
+}
+
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
@@ -9897,6 +9934,7 @@ GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
 GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 05/15] target-ppc: add modulo word operations
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (3 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 04/15] target-ppc: add cmprb instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 06/15] target-ppc: add modulo dword operations Nikunj A Dadhania
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

Adding following instructions:

moduw: Modulo Unsigned Word
modsw: Modulo Signed Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7e349e8..966e848 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1175,6 +1175,52 @@ GEN_DIVE(divde, divde, 0);
 GEN_DIVE(divdeo, divde, 1);
 #endif
 
+static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
+                                     TCGv arg2, int sign)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, arg1);
+    tcg_gen_trunc_tl_i32(t1, arg2);
+    if (sign) {
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i32(t2, t2, t3);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i32(t2, t2, t3);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_rem_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    } else {
+        TCGv_i32 t2 = tcg_const_i32(1);
+        TCGv_i32 t3 = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, t1, t1, t3, t2, t1);
+        tcg_gen_remu_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    }
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+}
+
+#define GEN_INT_ARITH_MODW(name, opc3, sign)                                \
+static void glue(gen_, name)(DisasContext *ctx)                             \
+{                                                                           \
+    gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],                        \
+                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
+                      sign);                                                \
+}
+
+GEN_INT_ARITH_MODW(moduw, 0x08, 0);
+GEN_INT_ARITH_MODW(modsw, 0x18, 1);
+
 /* mulhw  mulhw. */
 static void gen_mulhw(DisasContext *ctx)
 {
@@ -10241,6 +10287,8 @@ GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 06/15] target-ppc: add modulo dword operations
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (4 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 05/15] target-ppc: add modulo word operations Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.] Nikunj A Dadhania
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

Adding following instructions for ISA3.0 support

modud: Modulo Unsigned Dword
modsd: Modulo Signed Dword

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net
---
 target-ppc/translate.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 966e848..7c7328f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1221,6 +1221,52 @@ static void glue(gen_, name)(DisasContext *ctx)                             \
 GEN_INT_ARITH_MODW(moduw, 0x08, 0);
 GEN_INT_ARITH_MODW(modsw, 0x18, 1);
 
+#if defined(TARGET_PPC64)
+static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
+                                     TCGv arg2, int sign)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_mov_i64(t0, arg1);
+    tcg_gen_mov_i64(t1, arg2);
+    if (sign) {
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t0, INT64_MIN);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i64(t2, t2, t3);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i64(t2, t2, t3);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_rem_i64(ret, t0, t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    } else {
+        TCGv_i64 t2 = tcg_const_i64(1);
+        TCGv_i64 t3 = tcg_const_i64(0);
+        tcg_gen_movcond_i64(TCG_COND_EQ, t1, t1, t3, t2, t1);
+        tcg_gen_remu_i64(ret, t0, t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    }
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+#define GEN_INT_ARITH_MODD(name, opc3, sign)                            \
+static void glue(gen_, name)(DisasContext *ctx)                           \
+{                                                                         \
+  gen_op_arith_modd(ctx, cpu_gpr[rD(ctx->opcode)],                        \
+                    cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
+                    sign);                                                \
+}
+
+GEN_INT_ARITH_MODD(modud, 0x08, 0);
+GEN_INT_ARITH_MODD(modsd, 0x18, 1);
+#endif
+
 /* mulhw  mulhw. */
 static void gen_mulhw(DisasContext *ctx)
 {
@@ -10303,6 +10349,8 @@ GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(modsd, 0x1F, 0x09, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(modud, 0x1F, 0x09, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
 #undef GEN_INT_ARITH_MUL_HELPER
 #define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (5 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 06/15] target-ppc: add modulo dword operations Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-27  5:19   ` David Gibson
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 08/15] target-ppc: implement branch-less divd[o][.] Nikunj A Dadhania
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

While implementing modulo instructions figured out that the
implementation uses many branches. Change the logic to achieve the
branch-less code. Undefined value is set to dividend in case of invalid
input.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7c7328f..69d9ae0 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
 static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-    TCGv_i32 t1 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
 
     tcg_gen_trunc_tl_i32(t0, arg1);
     tcg_gen_trunc_tl_i32(t1, arg2);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
-    if (sign) {
-        TCGLabel *l3 = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
-        gen_set_label(l3);
-        tcg_gen_div_i32(t0, t0, t1);
-    } else {
-        tcg_gen_divu_i32(t0, t0, t1);
-    }
-    if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
-    tcg_gen_br(l2);
-    gen_set_label(l1);
     if (sign) {
-        tcg_gen_sari_i32(t0, t0, 31);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i32(t2, t2, t3);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i32(t2, t2, t3);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_div_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
     } else {
-        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_divu_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
     }
     if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 1);
-        tcg_gen_movi_tl(cpu_so, 1);
+        tcg_gen_extu_i32_tl(cpu_ov, t2);
+        tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
-    gen_set_label(l2);
-    tcg_gen_extu_i32_tl(ret, t0);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, ret);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 08/15] target-ppc: implement branch-less divd[o][.]
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (6 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.] Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 09/15] target-ppc: add cnttzd[.] instruction Nikunj A Dadhania
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

Similar to divw, implement branch-less divd.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c | 48 ++++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 69d9ae0..ba22e13 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1122,37 +1122,41 @@ GEN_DIVE(divweo, divwe, 1);
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
 
-    tcg_gen_brcondi_i64(TCG_COND_EQ, arg2, 0, l1);
-    if (sign) {
-        TCGLabel *l3 = gen_new_label();
-        tcg_gen_brcondi_i64(TCG_COND_NE, arg2, -1, l3);
-        tcg_gen_brcondi_i64(TCG_COND_EQ, arg1, INT64_MIN, l1);
-        gen_set_label(l3);
-        tcg_gen_div_i64(ret, arg1, arg2);
-    } else {
-        tcg_gen_divu_i64(ret, arg1, arg2);
-    }
-    if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
-    tcg_gen_br(l2);
-    gen_set_label(l1);
+    tcg_gen_mov_i64(t0, arg1);
+    tcg_gen_mov_i64(t1, arg2);
     if (sign) {
-        tcg_gen_sari_i64(ret, arg1, 63);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t0, INT64_MIN);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i64(t2, t2, t3);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i64(t2, t2, t3);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_div_i64(ret, t0, t1);
     } else {
-        tcg_gen_movi_i64(ret, 0);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t1, 0);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_divu_i64(ret, t0, t1);
     }
     if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 1);
-        tcg_gen_movi_tl(cpu_so, 1);
+        tcg_gen_mov_tl(cpu_ov, t2);
+        tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
-    gen_set_label(l2);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, ret);
 }
+
 #define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov)                      \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
 {                                                                             \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 09/15] target-ppc: add cnttzd[.] instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (7 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 08/15] target-ppc: implement branch-less divd[o][.] Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 10/15] target-ppc: add cnttzw[.] instruction Nikunj A Dadhania
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth
  Cc: qemu-devel, nikunj, bharata, aneesh.kumar, Sandipan Das

From: Sandipan Das <sandipandas1990@gmail.com>

Add ISA3.0 Count trailing zeros double word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ added ISA300 flag ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/helper.h     |  1 +
 target-ppc/int_helper.c |  5 +++++
 target-ppc/translate.c  | 10 ++++++++++
 3 files changed, 16 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 1f5cfd0..0c29c01 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -44,6 +44,7 @@ DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(cnttzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 7445376..93e8dfa 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -150,6 +150,11 @@ target_ulong helper_cntlzd(target_ulong t)
 {
     return clz64(t);
 }
+
+target_ulong helper_cnttzd(target_ulong t)
+{
+    return ctz64(t);
+}
 #endif
 
 #if defined(TARGET_PPC64)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ba22e13..038106d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1824,6 +1824,15 @@ static void gen_cntlzd(DisasContext *ctx)
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
+
+/* cnttzd */
+static void gen_cnttzd(DisasContext *ctx)
+{
+    gen_helper_cnttzd(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    }
+}
 #endif
 
 /***                             Integer rotate                            ***/
@@ -10061,6 +10070,7 @@ GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
 GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
+GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 10/15] target-ppc: add cnttzw[.] instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (8 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 09/15] target-ppc: add cnttzd[.] instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 11/15] target-ppc: add cmpeqb instruction Nikunj A Dadhania
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

Add ISA3.0: Count trailing zeros word instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/helper.h     |  1 +
 target-ppc/int_helper.c |  5 +++++
 target-ppc/translate.c  | 11 +++++++++++
 3 files changed, 17 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 0c29c01..9c79808 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -38,6 +38,7 @@ DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
 DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(cnttzw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 93e8dfa..02b6df3 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -145,6 +145,11 @@ target_ulong helper_cntlzw(target_ulong t)
     return clz32(t);
 }
 
+target_ulong helper_cnttzw(target_ulong t)
+{
+    return ctz32(t);
+}
+
 #if defined(TARGET_PPC64)
 target_ulong helper_cntlzd(target_ulong t)
 {
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 038106d..b1a875e 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1584,6 +1584,16 @@ static void gen_cntlzw(DisasContext *ctx)
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
+
+/* cnttzw */
+static void gen_cnttzw(DisasContext *ctx)
+{
+    gen_helper_cnttzw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    }
+}
+
 /* eqv & eqv. */
 GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER);
 /* extsb & extsb. */
@@ -10058,6 +10068,7 @@ GEN_HANDLER(subfic, 0x08, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
+GEN_HANDLER_E(cnttzw, 0x1F, 0x1A, 0x10, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(or, 0x1F, 0x1C, 0x0D, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(xor, 0x1F, 0x1C, 0x09, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(ori, 0x18, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 11/15] target-ppc: add cmpeqb instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (9 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 10/15] target-ppc: add cnttzw[.] instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 12/15] target-ppc: add setb instruction Nikunj A Dadhania
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

Search a byte in the stream of 8bytes provided in the register

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/helper.h     |  1 +
 target-ppc/int_helper.c | 22 ++++++++++++++++++++++
 target-ppc/translate.c  | 12 ++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 9c79808..9e4bb7b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -44,6 +44,7 @@ DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
+DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(cnttzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 02b6df3..15947ad 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -151,6 +151,28 @@ target_ulong helper_cnttzw(target_ulong t)
 }
 
 #if defined(TARGET_PPC64)
+/* if x = 0xab, returns 0xababababababababa */
+#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
+
+/* substract 1 from each byte, and with inverse, check if MSB is set at each
+ * byte.
+ * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
+ *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
+ */
+#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
+
+/* When you XOR the pattern and there is a match, that byte will be zero */
+#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
+
+uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+{
+    return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
+}
+
+#undef pattern
+#undef haszero
+#undef hasvalue
+
 target_ulong helper_cntlzd(target_ulong t)
 {
     return clz64(t);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b1a875e..8d56da8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -854,6 +854,15 @@ static void gen_cmprb(DisasContext *ctx)
     tcg_temp_free_i32(src2hi);
 }
 
+#if defined(TARGET_PPC64)
+/* cmpeqb */
+static void gen_cmpeqb(DisasContext *ctx)
+{
+    gen_helper_cmpeqb(cpu_crf[crfD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rB(ctx->opcode)]);
+}
+#endif
+
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
@@ -10046,6 +10055,9 @@ GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
+#endif
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 12/15] target-ppc: add setb instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (10 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 11/15] target-ppc: add cmpeqb instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 13/15] target-ppc: add maddld instruction Nikunj A Dadhania
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth
  Cc: qemu-devel, nikunj, bharata, aneesh.kumar, Vivek Andrew Sha

From: Vivek Andrew Sha <vivekandrewsha@gmail.com>

The CR number is provided in the opcode as - BFA (11:13)

Returns:
  -1 if bit 0 of CR field is set
   1 if bit 1 of CR field is set
   0 otherwise.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reworded commit, used 32bit ops as crf is 32bits ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 8d56da8..fa1c28c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4868,6 +4868,27 @@ static void gen_mtspr(DisasContext *ctx)
     }
 }
 
+#if defined(TARGET_PPC64)
+/* setb */
+static void gen_setb(DisasContext *ctx)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t8 = tcg_temp_new_i32();
+    TCGv_i32 tm1 = tcg_temp_new_i32();
+    int crf = crfS(ctx->opcode);
+
+    tcg_gen_setcondi_i32(TCG_COND_GEU, t0, cpu_crf[crf], 4);
+    tcg_gen_movi_i32(t8, 8);
+    tcg_gen_movi_i32(tm1, -1);
+    tcg_gen_movcond_i32(TCG_COND_GEU, t0, cpu_crf[crf], t8, tm1, t0);
+    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t8);
+    tcg_temp_free_i32(tm1);
+}
+#endif
+
 /***                         Cache management                              ***/
 
 /* dcbf */
@@ -10186,6 +10207,7 @@ GEN_HANDLER(mftb, 0x1F, 0x13, 0x0B, 0x00000001, PPC_MFTB),
 GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
+GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 13/15] target-ppc: add maddld instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (11 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 12/15] target-ppc: add setb instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 14/15] target-ppc: add maddhd and maddhdu instruction Nikunj A Dadhania
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

maddld: Multiply-Add Low Doubleword

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the lower dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index fa1c28c..81a87e9 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7741,6 +7741,17 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20)
 GEN_VAFORM_PAIRED(vsel, vperm, 21)
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 
+#if defined(TARGET_PPC64)
+static void gen_maddld(DisasContext *ctx)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
+    tcg_temp_free_i64(t1);
+}
+#endif /* defined(TARGET_PPC64) */
+
 GEN_VXFORM_NOA(vclzb, 1, 28)
 GEN_VXFORM_NOA(vclzh, 1, 29)
 GEN_VXFORM_NOA(vclzw, 1, 30)
@@ -10356,6 +10367,9 @@ GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
+#endif
 GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
 GEN_HANDLER2(evsel1, "evsel", 0x04, 0x1d, 0x09, 0x00000000, PPC_SPE),
 GEN_HANDLER2(evsel2, "evsel", 0x04, 0x1e, 0x09, 0x00000000, PPC_SPE),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 14/15] target-ppc: add maddhd and maddhdu instruction
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (12 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 13/15] target-ppc: add maddld instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode Nikunj A Dadhania
  2016-07-27  6:23 ` [Qemu-devel] [PATCH v4 00/15] POWER9 TCG enablements - part1 David Gibson
  15 siblings, 0 replies; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

maddhd: Multiply-Add High Doubleword
maddhdu: Multiply-Add High Doubleword Unsigned

Above two instruction are dual form and differ by 1 bit
(31st bit)

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the higher dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 81a87e9..ec7064f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7750,6 +7750,29 @@ static void gen_maddld(DisasContext *ctx)
     tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
     tcg_temp_free_i64(t1);
 }
+
+/* maddhd maddhdu */
+static void gen_maddhd_maddhdu(DisasContext *ctx)
+{
+    TCGv_i64 lo = tcg_temp_new_i64();
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    if (Rc(ctx->opcode)) {
+        tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
+                          cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_movi_i64(t1, 0);
+    } else {
+        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
+                          cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63);
+    }
+    tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi,
+                     cpu_gpr[rC(ctx->opcode)], t1);
+    tcg_temp_free_i64(lo);
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(t1);
+}
 #endif /* defined(TARGET_PPC64) */
 
 GEN_VXFORM_NOA(vclzb, 1, 28)
@@ -10368,6 +10391,8 @@ GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
 #if defined(TARGET_PPC64)
+GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
+              PPC2_ISA300),
 GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (13 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 14/15] target-ppc: add maddhd and maddhdu instruction Nikunj A Dadhania
@ 2016-07-26 11:58 ` Nikunj A Dadhania
  2016-07-27  5:31   ` David Gibson
  2016-07-27  6:23 ` [Qemu-devel] [PATCH v4 00/15] POWER9 TCG enablements - part1 David Gibson
  15 siblings, 1 reply; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-26 11:58 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, bharata, aneesh.kumar

ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
indirect opcode table and corresponding parsing routines.

EO (11:12) Expanded opcode field
Formats: XX1

EO (11:15) Expanded opcode field
Formats: VX, X, XX2

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c      |  82 ++++++++++++++++++++--------
 target-ppc/translate_init.c | 126 ++++++++++++++++++++++++++++++++------------
 2 files changed, 154 insertions(+), 54 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ec7064f..d522566 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -367,12 +367,13 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, PPC_NONE)
 #define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)      \
 GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
 
+#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
+GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
+
 typedef struct opcode_t {
-    unsigned char opc1, opc2, opc3;
+    unsigned char opc1, opc2, opc3, opc4;
 #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
-    unsigned char pad[5];
-#else
-    unsigned char pad[1];
+    unsigned char pad[4];
 #endif
     opc_handler_t handler;
     const char *oname;
@@ -452,6 +453,8 @@ EXTRACT_HELPER(opc1, 26, 6);
 EXTRACT_HELPER(opc2, 1, 5);
 /* Opcode part 3 */
 EXTRACT_HELPER(opc3, 6, 5);
+/* Opcode part 4 */
+EXTRACT_HELPER(opc4, 16, 5);
 /* Update Cr0 flags */
 EXTRACT_HELPER(Rc, 0, 1);
 /* Update Cr6 flags (Altivec) */
@@ -589,7 +592,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -604,7 +607,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl1,                                                     \
         .inval2  = invl2,                                                     \
@@ -620,7 +623,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -630,13 +633,28 @@ EXTRACT_HELPER(SP, 19, 2);
     },                                                                        \
     .oname = onam,                                                            \
 }
+#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+        .oname = stringify(name),                                             \
+    },                                                                        \
+    .oname = stringify(name),                                                 \
+}
 #else
 #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
 {                                                                             \
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -650,7 +668,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl1,                                                     \
         .inval2  = invl2,                                                     \
@@ -665,7 +683,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -674,6 +692,20 @@ EXTRACT_HELPER(SP, 19, 2);
     },                                                                        \
     .oname = onam,                                                            \
 }
+#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+    },                                                                        \
+    .oname = stringify(name),                                                 \
+}
 #endif
 
 /* SPR load/store helpers */
@@ -11906,9 +11938,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
         } else {
             ctx.opcode = cpu_ldl_code(env, ctx.nip);
         }
-        LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
-                    ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
-                    opc3(ctx.opcode), ctx.le_mode ? "little" : "big");
+        LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
+                  ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
+                  opc3(ctx.opcode), opc4(ctx.opcode),
+                  ctx.le_mode ? "little" : "big");
         ctx.nip += 4;
         table = env->opcodes;
         handler = table[opc1(ctx.opcode)];
@@ -11918,14 +11951,20 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
             if (is_indirect_opcode(handler)) {
                 table = ind_table(handler);
                 handler = table[opc3(ctx.opcode)];
+                if (is_indirect_opcode(handler)) {
+                    table = ind_table(handler);
+                    handler = table[opc4(ctx.opcode)];
+                }
             }
         }
         /* Is opcode *REALLY* valid ? */
         if (unlikely(handler->handler == &gen_invalid)) {
             qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
-                          "%02x - %02x - %02x (%08x) " TARGET_FMT_lx " %d\n",
+                          "%02x - %02x - %02x - %02x (%08x) "
+                          TARGET_FMT_lx " %d\n",
                           opc1(ctx.opcode), opc2(ctx.opcode),
-                          opc3(ctx.opcode), ctx.opcode, ctx.nip - 4, (int)msr_ir);
+                          opc3(ctx.opcode), opc4(ctx.opcode),
+                          ctx.opcode, ctx.nip - 4, (int)msr_ir);
         } else {
             uint32_t inval;
 
@@ -11937,9 +11976,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
 
             if (unlikely((ctx.opcode & inval) != 0)) {
                 qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
-                              "%02x - %02x - %02x (%08x) " TARGET_FMT_lx "\n",
-                              ctx.opcode & inval, opc1(ctx.opcode),
-                              opc2(ctx.opcode), opc3(ctx.opcode),
+                              "%02x - %02x - %02x - %02x (%08x) "
+                              TARGET_FMT_lx "\n", ctx.opcode & inval,
+                              opc1(ctx.opcode), opc2(ctx.opcode),
+                              opc3(ctx.opcode), opc4(ctx.opcode),
                               ctx.opcode, ctx.nip - 4);
                 gen_inval_exception(ctxp, POWERPC_EXCP_INVAL_INVAL);
                 break;
@@ -11966,9 +12006,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
             break;
         }
         if (tcg_check_temp_count()) {
-            fprintf(stderr, "Opcode %02x %02x %02x (%08x) leaked temporaries\n",
-                    opc1(ctx.opcode), opc2(ctx.opcode), opc3(ctx.opcode),
-                    ctx.opcode);
+            fprintf(stderr, "Opcode %02x %02x %02x %02x (%08x) leaked "
+                    "temporaries\n", opc1(ctx.opcode), opc2(ctx.opcode),
+                    opc3(ctx.opcode), opc4(ctx.opcode), ctx.opcode);
             exit(1);
         }
     }
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 0d8cff1..f627cfe 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9253,13 +9253,47 @@ static int register_dblind_insn (opc_handler_t **ppc_opcodes,
     return 0;
 }
 
+static int register_trplind_insn(opc_handler_t **ppc_opcodes,
+                                 unsigned char idx1, unsigned char idx2,
+                                 unsigned char idx3, unsigned char idx4,
+                                 opc_handler_t *handler)
+{
+    opc_handler_t **table;
+
+    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
+        printf("*** ERROR: unable to join indirect table idx "
+               "[%02x-%02x]\n", idx1, idx2);
+        return -1;
+    }
+    table = ind_table(ppc_opcodes[idx1]);
+    if (register_ind_in_table(table, idx2, idx3, NULL) < 0) {
+        printf("*** ERROR: unable to join 2nd-level indirect table idx "
+               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
+        return -1;
+    }
+    table = ind_table(table[idx2]);
+    if (register_ind_in_table(table, idx3, idx4, handler) < 0) {
+        printf("*** ERROR: unable to insert opcode "
+               "[%02x-%02x-%02x-%02x]\n", idx1, idx2, idx3, idx4);
+        return -1;
+    }
+    return 0;
+}
 static int register_insn (opc_handler_t **ppc_opcodes, opcode_t *insn)
 {
     if (insn->opc2 != 0xFF) {
         if (insn->opc3 != 0xFF) {
-            if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
-                                     insn->opc3, &insn->handler) < 0)
-                return -1;
+            if (insn->opc4 != 0xFF) {
+                if (register_trplind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                          insn->opc3, insn->opc4,
+                                          &insn->handler) < 0) {
+                    return -1;
+                }
+            } else {
+                if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                         insn->opc3, &insn->handler) < 0)
+                    return -1;
+            }
         } else {
             if (register_ind_insn(ppc_opcodes, insn->opc1,
                                   insn->opc2, &insn->handler) < 0)
@@ -9335,7 +9369,7 @@ static void dump_ppc_insns (CPUPPCState *env)
 {
     opc_handler_t **table, *handler;
     const char *p, *q;
-    uint8_t opc1, opc2, opc3;
+    uint8_t opc1, opc2, opc3, opc4;
 
     printf("Instructions set:\n");
     /* opc1 is 6 bits long */
@@ -9355,34 +9389,50 @@ static void dump_ppc_insns (CPUPPCState *env)
                     for (opc3 = 0; opc3 < PPC_CPU_INDIRECT_OPCODES_LEN;
                             opc3++) {
                         handler = table[opc3];
-                        if (handler->handler != &gen_invalid) {
-                            /* Special hack to properly dump SPE insns */
-                            p = strchr(handler->oname, '_');
-                            if (p == NULL) {
-                                printf("INSN: %02x %02x %02x (%02d %04d) : "
-                                       "%s\n",
-                                       opc1, opc2, opc3, opc1,
-                                       (opc3 << 5) | opc2,
-                                       handler->oname);
-                            } else {
-                                q = "speundef";
-                                if ((p - handler->oname) != strlen(q) ||
-                                    memcmp(handler->oname, q, strlen(q)) != 0) {
-                                    /* First instruction */
-                                    printf("INSN: %02x %02x %02x (%02d %04d) : "
-                                           "%.*s\n",
-                                           opc1, opc2 << 1, opc3, opc1,
-                                           (opc3 << 6) | (opc2 << 1),
-                                           (int)(p - handler->oname),
+                        if (is_indirect_opcode(handler)) {
+                            table = ind_table(handler);
+                            /* opc4 is 5 bits long */
+                            for (opc4 = 0; opc4 < PPC_CPU_INDIRECT_OPCODES_LEN;
+                                 opc4++) {
+                                handler = table[opc4];
+                                if (handler->handler != &gen_invalid) {
+                                    printf("INSN: %02x %02x %02x %02x -- "
+                                           "(%02d %04d %02d) : %s\n",
+                                           opc1, opc2, opc3, opc4,
+                                           opc1, (opc3 << 5) | opc2, opc4,
                                            handler->oname);
                                 }
-                                if (strcmp(p + 1, q) != 0) {
-                                    /* Second instruction */
+                            }
+                        } else {
+                            if (handler->handler != &gen_invalid) {
+                                /* Special hack to properly dump SPE insns */
+                                p = strchr(handler->oname, '_');
+                                if (p == NULL) {
                                     printf("INSN: %02x %02x %02x (%02d %04d) : "
                                            "%s\n",
-                                           opc1, (opc2 << 1) | 1, opc3, opc1,
-                                           (opc3 << 6) | (opc2 << 1) | 1,
-                                           p + 1);
+                                           opc1, opc2, opc3, opc1,
+                                           (opc3 << 5) | opc2,
+                                           handler->oname);
+                                } else {
+                                    q = "speundef";
+                                    if ((p - handler->oname) != strlen(q) ||
+                                        memcmp(handler->oname, q, strlen(q)) != 0) {
+                                        /* First instruction */
+                                        printf("INSN: %02x %02x %02x"
+                                               "(%02d %04d) : %.*s\n",
+                                               opc1, opc2 << 1, opc3, opc1,
+                                               (opc3 << 6) | (opc2 << 1),
+                                               (int)(p - handler->oname),
+                                               handler->oname);
+                                    }
+                                    if (strcmp(p + 1, q) != 0) {
+                                        /* Second instruction */
+                                        printf("INSN: %02x %02x %02x "
+                                               "(%02d %04d) : %s\n", opc1,
+                                               (opc2 << 1) | 1, opc3, opc1,
+                                               (opc3 << 6) | (opc2 << 1) | 1,
+                                               p + 1);
+                                    }
                                 }
                             }
                         }
@@ -9858,8 +9908,8 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
 {
     PowerPCCPU *cpu = POWERPC_CPU(dev);
     CPUPPCState *env = &cpu->env;
-    opc_handler_t **table;
-    int i, j;
+    opc_handler_t **table, **table_2;
+    int i, j, k;
 
     cpu_exec_exit(CPU(dev));
 
@@ -9870,10 +9920,20 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
         if (is_indirect_opcode(env->opcodes[i])) {
             table = ind_table(env->opcodes[i]);
             for (j = 0; j < PPC_CPU_INDIRECT_OPCODES_LEN; j++) {
-                if (table[j] != &invalid_handler &&
-                        is_indirect_opcode(table[j])) {
+                if (table[j] == &invalid_handler) {
+                    continue;
+                }
+                if (is_indirect_opcode(table[j])) {
+                    table_2 = ind_table(table[j]);
+                    for (k = 0; k < PPC_CPU_INDIRECT_OPCODES_LEN; k++) {
+                        if (table_2[k] != &invalid_handler &&
+                            is_indirect_opcode(table_2[k])) {
+                            g_free((opc_handler_t *)((uintptr_t)table_2[k] &
+                                                     ~PPC_INDIRECT));
+                        }
+                    }
                     g_free((opc_handler_t *)((uintptr_t)table[j] &
-                        ~PPC_INDIRECT));
+                                             ~PPC_INDIRECT));
                 }
             }
             g_free((opc_handler_t *)((uintptr_t)env->opcodes[i] &
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.] Nikunj A Dadhania
@ 2016-07-27  5:19   ` David Gibson
  2016-07-27  6:17     ` Nikunj A Dadhania
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2016-07-27  5:19 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 3404 bytes --]

On Tue, Jul 26, 2016 at 05:28:30PM +0530, Nikunj A Dadhania wrote:
> While implementing modulo instructions figured out that the
> implementation uses many branches. Change the logic to achieve the
> branch-less code. Undefined value is set to dividend in case of invalid
> input.
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
>  1 file changed, 23 insertions(+), 25 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 7c7328f..69d9ae0 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
>  static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
>                                       TCGv arg2, int sign, int compute_ov)
>  {
> -    TCGLabel *l1 = gen_new_label();
> -    TCGLabel *l2 = gen_new_label();
> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> -    TCGv_i32 t1 = tcg_temp_local_new_i32();
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +    TCGv_i32 t1 = tcg_temp_new_i32();
> +    TCGv_i32 t2 = tcg_temp_new_i32();
> +    TCGv_i32 t3 = tcg_temp_new_i32();
>  
>      tcg_gen_trunc_tl_i32(t0, arg1);
>      tcg_gen_trunc_tl_i32(t1, arg2);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
> -    if (sign) {
> -        TCGLabel *l3 = gen_new_label();
> -        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
> -        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
> -        gen_set_label(l3);
> -        tcg_gen_div_i32(t0, t0, t1);
> -    } else {
> -        tcg_gen_divu_i32(t0, t0, t1);
> -    }
> -    if (compute_ov) {
> -        tcg_gen_movi_tl(cpu_ov, 0);
> -    }
> -    tcg_gen_br(l2);
> -    gen_set_label(l1);
>      if (sign) {
> -        tcg_gen_sari_i32(t0, t0, 31);
> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
> +        tcg_gen_and_i32(t2, t2, t3);
> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
> +        tcg_gen_or_i32(t2, t2, t3);
> +        tcg_gen_movi_i32(t3, 0);
> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
> +        tcg_gen_div_i32(t3, t0, t1);
> +        tcg_gen_extu_i32_tl(ret, t3);

Should this be a signed extend, given it's a signed divide?

>      } else {
> -        tcg_gen_movi_i32(t0, 0);
> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0);
> +        tcg_gen_movi_i32(t3, 0);
> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
> +        tcg_gen_divu_i32(t3, t0, t1);
> +        tcg_gen_extu_i32_tl(ret, t3);
>      }
>      if (compute_ov) {
> -        tcg_gen_movi_tl(cpu_ov, 1);
> -        tcg_gen_movi_tl(cpu_so, 1);
> +        tcg_gen_extu_i32_tl(cpu_ov, t2);
> +        tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>      }
> -    gen_set_label(l2);
> -    tcg_gen_extu_i32_tl(ret, t0);
>      tcg_temp_free_i32(t0);
>      tcg_temp_free_i32(t1);
> +    tcg_temp_free_i32(t2);
> +    tcg_temp_free_i32(t3);
> +
>      if (unlikely(Rc(ctx->opcode) != 0))
>          gen_set_Rc0(ctx, ret);
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode Nikunj A Dadhania
@ 2016-07-27  5:31   ` David Gibson
  0 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2016-07-27  5:31 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 22164 bytes --]

On Tue, Jul 26, 2016 at 05:28:38PM +0530, Nikunj A Dadhania wrote:
> ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
> indirect opcode table and corresponding parsing routines.
> 
> EO (11:12) Expanded opcode field
> Formats: XX1
> 
> EO (11:15) Expanded opcode field
> Formats: VX, X, XX2
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate.c      |  82 ++++++++++++++++++++--------
>  target-ppc/translate_init.c | 126 ++++++++++++++++++++++++++++++++------------
>  2 files changed, 154 insertions(+), 54 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index ec7064f..d522566 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -367,12 +367,13 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, PPC_NONE)
>  #define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)      \
>  GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
>  
> +#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
> +GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
> +
>  typedef struct opcode_t {
> -    unsigned char opc1, opc2, opc3;
> +    unsigned char opc1, opc2, opc3, opc4;
>  #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
> -    unsigned char pad[5];
> -#else
> -    unsigned char pad[1];
> +    unsigned char pad[4];
>  #endif
>      opc_handler_t handler;
>      const char *oname;
> @@ -452,6 +453,8 @@ EXTRACT_HELPER(opc1, 26, 6);
>  EXTRACT_HELPER(opc2, 1, 5);
>  /* Opcode part 3 */
>  EXTRACT_HELPER(opc3, 6, 5);
> +/* Opcode part 4 */
> +EXTRACT_HELPER(opc4, 16, 5);
>  /* Update Cr0 flags */
>  EXTRACT_HELPER(Rc, 0, 1);
>  /* Update Cr6 flags (Altivec) */
> @@ -589,7 +592,7 @@ EXTRACT_HELPER(SP, 19, 2);
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl,                                                      \
>          .type = _typ,                                                         \
> @@ -604,7 +607,7 @@ EXTRACT_HELPER(SP, 19, 2);
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl1,                                                     \
>          .inval2  = invl2,                                                     \
> @@ -620,7 +623,7 @@ EXTRACT_HELPER(SP, 19, 2);
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl,                                                      \
>          .type = _typ,                                                         \
> @@ -630,13 +633,28 @@ EXTRACT_HELPER(SP, 19, 2);
>      },                                                                        \
>      .oname = onam,                                                            \
>  }
> +#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
> +{                                                                             \
> +    .opc1 = op1,                                                              \
> +    .opc2 = op2,                                                              \
> +    .opc3 = op3,                                                              \
> +    .opc4 = op4,                                                              \
> +    .handler = {                                                              \
> +        .inval1  = invl,                                                      \
> +        .type = _typ,                                                         \
> +        .type2 = _typ2,                                                       \
> +        .handler = &gen_##name,                                               \
> +        .oname = stringify(name),                                             \
> +    },                                                                        \
> +    .oname = stringify(name),                                                 \
> +}
>  #else
>  #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
>  {                                                                             \
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl,                                                      \
>          .type = _typ,                                                         \
> @@ -650,7 +668,7 @@ EXTRACT_HELPER(SP, 19, 2);
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl1,                                                     \
>          .inval2  = invl2,                                                     \
> @@ -665,7 +683,7 @@ EXTRACT_HELPER(SP, 19, 2);
>      .opc1 = op1,                                                              \
>      .opc2 = op2,                                                              \
>      .opc3 = op3,                                                              \
> -    .pad  = { 0, },                                                           \
> +    .opc4 = 0xff,                                                             \
>      .handler = {                                                              \
>          .inval1  = invl,                                                      \
>          .type = _typ,                                                         \
> @@ -674,6 +692,20 @@ EXTRACT_HELPER(SP, 19, 2);
>      },                                                                        \
>      .oname = onam,                                                            \
>  }
> +#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
> +{                                                                             \
> +    .opc1 = op1,                                                              \
> +    .opc2 = op2,                                                              \
> +    .opc3 = op3,                                                              \
> +    .opc4 = op4,                                                              \
> +    .handler = {                                                              \
> +        .inval1  = invl,                                                      \
> +        .type = _typ,                                                         \
> +        .type2 = _typ2,                                                       \
> +        .handler = &gen_##name,                                               \
> +    },                                                                        \
> +    .oname = stringify(name),                                                 \
> +}
>  #endif
>  
>  /* SPR load/store helpers */
> @@ -11906,9 +11938,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>          } else {
>              ctx.opcode = cpu_ldl_code(env, ctx.nip);
>          }
> -        LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
> -                    ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
> -                    opc3(ctx.opcode), ctx.le_mode ? "little" : "big");
> +        LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
> +                  ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
> +                  opc3(ctx.opcode), opc4(ctx.opcode),
> +                  ctx.le_mode ? "little" : "big");
>          ctx.nip += 4;
>          table = env->opcodes;
>          handler = table[opc1(ctx.opcode)];
> @@ -11918,14 +11951,20 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>              if (is_indirect_opcode(handler)) {
>                  table = ind_table(handler);
>                  handler = table[opc3(ctx.opcode)];
> +                if (is_indirect_opcode(handler)) {
> +                    table = ind_table(handler);
> +                    handler = table[opc4(ctx.opcode)];
> +                }
>              }
>          }
>          /* Is opcode *REALLY* valid ? */
>          if (unlikely(handler->handler == &gen_invalid)) {
>              qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
> -                          "%02x - %02x - %02x (%08x) " TARGET_FMT_lx " %d\n",
> +                          "%02x - %02x - %02x - %02x (%08x) "
> +                          TARGET_FMT_lx " %d\n",
>                            opc1(ctx.opcode), opc2(ctx.opcode),
> -                          opc3(ctx.opcode), ctx.opcode, ctx.nip - 4, (int)msr_ir);
> +                          opc3(ctx.opcode), opc4(ctx.opcode),
> +                          ctx.opcode, ctx.nip - 4, (int)msr_ir);
>          } else {
>              uint32_t inval;
>  
> @@ -11937,9 +11976,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>  
>              if (unlikely((ctx.opcode & inval) != 0)) {
>                  qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
> -                              "%02x - %02x - %02x (%08x) " TARGET_FMT_lx "\n",
> -                              ctx.opcode & inval, opc1(ctx.opcode),
> -                              opc2(ctx.opcode), opc3(ctx.opcode),
> +                              "%02x - %02x - %02x - %02x (%08x) "
> +                              TARGET_FMT_lx "\n", ctx.opcode & inval,
> +                              opc1(ctx.opcode), opc2(ctx.opcode),
> +                              opc3(ctx.opcode), opc4(ctx.opcode),
>                                ctx.opcode, ctx.nip - 4);
>                  gen_inval_exception(ctxp, POWERPC_EXCP_INVAL_INVAL);
>                  break;
> @@ -11966,9 +12006,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>              break;
>          }
>          if (tcg_check_temp_count()) {
> -            fprintf(stderr, "Opcode %02x %02x %02x (%08x) leaked temporaries\n",
> -                    opc1(ctx.opcode), opc2(ctx.opcode), opc3(ctx.opcode),
> -                    ctx.opcode);
> +            fprintf(stderr, "Opcode %02x %02x %02x %02x (%08x) leaked "
> +                    "temporaries\n", opc1(ctx.opcode), opc2(ctx.opcode),
> +                    opc3(ctx.opcode), opc4(ctx.opcode), ctx.opcode);
>              exit(1);
>          }
>      }
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 0d8cff1..f627cfe 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -9253,13 +9253,47 @@ static int register_dblind_insn (opc_handler_t **ppc_opcodes,
>      return 0;
>  }
>  
> +static int register_trplind_insn(opc_handler_t **ppc_opcodes,
> +                                 unsigned char idx1, unsigned char idx2,
> +                                 unsigned char idx3, unsigned char idx4,
> +                                 opc_handler_t *handler)
> +{
> +    opc_handler_t **table;
> +
> +    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
> +        printf("*** ERROR: unable to join indirect table idx "
> +               "[%02x-%02x]\n", idx1, idx2);
> +        return -1;
> +    }
> +    table = ind_table(ppc_opcodes[idx1]);
> +    if (register_ind_in_table(table, idx2, idx3, NULL) < 0) {
> +        printf("*** ERROR: unable to join 2nd-level indirect table idx "
> +               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
> +        return -1;
> +    }
> +    table = ind_table(table[idx2]);
> +    if (register_ind_in_table(table, idx3, idx4, handler) < 0) {
> +        printf("*** ERROR: unable to insert opcode "
> +               "[%02x-%02x-%02x-%02x]\n", idx1, idx2, idx3, idx4);
> +        return -1;
> +    }
> +    return 0;
> +}
>  static int register_insn (opc_handler_t **ppc_opcodes, opcode_t *insn)
>  {
>      if (insn->opc2 != 0xFF) {
>          if (insn->opc3 != 0xFF) {
> -            if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
> -                                     insn->opc3, &insn->handler) < 0)
> -                return -1;
> +            if (insn->opc4 != 0xFF) {
> +                if (register_trplind_insn(ppc_opcodes, insn->opc1, insn->opc2,
> +                                          insn->opc3, insn->opc4,
> +                                          &insn->handler) < 0) {
> +                    return -1;
> +                }
> +            } else {
> +                if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
> +                                         insn->opc3, &insn->handler) < 0)
> +                    return -1;
> +            }
>          } else {
>              if (register_ind_insn(ppc_opcodes, insn->opc1,
>                                    insn->opc2, &insn->handler) < 0)
> @@ -9335,7 +9369,7 @@ static void dump_ppc_insns (CPUPPCState *env)
>  {
>      opc_handler_t **table, *handler;
>      const char *p, *q;
> -    uint8_t opc1, opc2, opc3;
> +    uint8_t opc1, opc2, opc3, opc4;
>  
>      printf("Instructions set:\n");
>      /* opc1 is 6 bits long */
> @@ -9355,34 +9389,50 @@ static void dump_ppc_insns (CPUPPCState *env)
>                      for (opc3 = 0; opc3 < PPC_CPU_INDIRECT_OPCODES_LEN;
>                              opc3++) {
>                          handler = table[opc3];
> -                        if (handler->handler != &gen_invalid) {
> -                            /* Special hack to properly dump SPE insns */
> -                            p = strchr(handler->oname, '_');
> -                            if (p == NULL) {
> -                                printf("INSN: %02x %02x %02x (%02d %04d) : "
> -                                       "%s\n",
> -                                       opc1, opc2, opc3, opc1,
> -                                       (opc3 << 5) | opc2,
> -                                       handler->oname);
> -                            } else {
> -                                q = "speundef";
> -                                if ((p - handler->oname) != strlen(q) ||
> -                                    memcmp(handler->oname, q, strlen(q)) != 0) {
> -                                    /* First instruction */
> -                                    printf("INSN: %02x %02x %02x (%02d %04d) : "
> -                                           "%.*s\n",
> -                                           opc1, opc2 << 1, opc3, opc1,
> -                                           (opc3 << 6) | (opc2 << 1),
> -                                           (int)(p - handler->oname),
> +                        if (is_indirect_opcode(handler)) {
> +                            table = ind_table(handler);
> +                            /* opc4 is 5 bits long */
> +                            for (opc4 = 0; opc4 < PPC_CPU_INDIRECT_OPCODES_LEN;
> +                                 opc4++) {
> +                                handler = table[opc4];
> +                                if (handler->handler != &gen_invalid) {
> +                                    printf("INSN: %02x %02x %02x %02x -- "
> +                                           "(%02d %04d %02d) : %s\n",
> +                                           opc1, opc2, opc3, opc4,
> +                                           opc1, (opc3 << 5) | opc2, opc4,
>                                             handler->oname);
>                                  }
> -                                if (strcmp(p + 1, q) != 0) {
> -                                    /* Second instruction */
> +                            }
> +                        } else {
> +                            if (handler->handler != &gen_invalid) {
> +                                /* Special hack to properly dump SPE insns */
> +                                p = strchr(handler->oname, '_');
> +                                if (p == NULL) {
>                                      printf("INSN: %02x %02x %02x (%02d %04d) : "
>                                             "%s\n",
> -                                           opc1, (opc2 << 1) | 1, opc3, opc1,
> -                                           (opc3 << 6) | (opc2 << 1) | 1,
> -                                           p + 1);
> +                                           opc1, opc2, opc3, opc1,
> +                                           (opc3 << 5) | opc2,
> +                                           handler->oname);
> +                                } else {
> +                                    q = "speundef";
> +                                    if ((p - handler->oname) != strlen(q) ||
> +                                        memcmp(handler->oname, q, strlen(q)) != 0) {
> +                                        /* First instruction */
> +                                        printf("INSN: %02x %02x %02x"
> +                                               "(%02d %04d) : %.*s\n",
> +                                               opc1, opc2 << 1, opc3, opc1,
> +                                               (opc3 << 6) | (opc2 << 1),
> +                                               (int)(p - handler->oname),
> +                                               handler->oname);
> +                                    }
> +                                    if (strcmp(p + 1, q) != 0) {
> +                                        /* Second instruction */
> +                                        printf("INSN: %02x %02x %02x "
> +                                               "(%02d %04d) : %s\n", opc1,
> +                                               (opc2 << 1) | 1, opc3, opc1,
> +                                               (opc3 << 6) | (opc2 << 1) | 1,
> +                                               p + 1);
> +                                    }
>                                  }
>                              }
>                          }
> @@ -9858,8 +9908,8 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
>  {
>      PowerPCCPU *cpu = POWERPC_CPU(dev);
>      CPUPPCState *env = &cpu->env;
> -    opc_handler_t **table;
> -    int i, j;
> +    opc_handler_t **table, **table_2;
> +    int i, j, k;
>  
>      cpu_exec_exit(CPU(dev));
>  
> @@ -9870,10 +9920,20 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
>          if (is_indirect_opcode(env->opcodes[i])) {
>              table = ind_table(env->opcodes[i]);
>              for (j = 0; j < PPC_CPU_INDIRECT_OPCODES_LEN; j++) {
> -                if (table[j] != &invalid_handler &&
> -                        is_indirect_opcode(table[j])) {
> +                if (table[j] == &invalid_handler) {
> +                    continue;
> +                }
> +                if (is_indirect_opcode(table[j])) {
> +                    table_2 = ind_table(table[j]);
> +                    for (k = 0; k < PPC_CPU_INDIRECT_OPCODES_LEN; k++) {
> +                        if (table_2[k] != &invalid_handler &&
> +                            is_indirect_opcode(table_2[k])) {
> +                            g_free((opc_handler_t *)((uintptr_t)table_2[k] &
> +                                                     ~PPC_INDIRECT));
> +                        }
> +                    }
>                      g_free((opc_handler_t *)((uintptr_t)table[j] &
> -                        ~PPC_INDIRECT));
> +                                             ~PPC_INDIRECT));
>                  }
>              }
>              g_free((opc_handler_t *)((uintptr_t)env->opcodes[i] &

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family Nikunj A Dadhania
@ 2016-07-27  6:17   ` David Gibson
  0 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2016-07-27  6:17 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 10100 bytes --]

On Tue, Jul 26, 2016 at 05:28:24PM +0530, Nikunj A Dadhania wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> The patch adds CPU PVR definition for POWER9 and enables QEMU to launch
> guests/linux-user in TCG mode.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> [ Added POWER9 alias, POWER9 SPAPR core and dropped MMU defines ]
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

I've put this into ppc-for-2.8, but I've dropped the spapr core type
again for now.  I'd prefer to wait until the POWER9 support is closer
to complete before encouraging its use for an actual machine type.

> ---
>  hw/ppc/spapr_cpu_core.c     |  5 +++
>  target-ppc/cpu-models.c     |  5 +++
>  target-ppc/cpu-models.h     |  1 +
>  target-ppc/cpu-qom.h        |  1 +
>  target-ppc/mmu_helper.c     |  2 +-
>  target-ppc/translate_init.c | 86 ++++++++++++++++++++++++++++++++++++++++++++-
>  6 files changed, 98 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index 5a132bf..713c5f3 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -352,6 +352,7 @@ SPAPR_CPU_CORE_INITFN(POWER7+_v2.1, POWER7plus);
>  SPAPR_CPU_CORE_INITFN(POWER8_v2.0, POWER8);
>  SPAPR_CPU_CORE_INITFN(POWER8E_v2.1, POWER8E);
>  SPAPR_CPU_CORE_INITFN(POWER8NVL_v1.0, POWER8NVL);
> +SPAPR_CPU_CORE_INITFN(POWER9_v1.0, POWER9);
>  
>  typedef struct SPAPRCoreInfo {
>      const char *name;
> @@ -395,6 +396,10 @@ static const SPAPRCoreInfo spapr_cores[] = {
>      { .name = "POWER8NVL_v1.0", .initfn = spapr_cpu_core_POWER8NVL_initfn },
>      { .name = "POWER8NVL", .initfn = spapr_cpu_core_POWER8NVL_initfn },
>  
> +    /* POWER9 and aliases */
> +    { .name = "POWER9_v1.0", .initfn = spapr_cpu_core_POWER9_initfn },
> +    { .name = "POWER9", .initfn = spapr_cpu_core_POWER9_initfn },
> +
>      { .name = NULL }
>  };
>  
> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
> index 5209e63..901cf40 100644
> --- a/target-ppc/cpu-models.c
> +++ b/target-ppc/cpu-models.c
> @@ -1147,6 +1147,10 @@
>                  "POWER8NVL v1.0")
>      POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
>                  "PowerPC 970 v2.2")
> +
> +    POWERPC_DEF("POWER9_v1.0",   CPU_POWERPC_POWER9_BASE,            POWER9,
> +                "POWER9 v1.0")
> +
>      POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
>                  "PowerPC 970FX v1.0 (G5)")
>      POWERPC_DEF("970fx_v2.0",    CPU_POWERPC_970FX_v20,              970,
> @@ -1395,6 +1399,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
>      { "POWER8E", "POWER8E_v2.1" },
>      { "POWER8", "POWER8_v2.0" },
>      { "POWER8NVL", "POWER8NVL_v1.0" },
> +    { "POWER9", "POWER9_v1.0" },
>      { "970", "970_v2.2" },
>      { "970fx", "970fx_v3.1" },
>      { "970mp", "970mp_v1.1" },
> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
> index f21a44c..7d9e6a2 100644
> --- a/target-ppc/cpu-models.h
> +++ b/target-ppc/cpu-models.h
> @@ -562,6 +562,7 @@ enum {
>      CPU_POWERPC_POWER8_v20         = 0x004D0200,
>      CPU_POWERPC_POWER8NVL_BASE     = 0x004C0000,
>      CPU_POWERPC_POWER8NVL_v10      = 0x004C0100,
> +    CPU_POWERPC_POWER9_BASE        = 0x004E0000,
>      CPU_POWERPC_970_v22            = 0x00390202,
>      CPU_POWERPC_970FX_v10          = 0x00391100,
>      CPU_POWERPC_970FX_v20          = 0x003C0200,
> diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
> index 2864105..713deef 100644
> --- a/target-ppc/cpu-qom.h
> +++ b/target-ppc/cpu-qom.h
> @@ -86,6 +86,7 @@ enum powerpc_mmu_t {
>      POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
>                               | POWERPC_MMU_64K
>                               | POWERPC_MMU_AMR | 0x00000004,
> +    /* FIXME Add POWERPC_MMU_3_OO defines */
>      /* Architecture 2.07 "degraded" (no 1T segments)           */
>      POWERPC_MMU_2_07a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
>                               | 0x00000004,
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index 3eb3cd7..737f338 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1941,7 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>          break;
>      default:
>          /* XXX: TODO */
> -        cpu_abort(CPU(cpu), "Unknown MMU model\n");
> +        cpu_abort(CPU(cpu), "Unknown MMU model %d\n", env->mmu_model);
>          break;
>      }
>  }
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 5f28a36..1e12d80 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -7459,7 +7459,8 @@ enum BOOK3S_CPU_TYPE {
>      BOOK3S_CPU_POWER5PLUS,
>      BOOK3S_CPU_POWER6,
>      BOOK3S_CPU_POWER7,
> -    BOOK3S_CPU_POWER8
> +    BOOK3S_CPU_POWER8,
> +    BOOK3S_CPU_POWER9
>  };
>  
>  static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
> @@ -8241,6 +8242,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
>          break;
>      case BOOK3S_CPU_POWER7:
>      case BOOK3S_CPU_POWER8:
> +    case BOOK3S_CPU_POWER9:
>          gen_spr_book3s_ids(env);
>          gen_spr_amr(env, version >= BOOK3S_CPU_POWER8);
>          gen_spr_book3s_purr(env);
> @@ -8293,6 +8295,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
>          break;
>      case BOOK3S_CPU_POWER7:
>      case BOOK3S_CPU_POWER8:
> +    case BOOK3S_CPU_POWER9:
>      default:
>          env->slb_nr = 32;
>          break;
> @@ -8310,6 +8313,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
>          ppcPOWER7_irq_init(ppc_env_get_cpu(env));
>          break;
>      case BOOK3S_CPU_POWER8:
> +    case BOOK3S_CPU_POWER9:
>          init_excp_POWER8(env);
>          ppcPOWER7_irq_init(ppc_env_get_cpu(env));
>          break;
> @@ -8772,6 +8776,86 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>      pcc->l1_icache_size = 0x8000;
>      pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
>  }
> +static void init_proc_POWER9(CPUPPCState *env)
> +{
> +    init_proc_book3s_64(env, BOOK3S_CPU_POWER9);
> +}
> +
> +static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr)
> +{
> +    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER9_BASE) {
> +        return true;
> +    }
> +    return false;
> +}
> +
> +POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
> +
> +    dc->fw_name = "PowerPC,POWER9";
> +    dc->desc = "POWER9";
> +    dc->props = powerpc_servercpu_properties;
> +    pcc->pvr_match = ppc_pvr_match_power9;
> +    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07;
> +    pcc->init_proc = init_proc_POWER9;
> +    pcc->check_pow = check_pow_nocheck;
> +    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
> +                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
> +                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
> +                       PPC_FLOAT_FRSQRTES |
> +                       PPC_FLOAT_STFIWX |
> +                       PPC_FLOAT_EXT |
> +                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
> +                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
> +                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
> +                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
> +                       PPC_SEGMENT_64B | PPC_SLBI |
> +                       PPC_POPCNTB | PPC_POPCNTWD |
> +                       PPC_CILDST;
> +    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
> +                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
> +                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
> +                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
> +                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
> +                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
> +                        PPC2_TM | PPC2_PM_ISA206;
> +    pcc->msr_mask = (1ull << MSR_SF) |
> +                    (1ull << MSR_TM) |
> +                    (1ull << MSR_VR) |
> +                    (1ull << MSR_VSX) |
> +                    (1ull << MSR_EE) |
> +                    (1ull << MSR_PR) |
> +                    (1ull << MSR_FP) |
> +                    (1ull << MSR_ME) |
> +                    (1ull << MSR_FE0) |
> +                    (1ull << MSR_SE) |
> +                    (1ull << MSR_DE) |
> +                    (1ull << MSR_FE1) |
> +                    (1ull << MSR_IR) |
> +                    (1ull << MSR_DR) |
> +                    (1ull << MSR_PMM) |
> +                    (1ull << MSR_RI) |
> +                    (1ull << MSR_LE);
> +    /* Using 2.07 defines until new radix model is added. */
> +    pcc->mmu_model = POWERPC_MMU_2_07;
> +#if defined(CONFIG_SOFTMMU)
> +    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
> +    /* segment page size remain the same */
> +    pcc->sps = &POWER7_POWER8_sps;
> +#endif
> +    pcc->excp_model = POWERPC_EXCP_POWER8;
> +    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
> +    pcc->bfd_mach = bfd_mach_ppc64;
> +    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
> +                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
> +                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> +                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
> +    pcc->l1_dcache_size = 0x8000;
> +    pcc->l1_icache_size = 0x8000;
> +    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
> +}
>  
>  #if !defined(CONFIG_USER_ONLY)
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
  2016-07-27  5:19   ` David Gibson
@ 2016-07-27  6:17     ` Nikunj A Dadhania
  2016-07-27  6:29       ` David Gibson
  0 siblings, 1 reply; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-27  6:17 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Tue, Jul 26, 2016 at 05:28:30PM +0530, Nikunj A Dadhania wrote:
>> While implementing modulo instructions figured out that the
>> implementation uses many branches. Change the logic to achieve the
>> branch-less code. Undefined value is set to dividend in case of invalid
>> input.
>> 
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
>>  1 file changed, 23 insertions(+), 25 deletions(-)
>> 
>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>> index 7c7328f..69d9ae0 100644
>> --- a/target-ppc/translate.c
>> +++ b/target-ppc/translate.c
>> @@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
>>  static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
>>                                       TCGv arg2, int sign, int compute_ov)
>>  {
>> -    TCGLabel *l1 = gen_new_label();
>> -    TCGLabel *l2 = gen_new_label();
>> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
>> -    TCGv_i32 t1 = tcg_temp_local_new_i32();
>> +    TCGv_i32 t0 = tcg_temp_new_i32();
>> +    TCGv_i32 t1 = tcg_temp_new_i32();
>> +    TCGv_i32 t2 = tcg_temp_new_i32();
>> +    TCGv_i32 t3 = tcg_temp_new_i32();
>>  
>>      tcg_gen_trunc_tl_i32(t0, arg1);
>>      tcg_gen_trunc_tl_i32(t1, arg2);
>> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
>> -    if (sign) {
>> -        TCGLabel *l3 = gen_new_label();
>> -        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
>> -        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
>> -        gen_set_label(l3);
>> -        tcg_gen_div_i32(t0, t0, t1);
>> -    } else {
>> -        tcg_gen_divu_i32(t0, t0, t1);
>> -    }
>> -    if (compute_ov) {
>> -        tcg_gen_movi_tl(cpu_ov, 0);
>> -    }
>> -    tcg_gen_br(l2);
>> -    gen_set_label(l1);
>>      if (sign) {
>> -        tcg_gen_sari_i32(t0, t0, 31);
>> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
>> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
>> +        tcg_gen_and_i32(t2, t2, t3);
>> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
>> +        tcg_gen_or_i32(t2, t2, t3);
>> +        tcg_gen_movi_i32(t3, 0);
>> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
>> +        tcg_gen_div_i32(t3, t0, t1);
>> +        tcg_gen_extu_i32_tl(ret, t3);
>
> Should this be a signed extend, given it's a signed divide?

Don't think so, as the instruction is 32-bit, caller will only look at
the 32bit and div_i32 is signed, it will take care of extending sign
till 32-boundary.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 00/15] POWER9 TCG enablements - part1
       [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
                   ` (14 preceding siblings ...)
  2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode Nikunj A Dadhania
@ 2016-07-27  6:23 ` David Gibson
  15 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2016-07-27  6:23 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 3593 bytes --]

On Tue, Jul 26, 2016 at 05:28:23PM +0530, Nikunj A Dadhania wrote:
> This set starts adding new instructions for POWER9 described in ISA3.0.
> 
> Patches:
>   01-02: First two patches adds the required POWER9 cpu model and ISA defines.
>   03-14: Adds following instructions:
>              addpcis   : Add PC Immediate Shifted
>              cmprb     : Compare Ranged Byte
>              moduw     : Modulo Unsigned Word
>              modsw     : Modulo Signed Word
>              modud     : Modulo Unsigned Dword
>              modsd     : Modulo Signed Dword
>              cnttzd[.] : Count Trailing Zero Dword
>              cnttzw[.] : Count Trailing Zero Word
>              cmpeqb    : Compare Equal Byte
>              setb      : Set Boolean
>              maddld    : Multiply-Add Low Dword
>              maddhd    : Multiply-Add High Dword
>              maddhdu   : Multiply-Add High Dword Unsigned
>          Changes following instructions:
>              divd[u][o][.]: Divide Doubleword Signed/Unsigned
>              divw[u][o][.]: Divide Word Signed/Unsigned
>      15: Adds support for the new Expanded Opcode (EO) added in
>   ISA3.0

I've applied these to ppc-for-2.8, except for the div rework which I
have a comment on, and for which I'm hoping for an R-b from rth.  I
did make a small tweak to 1/15.

> 
> Changelog:
> v3:
> * Accumulate summary overflow in place of over-writing in div[w,d] operations
> 
> v2: 
> * Implement branchless modulo instruction
> * Change divd and divw to branchless implementation similar to modulo
>   instructions
> * Drop MMU_3_00 defines from the POWER9 define until radix support is
>   added.
> 
> v1:
> * addpcis - shift the immediate before adding
> * cmprb logic without branches
> * mod[su][wd]: use helpers
> * cmpeqb - use bit magics in the helpers
> * setb - bug fix and branchless
> * maddld - discard multiple dword calculation as we need only lower 64-bit
> * Expanded opcode - drop pad from 32-bit and free the third level indirect 
>   table in unrealize
> 
> Aneesh Kumar K.V (1):
>   target-ppc: Introduce Power9 family
> 
> Nikunj A Dadhania (12):
>   target-ppc: Introduce POWER ISA 3.0 flag
>   target-ppc: adding addpcis instruction
>   target-ppc: add cmprb instruction
>   target-ppc: add modulo word operations
>   target-ppc: add modulo dword operations
>   target-ppc: implement branch-less divw[o][.]
>   target-ppc: implement branch-less divd[o][.]
>   target-ppc: add cnttzw[.] instruction
>   target-ppc: add cmpeqb instruction
>   target-ppc: add maddld instruction
>   target-ppc: add maddhd and maddhdu instruction
>   target-ppc: introduce opc4 for Expanded Opcode
> 
> Sandipan Das (1):
>   target-ppc: add cnttzd[.] instruction
> 
> Vivek Andrew Sha (1):
>   target-ppc: add setb instruction
> 
>  hw/ppc/spapr_cpu_core.c     |   5 +
>  target-ppc/cpu-models.c     |   5 +
>  target-ppc/cpu-models.h     |   1 +
>  target-ppc/cpu-qom.h        |   1 +
>  target-ppc/cpu.h            |   5 +-
>  target-ppc/helper.h         |   3 +
>  target-ppc/int_helper.c     |  32 ++++
>  target-ppc/mmu_helper.c     |   2 +-
>  target-ppc/translate.c      | 432 +++++++++++++++++++++++++++++++++++++-------
>  target-ppc/translate_init.c | 212 ++++++++++++++++++----
>  10 files changed, 594 insertions(+), 104 deletions(-)
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
  2016-07-27  6:17     ` Nikunj A Dadhania
@ 2016-07-27  6:29       ` David Gibson
  2016-07-27  6:41         ` Nikunj A Dadhania
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2016-07-27  6:29 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 3267 bytes --]

On Wed, Jul 27, 2016 at 11:47:15AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Tue, Jul 26, 2016 at 05:28:30PM +0530, Nikunj A Dadhania wrote:
> >> While implementing modulo instructions figured out that the
> >> implementation uses many branches. Change the logic to achieve the
> >> branch-less code. Undefined value is set to dividend in case of invalid
> >> input.
> >> 
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> ---
> >>  target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
> >>  1 file changed, 23 insertions(+), 25 deletions(-)
> >> 
> >> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> >> index 7c7328f..69d9ae0 100644
> >> --- a/target-ppc/translate.c
> >> +++ b/target-ppc/translate.c
> >> @@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
> >>  static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
> >>                                       TCGv arg2, int sign, int compute_ov)
> >>  {
> >> -    TCGLabel *l1 = gen_new_label();
> >> -    TCGLabel *l2 = gen_new_label();
> >> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> >> -    TCGv_i32 t1 = tcg_temp_local_new_i32();
> >> +    TCGv_i32 t0 = tcg_temp_new_i32();
> >> +    TCGv_i32 t1 = tcg_temp_new_i32();
> >> +    TCGv_i32 t2 = tcg_temp_new_i32();
> >> +    TCGv_i32 t3 = tcg_temp_new_i32();
> >>  
> >>      tcg_gen_trunc_tl_i32(t0, arg1);
> >>      tcg_gen_trunc_tl_i32(t1, arg2);
> >> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
> >> -    if (sign) {
> >> -        TCGLabel *l3 = gen_new_label();
> >> -        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
> >> -        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
> >> -        gen_set_label(l3);
> >> -        tcg_gen_div_i32(t0, t0, t1);
> >> -    } else {
> >> -        tcg_gen_divu_i32(t0, t0, t1);
> >> -    }
> >> -    if (compute_ov) {
> >> -        tcg_gen_movi_tl(cpu_ov, 0);
> >> -    }
> >> -    tcg_gen_br(l2);
> >> -    gen_set_label(l1);
> >>      if (sign) {
> >> -        tcg_gen_sari_i32(t0, t0, 31);
> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
> >> +        tcg_gen_and_i32(t2, t2, t3);
> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
> >> +        tcg_gen_or_i32(t2, t2, t3);
> >> +        tcg_gen_movi_i32(t3, 0);
> >> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
> >> +        tcg_gen_div_i32(t3, t0, t1);
> >> +        tcg_gen_extu_i32_tl(ret, t3);
> >
> > Should this be a signed extend, given it's a signed divide?
> 
> Don't think so, as the instruction is 32-bit, caller will only look at
> the 32bit and div_i32 is signed, it will take care of extending sign
> till 32-boundary.

Hrm.  I thought most 32-bit arithmetic operations on Power actually
set the underlying 64-bit registers to a sign extended version of the
32-bit result.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
  2016-07-27  6:29       ` David Gibson
@ 2016-07-27  6:41         ` Nikunj A Dadhania
  2016-07-27  6:56           ` David Gibson
  0 siblings, 1 reply; 23+ messages in thread
From: Nikunj A Dadhania @ 2016-07-27  6:41 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Wed, Jul 27, 2016 at 11:47:15AM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > [ Unknown signature status ]
>> > On Tue, Jul 26, 2016 at 05:28:30PM +0530, Nikunj A Dadhania wrote:
>> >> While implementing modulo instructions figured out that the
>> >> implementation uses many branches. Change the logic to achieve the
>> >> branch-less code. Undefined value is set to dividend in case of invalid
>> >> input.
>> >> 
>> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >> ---
>> >>  target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
>> >>  1 file changed, 23 insertions(+), 25 deletions(-)
>> >> 
>> >> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>> >> index 7c7328f..69d9ae0 100644
>> >> --- a/target-ppc/translate.c
>> >> +++ b/target-ppc/translate.c
>> >> @@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
>> >>  static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
>> >>                                       TCGv arg2, int sign, int compute_ov)
>> >>  {
>> >> -    TCGLabel *l1 = gen_new_label();
>> >> -    TCGLabel *l2 = gen_new_label();
>> >> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
>> >> -    TCGv_i32 t1 = tcg_temp_local_new_i32();
>> >> +    TCGv_i32 t0 = tcg_temp_new_i32();
>> >> +    TCGv_i32 t1 = tcg_temp_new_i32();
>> >> +    TCGv_i32 t2 = tcg_temp_new_i32();
>> >> +    TCGv_i32 t3 = tcg_temp_new_i32();
>> >>  
>> >>      tcg_gen_trunc_tl_i32(t0, arg1);
>> >>      tcg_gen_trunc_tl_i32(t1, arg2);
>> >> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
>> >> -    if (sign) {
>> >> -        TCGLabel *l3 = gen_new_label();
>> >> -        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
>> >> -        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
>> >> -        gen_set_label(l3);
>> >> -        tcg_gen_div_i32(t0, t0, t1);
>> >> -    } else {
>> >> -        tcg_gen_divu_i32(t0, t0, t1);
>> >> -    }
>> >> -    if (compute_ov) {
>> >> -        tcg_gen_movi_tl(cpu_ov, 0);
>> >> -    }
>> >> -    tcg_gen_br(l2);
>> >> -    gen_set_label(l1);
>> >>      if (sign) {
>> >> -        tcg_gen_sari_i32(t0, t0, 31);
>> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
>> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
>> >> +        tcg_gen_and_i32(t2, t2, t3);
>> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
>> >> +        tcg_gen_or_i32(t2, t2, t3);
>> >> +        tcg_gen_movi_i32(t3, 0);
>> >> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
>> >> +        tcg_gen_div_i32(t3, t0, t1);
>> >> +        tcg_gen_extu_i32_tl(ret, t3);
>> >
>> > Should this be a signed extend, given it's a signed divide?
>> 
>> Don't think so, as the instruction is 32-bit, caller will only look at
>> the 32bit and div_i32 is signed, it will take care of extending sign
>> till 32-boundary.
>
> Hrm.  I thought most 32-bit arithmetic operations on Power actually
> set the underlying 64-bit registers to a sign extended version of the
> 32-bit result.

I think, when I want to operate on it as 64-bit, i will need signed
extension. rth can give more info on this.

Retained the behaviour as per the previous code as well:

    tcg_gen_trunc_tl_i32(t0, arg1);
    tcg_gen_trunc_tl_i32(t1, arg2);
    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
    if (sign) {
        TCGLabel *l3 = gen_new_label();
        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
        gen_set_label(l3);
        tcg_gen_div_i32(t0, t0, t1);
    } else {
        tcg_gen_divu_i32(t0, t0, t1);
    }
    if (compute_ov) {
        tcg_gen_movi_tl(cpu_ov, 0);
    }
    tcg_gen_br(l2);
    gen_set_label(l1);
    if (sign) {
        tcg_gen_sari_i32(t0, t0, 31);
    } else {
        tcg_gen_movi_i32(t0, 0);
    }
    if (compute_ov) {
        tcg_gen_movi_tl(cpu_ov, 1);
        tcg_gen_movi_tl(cpu_so, 1);
    }
    gen_set_label(l2);
    tcg_gen_extu_i32_tl(ret, t0);                     <<<<<<<<<<<<<<<<<<<<<<<
    tcg_temp_free_i32(t0);
    tcg_temp_free_i32(t1);

IMO, thats correct.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.]
  2016-07-27  6:41         ` Nikunj A Dadhania
@ 2016-07-27  6:56           ` David Gibson
  0 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2016-07-27  6:56 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 5056 bytes --]

On Wed, Jul 27, 2016 at 12:11:08PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Wed, Jul 27, 2016 at 11:47:15AM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > [ Unknown signature status ]
> >> > On Tue, Jul 26, 2016 at 05:28:30PM +0530, Nikunj A Dadhania wrote:
> >> >> While implementing modulo instructions figured out that the
> >> >> implementation uses many branches. Change the logic to achieve the
> >> >> branch-less code. Undefined value is set to dividend in case of invalid
> >> >> input.
> >> >> 
> >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> >> ---
> >> >>  target-ppc/translate.c | 48 +++++++++++++++++++++++-------------------------
> >> >>  1 file changed, 23 insertions(+), 25 deletions(-)
> >> >> 
> >> >> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> >> >> index 7c7328f..69d9ae0 100644
> >> >> --- a/target-ppc/translate.c
> >> >> +++ b/target-ppc/translate.c
> >> >> @@ -1049,41 +1049,39 @@ static void gen_addpcis(DisasContext *ctx)
> >> >>  static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
> >> >>                                       TCGv arg2, int sign, int compute_ov)
> >> >>  {
> >> >> -    TCGLabel *l1 = gen_new_label();
> >> >> -    TCGLabel *l2 = gen_new_label();
> >> >> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> >> >> -    TCGv_i32 t1 = tcg_temp_local_new_i32();
> >> >> +    TCGv_i32 t0 = tcg_temp_new_i32();
> >> >> +    TCGv_i32 t1 = tcg_temp_new_i32();
> >> >> +    TCGv_i32 t2 = tcg_temp_new_i32();
> >> >> +    TCGv_i32 t3 = tcg_temp_new_i32();
> >> >>  
> >> >>      tcg_gen_trunc_tl_i32(t0, arg1);
> >> >>      tcg_gen_trunc_tl_i32(t1, arg2);
> >> >> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
> >> >> -    if (sign) {
> >> >> -        TCGLabel *l3 = gen_new_label();
> >> >> -        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
> >> >> -        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
> >> >> -        gen_set_label(l3);
> >> >> -        tcg_gen_div_i32(t0, t0, t1);
> >> >> -    } else {
> >> >> -        tcg_gen_divu_i32(t0, t0, t1);
> >> >> -    }
> >> >> -    if (compute_ov) {
> >> >> -        tcg_gen_movi_tl(cpu_ov, 0);
> >> >> -    }
> >> >> -    tcg_gen_br(l2);
> >> >> -    gen_set_label(l1);
> >> >>      if (sign) {
> >> >> -        tcg_gen_sari_i32(t0, t0, 31);
> >> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
> >> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
> >> >> +        tcg_gen_and_i32(t2, t2, t3);
> >> >> +        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
> >> >> +        tcg_gen_or_i32(t2, t2, t3);
> >> >> +        tcg_gen_movi_i32(t3, 0);
> >> >> +        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
> >> >> +        tcg_gen_div_i32(t3, t0, t1);
> >> >> +        tcg_gen_extu_i32_tl(ret, t3);
> >> >
> >> > Should this be a signed extend, given it's a signed divide?
> >> 
> >> Don't think so, as the instruction is 32-bit, caller will only look at
> >> the 32bit and div_i32 is signed, it will take care of extending sign
> >> till 32-boundary.
> >
> > Hrm.  I thought most 32-bit arithmetic operations on Power actually
> > set the underlying 64-bit registers to a sign extended version of the
> > 32-bit result.
> 
> I think, when I want to operate on it as 64-bit, i will need signed
> extension. rth can give more info on this.
> 
> Retained the behaviour as per the previous code as well:

Ah, yes RT[0:31] <- undefined according to the ISA, so I guess this is
ok (though ideally we'd double check what the actual hardware does and
match that, just in case something relies on it).

> 
>     tcg_gen_trunc_tl_i32(t0, arg1);
>     tcg_gen_trunc_tl_i32(t1, arg2);
>     tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
>     if (sign) {
>         TCGLabel *l3 = gen_new_label();
>         tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
>         tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
>         gen_set_label(l3);
>         tcg_gen_div_i32(t0, t0, t1);
>     } else {
>         tcg_gen_divu_i32(t0, t0, t1);
>     }
>     if (compute_ov) {
>         tcg_gen_movi_tl(cpu_ov, 0);
>     }
>     tcg_gen_br(l2);
>     gen_set_label(l1);
>     if (sign) {
>         tcg_gen_sari_i32(t0, t0, 31);
>     } else {
>         tcg_gen_movi_i32(t0, 0);
>     }
>     if (compute_ov) {
>         tcg_gen_movi_tl(cpu_ov, 1);
>         tcg_gen_movi_tl(cpu_so, 1);
>     }
>     gen_set_label(l2);
>     tcg_gen_extu_i32_tl(ret, t0);                     <<<<<<<<<<<<<<<<<<<<<<<
>     tcg_temp_free_i32(t0);
>     tcg_temp_free_i32(t1);
> 
> IMO, thats correct.
> 
> Regards
> Nikunj
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-07-27  7:01 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1469534318-5549-1-git-send-email-nikunj@linux.vnet.ibm.com>
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 01/15] target-ppc: Introduce Power9 family Nikunj A Dadhania
2016-07-27  6:17   ` David Gibson
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 02/15] target-ppc: Introduce POWER ISA 3.0 flag Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 03/15] target-ppc: adding addpcis instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 04/15] target-ppc: add cmprb instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 05/15] target-ppc: add modulo word operations Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 06/15] target-ppc: add modulo dword operations Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 07/15] target-ppc: implement branch-less divw[o][.] Nikunj A Dadhania
2016-07-27  5:19   ` David Gibson
2016-07-27  6:17     ` Nikunj A Dadhania
2016-07-27  6:29       ` David Gibson
2016-07-27  6:41         ` Nikunj A Dadhania
2016-07-27  6:56           ` David Gibson
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 08/15] target-ppc: implement branch-less divd[o][.] Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 09/15] target-ppc: add cnttzd[.] instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 10/15] target-ppc: add cnttzw[.] instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 11/15] target-ppc: add cmpeqb instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 12/15] target-ppc: add setb instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 13/15] target-ppc: add maddld instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 14/15] target-ppc: add maddhd and maddhdu instruction Nikunj A Dadhania
2016-07-26 11:58 ` [Qemu-devel] [PATCH v4 15/15] target-ppc: introduce opc4 for Expanded Opcode Nikunj A Dadhania
2016-07-27  5:31   ` David Gibson
2016-07-27  6:23 ` [Qemu-devel] [PATCH v4 00/15] POWER9 TCG enablements - part1 David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.