All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9
@ 2016-12-05 11:25 Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 01/13] target-ppc: move ppc_vsr_t to common header Nikunj A Dadhania
                   ` (13 more replies)
  0 siblings, 14 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

This series contains 12 new instructions for POWER9 ISA3.0
     Couple of consolidation patches
     VSX Vector Insert/Extract Word
     VSX Vector Permute
     VSX Load/Store with length
     VSX Scalar Quad-Precision Move Instructions

Bharata B Rao (1):
  target-ppc: Add xxperm and xxpermr instructions

Nikunj A Dadhania (12):
  target-ppc: move ppc_vsr_t to common header
  target-ppc: add mask_u128 routine
  target-ppc: implement lxvl instruction
  target-ppc: implement lxvll instruction
  target-ppc: implement stxvl instruction
  target-ppc: implement stxvll instructions
  target-ppc: implement xxextractuw instruction
  target-ppc: implement xxinsertw instruction
  target-ppc: implement stop instruction
  target-ppc: implement xsabsqp/xsnabsqp instruction
  target-ppc: implement xsnegqp instruction
  target-ppc: implement xscpsgnqp instruction

 target-ppc/fpu_helper.c             |  87 +++++++++++++++++-------------
 target-ppc/helper.h                 |   8 +++
 target-ppc/int_helper.c             |  63 ++++++++++++++++++++++
 target-ppc/internal.h               |  80 +++++++++++++++++++++++----
 target-ppc/mem_helper.c             |  92 +++++++++++++++++++++++++++++++
 target-ppc/translate.c              |   6 +++
 target-ppc/translate/vsx-impl.inc.c | 105 ++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  19 +++++++
 8 files changed, 414 insertions(+), 46 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 01/13] target-ppc: move ppc_vsr_t to common header
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine Nikunj A Dadhania
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

The structure and corresponding defines and functions need to be used
outside of fpu_helper.c as well.

Add u8, u16, u32 and Int128 to the structure.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/fpu_helper.c | 37 -------------------------------------
 target-ppc/internal.h   | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 696f537..3b867cf 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1777,43 +1777,6 @@ uint32_t helper_efdcmpeq(CPUPPCState *env, uint64_t op1, uint64_t op2)
     return helper_efdtsteq(env, op1, op2);
 }
 
-typedef union _ppc_vsr_t {
-    uint64_t u64[2];
-    uint32_t u32[4];
-    float32 f32[4];
-    float64 f64[2];
-} ppc_vsr_t;
-
-#if defined(HOST_WORDS_BIGENDIAN)
-#define VsrW(i) u32[i]
-#define VsrD(i) u64[i]
-#else
-#define VsrW(i) u32[3-(i)]
-#define VsrD(i) u64[1-(i)]
-#endif
-
-static void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
-{
-    if (n < 32) {
-        vsr->VsrD(0) = env->fpr[n];
-        vsr->VsrD(1) = env->vsr[n];
-    } else {
-        vsr->u64[0] = env->avr[n-32].u64[0];
-        vsr->u64[1] = env->avr[n-32].u64[1];
-    }
-}
-
-static void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
-{
-    if (n < 32) {
-        env->fpr[n] = vsr->VsrD(0);
-        env->vsr[n] = vsr->VsrD(1);
-    } else {
-        env->avr[n-32].u64[0] = vsr->u64[0];
-        env->avr[n-32].u64[1] = vsr->u64[1];
-    }
-}
-
 #define float64_to_float64(x, env) x
 
 
diff --git a/target-ppc/internal.h b/target-ppc/internal.h
index e83ea45..66cde46 100644
--- a/target-ppc/internal.h
+++ b/target-ppc/internal.h
@@ -199,4 +199,46 @@ EXTRACT_HELPER(SHW, 8, 2);
 EXTRACT_HELPER(SP, 19, 2);
 EXTRACT_HELPER(IMM8, 11, 8);
 
+typedef union _ppc_vsr_t {
+    uint8_t u8[16];
+    uint16_t u16[8];
+    uint32_t u32[4];
+    uint64_t u64[2];
+    float32 f32[4];
+    float64 f64[2];
+    Int128  s128;
+} ppc_vsr_t;
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VsrB(i) u8[i]
+#define VsrW(i) u32[i]
+#define VsrD(i) u64[i]
+#else
+#define VsrB(i) u8[15 - (i)]
+#define VsrW(i) u32[3 - (i)]
+#define VsrD(i) u64[1 - (i)]
+#endif
+
+static inline void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+    if (n < 32) {
+        vsr->VsrD(0) = env->fpr[n];
+        vsr->VsrD(1) = env->vsr[n];
+    } else {
+        vsr->u64[0] = env->avr[n - 32].u64[0];
+        vsr->u64[1] = env->avr[n - 32].u64[1];
+    }
+}
+
+static inline void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+    if (n < 32) {
+        env->fpr[n] = vsr->VsrD(0);
+        env->vsr[n] = vsr->VsrD(1);
+    } else {
+        env->avr[n - 32].u64[0] = vsr->u64[0];
+        env->avr[n - 32].u64[1] = vsr->u64[1];
+    }
+}
+
 #endif /* PPC_INTERNAL_H */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 01/13] target-ppc: move ppc_vsr_t to common header Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 17:36   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction Nikunj A Dadhania
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

Adjust FUNC_MASK define and add function to generate mask_u128

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/internal.h | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/target-ppc/internal.h b/target-ppc/internal.h
index 66cde46..27d956f 100644
--- a/target-ppc/internal.h
+++ b/target-ppc/internal.h
@@ -18,9 +18,9 @@
 #ifndef PPC_INTERNAL_H
 #define PPC_INTERNAL_H
 
-#define FUNC_MASK(name, ret_type, size, max_val)                  \
-static inline ret_type name(uint##size##_t start,                 \
-                              uint##size##_t end)                 \
+#define FUNC_MASK(name, ret_type, size, in_type, max_val)         \
+static inline ret_type name(in_type start,                        \
+                            in_type end)                          \
 {                                                                 \
     ret_type ret, max_bit = size - 1;                             \
                                                                   \
@@ -29,8 +29,8 @@ static inline ret_type name(uint##size##_t start,                 \
     } else if (likely(end == max_bit)) {                          \
         ret = max_val >> start;                                   \
     } else {                                                      \
-        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^            \
-            (((uint##size##_t)(-1ULL) >> (end)) >> 1);            \
+        ret = (((in_type)(-1ULL)) >> (start)) ^                   \
+            (((in_type)(-1ULL) >> (end)) >> 1);                   \
         if (unlikely(start > end)) {                              \
             return ~ret;                                          \
         }                                                         \
@@ -40,12 +40,32 @@ static inline ret_type name(uint##size##_t start,                 \
 }
 
 #if defined(TARGET_PPC64)
-FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX);
+FUNC_MASK(MASK, target_ulong, 64, uint64_t, UINT64_MAX);
 #else
-FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX);
+FUNC_MASK(MASK, target_ulong, 32, uint32_t, UINT32_MAX);
+#endif
+FUNC_MASK(mask_u32, uint32_t, 32, uint32_t, UINT32_MAX);
+FUNC_MASK(mask_u64, uint64_t, 64, uint64_t, UINT64_MAX);
+
+#if defined(CONFIG_INT128)
+FUNC_MASK(mask_u128, Int128, 128, Int128, ~((__uint128_t)0));
+#else
+static inline Int128 mask_u128(int start, int end)
+{
+    Int128 r = {0};
+    if (start > 63) {
+        r.hi = 0;
+        r.lo = mask_u64(start - 64, end - 64);
+    } else if (end < 64) {
+        r.hi = mask_u64(start, end);
+        r.lo = 0;
+    } else {
+        r.hi = mask_u64(start, 63);
+        r.lo = mask_u64(0, end - 64);
+    }
+    return r;
+}
 #endif
-FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX);
-FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX);
 
 /*****************************************************************************/
 /***                           Instruction decoding                        ***/
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 01/13] target-ppc: move ppc_vsr_t to common header Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 17:46   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction Nikunj A Dadhania
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

lxvl: Load VSX Vector with Length

Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Loading 14 bytes results in:

Vector (8-bit elements) in BE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Vector (8-bit elements) in LE:
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 54 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index bc39efb..d9ccafd 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -317,6 +317,7 @@ DEF_HELPER_3(lvewx, void, env, avr, tl)
 DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
+DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 1ab8a6e..0a8ff54 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -24,6 +24,7 @@
 
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
+#include "internal.h"
 
 //#define DEBUG_OP
 
@@ -284,6 +285,30 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
 #undef I
 #undef LVE
 
+void helper_lxvl(CPUPPCState *env, target_ulong addr,
+                 target_ulong xt_num, target_ulong rb)
+{
+    ppc_vsr_t xt;
+
+    getVSR(xt_num, &xt, env);
+    if (unlikely((rb & 0xFF) == 0)) {
+        xt.s128 = int128_make128(0, 0);
+    } else {
+        target_ulong end = ((rb & 0xFF) * 8) - 1;
+        if (msr_le) {
+            xt.u64[HI_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));
+            addr = addr_add(env, addr, 8);
+            xt.u64[LO_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));
+        } else {
+            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            addr = addr_add(env, addr, 8);
+            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+        }
+        xt.s128 = int128_and(xt.s128, mask_u128(0, end));
+    }
+    putVSR(xt_num, &xt, env);
+}
+
 #undef HI_IDX
 #undef LO_IDX
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 2fbdbd2..e53f91e 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -240,6 +240,33 @@ VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
 VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
 VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
 
+#define VSX_VECTOR_LOAD_STORE_LENGTH(name)                      \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    TCGv EA, xt;                                                \
+                                                                \
+    if (xT(ctx->opcode) < 32) {                                 \
+        if (unlikely(!ctx->vsx_enabled)) {                      \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);              \
+            return;                                             \
+        }                                                       \
+    } else {                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VPU);               \
+            return;                                             \
+        }                                                       \
+    }                                                           \
+    EA = tcg_temp_new();                                        \
+    xt = tcg_const_tl(xT(ctx->opcode));                         \
+    gen_set_access_type(ctx, ACCESS_INT);                       \
+    gen_addr_register(ctx, EA);                                 \
+    gen_helper_##name(cpu_env, EA, xt, cpu_gpr[rB(ctx->opcode)]); \
+    tcg_temp_free(EA);                                          \
+    tcg_temp_free(xt);                                          \
+}
+
+VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
+
 #define VSX_LOAD_SCALAR_DS(name, operation)                       \
 static void gen_##name(DisasContext *ctx)                         \
 {                                                                 \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 8a1cbe0..3383cdd 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -10,6 +10,7 @@ GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvx, 0x1F, 0x0C, 0x08, 0x00000040, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (2 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 17:52   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction Nikunj A Dadhania
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

lxvll: Load VSX Vector Left-justified with Length

Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Loading 14 bytes to vector (8-bit elements) in BE/LE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c |  1 +
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 28 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index d9ccafd..67c8b71 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -318,6 +318,7 @@ DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
+DEF_HELPER_4(lxvll, void, env, tl, tl, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 0a8ff54..c5826bc 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -309,6 +309,31 @@ void helper_lxvl(CPUPPCState *env, target_ulong addr,
     putVSR(xt_num, &xt, env);
 }
 
+void helper_lxvll(CPUPPCState *env, target_ulong addr,
+                  target_ulong xt_num, target_ulong rb)
+{
+    ppc_vsr_t xt;
+
+    getVSR(xt_num, &xt, env);
+    if (unlikely((rb & 0xFF) == 0)) {
+        xt.s128 = int128_make128(0, 0);
+    } else {
+        target_ulong end = ((rb & 0xFF) * 8) - 1;
+        if (msr_le) {
+            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            addr = addr_add(env, addr, 8);
+            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
+        } else {
+            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            addr = addr_add(env, addr, 8);
+            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            xt.s128 = int128_and(xt.s128, mask_u128(0, end));
+        }
+    }
+    putVSR(xt_num, &xt, env);
+}
+
 #undef HI_IDX
 #undef LO_IDX
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index e53f91e..40f584e 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -266,6 +266,7 @@ static void gen_##name(DisasContext *ctx)                       \
 }
 
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
+VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
 
 #define VSX_LOAD_SCALAR_DS(name, operation)                       \
 static void gen_##name(DisasContext *ctx)                         \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 3383cdd..7751a7b 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -11,6 +11,7 @@ GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvx, 0x1F, 0x0C, 0x08, 0x00000040, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (3 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 17:55   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions Nikunj A Dadhania
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

stxvl: Store VSX Vector with Length

Vector (8-bit elements) in BE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Vector (8-bit elements) in LE:
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+

Storing 14 bytes would result in following Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/mem_helper.c             | 17 +++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c |  1 +
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 20 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 67c8b71..5ddc96d 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -319,6 +319,7 @@ DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
 DEF_HELPER_4(lxvll, void, env, tl, tl, tl)
+DEF_HELPER_4(stxvl, void, env, tl, tl, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index c5826bc..6227c4d 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -334,6 +334,23 @@ void helper_lxvll(CPUPPCState *env, target_ulong addr,
     putVSR(xt_num, &xt, env);
 }
 
+void helper_stxvl(CPUPPCState *env, target_ulong addr,
+                 target_ulong xt_num, target_ulong rb)
+{
+    int i;
+    ppc_vsr_t xt;
+    target_ulong end = rb & 0xFF;
+
+    if (!end) {
+        return;
+    }
+    getVSR(xt_num, &xt, env);
+    for (i = 15; i > 15 - end; i--) {
+        cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
+        addr = addr_add(env, addr, 1);
+    }
+}
+
 #undef HI_IDX
 #undef LO_IDX
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 40f584e..d8e2ab4 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -267,6 +267,7 @@ static void gen_##name(DisasContext *ctx)                       \
 
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
+VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
 
 #define VSX_LOAD_SCALAR_DS(name, operation)                       \
 static void gen_##name(DisasContext *ctx)                         \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 7751a7b..1a980d6 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -23,6 +23,7 @@ GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stxvx, 0x1F, 0x0C, 0x0C, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stxvl, 0x1F, 0x0D, 0x0C, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (4 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 17:57   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction Nikunj A Dadhania
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

stxvll: Store VSX Vector Left-justified with Length

Vector (8-bit elements) in LE/BE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Storing 14 bytes would result in following Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c |  1 +
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 28 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 5ddc96d..91bdfc3 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -320,6 +320,7 @@ DEF_HELPER_3(stvewx, void, env, avr, tl)
 DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
 DEF_HELPER_4(lxvll, void, env, tl, tl, tl)
 DEF_HELPER_4(stxvl, void, env, tl, tl, tl)
+DEF_HELPER_4(stxvll, void, env, tl, tl, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 6227c4d..b9885a8 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -351,6 +351,31 @@ void helper_stxvl(CPUPPCState *env, target_ulong addr,
     }
 }
 
+void helper_stxvll(CPUPPCState *env, target_ulong addr,
+                   target_ulong xt_num, target_ulong rb)
+{
+    int i;
+    ppc_vsr_t xt;
+    target_ulong end = rb & 0xFF;
+
+    if (!end) {
+        return;
+    }
+
+    getVSR(xt_num, &xt, env);
+    if (msr_le) {
+        for (i = 0; i < end; i++) {
+            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
+            addr = addr_add(env, addr, 1);
+        }
+    } else {
+        for (i = 15; i > 15 - end; i--) {
+            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
+            addr = addr_add(env, addr, 1);
+        }
+    }
+}
+
 #undef HI_IDX
 #undef LO_IDX
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index d8e2ab4..d2d931c 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -268,6 +268,7 @@ static void gen_##name(DisasContext *ctx)                       \
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
 VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
+VSX_VECTOR_LOAD_STORE_LENGTH(stxvll)
 
 #define VSX_LOAD_SCALAR_DS(name, operation)                       \
 static void gen_##name(DisasContext *ctx)                         \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 1a980d6..f315580 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -24,6 +24,7 @@ GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stxvx, 0x1F, 0x0C, 0x0C, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stxvl, 0x1F, 0x0D, 0x0C, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stxvll, 0x1F, 0x0D, 0x0D, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (5 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 18:10   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction Nikunj A Dadhania
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

xxextractuw: VSX Vector Extract Unsigned Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/int_helper.c             | 33 +++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 22 ++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  5 +++++
 4 files changed, 61 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 91bdfc3..940f81c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -536,6 +536,7 @@ DEF_HELPER_2(xvrspic, void, env, i32)
 DEF_HELPER_2(xvrspim, void, env, i32)
 DEF_HELPER_2(xvrspip, void, env, i32)
 DEF_HELPER_2(xvrspiz, void, env, i32)
+DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 7030f61..d7f50bd 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2033,6 +2033,39 @@ VEXTRACT(uw, u32)
 VEXTRACT(d, u64)
 #undef VEXTRACT
 
+#if defined(HOST_WORDS_BIGENDIAN)
+#define XXEXTRACT(name, element)                                        \
+void helper_##name(CPUPPCState *env, target_ulong xtn,                  \
+                   target_ulong xbn, uint32_t index)                    \
+{                                                                       \
+    ppc_vsr_t xt, xb;                                                   \
+    uint32_t es = sizeof(xt.element[0]);                                \
+                                                                        \
+    getVSR(xbn, &xb, env);                                              \
+    memmove(&xt.u8[8 - es], &xb.u8[index], es);                         \
+    memset(&xt.u8[8], 0, 8);                                            \
+    memset(&xt.u8[0], 0, 8 - es);                                       \
+    putVSR(xtn, &xt, env);                                              \
+}
+#else
+#define XXEXTRACT(name, element)                                        \
+void helper_##name(CPUPPCState *env, target_ulong xtn,                  \
+                   target_ulong xbn, uint32_t index)                    \
+{                                                                       \
+    ppc_vsr_t xt, xb;                                                   \
+    uint32_t es = sizeof(xt.element[0]);                                \
+    uint32_t s = (16 - index) - es;                                     \
+                                                                        \
+    getVSR(xbn, &xb, env);                                              \
+    memmove(&xt.u8[8], &xb.u8[s], es);                                  \
+    memset(&xt.u8[0], 0, 8);                                            \
+    memset(&xt.u8[8 + es], 0, 8 - es);                                  \
+    putVSR(xtn, &xt, env);                                              \
+}
+#endif
+XXEXTRACT(xxextractuw, u32)
+#undef XXEXTRACT
+
 #define VEXT_SIGNED(name, element, mask, cast, recast)              \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
 {                                                                   \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index d2d931c..e978b7e 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -1127,6 +1127,28 @@ static void gen_xxsldwi(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
+#define VSX_EXTRACT(name)                                       \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    TCGv xt, xb;                                                \
+    TCGv_i32 t0 = tcg_temp_new_i32();                           \
+    uint8_t uimm = UIMM4(ctx->opcode);                          \
+                                                                \
+    if (unlikely(!ctx->vsx_enabled)) {                          \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
+        return;                                                 \
+    }                                                           \
+    xt = tcg_const_tl(xT(ctx->opcode));                         \
+    xb = tcg_const_tl(xB(ctx->opcode));                         \
+    tcg_gen_movi_i32(t0, uimm);                                 \
+    gen_helper_##name(cpu_env, xt, xb, t0);                     \
+    tcg_temp_free(xb);                                          \
+    tcg_temp_free(xt);                                          \
+    tcg_temp_free_i32(t0);                                      \
+}
+
+VSX_EXTRACT(xxextractuw)
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index f315580..1f35365 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -45,6 +45,10 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 
+#define GEN_XX2FORM_EXT(name, opc2, opc3, fl2)                          \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0x00100000, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0x00100000, PPC_NONE, fl2)
+
 #define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
 GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
@@ -267,6 +271,7 @@ GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
 GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
 GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
+GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300),
 
 #define GEN_XXSEL_ROW(opc3) \
 GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (6 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 18:14   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 09/13] target-ppc: implement stop instruction Nikunj A Dadhania
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

xxinsertw: VSX Vector Insert Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/int_helper.c             | 30 ++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c |  5 +++--
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 940f81c..9f812c8 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -537,6 +537,7 @@ DEF_HELPER_2(xvrspim, void, env, i32)
 DEF_HELPER_2(xvrspip, void, env, i32)
 DEF_HELPER_2(xvrspiz, void, env, i32)
 DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
+DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index d7f50bd..876ce04 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2066,6 +2066,36 @@ void helper_##name(CPUPPCState *env, target_ulong xtn,                  \
 XXEXTRACT(xxextractuw, u32)
 #undef XXEXTRACT
 
+#if defined(HOST_WORDS_BIGENDIAN)
+#define XXINSERT(name, element)                                         \
+void helper_##name(CPUPPCState *env, target_ulong xtn,                  \
+                   target_ulong xbn, uint32_t index)                    \
+{                                                                       \
+    ppc_vsr_t xt, xb;                                                   \
+                                                                        \
+    getVSR(xbn, &xb, env);                                              \
+    getVSR(xtn, &xt, env);                                              \
+    memmove(&xt.u8[index], &xb.u8[8 - sizeof(xt.element)],              \
+            sizeof(xt.element[0]));                                     \
+    putVSR(xtn, &xt, env);                                              \
+}
+#else
+#define XXINSERT(name, element)                                         \
+void helper_##name(CPUPPCState *env, target_ulong xtn,                  \
+                   target_ulong xbn, uint32_t index)                    \
+{                                                                       \
+    ppc_vsr_t xt, xb;                                                   \
+    uint32_t d = (16 - index) - sizeof(xt.element[0]);                  \
+                                                                        \
+    getVSR(xbn, &xb, env);                                              \
+    getVSR(xtn, &xt, env);                                              \
+    memmove(&xt.u8[d], &xb.u8[8], sizeof(xt.element[0]));               \
+    putVSR(xtn, &xt, env);                                              \
+}
+#endif
+XXINSERT(xxinsertw, u32)
+#undef XXINSERT
+
 #define VEXT_SIGNED(name, element, mask, cast, recast)              \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
 {                                                                   \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index e978b7e..07f1904 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -1127,7 +1127,7 @@ static void gen_xxsldwi(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
-#define VSX_EXTRACT(name)                                       \
+#define VSX_EXTRACT_INSERT(name)                                \
 static void gen_##name(DisasContext *ctx)                       \
 {                                                               \
     TCGv xt, xb;                                                \
@@ -1147,7 +1147,8 @@ static void gen_##name(DisasContext *ctx)                       \
     tcg_temp_free_i32(t0);                                      \
 }
 
-VSX_EXTRACT(xxextractuw)
+VSX_EXTRACT_INSERT(xxextractuw)
+VSX_EXTRACT_INSERT(xxinsertw)
 
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 1f35365..1285e0b 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -272,6 +272,7 @@ GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
 GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
 GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300),
+GEN_XX2FORM_EXT(xxinsertw, 0x0A, 0x0B, PPC2_ISA300),
 
 #define GEN_XXSEL_ROW(opc3) \
 GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 09/13] target-ppc: implement stop instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (7 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction Nikunj A Dadhania
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

Use the nap code.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index f68f427..47349e7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3153,6 +3153,11 @@ static void gen_nap(DisasContext *ctx)
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
+static void gen_stop(DisasContext *ctx)
+{
+    gen_nap(ctx);
+}
+
 static void gen_sleep(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
@@ -6221,6 +6226,7 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
 GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B),
+GEN_HANDLER_E(stop, 0x13, 0x12, 0x0b, 0x03FFF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (8 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 09/13] target-ppc: implement stop instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 18:14   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction Nikunj A Dadhania
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

xsabsqp:  VSX Scalar Absolute Quad-Precision
xsnabsqp: VSX Scalar Negative Absolute Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate/vsx-impl.inc.c | 35 +++++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  5 +++++
 2 files changed, 40 insertions(+)

diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 07f1904..970d83c 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -639,6 +639,41 @@ VSX_SCALAR_MOVE(xsnabsdp, OP_NABS, SGN_MASK_DP)
 VSX_SCALAR_MOVE(xsnegdp, OP_NEG, SGN_MASK_DP)
 VSX_SCALAR_MOVE(xscpsgndp, OP_CPSGN, SGN_MASK_DP)
 
+#define VSX_SCALAR_MOVE_QP(name, op, sgn_mask)                    \
+static void glue(gen_, name)(DisasContext *ctx)                   \
+{                                                                 \
+    int xt = rD(ctx->opcode) + 32;                                \
+    int xb = rB(ctx->opcode) + 32;                                \
+    TCGv_i64 xbh, xbl, sgm;                                       \
+                                                                  \
+    if (unlikely(!ctx->vsx_enabled)) {                            \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                    \
+        return;                                                   \
+    }                                                             \
+    xbh = tcg_temp_new_i64();                                     \
+    xbl = tcg_temp_new_i64();                                     \
+    sgm = tcg_temp_new_i64();                                     \
+    tcg_gen_mov_i64(xbh, cpu_vsrh(xb));                           \
+    tcg_gen_mov_i64(xbl, cpu_vsrl(xb));                           \
+    tcg_gen_movi_i64(sgm, sgn_mask);                              \
+    switch (op) {                                                 \
+    case OP_ABS:                                                  \
+        tcg_gen_andc_i64(xbh, xbh, sgm);                          \
+        break;                                                    \
+    case OP_NABS:                                                 \
+        tcg_gen_or_i64(xbh, xbh, sgm);                            \
+        break;                                                    \
+    }                                                             \
+    tcg_gen_mov_i64(cpu_vsrh(xt), xbh);                           \
+    tcg_gen_mov_i64(cpu_vsrl(xt), xbl);                           \
+    tcg_temp_free_i64(xbl);                                       \
+    tcg_temp_free_i64(xbh);                                       \
+    tcg_temp_free_i64(sgm);                                       \
+}
+
+VSX_SCALAR_MOVE_QP(xsabsqp, OP_ABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE_QP(xsnabsqp, OP_NABS, SGN_MASK_DP)
+
 #define VSX_VECTOR_MOVE(name, op, sgn_mask)                      \
 static void glue(gen_, name)(DisasContext * ctx)                 \
     {                                                            \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 1285e0b..0216efe 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -96,12 +96,17 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX)
 #define GEN_VSX_XFORM_300(name, opc2, opc3, inval) \
 GEN_HANDLER_E(name, 0x3F, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
 
+#define GEN_VSX_XFORM_300_EO(name, opc2, opc3, opc4, inval)             \
+GEN_HANDLER_E_2(name, 0x3F, opc2, opc3, opc4, inval, PPC_NONE, PPC2_ISA300)
 
 GEN_XX2FORM(xsabsdp, 0x12, 0x15, PPC2_VSX),
 GEN_XX2FORM(xsnabsdp, 0x12, 0x16, PPC2_VSX),
 GEN_XX2FORM(xsnegdp, 0x12, 0x17, PPC2_VSX),
 GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
 
+GEN_VSX_XFORM_300_EO(xsabsqp, 0x04, 0x19, 0x00, 0x00000001),
+GEN_VSX_XFORM_300_EO(xsnabsqp, 0x04, 0x19, 0x08, 0x00000001),
+
 GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX),
 GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX),
 GEN_XX2FORM(xvnegdp, 0x12, 0x1F, PPC2_VSX),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (9 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 18:15   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction Nikunj A Dadhania
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

xsnegqp: VSX Scalar Negate Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate/vsx-impl.inc.c | 5 +++++
 target-ppc/translate/vsx-ops.inc.c  | 1 +
 2 files changed, 6 insertions(+)

diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 970d83c..560deaf 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -663,6 +663,10 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
     case OP_NABS:                                                 \
         tcg_gen_or_i64(xbh, xbh, sgm);                            \
         break;                                                    \
+    case OP_NEG:                                                  \
+        tcg_gen_xor_i64(xbh, xbh, sgm);                           \
+        tcg_gen_xori_i64(xbl, xbl, 0);                            \
+        break;                                                    \
     }                                                             \
     tcg_gen_mov_i64(cpu_vsrh(xt), xbh);                           \
     tcg_gen_mov_i64(cpu_vsrl(xt), xbl);                           \
@@ -673,6 +677,7 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
 
 VSX_SCALAR_MOVE_QP(xsabsqp, OP_ABS, SGN_MASK_DP)
 VSX_SCALAR_MOVE_QP(xsnabsqp, OP_NABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE_QP(xsnegqp, OP_NEG, SGN_MASK_DP)
 
 #define VSX_VECTOR_MOVE(name, op, sgn_mask)                      \
 static void glue(gen_, name)(DisasContext * ctx)                 \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 0216efe..d798edb 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -106,6 +106,7 @@ GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
 
 GEN_VSX_XFORM_300_EO(xsabsqp, 0x04, 0x19, 0x00, 0x00000001),
 GEN_VSX_XFORM_300_EO(xsnabsqp, 0x04, 0x19, 0x08, 0x00000001),
+GEN_VSX_XFORM_300_EO(xsnegqp, 0x04, 0x19, 0x10, 0x00000001),
 
 GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX),
 GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (10 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-05 18:18   ` Richard Henderson
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions Nikunj A Dadhania
  2016-12-06  4:12 ` [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 David Gibson
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

xscpsgnqp: VSX Scalar Copy Sign Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate/vsx-impl.inc.c | 12 +++++++++++-
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 560deaf..77f098b 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -642,9 +642,10 @@ VSX_SCALAR_MOVE(xscpsgndp, OP_CPSGN, SGN_MASK_DP)
 #define VSX_SCALAR_MOVE_QP(name, op, sgn_mask)                    \
 static void glue(gen_, name)(DisasContext *ctx)                   \
 {                                                                 \
+    int xa;                                                       \
     int xt = rD(ctx->opcode) + 32;                                \
     int xb = rB(ctx->opcode) + 32;                                \
-    TCGv_i64 xbh, xbl, sgm;                                       \
+    TCGv_i64 xah, xbh, xbl, sgm;                                  \
                                                                   \
     if (unlikely(!ctx->vsx_enabled)) {                            \
         gen_exception(ctx, POWERPC_EXCP_VSXU);                    \
@@ -667,6 +668,14 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
         tcg_gen_xor_i64(xbh, xbh, sgm);                           \
         tcg_gen_xori_i64(xbl, xbl, 0);                            \
         break;                                                    \
+    case OP_CPSGN:                                                \
+        xah = tcg_temp_new_i64();                                 \
+        xa = rA(ctx->opcode) + 32;                                \
+        tcg_gen_and_i64(xah, cpu_vsrh(xa), sgm);                  \
+        tcg_gen_andc_i64(xbh, xbh, sgm);                          \
+        tcg_gen_or_i64(xbh, xbh, xah);                            \
+        tcg_temp_free_i64(xah);                                   \
+        break;                                                    \
     }                                                             \
     tcg_gen_mov_i64(cpu_vsrh(xt), xbh);                           \
     tcg_gen_mov_i64(cpu_vsrl(xt), xbl);                           \
@@ -678,6 +687,7 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
 VSX_SCALAR_MOVE_QP(xsabsqp, OP_ABS, SGN_MASK_DP)
 VSX_SCALAR_MOVE_QP(xsnabsqp, OP_NABS, SGN_MASK_DP)
 VSX_SCALAR_MOVE_QP(xsnegqp, OP_NEG, SGN_MASK_DP)
+VSX_SCALAR_MOVE_QP(xscpsgnqp, OP_CPSGN, SGN_MASK_DP)
 
 #define VSX_VECTOR_MOVE(name, op, sgn_mask)                      \
 static void glue(gen_, name)(DisasContext * ctx)                 \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index d798edb..42e83d2 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -107,6 +107,7 @@ GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
 GEN_VSX_XFORM_300_EO(xsabsqp, 0x04, 0x19, 0x00, 0x00000001),
 GEN_VSX_XFORM_300_EO(xsnabsqp, 0x04, 0x19, 0x08, 0x00000001),
 GEN_VSX_XFORM_300_EO(xsnegqp, 0x04, 0x19, 0x10, 0x00000001),
+GEN_VSX_XFORM_300(xscpsgnqp, 0x04, 0x03, 0x00000001),
 
 GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX),
 GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (11 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction Nikunj A Dadhania
@ 2016-12-05 11:25 ` Nikunj A Dadhania
  2016-12-06  4:11   ` David Gibson
  2016-12-06  4:12 ` [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 David Gibson
  13 siblings, 1 reply; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-05 11:25 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, bharata, nikunj

From: Bharata B Rao <bharata@linux.vnet.ibm.com>

xxperm:  VSX Vector Permute
xxpermr: VSX Vector Permute Right-indexed

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/fpu_helper.c             | 50 +++++++++++++++++++++++++++++++++++++
 target-ppc/helper.h                 |  2 ++
 target-ppc/translate/vsx-impl.inc.c |  2 ++
 target-ppc/translate/vsx-ops.inc.c  |  2 ++
 4 files changed, 56 insertions(+)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 3b867cf..be552c7 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
     float_check_status(env);
     return xt;
 }
+
+static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src)
+{
+#if defined(HOST_WORDS_BIGENDIAN)
+    memcpy(src, xa, sizeof(*xa));
+    memcpy(src + 16, xt, sizeof(*xt));
+#else
+    memcpy(src, xt, sizeof(*xt));
+    memcpy(src + 16, xa, sizeof(*xa));
+#endif
+}
+
+static int8_t vsr_get_byte(int8_t *src, int bound, int idx)
+{
+    if (idx >= bound) {
+        return 0xFF;
+    }
+#if defined(HOST_WORDS_BIGENDIAN)
+    return src[idx];
+#else
+    return src[bound - 1 - idx];
+#endif
+}
+
+#define VSX_XXPERM(op, indexed)                                    \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                \
+{                                                                  \
+    ppc_vsr_t xt, xa, pcv;                                         \
+    int i, idx;                                                    \
+    int8_t src[32];                                                \
+                                                                   \
+    getVSR(xA(opcode), &xa, env);                                  \
+    getVSR(xT(opcode), &xt, env);                                  \
+    getVSR(xB(opcode), &pcv, env);                                 \
+                                                                   \
+    vsr_copy_256(&xa, &xt, src);                                   \
+                                                                   \
+    for (i = 0; i < 16; i++) {                                     \
+        idx = pcv.VsrB(i) & 0x1F;                                  \
+        if (indexed) {                                             \
+            xt.VsrB(i) = vsr_get_byte(src, 32, 31 - idx);          \
+        } else {                                                   \
+            xt.VsrB(i) = vsr_get_byte(src, 32, idx);               \
+        }                                                          \
+    }                                                              \
+    putVSR(xT(opcode), &xt, env);                                  \
+}
+
+VSX_XXPERM(xxperm, 0)
+VSX_XXPERM(xxpermr, 1)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 9f812c8..399cf99 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -538,6 +538,8 @@ DEF_HELPER_2(xvrspip, void, env, i32)
 DEF_HELPER_2(xvrspiz, void, env, i32)
 DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
 DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32)
+DEF_HELPER_2(xxperm, void, env, i32)
+DEF_HELPER_2(xxpermr, void, env, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 77f098b..2ad152e 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -914,6 +914,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xxperm, 0x08, 0x03, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xxpermr, 0x08, 0x07, 0, PPC2_ISA300)
 
 static void gen_xxbrd(DisasContext *ctx)
 {
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 42e83d2..93fb9b8 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -275,6 +275,8 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
 VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
 GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
 GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
+GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300),
+GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
 GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
 GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine Nikunj A Dadhania
@ 2016-12-05 17:36   ` Richard Henderson
  2016-12-06  5:19     ` Nikunj A Dadhania
  0 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:36 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +#if defined(CONFIG_INT128)
> +FUNC_MASK(mask_u128, Int128, 128, Int128, ~((__uint128_t)0));
> +#else
> +static inline Int128 mask_u128(int start, int end)
> +{
> +    Int128 r = {0};
> +    if (start > 63) {
> +        r.hi = 0;
> +        r.lo = mask_u64(start - 64, end - 64);
> +    } else if (end < 64) {
> +        r.hi = mask_u64(start, end);
> +        r.lo = 0;
> +    } else {
> +        r.hi = mask_u64(start, 63);
> +        r.lo = mask_u64(0, end - 64);
> +    }
> +    return r;
> +}
>  #endif

First, I would really really like you to stop adding *any* ifdefs on
CONFIG_INT128.  All that's going to do is make sure that there's code that is
almost never tested, since x86_64 (and other 64-bit hosts) does support int128.

Second, you're not using the Int128 interface correctly.  Better would be

static inline Int128 mask_u128(int start, int end)
{
    uint64_t hi, lo;
    if (start > 63) {
        hi = 0;
        lo = mask_u64(start - 64, end - 64);
    } else if (end < 64) {
        hi = mask_u64(start, end);
        lo = 0;
    } else {
        hi = mask_u64(start, 63);
        lo = mask_u64(0, end - 64);
    }
    return make_int128(lo, hi);
}


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction Nikunj A Dadhania
@ 2016-12-05 17:46   ` Richard Henderson
  2016-12-06  5:25     ` Nikunj A Dadhania
  2016-12-06 10:11     ` Nikunj A Dadhania
  0 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:46 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> lxvl: Load VSX Vector with Length
> 
> Little/Big-endian Storage:
> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
> |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
> 
> Loading 14 bytes results in:
> 
> Vector (8-bit elements) in BE:
> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
> |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
> 
> Vector (8-bit elements) in LE:
> +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
> |00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
> +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h                 |  1 +
>  target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  |  1 +
>  4 files changed, 54 insertions(+)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index bc39efb..d9ccafd 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -317,6 +317,7 @@ DEF_HELPER_3(lvewx, void, env, avr, tl)
>  DEF_HELPER_3(stvebx, void, env, avr, tl)
>  DEF_HELPER_3(stvehx, void, env, avr, tl)
>  DEF_HELPER_3(stvewx, void, env, avr, tl)
> +DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
>  DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
>  DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
>  DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
> index 1ab8a6e..0a8ff54 100644
> --- a/target-ppc/mem_helper.c
> +++ b/target-ppc/mem_helper.c
> @@ -24,6 +24,7 @@
>  
>  #include "helper_regs.h"
>  #include "exec/cpu_ldst.h"
> +#include "internal.h"
>  
>  //#define DEBUG_OP
>  
> @@ -284,6 +285,30 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
>  #undef I
>  #undef LVE
>  
> +void helper_lxvl(CPUPPCState *env, target_ulong addr,
> +                 target_ulong xt_num, target_ulong rb)
> +{
> +    ppc_vsr_t xt;
> +
> +    getVSR(xt_num, &xt, env);
> +    if (unlikely((rb & 0xFF) == 0)) {
> +        xt.s128 = int128_make128(0, 0);
> +    } else {
> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
> +        if (msr_le) {
> +            xt.u64[HI_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));
> +            addr = addr_add(env, addr, 8);
> +            xt.u64[LO_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));

hi/lo assignment reversed for le.

> +        } else {
> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +            addr = addr_add(env, addr, 8);
> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +        }
> +        xt.s128 = int128_and(xt.s128, mask_u128(0, end));

I don't think mask_u128 does the right thing for end > 127.
I think you need a check here.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction Nikunj A Dadhania
@ 2016-12-05 17:52   ` Richard Henderson
  2016-12-05 17:59     ` Richard Henderson
  2016-12-06  5:45     ` Nikunj A Dadhania
  0 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:52 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
> +                  target_ulong xt_num, target_ulong rb)
> +{
> +    ppc_vsr_t xt;
> +
> +    getVSR(xt_num, &xt, env);
> +    if (unlikely((rb & 0xFF) == 0)) {
> +        xt.s128 = int128_make128(0, 0);

Nit: int128_zero.

> +    } else {
> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
> +        if (msr_le) {
> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +            addr = addr_add(env, addr, 8);
> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));

The ISA document says that this is a sequence of byte operations.  Which means
that END < 127 will not access bytes outside of the length.  Which means that
your code will trigger SIGSEGV near page boundaries when real hardware won't.

I also don't see how this does the right thing for little-endian.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction Nikunj A Dadhania
@ 2016-12-05 17:55   ` Richard Henderson
  2016-12-06  5:46     ` Nikunj A Dadhania
  0 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:55 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +    for (i = 15; i > 15 - end; i--) {
> +        cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
> +        addr = addr_add(env, addr, 1);
> +    }

(1) you need to handle nb > 16.
(2) don't you need different byte indexing for little-endian?


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions Nikunj A Dadhania
@ 2016-12-05 17:57   ` Richard Henderson
  2016-12-06  6:03     ` Nikunj A Dadhania
  0 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:57 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +    getVSR(xt_num, &xt, env);
> +    if (msr_le) {
> +        for (i = 0; i < end; i++) {
> +            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
> +            addr = addr_add(env, addr, 1);
> +        }
> +    } else {
> +        for (i = 15; i > 15 - end; i--) {
> +            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
> +            addr = addr_add(env, addr, 1);
> +        }
> +    }

Have you accidentally swapped the implementations of stxvl and stxvll?


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction
  2016-12-05 17:52   ` Richard Henderson
@ 2016-12-05 17:59     ` Richard Henderson
  2016-12-06  5:45     ` Nikunj A Dadhania
  1 sibling, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 17:59 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 09:52 AM, Richard Henderson wrote:
> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
>> +                  target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
> 
> Nit: int128_zero.
> 
>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
>> +        if (msr_le) {
>> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
> 
> The ISA document says that this is a sequence of byte operations.  Which means
> that END < 127 will not access bytes outside of the length.  Which means that
> your code will trigger SIGSEGV near page boundaries when real hardware won't.
> 
> I also don't see how this does the right thing for little-endian.

Oh, and one more thing:

Do you need to perform the permission check on all NB bytes before writing any
of them?  I suspect that real hardware does, otherwise the instruction might
not be restartable.

Are there any atomicity guarantees made by real hardware?  If so, you may need
to implement this differently.  If not, a comment to that effect would be helpful.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction Nikunj A Dadhania
@ 2016-12-05 18:10   ` Richard Henderson
  0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 18:10 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +static void gen_##name(DisasContext *ctx)                       \
> +{                                                               \
> +    TCGv xt, xb;                                                \
> +    TCGv_i32 t0 = tcg_temp_new_i32();                           \
> +    uint8_t uimm = UIMM4(ctx->opcode);                          \
> +                                                                \
> +    if (unlikely(!ctx->vsx_enabled)) {                          \
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
> +        return;                                                 \
> +    }                                                           \
> +    xt = tcg_const_tl(xT(ctx->opcode));                         \
> +    xb = tcg_const_tl(xB(ctx->opcode));                         \
> +    tcg_gen_movi_i32(t0, uimm);                                 \
> +    gen_helper_##name(cpu_env, xt, xb, t0);                     \
> +    tcg_temp_free(xb);                                          \
> +    tcg_temp_free(xt);                                          \
> +    tcg_temp_free_i32(t0);                                      \

While the ISA manual says that the results are undefined if UIMM > 12, I don't
think you should allow the implementation here to read beyond the end of the XB
register.

What does real hardware do?  I would have hoped for a SIGILL, but I don't
suppose that actually happens.

> +    getVSR(xbn, &xb, env);                                              \
> +    memmove(&xt.u8[8 - es], &xb.u8[index], es);                         \
> +    memset(&xt.u8[8], 0, 8);                                            \
> +    memset(&xt.u8[0], 0, 8 - es);                                       \
> +    putVSR(xtn, &xt, env);                                              \

I think this would be simpler as

  memset(&xt, 0, sizeof(xt));
  memcpy(&xt.u8[8 - es], &xb.u8[index], es);

since xt and xb are local variables and therefore cannot overlap.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction Nikunj A Dadhania
@ 2016-12-05 18:14   ` Richard Henderson
  0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 18:14 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> xsabsqp:  VSX Scalar Absolute Quad-Precision
> xsnabsqp: VSX Scalar Negative Absolute Quad-Precision
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/translate/vsx-impl.inc.c | 35 +++++++++++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  |  5 +++++
>  2 files changed, 40 insertions(+)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction Nikunj A Dadhania
@ 2016-12-05 18:14   ` Richard Henderson
  0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 18:14 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> xxinsertw: VSX Vector Insert Word
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h                 |  1 +
>  target-ppc/int_helper.c             | 30 ++++++++++++++++++++++++++++++
>  target-ppc/translate/vsx-impl.inc.c |  5 +++--
>  target-ppc/translate/vsx-ops.inc.c  |  1 +
>  4 files changed, 35 insertions(+), 2 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction Nikunj A Dadhania
@ 2016-12-05 18:15   ` Richard Henderson
  2016-12-06  8:45     ` Nikunj A Dadhania
  0 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 18:15 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +    case OP_NEG:                                                  \
> +        tcg_gen_xor_i64(xbh, xbh, sgm);                           \
> +        tcg_gen_xori_i64(xbl, xbl, 0);                            \
> +        break;                                                    \

No point in the xori.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction Nikunj A Dadhania
@ 2016-12-05 18:18   ` Richard Henderson
  0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2016-12-05 18:18 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, bharata

On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> xscpsgnqp: VSX Scalar Copy Sign Quad-Precision
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/translate/vsx-impl.inc.c | 12 +++++++++++-
>  target-ppc/translate/vsx-ops.inc.c  |  1 +
>  2 files changed, 12 insertions(+), 1 deletion(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions Nikunj A Dadhania
@ 2016-12-06  4:11   ` David Gibson
  2016-12-06  8:55     ` Bharata B Rao
  0 siblings, 1 reply; 35+ messages in thread
From: David Gibson @ 2016-12-06  4:11 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata

[-- Attachment #1: Type: text/plain, Size: 5782 bytes --]

On Mon, Dec 05, 2016 at 04:55:30PM +0530, Nikunj A Dadhania wrote:
> From: Bharata B Rao <bharata@linux.vnet.ibm.com>
> 
> xxperm:  VSX Vector Permute
> xxpermr: VSX Vector Permute Right-indexed
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/fpu_helper.c             | 50 +++++++++++++++++++++++++++++++++++++
>  target-ppc/helper.h                 |  2 ++
>  target-ppc/translate/vsx-impl.inc.c |  2 ++
>  target-ppc/translate/vsx-ops.inc.c  |  2 ++
>  4 files changed, 56 insertions(+)
> 
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 3b867cf..be552c7 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
>      float_check_status(env);
>      return xt;
>  }
> +
> +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src)
> +{
> +#if defined(HOST_WORDS_BIGENDIAN)
> +    memcpy(src, xa, sizeof(*xa));
> +    memcpy(src + 16, xt, sizeof(*xt));
> +#else
> +    memcpy(src, xt, sizeof(*xt));
> +    memcpy(src + 16, xa, sizeof(*xa));

Is this right?  I thought the order of the bytes within each word
varied with the host endianness as well.

> +#endif
> +}
> +
> +static int8_t vsr_get_byte(int8_t *src, int bound, int idx)
> +{
> +    if (idx >= bound) {
> +        return 0xFF;
> +    }

AFAICT you don't need this check.  For both xxperm and xxpermr you're
already masking the index to 5 bits, so it can't exceed 31.

> +#if defined(HOST_WORDS_BIGENDIAN)
> +    return src[idx];
> +#else
> +    return src[bound - 1 - idx];
> +#endif
> +}
> +
> +#define VSX_XXPERM(op, indexed)                                    \
> +void helper_##op(CPUPPCState *env, uint32_t opcode)                \
> +{                                                                  \
> +    ppc_vsr_t xt, xa, pcv;                                         \
> +    int i, idx;                                                    \
> +    int8_t src[32];                                                \
> +                                                                   \
> +    getVSR(xA(opcode), &xa, env);                                  \
> +    getVSR(xT(opcode), &xt, env);                                  \
> +    getVSR(xB(opcode), &pcv, env);                                 \
> +                                                                   \
> +    vsr_copy_256(&xa, &xt, src);                                   \

You have a double copy here AFAICT - first from the actual env
structure to xt and xa, then to the src array.  That seems like it
would be good to avoid.

It seems like it would nice in any case to avoid even the one copy.
You'd need a temporary for the output of course and to copy that, but
you should be able to combine indexed with host endianness to
translate each index to retrieve directly from the VSR values in env.

> +    for (i = 0; i < 16; i++) {                                     \
> +        idx = pcv.VsrB(i) & 0x1F;                                  \
> +        if (indexed) {                                             \
> +            xt.VsrB(i) = vsr_get_byte(src, 32, 31 - idx);          \
> +        } else {                                                   \
> +            xt.VsrB(i) = vsr_get_byte(src, 32, idx);               \
> +        }                                                          \
> +    }                                                              \
> +    putVSR(xT(opcode), &xt, env);                                  \
> +}
> +
> +VSX_XXPERM(xxperm, 0)
> +VSX_XXPERM(xxpermr, 1)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 9f812c8..399cf99 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -538,6 +538,8 @@ DEF_HELPER_2(xvrspip, void, env, i32)
>  DEF_HELPER_2(xvrspiz, void, env, i32)
>  DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
>  DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32)
> +DEF_HELPER_2(xxperm, void, env, i32)
> +DEF_HELPER_2(xxpermr, void, env, i32)
>  
>  DEF_HELPER_2(efscfsi, i32, env, i32)
>  DEF_HELPER_2(efscfui, i32, env, i32)
> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> index 77f098b..2ad152e 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -914,6 +914,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
> +GEN_VSX_HELPER_2(xxperm, 0x08, 0x03, 0, PPC2_ISA300)
> +GEN_VSX_HELPER_2(xxpermr, 0x08, 0x07, 0, PPC2_ISA300)
>  
>  static void gen_xxbrd(DisasContext *ctx)
>  {
> diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
> index 42e83d2..93fb9b8 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -275,6 +275,8 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
>  VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
>  GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
>  GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
> +GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300),
> +GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
>  GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
>  GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
>  GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9
  2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
                   ` (12 preceding siblings ...)
  2016-12-05 11:25 ` [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions Nikunj A Dadhania
@ 2016-12-06  4:12 ` David Gibson
  13 siblings, 0 replies; 35+ messages in thread
From: David Gibson @ 2016-12-06  4:12 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, bharata

[-- Attachment #1: Type: text/plain, Size: 637 bytes --]

On Mon, Dec 05, 2016 at 04:55:17PM +0530, Nikunj A Dadhania wrote:
> This series contains 12 new instructions for POWER9 ISA3.0
>      Couple of consolidation patches
>      VSX Vector Insert/Extract Word
>      VSX Vector Permute
>      VSX Load/Store with length
>      VSX Scalar Quad-Precision Move Instructions

I've merged 1, 9 & 10 to ppc-for-2.9.  The rest I've left for now
since they have comments from either me or rth.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine
  2016-12-05 17:36   ` Richard Henderson
@ 2016-12-06  5:19     ` Nikunj A Dadhania
  0 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  5:19 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +#if defined(CONFIG_INT128)
>> +FUNC_MASK(mask_u128, Int128, 128, Int128, ~((__uint128_t)0));
>> +#else
>> +static inline Int128 mask_u128(int start, int end)
>> +{
>> +    Int128 r = {0};
>> +    if (start > 63) {
>> +        r.hi = 0;
>> +        r.lo = mask_u64(start - 64, end - 64);
>> +    } else if (end < 64) {
>> +        r.hi = mask_u64(start, end);
>> +        r.lo = 0;
>> +    } else {
>> +        r.hi = mask_u64(start, 63);
>> +        r.lo = mask_u64(0, end - 64);
>> +    }
>> +    return r;
>> +}
>>  #endif
>
> First, I would really really like you to stop adding *any* ifdefs on
> CONFIG_INT128.  All that's going to do is make sure that there's code that is
> almost never tested, since x86_64 (and other 64-bit hosts) does support int128.

I did test both the cases above by flipping the switch of CONFIG_INT128.
Initially was planning to do this in int128.h, but the bit numbering is
different and wont be usable for other architecture.

> Second, you're not using the Int128 interface correctly.  Better would be
>
> static inline Int128 mask_u128(int start, int end)
> {
>     uint64_t hi, lo;
>     if (start > 63) {
>         hi = 0;
>         lo = mask_u64(start - 64, end - 64);
>     } else if (end < 64) {
>         hi = mask_u64(start, end);
>         lo = 0;
>     } else {
>         hi = mask_u64(start, 63);
>         lo = mask_u64(0, end - 64);
>     }
>     return make_int128(lo, hi);
> }

Sure will use this.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction
  2016-12-05 17:46   ` Richard Henderson
@ 2016-12-06  5:25     ` Nikunj A Dadhania
  2016-12-06 10:11     ` Nikunj A Dadhania
  1 sibling, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  5:25 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> lxvl: Load VSX Vector with Length
>> 
>> Little/Big-endian Storage:
>> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
>> |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
>> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
>> 
>> Loading 14 bytes results in:
>> 
>> Vector (8-bit elements) in BE:
>> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
>> |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
>> +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
>> 
>> Vector (8-bit elements) in LE:
>> +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
>> |00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
>> +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
>> 
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/helper.h                 |  1 +
>>  target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
>>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
>>  target-ppc/translate/vsx-ops.inc.c  |  1 +
>>  4 files changed, 54 insertions(+)
>> 
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index bc39efb..d9ccafd 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -317,6 +317,7 @@ DEF_HELPER_3(lvewx, void, env, avr, tl)
>>  DEF_HELPER_3(stvebx, void, env, avr, tl)
>>  DEF_HELPER_3(stvehx, void, env, avr, tl)
>>  DEF_HELPER_3(stvewx, void, env, avr, tl)
>> +DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
>>  DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
>>  DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
>>  DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
>> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
>> index 1ab8a6e..0a8ff54 100644
>> --- a/target-ppc/mem_helper.c
>> +++ b/target-ppc/mem_helper.c
>> @@ -24,6 +24,7 @@
>>  
>>  #include "helper_regs.h"
>>  #include "exec/cpu_ldst.h"
>> +#include "internal.h"
>>  
>>  //#define DEBUG_OP
>>  
>> @@ -284,6 +285,30 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
>>  #undef I
>>  #undef LVE
>>  
>> +void helper_lxvl(CPUPPCState *env, target_ulong addr,
>> +                 target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
>> +        if (msr_le) {
>> +            xt.u64[HI_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[LO_IDX] = bswap64(cpu_ldq_data_ra(env, addr, GETPC()));
>
> hi/lo assignment reversed for le.

Already taken care here:

#if defined(HOST_WORDS_BIGENDIAN)
#define HI_IDX 0
#define LO_IDX 1
#else
#define HI_IDX 1
#define LO_IDX 0
#endif

>
>> +        } else {
>> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +        }
>> +        xt.s128 = int128_and(xt.s128, mask_u128(0, end));
>
> I don't think mask_u128 does the right thing for end > 127.
> I think you need a check here.

Sure. Will do that.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction
  2016-12-05 17:52   ` Richard Henderson
  2016-12-05 17:59     ` Richard Henderson
@ 2016-12-06  5:45     ` Nikunj A Dadhania
  1 sibling, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  5:45 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
>> +                  target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
>
> Nit: int128_zero.

Sure.

>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
>> +        if (msr_le) {
>> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
>
> The ISA document says that this is a sequence of byte operations.  Which means
> that END < 127 will not access bytes outside of the length.  Which means that
> your code will trigger SIGSEGV near page boundaries when real hardware
> won't.

In that case, I can use I can use cpu_ldub_data_ra()

> I also don't see how this does the right thing for little-endian.

Needs to be in big-endian order - two things
1) LO/HI swapped
2) No byte swapping

AFAIU the example given in ISA, i see the right output in my test.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction
  2016-12-05 17:55   ` Richard Henderson
@ 2016-12-06  5:46     ` Nikunj A Dadhania
  0 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  5:46 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +    for (i = 15; i > 15 - end; i--) {
>> +        cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
>> +        addr = addr_add(env, addr, 1);
>> +    }
>
> (1) you need to handle nb > 16.

Sure will handle that.

> (2) don't you need different byte indexing for little-endian?

AFAIU, no, I am going by the example of the ISA.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions
  2016-12-05 17:57   ` Richard Henderson
@ 2016-12-06  6:03     ` Nikunj A Dadhania
  0 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  6:03 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +    getVSR(xt_num, &xt, env);
>> +    if (msr_le) {
>> +        for (i = 0; i < end; i++) {
>> +            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
>> +            addr = addr_add(env, addr, 1);
>> +        }
>> +    } else {
>> +        for (i = 15; i > 15 - end; i--) {
>> +            cpu_stb_data_ra(env, addr, xt.u8[i], GETPC());
>> +            addr = addr_add(env, addr, 1);
>> +        }
>> +    }
>
> Have you accidentally swapped the implementations of stxvl and stxvll?

Now I am in doubt :-)
Let me put my understanding here.

In case of stxvl if we have following vector:
        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
    BE  |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+         
        +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
    LE  |00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
        +--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+

    So for LE/BE: u[0] = "T", u[1] = "h", u[2] = "i", u[3] = "s", ....
    
    Storage should be of following order:
        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
        |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

While in case of stxvll
       +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
BE/LE  |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
       +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
    
    For LE: u[0] = 0, u[1] = 0, u[2] = "T", u[3] = "S", .... 
    For BE: u[0] = "T", u[1] = "h", u[2]="i", u[3]="s", ....
    
    Storage should be of following order:
       +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
       |“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
       +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Please let me know your thoughts.

While discussing here with Bharata, I think I will need to handle host
endian here?

Regards,
Nikunj
    

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction
  2016-12-05 18:15   ` Richard Henderson
@ 2016-12-06  8:45     ` Nikunj A Dadhania
  0 siblings, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06  8:45 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +    case OP_NEG:                                                  \
>> +        tcg_gen_xor_i64(xbh, xbh, sgm);                           \
>> +        tcg_gen_xori_i64(xbl, xbl, 0);                            \
>> +        break;                                                    \
>
> No point in the xori.

Yeah, right.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions
  2016-12-06  4:11   ` David Gibson
@ 2016-12-06  8:55     ` Bharata B Rao
  0 siblings, 0 replies; 35+ messages in thread
From: Bharata B Rao @ 2016-12-06  8:55 UTC (permalink / raw)
  To: David Gibson; +Cc: Nikunj A Dadhania, qemu-ppc, rth, qemu-devel

On Tue, Dec 06, 2016 at 03:11:22PM +1100, David Gibson wrote:
> On Mon, Dec 05, 2016 at 04:55:30PM +0530, Nikunj A Dadhania wrote:
> > From: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > 
> > xxperm:  VSX Vector Permute
> > xxpermr: VSX Vector Permute Right-indexed
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> > ---
> >  target-ppc/fpu_helper.c             | 50 +++++++++++++++++++++++++++++++++++++
> >  target-ppc/helper.h                 |  2 ++
> >  target-ppc/translate/vsx-impl.inc.c |  2 ++
> >  target-ppc/translate/vsx-ops.inc.c  |  2 ++
> >  4 files changed, 56 insertions(+)
> > 
> > diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> > index 3b867cf..be552c7 100644
> > --- a/target-ppc/fpu_helper.c
> > +++ b/target-ppc/fpu_helper.c
> > @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
> >      float_check_status(env);
> >      return xt;
> >  }
> > +
> > +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src)
> > +{
> > +#if defined(HOST_WORDS_BIGENDIAN)
> > +    memcpy(src, xa, sizeof(*xa));
> > +    memcpy(src + 16, xt, sizeof(*xt));
> > +#else
> > +    memcpy(src, xt, sizeof(*xt));
> > +    memcpy(src + 16, xa, sizeof(*xa));
> 
> Is this right?  I thought the order of the bytes within each word
> varied with the host endianness as well.

Since we are already working with 2 16 byte vectors xa and xb here, I thought
we don't have to worry about order of bytes within each vector, but instead can
construct the 32 byte vector as above based on host endianness.

> 
> > +#endif
> > +}
> > +
> > +static int8_t vsr_get_byte(int8_t *src, int bound, int idx)
> > +{
> > +    if (idx >= bound) {
> > +        return 0xFF;
> > +    }
> 
> AFAICT you don't need this check.  For both xxperm and xxpermr you're
> already masking the index to 5 bits, so it can't exceed 31.

Was thinking of making it a generic API and hence had that boundary
check but yes, no point for the check in the context of this instruction.

> 
> > +#if defined(HOST_WORDS_BIGENDIAN)
> > +    return src[idx];
> > +#else
> > +    return src[bound - 1 - idx];
> > +#endif
> > +}
> > +
> > +#define VSX_XXPERM(op, indexed)                                    \
> > +void helper_##op(CPUPPCState *env, uint32_t opcode)                \
> > +{                                                                  \
> > +    ppc_vsr_t xt, xa, pcv;                                         \
> > +    int i, idx;                                                    \
> > +    int8_t src[32];                                                \
> > +                                                                   \
> > +    getVSR(xA(opcode), &xa, env);                                  \
> > +    getVSR(xT(opcode), &xt, env);                                  \
> > +    getVSR(xB(opcode), &pcv, env);                                 \
> > +                                                                   \
> > +    vsr_copy_256(&xa, &xt, src);                                   \
> 
> You have a double copy here AFAICT - first from the actual env
> structure to xt and xa, then to the src array.  That seems like it
> would be good to avoid.
> 
> It seems like it would nice in any case to avoid even the one copy.
> You'd need a temporary for the output of course and to copy that, but
> you should be able to combine indexed with host endianness to
> translate each index to retrieve directly from the VSR values in env.

I am not sure it would be good to retrieve byte values directly from
env as getVSR nicely abstracts out from which fields
(env->[fpr, vsr, avr] the data is fetched based on the register specified
in the opcode.

I can reduce one copy though by not constructing a 32 byte vector (src)
but instead retrieving the bytes directly from xa and xt based on
the index.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction
  2016-12-05 17:46   ` Richard Henderson
  2016-12-06  5:25     ` Nikunj A Dadhania
@ 2016-12-06 10:11     ` Nikunj A Dadhania
  1 sibling, 0 replies; 35+ messages in thread
From: Nikunj A Dadhania @ 2016-12-06 10:11 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, bharata

Richard Henderson <rth@twiddle.net> writes:
>> +void helper_lxvl(CPUPPCState *env, target_ulong addr,
>> +                 target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;

Found the above wrong it the code, ISA is extracting bit 0:7
from GPR[RB]

Regards
Nikunj

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2016-12-06 10:11 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-05 11:25 [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 01/13] target-ppc: move ppc_vsr_t to common header Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 02/13] target-ppc: add mask_u128 routine Nikunj A Dadhania
2016-12-05 17:36   ` Richard Henderson
2016-12-06  5:19     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 03/13] target-ppc: implement lxvl instruction Nikunj A Dadhania
2016-12-05 17:46   ` Richard Henderson
2016-12-06  5:25     ` Nikunj A Dadhania
2016-12-06 10:11     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 04/13] target-ppc: implement lxvll instruction Nikunj A Dadhania
2016-12-05 17:52   ` Richard Henderson
2016-12-05 17:59     ` Richard Henderson
2016-12-06  5:45     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 05/13] target-ppc: implement stxvl instruction Nikunj A Dadhania
2016-12-05 17:55   ` Richard Henderson
2016-12-06  5:46     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 06/13] target-ppc: implement stxvll instructions Nikunj A Dadhania
2016-12-05 17:57   ` Richard Henderson
2016-12-06  6:03     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 07/13] target-ppc: implement xxextractuw instruction Nikunj A Dadhania
2016-12-05 18:10   ` Richard Henderson
2016-12-05 11:25 ` [Qemu-devel] [PATCH 08/13] target-ppc: implement xxinsertw instruction Nikunj A Dadhania
2016-12-05 18:14   ` Richard Henderson
2016-12-05 11:25 ` [Qemu-devel] [PATCH 09/13] target-ppc: implement stop instruction Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 10/13] target-ppc: implement xsabsqp/xsnabsqp instruction Nikunj A Dadhania
2016-12-05 18:14   ` Richard Henderson
2016-12-05 11:25 ` [Qemu-devel] [PATCH 11/13] target-ppc: implement xsnegqp instruction Nikunj A Dadhania
2016-12-05 18:15   ` Richard Henderson
2016-12-06  8:45     ` Nikunj A Dadhania
2016-12-05 11:25 ` [Qemu-devel] [PATCH 12/13] target-ppc: implement xscpsgnqp instruction Nikunj A Dadhania
2016-12-05 18:18   ` Richard Henderson
2016-12-05 11:25 ` [Qemu-devel] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions Nikunj A Dadhania
2016-12-06  4:11   ` David Gibson
2016-12-06  8:55     ` Bharata B Rao
2016-12-06  4:12 ` [Qemu-devel] [PATCH ppc-for-2.9 00/13] POWER9 TCG enablements - part9 David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.