* [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5
@ 2016-09-28 5:45 Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
` (5 more replies)
0 siblings, 6 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Rajalakshmi Srinivasaraghavan
This series contains 15 new instructions for POWER9 described in ISA3.0.
Patches:
01: Adds vector multiply instructions.
vmul10uq : Vector Multiply-by-10 Unsigned Quadword
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned QW
vmul10ecuq: Vector Multiply-by-10 Extended write Carry Unsigned QW
02: Adds vector extract unsigned left indexed instructions.
vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
03: Adds vector extract unsigned right indexed instructions.
vextubrx: Vector Extract Unsigned Byte Right-Indexed
vextuhrx: Vector Extract Unsigned Halfword Right-Indexed
vextuwrx: Vector Extract Unsigned Word Right-Indexed
04: Fix invalid mask - cmpl, bctar.
05: Adds vector compare not equal instructions.
vcmpneb - Vector Compare Not Equal Byte
vcmpneh - Vector Compare Not Equal Halfword
vcmpnew - Vector Compare Not Equal Word
06: Adds vclzlsbb/vctzlsbb instructions
vclzlsbb - Vector Count Leading Zero Least-Significant Bits Byte
vctzlsbb - Vector Count Trailing Zero Least-Significant Bits Byte
target-ppc/helper.h | 14 ++++
target-ppc/int_helper.c | 134 +++++++++++++++++++++++++++++++----
target-ppc/translate.c | 4 +-
target-ppc/translate/vmx-impl.inc.c | 123 +++++++++++++++++++++++++++++++-
target-ppc/translate/vmx-ops.inc.c | 24 ++++---
5 files changed, 274 insertions(+), 25 deletions(-)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-09-28 16:42 ` Richard Henderson
2016-09-29 2:07 ` David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions Rajalakshmi Srinivasaraghavan
` (4 subsequent siblings)
5 siblings, 2 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Vasant Hegde, Rajalakshmi Srinivasaraghavan
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
vmul10uq : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/translate/vmx-impl.inc.c | 74 +++++++++++++++++++++++++++++++++++
target-ppc/translate/vmx-ops.inc.c | 8 ++--
2 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 3ce374d..abfde27 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -182,6 +182,54 @@ static void gen_mtvscr(DisasContext *ctx)
tcg_temp_free_ptr(p);
}
+#define GEN_VX_VMUL10(name, add_cin, ret_carry) \
+static void glue(gen_, name)(DisasContext *ctx) \
+{ \
+ TCGv_i64 t0 = tcg_temp_new_i64(); \
+ TCGv_i64 t1 = tcg_temp_new_i64(); \
+ TCGv_i64 t2 = tcg_temp_new_i64(); \
+ TCGv_i64 cin = tcg_temp_new_i64(); \
+ TCGv_i64 val, z; \
+ \
+ if (unlikely(!ctx->altivec_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VPU); \
+ return; \
+ } \
+ \
+ val = tcg_const_i64(10); \
+ z = tcg_const_i64(0); \
+ \
+ if (add_cin) { \
+ tcg_gen_andi_i64(cin, cpu_avrl[rB(ctx->opcode)], 0xF); \
+ tcg_gen_movcond_i64(TCG_COND_LTU, cin, cin, val, cin, z); \
+ } else { \
+ tcg_gen_movi_i64(cin, 0); \
+ } \
+ \
+ tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
+ tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, z, cin, z); \
+ tcg_gen_add2_i64(t2, t0, t1, z, t2, z); \
+ tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], val); \
+ tcg_gen_add2_i64(cpu_avrh[rD(ctx->opcode)], t2, t0, z, t2, z); \
+ \
+ if (ret_carry) { \
+ tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t0, t1, z, t2, z); \
+ tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0); \
+ } \
+ \
+ tcg_temp_free_i64(t0); \
+ tcg_temp_free_i64(t1); \
+ tcg_temp_free_i64(t2); \
+ tcg_temp_free_i64(val); \
+ tcg_temp_free_i64(cin); \
+ tcg_temp_free_i64(z); \
+} \
+
+GEN_VX_VMUL10(vmul10uq, 0, 0);
+GEN_VX_VMUL10(vmul10euq, 1, 0);
+GEN_VX_VMUL10(vmul10cuq, 0, 1);
+GEN_VX_VMUL10(vmul10ecuq, 1, 1);
+
/* Logical operations */
#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
@@ -276,8 +324,30 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
} \
}
+/* Adds support to provide invalid mask */
+#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0, \
+ name1, flg1, flg2_1, inval1) \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
+{ \
+ if ((Rc(ctx->opcode) == 0) && \
+ ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) && \
+ !(ctx->opcode & inval0)) { \
+ gen_##name0(ctx); \
+ } else if ((Rc(ctx->opcode) == 1) && \
+ ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
+ !(ctx->opcode & inval1)) { \
+ gen_##name1(ctx); \
+ } else { \
+ gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
+ } \
+}
+
GEN_VXFORM(vaddubm, 0, 0);
+GEN_VXFORM_DUAL_EXT(vaddubm, PPC_NONE, PPC2_ALTIVEC_207, 0, \
+ vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
GEN_VXFORM(vadduhm, 0, 1);
+GEN_VXFORM_DUAL(vadduhm, PPC_NONE, PPC2_ALTIVEC_207, \
+ vmul10ecuq, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vadduwm, 0, 2);
GEN_VXFORM(vaddudm, 0, 3);
GEN_VXFORM(vsububm, 0, 16);
@@ -390,7 +460,11 @@ GEN_VXFORM(vsro, 6, 17);
GEN_VXFORM(vaddcuw, 0, 6);
GEN_VXFORM(vsubcuw, 0, 22);
GEN_VXFORM_ENV(vaddubs, 0, 8);
+GEN_VXFORM_DUAL_EXT(vaddubs, PPC_NONE, PPC2_ALTIVEC_207, 0, \
+ vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
GEN_VXFORM_ENV(vadduhs, 0, 9);
+GEN_VXFORM_DUAL(vadduhs, PPC_NONE, PPC2_ALTIVEC_207, \
+ vmul10euq, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_ENV(vadduws, 0, 10);
GEN_VXFORM_ENV(vaddsbs, 0, 12);
GEN_VXFORM_ENV(vaddshs, 0, 13);
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index a7022a0..5d47b0f 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -55,8 +55,8 @@ GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, tp0, tp1), \
GEN_HANDLER_E(name0##_##name1, 0x4, opc2, (opc3 | 0x10), 0x00000000, tp0, tp1),
-GEN_VXFORM(vaddubm, 0, 0),
-GEN_VXFORM(vadduhm, 0, 1),
+GEN_VXFORM_DUAL(vaddubm, vmul10cuq, 0, 0, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vadduhm, vmul10ecuq, 0, 1, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vadduwm, 0, 2),
GEN_VXFORM_207(vaddudm, 0, 3),
GEN_VXFORM_DUAL(vsububm, bcdadd, 0, 16, PPC_ALTIVEC, PPC_NONE),
@@ -123,8 +123,8 @@ GEN_VXFORM(vslo, 6, 16),
GEN_VXFORM(vsro, 6, 17),
GEN_VXFORM(vaddcuw, 0, 6),
GEN_VXFORM(vsubcuw, 0, 22),
-GEN_VXFORM(vaddubs, 0, 8),
-GEN_VXFORM(vadduhs, 0, 9),
+GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vadduhs, vmul10euq, 0, 9, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vadduws, 0, 10),
GEN_VXFORM(vaddsbs, 0, 12),
GEN_VXFORM(vaddshs, 0, 13),
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-09-28 16:54 ` Richard Henderson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions Rajalakshmi Srinivasaraghavan
` (3 subsequent siblings)
5 siblings, 1 reply; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Avinesh Kumar, Rajalakshmi Srinivasaraghavan
From: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
[ Remove else part in helper ]
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 3 ++
target-ppc/int_helper.c | 37 +++++++++++++++++++++++++++++++++++
target-ppc/translate/vmx-impl.inc.c | 19 ++++++++++++++++++
target-ppc/translate/vmx-ops.inc.c | 4 ++-
4 files changed, 62 insertions(+), 1 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index a1c2962..3041199 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -344,6 +344,9 @@ DEF_HELPER_3(vpmsumb, void, avr, avr, avr)
DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
+DEF_HELPER_2(vextublx, tl, tl, avr)
+DEF_HELPER_2(vextuhlx, tl, tl, avr)
+DEF_HELPER_2(vextuwlx, tl, tl, avr)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 51a9ac5..c24cc07 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1705,6 +1705,43 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
}
}
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = a & 0xf; \
+ for (i = 0; i < elem; i++) { \
+ r = r << 8; \
+ if (index + i <= 15) { \
+ r = r | b->u8[index + i]; \
+ } \
+ } \
+ return r; \
+}
+#else
+#define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = 15 - (a & 0xf); \
+ for (i = 0; i < elem; i++) { \
+ r = r << 8; \
+ if (index - i >= 0) { \
+ r = r | b->u8[index - i]; \
+ } \
+ } \
+ return r; \
+}
+#endif
+
+VEXTULX_DO(vextublx, 1)
+VEXTULX_DO(vextuhlx, 2)
+VEXTULX_DO(vextuwlx, 4)
+#undef VEXTULX_DO
+
/* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they are
* to conform to what real hardware appears to do. */
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index abfde27..815ba96 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -342,6 +342,19 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
} \
}
+#define GEN_VXFORM_HETRO(name, opc2, opc3) \
+static void glue(gen_, name)(DisasContext *ctx) \
+{ \
+ TCGv_ptr rb; \
+ if (unlikely(!ctx->altivec_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VPU); \
+ return; \
+ } \
+ rb = gen_avr_ptr(rB(ctx->opcode)); \
+ gen_helper_##name(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], rb); \
+ tcg_temp_free_ptr(rb); \
+}
+
GEN_VXFORM(vaddubm, 0, 0);
GEN_VXFORM_DUAL_EXT(vaddubm, PPC_NONE, PPC2_ALTIVEC_207, 0, \
vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
@@ -516,6 +529,12 @@ GEN_VXFORM_ENV(vsubfp, 5, 1);
GEN_VXFORM_ENV(vmaxfp, 5, 16);
GEN_VXFORM_ENV(vminfp, 5, 17);
+GEN_VXFORM_HETRO(vextublx, 6, 24)
+GEN_VXFORM_HETRO(vextuhlx, 6, 25)
+GEN_VXFORM_HETRO(vextuwlx, 6, 26)
+GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
+ vextuwlx, PPC_NONE, PPC2_ISA300)
+
#define GEN_VXRFORM1(opname, name, str, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
{ \
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 5d47b0f..3e0047d 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -91,8 +91,10 @@ GEN_VXFORM(vmrghw, 6, 2),
GEN_VXFORM(vmrglb, 6, 4),
GEN_VXFORM(vmrglh, 6, 5),
GEN_VXFORM(vmrglw, 6, 6),
+GEN_VXFORM_300(vextublx, 6, 24),
+GEN_VXFORM_300(vextuhlx, 6, 25),
+GEN_VXFORM_DUAL(vmrgow, vextuwlx, 6, 26, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM_207(vmrgew, 6, 30),
-GEN_VXFORM_207(vmrgow, 6, 26),
GEN_VXFORM(vmuloub, 4, 0),
GEN_VXFORM(vmulouh, 4, 1),
GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-10-25 4:32 ` Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar Rajalakshmi Srinivasaraghavan
` (2 subsequent siblings)
5 siblings, 1 reply; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Hariharan T.S, Avinesh Kumar,
Rajalakshmi Srinivasaraghavan
From: Hariharan T.S <hari@linux.vnet.ibm.com>
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 3 ++
target-ppc/int_helper.c | 38 ++++++++++++++++++++++++++++++++++-
target-ppc/translate/vmx-impl.inc.c | 5 ++++
target-ppc/translate/vmx-ops.inc.c | 4 ++-
4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 3041199..aef2f30 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -347,6 +347,9 @@ DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
DEF_HELPER_2(vextublx, tl, tl, avr)
DEF_HELPER_2(vextuhlx, tl, tl, avr)
DEF_HELPER_2(vextuwlx, tl, tl, avr)
+DEF_HELPER_2(vextubrx, tl, tl, avr)
+DEF_HELPER_2(vextuhrx, tl, tl, avr)
+DEF_HELPER_2(vextuwrx, tl, tl, avr)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index c24cc07..09a1799 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1561,7 +1561,6 @@ void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
#endif
}
-
#if defined(HOST_WORDS_BIGENDIAN)
#define PKBIG 1
#else
@@ -1742,6 +1741,43 @@ VEXTULX_DO(vextuhlx, 2)
VEXTULX_DO(vextuwlx, 4)
#undef VEXTULX_DO
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = a & 0xf; \
+ for (i = elem - 1; i >= 0; i--) { \
+ r = r << 8; \
+ if ((15 - i - index) >= 0) { \
+ r = r | b->u8[15 - i - index]; \
+ } \
+ } \
+ return r; \
+}
+#else
+#define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = 15 - (a & 0xf); \
+ for (i = elem - 1; i >= 0; i--) { \
+ r = r << 8; \
+ if ((15 + i - index) <= 15) { \
+ r = r | b->u8[15 + i - index]; \
+ } \
+ } \
+ return r; \
+}
+#endif
+
+VEXTURX_DO(vextubrx, 1)
+VEXTURX_DO(vextuhrx, 2)
+VEXTURX_DO(vextuwrx, 4)
+#undef VEXTURX_DO
+
/* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they are
* to conform to what real hardware appears to do. */
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 815ba96..10641dc 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -534,6 +534,11 @@ GEN_VXFORM_HETRO(vextuhlx, 6, 25)
GEN_VXFORM_HETRO(vextuwlx, 6, 26)
GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
vextuwlx, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM_HETRO(vextubrx, 6, 28)
+GEN_VXFORM_HETRO(vextuhrx, 6, 29)
+GEN_VXFORM_HETRO(vextuwrx, 6, 30)
+GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \
+ vextuwrx, PPC_NONE, PPC2_ISA300)
#define GEN_VXRFORM1(opname, name, str, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 3e0047d..87be6c6 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -94,7 +94,9 @@ GEN_VXFORM(vmrglw, 6, 6),
GEN_VXFORM_300(vextublx, 6, 24),
GEN_VXFORM_300(vextuhlx, 6, 25),
GEN_VXFORM_DUAL(vmrgow, vextuwlx, 6, 26, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_207(vmrgew, 6, 30),
+GEN_VXFORM_300(vextubrx, 6, 28),
+GEN_VXFORM_300(vextuhrx, 6, 29),
+GEN_VXFORM_DUAL(vmrgew, vextuwrx, 6, 30, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vmuloub, 4, 0),
GEN_VXFORM(vmulouh, 4, 1),
GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
` (2 preceding siblings ...)
2016-09-28 5:45 ` [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-09-29 2:22 ` David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions Rajalakshmi Srinivasaraghavan
5 siblings, 1 reply; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Avinesh Kumar, Rajalakshmi Srinivasaraghavan
From: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
cmpl: invalid bit mask should be 0x00400001
bctar: invalid bit mask should be 0x0000E000
Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/translate.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 8eefd82..dab8f19 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -6203,7 +6203,7 @@ static opcode_t opcodes[] = {
GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
-GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
+GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400001, PPC_INTEGER),
GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
#if defined(TARGET_PPC64)
GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
@@ -6297,7 +6297,7 @@ GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x00000000, PPC_FLOW),
GEN_HANDLER(bclr, 0x13, 0x10, 0x00, 0x00000000, PPC_FLOW),
-GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0, PPC_NONE, PPC2_BCTAR_ISA207),
+GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0x0000E000, PPC_NONE, PPC2_BCTAR_ISA207),
GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
#if defined(TARGET_PPC64)
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
` (3 preceding siblings ...)
2016-09-28 5:45 ` [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-09-28 17:01 ` Richard Henderson
2016-09-29 2:22 ` David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions Rajalakshmi Srinivasaraghavan
5 siblings, 2 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Rajalakshmi Srinivasaraghavan
The following vector compare not equal instructions are added from ISA 3.0.
vcmpneb - Vector Compare Not Equal Byte
vcmpneh - Vector Compare Not Equal Halfword
vcmpnew - Vector Compare Not Equal Word
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 6 ++++++
target-ppc/int_helper.c | 31 +++++++++++++++++++------------
target-ppc/translate/vmx-impl.inc.c | 11 ++++++++++-
target-ppc/translate/vmx-ops.inc.c | 6 +++---
4 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index aef2f30..9c3095f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -147,6 +147,9 @@ DEF_HELPER_4(vcmpequb, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequh, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequw, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequd, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneb, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneh, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnew, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezb, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezh, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezw, void, env, avr, avr, avr)
@@ -166,6 +169,9 @@ DEF_HELPER_4(vcmpequb_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequh_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequw_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpequd_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneb_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneh_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnew_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezb_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezh_dot, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpnezw_dot, void, env, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 09a1799..f132f7b 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -735,20 +735,24 @@ VCMP(gtsd, >, s64)
#undef VCMP_DO
#undef VCMP
-#define VCMPNEZ_DO(suffix, element, etype, record) \
-void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
+#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
+void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
ppc_avr_t *a, ppc_avr_t *b) \
{ \
etype ones = (etype)-1; \
etype all = ones; \
- etype none = 0; \
+ etype result, none = 0; \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- etype result = ((a->element[i] == 0) \
+ if (cmpzero) { \
+ result = ((a->element[i] == 0) \
|| (b->element[i] == 0) \
|| (a->element[i] != b->element[i]) ? \
ones : 0x0); \
+ } else { \
+ result = (a->element[i] != b->element[i]) ? ones : 0x0; \
+ } \
r->element[i] = result; \
all &= result; \
none |= result; \
@@ -762,14 +766,17 @@ void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
* suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
* element - element type to access from vector
*/
-#define VCMPNEZ(suffix, element, etype) \
- VCMPNEZ_DO(suffix, element, etype, 0) \
- VCMPNEZ_DO(suffix##_dot, element, etype, 1)
-VCMPNEZ(b, u8, uint8_t)
-VCMPNEZ(h, u16, uint16_t)
-VCMPNEZ(w, u32, uint32_t)
-#undef VCMPNEZ_DO
-#undef VCMPNEZ
+#define VCMPNE(suffix, element, etype, cmpzero) \
+ VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
+ VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
+VCMPNE(zb, u8, uint8_t, 1)
+VCMPNE(zh, u16, uint16_t, 1)
+VCMPNE(zw, u32, uint32_t, 1)
+VCMPNE(b, u8, uint8_t, 0)
+VCMPNE(h, u16, uint16_t, 0)
+VCMPNE(w, u32, uint32_t, 0)
+#undef VCMPNE_DO
+#undef VCMPNE
#define VCMPFP_DO(suffix, compare, order, record) \
void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 10641dc..0bc7188 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -608,7 +608,16 @@ GEN_VXRFORM(vcmpeqfp, 3, 3)
GEN_VXRFORM(vcmpgefp, 3, 7)
GEN_VXRFORM(vcmpgtfp, 3, 11)
GEN_VXRFORM(vcmpbfp, 3, 15)
-
+GEN_VXRFORM(vcmpneb, 3, 0)
+GEN_VXRFORM(vcmpneh, 3, 1)
+GEN_VXRFORM(vcmpnew, 3, 2)
+
+GEN_VXRFORM_DUAL(vcmpequb, PPC_NONE, PPC2_ALTIVEC_207, \
+ vcmpneb, PPC_NONE, PPC2_ISA300)
+GEN_VXRFORM_DUAL(vcmpequh, PPC_NONE, PPC2_ALTIVEC_207, \
+ vcmpneh, PPC_NONE, PPC2_ISA300)
+GEN_VXRFORM_DUAL(vcmpequw, PPC_NONE, PPC2_ALTIVEC_207, \
+ vcmpnew, PPC_NONE, PPC2_ISA300)
GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \
vcmpequd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 87be6c6..009e9b1 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -185,9 +185,6 @@ GEN_HANDLER2_E(name, str, 0x4, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300),
GEN_VXRFORM1_300(name, name, #name, opc2, opc3) \
GEN_VXRFORM1_300(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
-GEN_VXRFORM(vcmpequb, 3, 0)
-GEN_VXRFORM(vcmpequh, 3, 1)
-GEN_VXRFORM(vcmpequw, 3, 2)
GEN_VXRFORM_300(vcmpnezb, 3, 4)
GEN_VXRFORM_300(vcmpnezh, 3, 5)
GEN_VXRFORM_300(vcmpnezw, 3, 6)
@@ -201,6 +198,9 @@ GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE)
GEN_VXRFORM(vcmpgefp, 3, 7)
GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpequb, vcmpneb, 3, 0, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpequh, vcmpneh, 3, 1, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpequw, vcmpnew, 3, 2, PPC_NONE, PPC2_ALTIVEC_207)
#define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \
GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
` (4 preceding siblings ...)
2016-09-28 5:45 ` [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 5:45 ` Rajalakshmi Srinivasaraghavan
2016-09-28 17:08 ` Richard Henderson
2016-09-29 2:25 ` David Gibson
5 siblings, 2 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-09-28 5:45 UTC (permalink / raw)
To: qemu-ppc, david, rth
Cc: qemu-devel, nikunj, benh, Rajalakshmi Srinivasaraghavan
The following vector instructions are added from ISA 3.0.
vclzlsbb - Vector Count Leading Zero Least-Significant Bits Byte
vctzlsbb - Vector Count Trailing Zero Least-Significant Bits Byte
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 2 ++
target-ppc/int_helper.c | 28 ++++++++++++++++++++++++++++
target-ppc/translate/vmx-impl.inc.c | 14 ++++++++++++++
target-ppc/translate/vmx-ops.inc.c | 2 ++
4 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 9c3095f..30c4429 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -343,6 +343,8 @@ DEF_HELPER_2(vpopcntb, void, avr, avr)
DEF_HELPER_2(vpopcnth, void, avr, avr)
DEF_HELPER_2(vpopcntw, void, avr, avr)
DEF_HELPER_2(vpopcntd, void, avr, avr)
+DEF_HELPER_1(vclzlsbb, tl, avr)
+DEF_HELPER_1(vctzlsbb, tl, avr)
DEF_HELPER_3(vbpermd, void, avr, avr, avr)
DEF_HELPER_3(vbpermq, void, avr, avr, avr)
DEF_HELPER_2(vgbbd, void, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f132f7b..759ead9 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -881,6 +881,34 @@ VCT(uxs, cvtsduw, u32)
VCT(sxs, cvtsdsw, s32)
#undef VCT
+target_ulong helper_vclzlsbb(ppc_avr_t *r)
+{
+ target_ulong count = 0;
+ int i;
+ VECTOR_FOR_INORDER_I(i, u8) {
+ if (r->u8[i] & 0x01) {
+ break;
+ }
+ count++;
+ }
+ return count;
+}
+target_ulong helper_vctzlsbb(ppc_avr_t *r)
+{
+ target_ulong count = 0;
+ int i;
+#if defined(HOST_WORDS_BIGENDIAN)
+ for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
+#else
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+#endif
+ if (r->u8[i] & 0x01) {
+ break;
+ }
+ count++;
+ }
+ return count;
+}
void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
{
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 0bc7188..1649b34 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -691,6 +691,18 @@ static void glue(gen_, name)(DisasContext *ctx) \
tcg_temp_free_ptr(rd); \
}
+#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4) \
+static void glue(gen_, name)(DisasContext *ctx) \
+ { \
+ TCGv_ptr rb; \
+ if (unlikely(!ctx->altivec_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VPU); \
+ return; \
+ } \
+ rb = gen_avr_ptr(rB(ctx->opcode)); \
+ gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb); \
+ tcg_temp_free_ptr(rb); \
+ }
GEN_VXFORM_NOA(vupkhsb, 7, 8);
GEN_VXFORM_NOA(vupkhsh, 7, 9);
GEN_VXFORM_NOA(vupkhsw, 7, 25);
@@ -905,6 +917,8 @@ GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
+GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
+GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
GEN_VXFORM_NOA(vpopcntb, 1, 28)
GEN_VXFORM_NOA(vpopcnth, 1, 29)
GEN_VXFORM_NOA(vpopcntw, 1, 30)
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 009e9b1..20d243f 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -223,6 +223,8 @@ GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C),
GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D),
GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E),
GEN_VXFORM_300_EO(vctzd, 0x01, 0x18, 0x1F),
+GEN_VXFORM_300_EO(vclzlsbb, 0x01, 0x18, 0x0),
+GEN_VXFORM_300_EO(vctzlsbb, 0x01, 0x18, 0x1),
GEN_VXFORM_300(vpermr, 0x1D, 0xFF),
#define GEN_VXFORM_NOA(name, opc2, opc3) \
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 16:42 ` Richard Henderson
2016-10-05 5:23 ` Rajalakshmi Srinivasaraghavan
2016-09-29 2:07 ` David Gibson
1 sibling, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-09-28 16:42 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, qemu-ppc, david
Cc: qemu-devel, nikunj, benh, Vasant Hegde
On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
> + val = tcg_const_i64(10); \
Rename this "ten" for clarity?
> + z = tcg_const_i64(0); \
> + \
> + if (add_cin) { \
> + tcg_gen_andi_i64(cin, cpu_avrl[rB(ctx->opcode)], 0xF); \
> + tcg_gen_movcond_i64(TCG_COND_LTU, cin, cin, val, cin, z); \
What is the purpose of this movcond? The docs specifically say that values
greater than 9 are undefined.
> + } else { \
> + tcg_gen_movi_i64(cin, 0); \
> + } \
> + \
> + tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, z, cin, z); \
> + tcg_gen_add2_i64(t2, t0, t1, z, t2, z); \
This two additions are unused if !add_cin, and the second appears to be
mergable with the first -- don't use so many z's. I think this simplifies to
if (add_cin) {
tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], ten);
tcg_gen_andi_i64(t2, cpu_avrl[rB(ctx->opcode)], 0xF);
tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, t1, t2, z);
} else {
tcg_gen_mulu2_i64(cpu_avrl[rD(ctx->opcode)], t2,
cpu_avrl[rA(ctx->opcode)], ten);
}
> + tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], val); \
> + tcg_gen_add2_i64(cpu_avrh[rD(ctx->opcode)], t2, t0, z, t2, z); \
> + \
> + if (ret_carry) { \
> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t0, t1, z, t2, z); \
> + tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0); \
Likewise simplifies to
if (ret_carry) {
tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], ten);
tcg_gen_add2_i64(t0, cpu_avrl[rD(ctx->opcode)], t0, t1, t2, z);
tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0);
} else {
tcg_gen_mul_i64(t0, cpu_avrh[rA(ctx->opcode)], ten);
tcg_gen_add_i64(cpu_avrh[rD(ctx->opcode)], t0, t2);
}
r~
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 16:54 ` Richard Henderson
2016-10-05 5:21 ` Rajalakshmi Srinivasaraghavan
0 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-09-28 16:54 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, qemu-ppc, david
Cc: qemu-devel, nikunj, benh, Avinesh Kumar
On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
> +#if defined(HOST_WORDS_BIGENDIAN)
> +#define VEXTULX_DO(name, elem) \
> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
> +{ \
> + target_ulong r = 0; \
> + int i; \
> + int index = a & 0xf; \
> + for (i = 0; i < elem; i++) { \
> + r = r << 8; \
> + if (index + i <= 15) { \
> + r = r | b->u8[index + i]; \
> + } \
> + } \
> + return r; \
> +}
> +#else
> +#define VEXTULX_DO(name, elem) \
> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
> +{ \
> + target_ulong r = 0; \
> + int i; \
> + int index = 15 - (a & 0xf); \
> + for (i = 0; i < elem; i++) { \
> + r = r << 8; \
> + if (index - i >= 0) { \
> + r = r | b->u8[index - i]; \
> + } \
> + } \
> + return r; \
> +}
> +#endif
> +
> +VEXTULX_DO(vextublx, 1)
> +VEXTULX_DO(vextuhlx, 2)
> +VEXTULX_DO(vextuwlx, 4)
> +#undef VEXTULX_DO
Ew.
This should be one 128-bit shift and one and.
Since the shift amount is a multiple of 8, the 128-bit shift for vextub[lr]x
does not need to cross a double-word boundary, and so can be decomposed into
one 64-bit shift of (count & 64 ? hi : lo).
For vextu[hw]lr]x, you'd need to do the whole left-shift, right-shift, or thing.
But still, fantastically better than a loop.
r~
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 17:01 ` Richard Henderson
2016-09-29 2:22 ` David Gibson
1 sibling, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2016-09-28 17:01 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, qemu-ppc, david; +Cc: qemu-devel, nikunj, benh
On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
> The following vector compare not equal instructions are added from ISA 3.0.
>
> vcmpneb - Vector Compare Not Equal Byte
> vcmpneh - Vector Compare Not Equal Halfword
> vcmpnew - Vector Compare Not Equal Word
>
> Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
> ---
> target-ppc/helper.h | 6 ++++++
> target-ppc/int_helper.c | 31 +++++++++++++++++++------------
> target-ppc/translate/vmx-impl.inc.c | 11 ++++++++++-
> target-ppc/translate/vmx-ops.inc.c | 6 +++---
> 4 files changed, 38 insertions(+), 16 deletions(-)
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions Rajalakshmi Srinivasaraghavan
@ 2016-09-28 17:08 ` Richard Henderson
2016-09-29 2:23 ` David Gibson
2016-09-29 2:25 ` David Gibson
1 sibling, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-09-28 17:08 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan, qemu-ppc, david; +Cc: qemu-devel, nikunj, benh
On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
> + return count;
> +}
> +target_ulong helper_vctzlsbb(ppc_avr_t *r)
> +{
...
> + return count;
> +}
> void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
> ppc_avr_t *b, ppc_avr_t *c)
Watch your spacing between functions. Otherwise,
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
2016-09-28 16:42 ` Richard Henderson
@ 2016-09-29 2:07 ` David Gibson
2016-09-29 4:00 ` Richard Henderson
1 sibling, 1 reply; 22+ messages in thread
From: David Gibson @ 2016-09-29 2:07 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan
Cc: qemu-ppc, rth, qemu-devel, nikunj, benh, Vasant Hegde
[-- Attachment #1: Type: text/plain, Size: 8816 bytes --]
On Wed, Sep 28, 2016 at 11:15:13AM +0530, Rajalakshmi Srinivasaraghavan wrote:
> From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
>
> vmul10uq : Vector Multiply-by-10 Unsigned Quadword VX-form
> vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
> vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
> vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form
>
> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> [ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
> ---
> target-ppc/translate/vmx-impl.inc.c | 74 +++++++++++++++++++++++++++++++++++
> target-ppc/translate/vmx-ops.inc.c | 8 ++--
> 2 files changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
> index 3ce374d..abfde27 100644
> --- a/target-ppc/translate/vmx-impl.inc.c
> +++ b/target-ppc/translate/vmx-impl.inc.c
> @@ -182,6 +182,54 @@ static void gen_mtvscr(DisasContext *ctx)
> tcg_temp_free_ptr(p);
> }
>
> +#define GEN_VX_VMUL10(name, add_cin, ret_carry) \
> +static void glue(gen_, name)(DisasContext *ctx) \
> +{ \
> + TCGv_i64 t0 = tcg_temp_new_i64(); \
> + TCGv_i64 t1 = tcg_temp_new_i64(); \
> + TCGv_i64 t2 = tcg_temp_new_i64(); \
> + TCGv_i64 cin = tcg_temp_new_i64(); \
> + TCGv_i64 val, z; \
> + \
> + if (unlikely(!ctx->altivec_enabled)) { \
> + gen_exception(ctx, POWERPC_EXCP_VPU); \
> + return; \
> + } \
> + \
> + val = tcg_const_i64(10); \
> + z = tcg_const_i64(0); \
> + \
> + if (add_cin) { \
> + tcg_gen_andi_i64(cin, cpu_avrl[rB(ctx->opcode)], 0xF); \
> + tcg_gen_movcond_i64(TCG_COND_LTU, cin, cin, val, cin, z); \
> + } else { \
> + tcg_gen_movi_i64(cin, 0); \
> + } \
> + \
> + tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
Do you really want to be using an actual mul op, rather than (in << 3)
+ (in << 1)? Obviously working out al the carries correctly will be a
bit fiddly.
> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, z, cin, z); \
> + tcg_gen_add2_i64(t2, t0, t1, z, t2, z); \
> + tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], val); \
> + tcg_gen_add2_i64(cpu_avrh[rD(ctx->opcode)], t2, t0, z, t2, z); \
> + \
> + if (ret_carry) { \
> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t0, t1, z, t2, z); \
> + tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0); \
> + } \
> + \
> + tcg_temp_free_i64(t0); \
> + tcg_temp_free_i64(t1); \
> + tcg_temp_free_i64(t2); \
> + tcg_temp_free_i64(val); \
> + tcg_temp_free_i64(cin); \
> + tcg_temp_free_i64(z); \
> +} \
> +
> +GEN_VX_VMUL10(vmul10uq, 0, 0);
> +GEN_VX_VMUL10(vmul10euq, 1, 0);
> +GEN_VX_VMUL10(vmul10cuq, 0, 1);
> +GEN_VX_VMUL10(vmul10ecuq, 1, 1);
> +
> /* Logical operations */
> #define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3) \
> static void glue(gen_, name)(DisasContext *ctx) \
> @@ -276,8 +324,30 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
> } \
> }
>
> +/* Adds support to provide invalid mask */
> +#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0, \
> + name1, flg1, flg2_1, inval1) \
> +static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
> +{ \
> + if ((Rc(ctx->opcode) == 0) && \
> + ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) && \
> + !(ctx->opcode & inval0)) { \
> + gen_##name0(ctx); \
> + } else if ((Rc(ctx->opcode) == 1) && \
> + ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
> + !(ctx->opcode & inval1)) { \
> + gen_##name1(ctx); \
> + } else { \
> + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
> + } \
> +}
> +
> GEN_VXFORM(vaddubm, 0, 0);
> +GEN_VXFORM_DUAL_EXT(vaddubm, PPC_NONE, PPC2_ALTIVEC_207, 0, \
> + vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
> GEN_VXFORM(vadduhm, 0, 1);
> +GEN_VXFORM_DUAL(vadduhm, PPC_NONE, PPC2_ALTIVEC_207, \
> + vmul10ecuq, PPC_NONE, PPC2_ISA300)
> GEN_VXFORM(vadduwm, 0, 2);
> GEN_VXFORM(vaddudm, 0, 3);
> GEN_VXFORM(vsububm, 0, 16);
> @@ -390,7 +460,11 @@ GEN_VXFORM(vsro, 6, 17);
> GEN_VXFORM(vaddcuw, 0, 6);
> GEN_VXFORM(vsubcuw, 0, 22);
> GEN_VXFORM_ENV(vaddubs, 0, 8);
> +GEN_VXFORM_DUAL_EXT(vaddubs, PPC_NONE, PPC2_ALTIVEC_207, 0, \
> + vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
> GEN_VXFORM_ENV(vadduhs, 0, 9);
> +GEN_VXFORM_DUAL(vadduhs, PPC_NONE, PPC2_ALTIVEC_207, \
> + vmul10euq, PPC_NONE, PPC2_ISA300)
> GEN_VXFORM_ENV(vadduws, 0, 10);
> GEN_VXFORM_ENV(vaddsbs, 0, 12);
> GEN_VXFORM_ENV(vaddshs, 0, 13);
> diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
> index a7022a0..5d47b0f 100644
> --- a/target-ppc/translate/vmx-ops.inc.c
> +++ b/target-ppc/translate/vmx-ops.inc.c
> @@ -55,8 +55,8 @@ GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
> GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, tp0, tp1), \
> GEN_HANDLER_E(name0##_##name1, 0x4, opc2, (opc3 | 0x10), 0x00000000, tp0, tp1),
>
> -GEN_VXFORM(vaddubm, 0, 0),
> -GEN_VXFORM(vadduhm, 0, 1),
> +GEN_VXFORM_DUAL(vaddubm, vmul10cuq, 0, 0, PPC_ALTIVEC, PPC_NONE),
> +GEN_VXFORM_DUAL(vadduhm, vmul10ecuq, 0, 1, PPC_ALTIVEC, PPC_NONE),
> GEN_VXFORM(vadduwm, 0, 2),
> GEN_VXFORM_207(vaddudm, 0, 3),
> GEN_VXFORM_DUAL(vsububm, bcdadd, 0, 16, PPC_ALTIVEC, PPC_NONE),
> @@ -123,8 +123,8 @@ GEN_VXFORM(vslo, 6, 16),
> GEN_VXFORM(vsro, 6, 17),
> GEN_VXFORM(vaddcuw, 0, 6),
> GEN_VXFORM(vsubcuw, 0, 22),
> -GEN_VXFORM(vaddubs, 0, 8),
> -GEN_VXFORM(vadduhs, 0, 9),
> +GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),
> +GEN_VXFORM_DUAL(vadduhs, vmul10euq, 0, 9, PPC_ALTIVEC, PPC_NONE),
> GEN_VXFORM(vadduws, 0, 10),
> GEN_VXFORM(vaddsbs, 0, 12),
> GEN_VXFORM(vaddshs, 0, 13),
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar
2016-09-28 5:45 ` [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar Rajalakshmi Srinivasaraghavan
@ 2016-09-29 2:22 ` David Gibson
0 siblings, 0 replies; 22+ messages in thread
From: David Gibson @ 2016-09-29 2:22 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan
Cc: qemu-ppc, rth, qemu-devel, nikunj, benh, Avinesh Kumar
[-- Attachment #1: Type: text/plain, Size: 2014 bytes --]
On Wed, Sep 28, 2016 at 11:15:16AM +0530, Rajalakshmi Srinivasaraghavan wrote:
> From: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
>
> cmpl: invalid bit mask should be 0x00400001
> bctar: invalid bit mask should be 0x0000E000
>
> Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
> Signed-off-by: Rajalakshmi Srinivasaraghavan
> <raji@linux.vnet.ibm.com>
Applied to ppc-for-2.8.
> ---
> target-ppc/translate.c | 4 ++--
> 1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 8eefd82..dab8f19 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -6203,7 +6203,7 @@ static opcode_t opcodes[] = {
> GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
> GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
> GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
> -GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
> +GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400001, PPC_INTEGER),
> GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
> #if defined(TARGET_PPC64)
> GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
> @@ -6297,7 +6297,7 @@ GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
> GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
> GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x00000000, PPC_FLOW),
> GEN_HANDLER(bclr, 0x13, 0x10, 0x00, 0x00000000, PPC_FLOW),
> -GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0, PPC_NONE, PPC2_BCTAR_ISA207),
> +GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0x0000E000, PPC_NONE, PPC2_BCTAR_ISA207),
> GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
> GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
> #if defined(TARGET_PPC64)
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions Rajalakshmi Srinivasaraghavan
2016-09-28 17:01 ` Richard Henderson
@ 2016-09-29 2:22 ` David Gibson
1 sibling, 0 replies; 22+ messages in thread
From: David Gibson @ 2016-09-29 2:22 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan; +Cc: qemu-ppc, rth, qemu-devel, nikunj, benh
[-- Attachment #1: Type: text/plain, Size: 7802 bytes --]
On Wed, Sep 28, 2016 at 11:15:17AM +0530, Rajalakshmi Srinivasaraghavan wrote:
> The following vector compare not equal instructions are added from ISA 3.0.
>
> vcmpneb - Vector Compare Not Equal Byte
> vcmpneh - Vector Compare Not Equal Halfword
> vcmpnew - Vector Compare Not Equal Word
>
> Signed-off-by: Rajalakshmi Srinivasaraghavan
> <raji@linux.vnet.ibm.com>
Applied to ppc-for-2.8.
> ---
> target-ppc/helper.h | 6 ++++++
> target-ppc/int_helper.c | 31 +++++++++++++++++++------------
> target-ppc/translate/vmx-impl.inc.c | 11 ++++++++++-
> target-ppc/translate/vmx-ops.inc.c | 6 +++---
> 4 files changed, 38 insertions(+), 16 deletions(-)
>
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index aef2f30..9c3095f 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -147,6 +147,9 @@ DEF_HELPER_4(vcmpequb, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequh, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequw, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequd, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpneb, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpneh, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpnew, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezb, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezh, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezw, void, env, avr, avr, avr)
> @@ -166,6 +169,9 @@ DEF_HELPER_4(vcmpequb_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequh_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequw_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpequd_dot, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpneb_dot, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpneh_dot, void, env, avr, avr, avr)
> +DEF_HELPER_4(vcmpnew_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezb_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezh_dot, void, env, avr, avr, avr)
> DEF_HELPER_4(vcmpnezw_dot, void, env, avr, avr, avr)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 09a1799..f132f7b 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -735,20 +735,24 @@ VCMP(gtsd, >, s64)
> #undef VCMP_DO
> #undef VCMP
>
> -#define VCMPNEZ_DO(suffix, element, etype, record) \
> -void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
> +#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
> +void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
> ppc_avr_t *a, ppc_avr_t *b) \
> { \
> etype ones = (etype)-1; \
> etype all = ones; \
> - etype none = 0; \
> + etype result, none = 0; \
> int i; \
> \
> for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
> - etype result = ((a->element[i] == 0) \
> + if (cmpzero) { \
> + result = ((a->element[i] == 0) \
> || (b->element[i] == 0) \
> || (a->element[i] != b->element[i]) ? \
> ones : 0x0); \
> + } else { \
> + result = (a->element[i] != b->element[i]) ? ones : 0x0; \
> + } \
> r->element[i] = result; \
> all &= result; \
> none |= result; \
> @@ -762,14 +766,17 @@ void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
> * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
> * element - element type to access from vector
> */
> -#define VCMPNEZ(suffix, element, etype) \
> - VCMPNEZ_DO(suffix, element, etype, 0) \
> - VCMPNEZ_DO(suffix##_dot, element, etype, 1)
> -VCMPNEZ(b, u8, uint8_t)
> -VCMPNEZ(h, u16, uint16_t)
> -VCMPNEZ(w, u32, uint32_t)
> -#undef VCMPNEZ_DO
> -#undef VCMPNEZ
> +#define VCMPNE(suffix, element, etype, cmpzero) \
> + VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
> + VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
> +VCMPNE(zb, u8, uint8_t, 1)
> +VCMPNE(zh, u16, uint16_t, 1)
> +VCMPNE(zw, u32, uint32_t, 1)
> +VCMPNE(b, u8, uint8_t, 0)
> +VCMPNE(h, u16, uint16_t, 0)
> +VCMPNE(w, u32, uint32_t, 0)
> +#undef VCMPNE_DO
> +#undef VCMPNE
>
> #define VCMPFP_DO(suffix, compare, order, record) \
> void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
> diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
> index 10641dc..0bc7188 100644
> --- a/target-ppc/translate/vmx-impl.inc.c
> +++ b/target-ppc/translate/vmx-impl.inc.c
> @@ -608,7 +608,16 @@ GEN_VXRFORM(vcmpeqfp, 3, 3)
> GEN_VXRFORM(vcmpgefp, 3, 7)
> GEN_VXRFORM(vcmpgtfp, 3, 11)
> GEN_VXRFORM(vcmpbfp, 3, 15)
> -
> +GEN_VXRFORM(vcmpneb, 3, 0)
> +GEN_VXRFORM(vcmpneh, 3, 1)
> +GEN_VXRFORM(vcmpnew, 3, 2)
> +
> +GEN_VXRFORM_DUAL(vcmpequb, PPC_NONE, PPC2_ALTIVEC_207, \
> + vcmpneb, PPC_NONE, PPC2_ISA300)
> +GEN_VXRFORM_DUAL(vcmpequh, PPC_NONE, PPC2_ALTIVEC_207, \
> + vcmpneh, PPC_NONE, PPC2_ISA300)
> +GEN_VXRFORM_DUAL(vcmpequw, PPC_NONE, PPC2_ALTIVEC_207, \
> + vcmpnew, PPC_NONE, PPC2_ISA300)
> GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \
> vcmpequd, PPC_NONE, PPC2_ALTIVEC_207)
> GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
> diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
> index 87be6c6..009e9b1 100644
> --- a/target-ppc/translate/vmx-ops.inc.c
> +++ b/target-ppc/translate/vmx-ops.inc.c
> @@ -185,9 +185,6 @@ GEN_HANDLER2_E(name, str, 0x4, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300),
> GEN_VXRFORM1_300(name, name, #name, opc2, opc3) \
> GEN_VXRFORM1_300(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
>
> -GEN_VXRFORM(vcmpequb, 3, 0)
> -GEN_VXRFORM(vcmpequh, 3, 1)
> -GEN_VXRFORM(vcmpequw, 3, 2)
> GEN_VXRFORM_300(vcmpnezb, 3, 4)
> GEN_VXRFORM_300(vcmpnezh, 3, 5)
> GEN_VXRFORM_300(vcmpnezw, 3, 6)
> @@ -201,6 +198,9 @@ GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE)
> GEN_VXRFORM(vcmpgefp, 3, 7)
> GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
> GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
> +GEN_VXRFORM_DUAL(vcmpequb, vcmpneb, 3, 0, PPC_NONE, PPC2_ALTIVEC_207)
> +GEN_VXRFORM_DUAL(vcmpequh, vcmpneh, 3, 1, PPC_NONE, PPC2_ALTIVEC_207)
> +GEN_VXRFORM_DUAL(vcmpequw, vcmpnew, 3, 2, PPC_NONE, PPC2_ALTIVEC_207)
>
> #define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \
> GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions
2016-09-28 17:08 ` Richard Henderson
@ 2016-09-29 2:23 ` David Gibson
0 siblings, 0 replies; 22+ messages in thread
From: David Gibson @ 2016-09-29 2:23 UTC (permalink / raw)
To: Richard Henderson
Cc: Rajalakshmi Srinivasaraghavan, qemu-ppc, qemu-devel, nikunj, benh
[-- Attachment #1: Type: text/plain, Size: 779 bytes --]
On Wed, Sep 28, 2016 at 10:08:06AM -0700, Richard Henderson wrote:
> On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
> > + return count;
> > +}
> > +target_ulong helper_vctzlsbb(ppc_avr_t *r)
> > +{
>
> ...
>
> > + return count;
> > +}
> > void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
> > ppc_avr_t *b, ppc_avr_t *c)
>
> Watch your spacing between functions. Otherwise,
>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
I've added a couple of extra line breaks and applied to ppc-for-2.8.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions Rajalakshmi Srinivasaraghavan
2016-09-28 17:08 ` Richard Henderson
@ 2016-09-29 2:25 ` David Gibson
1 sibling, 0 replies; 22+ messages in thread
From: David Gibson @ 2016-09-29 2:25 UTC (permalink / raw)
To: Rajalakshmi Srinivasaraghavan; +Cc: qemu-ppc, rth, qemu-devel, nikunj, benh
[-- Attachment #1: Type: text/plain, Size: 5088 bytes --]
On Wed, Sep 28, 2016 at 11:15:18AM +0530, Rajalakshmi Srinivasaraghavan wrote:
> The following vector instructions are added from ISA 3.0.
>
> vclzlsbb - Vector Count Leading Zero Least-Significant Bits Byte
> vctzlsbb - Vector Count Trailing Zero Least-Significant Bits Byte
>
> Signed-off-by: Rajalakshmi Srinivasaraghavan
> <raji@linux.vnet.ibm.com>
This will do for now, but I think you could do better than the loop.
Something like:
tmp = vector & 0x0101010101010101;
count = (clz(tmp) + 1) >> 3;
I think would do it (obviously more care would be needed with edge cases).
> ---
> target-ppc/helper.h | 2 ++
> target-ppc/int_helper.c | 28 ++++++++++++++++++++++++++++
> target-ppc/translate/vmx-impl.inc.c | 14 ++++++++++++++
> target-ppc/translate/vmx-ops.inc.c | 2 ++
> 4 files changed, 46 insertions(+), 0 deletions(-)
>
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 9c3095f..30c4429 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -343,6 +343,8 @@ DEF_HELPER_2(vpopcntb, void, avr, avr)
> DEF_HELPER_2(vpopcnth, void, avr, avr)
> DEF_HELPER_2(vpopcntw, void, avr, avr)
> DEF_HELPER_2(vpopcntd, void, avr, avr)
> +DEF_HELPER_1(vclzlsbb, tl, avr)
> +DEF_HELPER_1(vctzlsbb, tl, avr)
> DEF_HELPER_3(vbpermd, void, avr, avr, avr)
> DEF_HELPER_3(vbpermq, void, avr, avr, avr)
> DEF_HELPER_2(vgbbd, void, avr, avr)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index f132f7b..759ead9 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -881,6 +881,34 @@ VCT(uxs, cvtsduw, u32)
> VCT(sxs, cvtsdsw, s32)
> #undef VCT
>
> +target_ulong helper_vclzlsbb(ppc_avr_t *r)
> +{
> + target_ulong count = 0;
> + int i;
> + VECTOR_FOR_INORDER_I(i, u8) {
> + if (r->u8[i] & 0x01) {
> + break;
> + }
> + count++;
> + }
> + return count;
> +}
> +target_ulong helper_vctzlsbb(ppc_avr_t *r)
> +{
> + target_ulong count = 0;
> + int i;
> +#if defined(HOST_WORDS_BIGENDIAN)
> + for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
> +#else
> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
> +#endif
> + if (r->u8[i] & 0x01) {
> + break;
> + }
> + count++;
> + }
> + return count;
> +}
> void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
> ppc_avr_t *b, ppc_avr_t *c)
> {
> diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
> index 0bc7188..1649b34 100644
> --- a/target-ppc/translate/vmx-impl.inc.c
> +++ b/target-ppc/translate/vmx-impl.inc.c
> @@ -691,6 +691,18 @@ static void glue(gen_, name)(DisasContext *ctx) \
> tcg_temp_free_ptr(rd); \
> }
>
> +#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4) \
> +static void glue(gen_, name)(DisasContext *ctx) \
> + { \
> + TCGv_ptr rb; \
> + if (unlikely(!ctx->altivec_enabled)) { \
> + gen_exception(ctx, POWERPC_EXCP_VPU); \
> + return; \
> + } \
> + rb = gen_avr_ptr(rB(ctx->opcode)); \
> + gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb); \
> + tcg_temp_free_ptr(rb); \
> + }
> GEN_VXFORM_NOA(vupkhsb, 7, 8);
> GEN_VXFORM_NOA(vupkhsh, 7, 9);
> GEN_VXFORM_NOA(vupkhsw, 7, 25);
> @@ -905,6 +917,8 @@ GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
> GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
> GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
> GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
> +GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
> +GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
> GEN_VXFORM_NOA(vpopcntb, 1, 28)
> GEN_VXFORM_NOA(vpopcnth, 1, 29)
> GEN_VXFORM_NOA(vpopcntw, 1, 30)
> diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
> index 009e9b1..20d243f 100644
> --- a/target-ppc/translate/vmx-ops.inc.c
> +++ b/target-ppc/translate/vmx-ops.inc.c
> @@ -223,6 +223,8 @@ GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C),
> GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D),
> GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E),
> GEN_VXFORM_300_EO(vctzd, 0x01, 0x18, 0x1F),
> +GEN_VXFORM_300_EO(vclzlsbb, 0x01, 0x18, 0x0),
> +GEN_VXFORM_300_EO(vctzlsbb, 0x01, 0x18, 0x1),
> GEN_VXFORM_300(vpermr, 0x1D, 0xFF),
>
> #define GEN_VXFORM_NOA(name, opc2, opc3) \
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-29 2:07 ` David Gibson
@ 2016-09-29 4:00 ` Richard Henderson
2016-09-29 4:24 ` [Qemu-devel] [Qemu-ppc] " David Gibson
0 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-09-29 4:00 UTC (permalink / raw)
To: David Gibson, Rajalakshmi Srinivasaraghavan
Cc: qemu-ppc, qemu-devel, nikunj, benh, Vasant Hegde
On 09/28/2016 07:07 PM, David Gibson wrote:
>> + tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
>
> Do you really want to be using an actual mul op, rather than (in << 3)
> + (in << 1)? Obviously working out al the carries correctly will be a
> bit fiddly.
I think it's fine. Modern hardware will do the double-word multiply in 3-5
cycles, which is probably equal to what we could do by hand with shifts.
r~
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-29 4:00 ` Richard Henderson
@ 2016-09-29 4:24 ` David Gibson
0 siblings, 0 replies; 22+ messages in thread
From: David Gibson @ 2016-09-29 4:24 UTC (permalink / raw)
To: Richard Henderson
Cc: Rajalakshmi Srinivasaraghavan, Vasant Hegde, qemu-ppc, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 822 bytes --]
On Wed, Sep 28, 2016 at 09:00:51PM -0700, Richard Henderson wrote:
> On 09/28/2016 07:07 PM, David Gibson wrote:
> > > + tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
> >
> > Do you really want to be using an actual mul op, rather than (in << 3)
> > + (in << 1)? Obviously working out al the carries correctly will be a
> > bit fiddly.
>
> I think it's fine. Modern hardware will do the double-word multiply in 3-5
> cycles, which is probably equal to what we could do by hand with
> shifts.
Fair enough. And it will make for less dicking around with the carry
in and carry out.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions
2016-09-28 16:54 ` Richard Henderson
@ 2016-10-05 5:21 ` Rajalakshmi Srinivasaraghavan
2016-10-25 4:31 ` Rajalakshmi Srinivasaraghavan
0 siblings, 1 reply; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-10-05 5:21 UTC (permalink / raw)
To: Richard Henderson, qemu-ppc, david
Cc: qemu-devel, nikunj, benh, Avinesh Kumar
On 09/28/2016 10:24 PM, Richard Henderson wrote:
> On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +#define VEXTULX_DO(name, elem) \
>> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
>> +{ \
>> + target_ulong r = 0; \
>> + int i; \
>> + int index = a & 0xf; \
>> + for (i = 0; i < elem; i++) { \
>> + r = r << 8; \
>> + if (index + i <= 15) { \
>> + r = r | b->u8[index + i]; \
>> + } \
>> + } \
>> + return r; \
>> +}
>> +#else
>> +#define VEXTULX_DO(name, elem) \
>> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
>> +{ \
>> + target_ulong r = 0; \
>> + int i; \
>> + int index = 15 - (a & 0xf); \
>> + for (i = 0; i < elem; i++) { \
>> + r = r << 8; \
>> + if (index - i >= 0) { \
>> + r = r | b->u8[index - i]; \
>> + } \
>> + } \
>> + return r; \
>> +}
>> +#endif
>> +
>> +VEXTULX_DO(vextublx, 1)
>> +VEXTULX_DO(vextuhlx, 2)
>> +VEXTULX_DO(vextuwlx, 4)
>> +#undef VEXTULX_DO
> Ew.
>
> This should be one 128-bit shift and one and.
>
> Since the shift amount is a multiple of 8, the 128-bit shift for vextub[lr]x
> does not need to cross a double-word boundary, and so can be decomposed into
> one 64-bit shift of (count & 64 ? hi : lo).
>
> For vextu[hw]lr]x, you'd need to do the whole left-shift, right-shift, or thing.
>
> But still, fantastically better than a loop.
Ack. Will send an updated patch.
>
>
> r~
>
>
--
Thanks
Rajalakshmi S
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions
2016-09-28 16:42 ` Richard Henderson
@ 2016-10-05 5:23 ` Rajalakshmi Srinivasaraghavan
0 siblings, 0 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-10-05 5:23 UTC (permalink / raw)
To: Richard Henderson, qemu-ppc, david; +Cc: Vasant Hegde, qemu-devel, nikunj
On 09/28/2016 10:12 PM, Richard Henderson wrote:
> On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
>> + val = tcg_const_i64(10); \
> Rename this "ten" for clarity?
>
>> + z = tcg_const_i64(0); \
>> + \
>> + if (add_cin) { \
>> + tcg_gen_andi_i64(cin, cpu_avrl[rB(ctx->opcode)], 0xF); \
>> + tcg_gen_movcond_i64(TCG_COND_LTU, cin, cin, val, cin, z); \
> What is the purpose of this movcond? The docs specifically say that values
> greater than 9 are undefined.
>
>> + } else { \
>> + tcg_gen_movi_i64(cin, 0); \
>> + } \
>> + \
>> + tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], val); \
>> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, z, cin, z); \
>> + tcg_gen_add2_i64(t2, t0, t1, z, t2, z); \
> This two additions are unused if !add_cin, and the second appears to be
> mergable with the first -- don't use so many z's. I think this simplifies to
>
> if (add_cin) {
> tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], ten);
> tcg_gen_andi_i64(t2, cpu_avrl[rB(ctx->opcode)], 0xF);
> tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, t1, t2, z);
> } else {
> tcg_gen_mulu2_i64(cpu_avrl[rD(ctx->opcode)], t2,
> cpu_avrl[rA(ctx->opcode)], ten);
> }
>
>> + tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], val); \
>> + tcg_gen_add2_i64(cpu_avrh[rD(ctx->opcode)], t2, t0, z, t2, z); \
>> + \
>> + if (ret_carry) { \
>> + tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t0, t1, z, t2, z); \
>> + tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0); \
> Likewise simplifies to
>
> if (ret_carry) {
> tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], ten);
> tcg_gen_add2_i64(t0, cpu_avrl[rD(ctx->opcode)], t0, t1, t2, z);
> tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0);
> } else {
> tcg_gen_mul_i64(t0, cpu_avrh[rA(ctx->opcode)], ten);
> tcg_gen_add_i64(cpu_avrh[rD(ctx->opcode)], t0, t2);
> }
>
Will check and send updated patch.
> r~
>
>
--
Thanks
Rajalakshmi S
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions
2016-10-05 5:21 ` Rajalakshmi Srinivasaraghavan
@ 2016-10-25 4:31 ` Rajalakshmi Srinivasaraghavan
0 siblings, 0 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-10-25 4:31 UTC (permalink / raw)
To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, nikunj, Avinesh Kumar
[-- Attachment #1: Type: text/plain, Size: 2725 bytes --]
On 10/05/2016 10:51 AM, Rajalakshmi Srinivasaraghavan wrote:
>
>
> On 09/28/2016 10:24 PM, Richard Henderson wrote:
>> On 09/27/2016 10:45 PM, Rajalakshmi Srinivasaraghavan wrote:
>>> +#if defined(HOST_WORDS_BIGENDIAN)
>>> +#define VEXTULX_DO(name, elem) \
>>> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
>>> +{ \
>>> + target_ulong r = 0; \
>>> + int i; \
>>> + int index = a & 0xf; \
>>> + for (i = 0; i < elem; i++) { \
>>> + r = r << 8; \
>>> + if (index + i <= 15) { \
>>> + r = r | b->u8[index + i]; \
>>> + } \
>>> + } \
>>> + return r; \
>>> +}
>>> +#else
>>> +#define VEXTULX_DO(name, elem) \
>>> +target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
>>> +{ \
>>> + target_ulong r = 0; \
>>> + int i; \
>>> + int index = 15 - (a & 0xf); \
>>> + for (i = 0; i < elem; i++) { \
>>> + r = r << 8; \
>>> + if (index - i >= 0) { \
>>> + r = r | b->u8[index - i]; \
>>> + } \
>>> + } \
>>> + return r; \
>>> +}
>>> +#endif
>>> +
>>> +VEXTULX_DO(vextublx, 1)
>>> +VEXTULX_DO(vextuhlx, 2)
>>> +VEXTULX_DO(vextuwlx, 4)
>>> +#undef VEXTULX_DO
>> Ew.
>>
>> This should be one 128-bit shift and one and.
>>
>> Since the shift amount is a multiple of 8, the 128-bit shift for
>> vextub[lr]x
>> does not need to cross a double-word boundary, and so can be
>> decomposed into
>> one 64-bit shift of (count & 64 ? hi : lo).
>>
>> For vextu[hw]lr]x, you'd need to do the whole left-shift,
>> right-shift, or thing.
>>
>> But still, fantastically better than a loop.
> Ack. Will send an updated patch.
Attached updated patch.
>>
>>
>> r~
>>
>>
>
--
Thanks
Rajalakshmi S
[-- Attachment #2: 0001-target-ppc-add-vextu-bhw-lx-instructions.patch --]
[-- Type: text/x-patch, Size: 7387 bytes --]
>From 59b96e11dd4c649ba9dbf0435439f717b931530f Mon Sep 17 00:00:00 2001
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date: Mon, 24 Oct 2016 11:36:33 +0530
Subject: [PATCH 1/2] target-ppc: add vextu[bhw]lx instructions
vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 3 ++
target-ppc/int_helper.c | 63 +++++++++++++++++++++++++++++++++++
target-ppc/translate/vmx-impl.inc.c | 18 ++++++++++
target-ppc/translate/vmx-ops.inc.c | 4 ++-
4 files changed, 87 insertions(+), 1 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 04c6421..8551568 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -357,6 +357,9 @@ DEF_HELPER_3(vpmsumb, void, avr, avr, avr)
DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
+DEF_HELPER_2(vextublx, tl, tl, avr)
+DEF_HELPER_2(vextuhlx, tl, tl, avr)
+DEF_HELPER_2(vextuwlx, tl, tl, avr)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 5aee0a8..2b28848 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1742,6 +1742,69 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
}
}
+#define EXTRACT128(value, start, length) \
+ ((value >> start) & (~(__uint128_t)0 >> (128 - length)))
+
+#if defined(HOST_WORDS_BIGENDIAN)
+# if defined (CONFIG_INT128) \
+# define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int index = (a & 0xf) * 8; \
+ r = EXTRACT128(b->u128, index, elem * 8); \
+ return r; \
+}
+# else
+# define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = a & 0xf; \
+ for (i = 0; i < elem; i++) { \
+ r = r << 8; \
+ if (index + i <= 15) { \
+ r = r | b->u8[index + i]; \
+ } \
+ } \
+ return r; \
+}
+# endif
+#else
+# if defined (CONFIG_INT128)
+# define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int size = elem * 8; \
+ int index = (15 - (a & 0xf) + 1) * 8; \
+ r = EXTRACT128(b->u128, (index - size), size); \
+ return r; \
+}
+# else
+# define VEXTULX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = 15 - (a & 0xf); \
+ for (i = 0; i < elem; i++) { \
+ r = r << 8; \
+ if (index - i >= 0) { \
+ r = r | b->u8[index - i]; \
+ } \
+ } \
+ return r; \
+}
+# endif
+#endif
+
+VEXTULX_DO(vextublx, 1)
+VEXTULX_DO(vextuhlx, 2)
+VEXTULX_DO(vextuwlx, 4)
+#undef VEXTULX_DO
+
/* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they are
* to conform to what real hardware appears to do. */
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index c8998f3..0a9d609 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -276,6 +276,19 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
} \
}
+#define GEN_VXFORM_HETRO(name, opc2, opc3) \
+static void glue(gen_, name)(DisasContext *ctx) \
+{ \
+ TCGv_ptr rb; \
+ if (unlikely(!ctx->altivec_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VPU); \
+ return; \
+ } \
+ rb = gen_avr_ptr(rB(ctx->opcode)); \
+ gen_helper_##name(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], rb); \
+ tcg_temp_free_ptr(rb); \
+}
+
GEN_VXFORM(vaddubm, 0, 0);
GEN_VXFORM(vadduhm, 0, 1);
GEN_VXFORM(vadduwm, 0, 2);
@@ -441,6 +454,11 @@ GEN_VXFORM_ENV(vaddfp, 5, 0);
GEN_VXFORM_ENV(vsubfp, 5, 1);
GEN_VXFORM_ENV(vmaxfp, 5, 16);
GEN_VXFORM_ENV(vminfp, 5, 17);
+GEN_VXFORM_HETRO(vextublx, 6, 24)
+GEN_VXFORM_HETRO(vextuhlx, 6, 25)
+GEN_VXFORM_HETRO(vextuwlx, 6, 26)
+GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
+ vextuwlx, PPC_NONE, PPC2_ISA300)
#define GEN_VXRFORM1(opname, name, str, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 68cba3e..70dc250 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -91,8 +91,10 @@ GEN_VXFORM(vmrghw, 6, 2),
GEN_VXFORM(vmrglb, 6, 4),
GEN_VXFORM(vmrglh, 6, 5),
GEN_VXFORM(vmrglw, 6, 6),
+GEN_VXFORM_300(vextublx, 6, 24),
+GEN_VXFORM_300(vextuhlx, 6, 25),
+GEN_VXFORM_DUAL(vmrgow, vextuwlx, 6, 26, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_207(vmrgew, 6, 30),
-GEN_VXFORM_207(vmrgow, 6, 26),
GEN_VXFORM(vmuloub, 4, 0),
GEN_VXFORM(vmulouh, 4, 1),
GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions
2016-09-28 5:45 ` [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions Rajalakshmi Srinivasaraghavan
@ 2016-10-25 4:32 ` Rajalakshmi Srinivasaraghavan
0 siblings, 0 replies; 22+ messages in thread
From: Rajalakshmi Srinivasaraghavan @ 2016-10-25 4:32 UTC (permalink / raw)
To: qemu-ppc, david, rth; +Cc: Hariharan T.S, nikunj, qemu-devel, Avinesh Kumar
[-- Attachment #1: Type: text/plain, Size: 198 bytes --]
On 09/28/2016 11:15 AM, Rajalakshmi Srinivasaraghavan wrote:
> From: Hariharan T.S <hari@linux.vnet.ibm.com>
>
Attached updatde patch based on comments on vextu[bhw]lx.
--
Thanks
Rajalakshmi S
[-- Attachment #2: 0002-target-ppc-add-vextu-bhw-rx-instructions.patch --]
[-- Type: text/x-patch, Size: 6527 bytes --]
>From f027eb4903b89720634423c335e3688cf1e8632d Mon Sep 17 00:00:00 2001
From: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date: Mon, 24 Oct 2016 12:23:31 +0530
Subject: [PATCH 2/2] target-ppc: add vextu[bhw]rx instructions
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
target-ppc/helper.h | 3 ++
target-ppc/int_helper.c | 60 +++++++++++++++++++++++++++++++++++
target-ppc/translate/vmx-impl.inc.c | 5 +++
target-ppc/translate/vmx-ops.inc.c | 4 ++-
4 files changed, 71 insertions(+), 1 deletions(-)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 8551568..f532977 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -360,6 +360,9 @@ DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
DEF_HELPER_2(vextublx, tl, tl, avr)
DEF_HELPER_2(vextuhlx, tl, tl, avr)
DEF_HELPER_2(vextuwlx, tl, tl, avr)
+DEF_HELPER_2(vextubrx, tl, tl, avr)
+DEF_HELPER_2(vextuhrx, tl, tl, avr)
+DEF_HELPER_2(vextuwrx, tl, tl, avr)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 2b28848..17f0613 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1805,6 +1805,66 @@ VEXTULX_DO(vextuhlx, 2)
VEXTULX_DO(vextuwlx, 4)
#undef VEXTULX_DO
+#if defined(HOST_WORDS_BIGENDIAN)
+# if defined (CONFIG_INT128) \
+# define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int size = elem * 8; \
+ int index = (15 - (a & 0xf) + 1) * 8; \
+ r = EXTRACT128(b->u128, (index - size), size); \
+ return r; \
+}
+# else
+# define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = a & 0xf; \
+ for (i = elem - 1; i >= 0; i--) { \
+ r = r << 8; \
+ if ((15 - i - index) >= 0) { \
+ r = r | b->u8[15 - i - index]; \
+ } \
+ } \
+ return r; \
+}
+# endif
+#else
+# if defined (CONFIG_INT128)
+# define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int index = (a & 0xf) * 8; \
+ r = EXTRACT128(b->u128, index, elem * 8); \
+ return r; \
+}
+# else
+# define VEXTURX_DO(name, elem) \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
+{ \
+ target_ulong r = 0; \
+ int i; \
+ int index = 15 - (a & 0xf); \
+ for (i = elem - 1; i >= 0; i--) { \
+ r = r << 8; \
+ if ((15 + i - index) <= 15) { \
+ r = r | b->u8[15 + i - index]; \
+ } \
+ } \
+ return r; \
+}
+# endif
+#endif
+
+VEXTURX_DO(vextubrx, 1)
+VEXTURX_DO(vextuhrx, 2)
+VEXTURX_DO(vextuwrx, 4)
+#undef VEXTURX_DO
+
/* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they are
* to conform to what real hardware appears to do. */
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 0a9d609..6b51592 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -459,6 +459,11 @@ GEN_VXFORM_HETRO(vextuhlx, 6, 25)
GEN_VXFORM_HETRO(vextuwlx, 6, 26)
GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
vextuwlx, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM_HETRO(vextubrx, 6, 28)
+GEN_VXFORM_HETRO(vextuhrx, 6, 29)
+GEN_VXFORM_HETRO(vextuwrx, 6, 30)
+GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \
+ vextuwrx, PPC_NONE, PPC2_ISA300)
#define GEN_VXRFORM1(opname, name, str, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index 70dc250..3f909fa 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -94,7 +94,9 @@ GEN_VXFORM(vmrglw, 6, 6),
GEN_VXFORM_300(vextublx, 6, 24),
GEN_VXFORM_300(vextuhlx, 6, 25),
GEN_VXFORM_DUAL(vmrgow, vextuwlx, 6, 26, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_207(vmrgew, 6, 30),
+GEN_VXFORM_300(vextubrx, 6, 28),
+GEN_VXFORM_300(vextuhrx, 6, 29),
+GEN_VXFORM_DUAL(vmrgew, vextuwrx, 6, 30, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM(vmuloub, 4, 0),
GEN_VXFORM(vmulouh, 4, 1),
GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
--
1.7.1
^ permalink raw reply related [flat|nested] 22+ messages in thread
end of thread, other threads:[~2016-10-25 4:32 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-28 5:45 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablement - part5 Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 1/6] target-ppc: add vmul10[u, eu, cu, ecu]q instructions Rajalakshmi Srinivasaraghavan
2016-09-28 16:42 ` Richard Henderson
2016-10-05 5:23 ` Rajalakshmi Srinivasaraghavan
2016-09-29 2:07 ` David Gibson
2016-09-29 4:00 ` Richard Henderson
2016-09-29 4:24 ` [Qemu-devel] [Qemu-ppc] " David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 2/6] target-ppc: add vextu[bhw]lx instructions Rajalakshmi Srinivasaraghavan
2016-09-28 16:54 ` Richard Henderson
2016-10-05 5:21 ` Rajalakshmi Srinivasaraghavan
2016-10-25 4:31 ` Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 3/6] target-ppc: add vextu[bhw]rx instructions Rajalakshmi Srinivasaraghavan
2016-10-25 4:32 ` Rajalakshmi Srinivasaraghavan
2016-09-28 5:45 ` [Qemu-devel] [PATCH 4/6] target-ppc: fix invalid mask - cmpl, bctar Rajalakshmi Srinivasaraghavan
2016-09-29 2:22 ` David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 5/6] target-ppc: add vector compare not equal instructions Rajalakshmi Srinivasaraghavan
2016-09-28 17:01 ` Richard Henderson
2016-09-29 2:22 ` David Gibson
2016-09-28 5:45 ` [Qemu-devel] [PATCH 6/6] target-ppc: add vclzlsbb/vctzlsbb instructions Rajalakshmi Srinivasaraghavan
2016-09-28 17:08 ` Richard Henderson
2016-09-29 2:23 ` David Gibson
2016-09-29 2:25 ` David Gibson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.