qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: mark.cave-ayland@ilande.co.uk, amarkovic@wavecomp.com,
	hsp.cat7@gmail.com
Subject: [PATCH v7 17/22] tcg/ppc: Update vector support for v2.07 Altivec
Date: Mon, 30 Sep 2019 13:21:20 -0700	[thread overview]
Message-ID: <20190930202125.21064-18-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190930202125.21064-1-richard.henderson@linaro.org>

These new instructions are conditional only on MSR.VEC and
are thus part of the Altivec instruction set, and not VSX.
This includes lots of double-word arithmetic and a few extra
logical operations.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.h     |  4 +-
 tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++----------
 2 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index c974ca274a..13197eddce 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -61,6 +61,7 @@ typedef enum {
 typedef enum {
     tcg_isa_base,
     tcg_isa_2_06,
+    tcg_isa_2_07,
     tcg_isa_3_00,
 } TCGPowerISA;
 
@@ -69,6 +70,7 @@ extern bool have_altivec;
 extern bool have_vsx;
 
 #define have_isa_2_06  (have_isa >= tcg_isa_2_06)
+#define have_isa_2_07  (have_isa >= tcg_isa_2_07)
 #define have_isa_3_00  (have_isa >= tcg_isa_3_00)
 
 /* optional instructions automatically implemented */
@@ -155,7 +157,7 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_v256             0
 
 #define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_orc_vec          have_isa_2_07
 #define TCG_TARGET_HAS_not_vec          1
 #define TCG_TARGET_HAS_neg_vec          0
 #define TCG_TARGET_HAS_abs_vec          0
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 2388958405..bc3a669cb4 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -484,6 +484,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define VADDSWS    VX4(896)
 #define VADDUWS    VX4(640)
 #define VADDUWM    VX4(128)
+#define VADDUDM    VX4(192)       /* v2.07 */
 
 #define VSUBSBS    VX4(1792)
 #define VSUBUBS    VX4(1536)
@@ -494,47 +495,62 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define VSUBSWS    VX4(1920)
 #define VSUBUWS    VX4(1664)
 #define VSUBUWM    VX4(1152)
+#define VSUBUDM    VX4(1216)      /* v2.07 */
 
 #define VMAXSB     VX4(258)
 #define VMAXSH     VX4(322)
 #define VMAXSW     VX4(386)
+#define VMAXSD     VX4(450)       /* v2.07 */
 #define VMAXUB     VX4(2)
 #define VMAXUH     VX4(66)
 #define VMAXUW     VX4(130)
+#define VMAXUD     VX4(194)       /* v2.07 */
 #define VMINSB     VX4(770)
 #define VMINSH     VX4(834)
 #define VMINSW     VX4(898)
+#define VMINSD     VX4(962)       /* v2.07 */
 #define VMINUB     VX4(514)
 #define VMINUH     VX4(578)
 #define VMINUW     VX4(642)
+#define VMINUD     VX4(706)       /* v2.07 */
 
 #define VCMPEQUB   VX4(6)
 #define VCMPEQUH   VX4(70)
 #define VCMPEQUW   VX4(134)
+#define VCMPEQUD   VX4(199)       /* v2.07 */
 #define VCMPGTSB   VX4(774)
 #define VCMPGTSH   VX4(838)
 #define VCMPGTSW   VX4(902)
+#define VCMPGTSD   VX4(967)       /* v2.07 */
 #define VCMPGTUB   VX4(518)
 #define VCMPGTUH   VX4(582)
 #define VCMPGTUW   VX4(646)
+#define VCMPGTUD   VX4(711)       /* v2.07 */
 
 #define VSLB       VX4(260)
 #define VSLH       VX4(324)
 #define VSLW       VX4(388)
+#define VSLD       VX4(1476)      /* v2.07 */
 #define VSRB       VX4(516)
 #define VSRH       VX4(580)
 #define VSRW       VX4(644)
+#define VSRD       VX4(1732)      /* v2.07 */
 #define VSRAB      VX4(772)
 #define VSRAH      VX4(836)
 #define VSRAW      VX4(900)
+#define VSRAD      VX4(964)       /* v2.07 */
 #define VRLB       VX4(4)
 #define VRLH       VX4(68)
 #define VRLW       VX4(132)
+#define VRLD       VX4(196)       /* v2.07 */
 
 #define VMULEUB    VX4(520)
 #define VMULEUH    VX4(584)
+#define VMULEUW    VX4(648)       /* v2.07 */
 #define VMULOUB    VX4(8)
 #define VMULOUH    VX4(72)
+#define VMULOUW    VX4(136)       /* v2.07 */
+#define VMULUWM    VX4(137)       /* v2.07 */
 #define VMSUMUHM   VX4(38)
 
 #define VMRGHB     VX4(12)
@@ -552,6 +568,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define VNOR       VX4(1284)
 #define VOR        VX4(1156)
 #define VXOR       VX4(1220)
+#define VEQV       VX4(1668)      /* v2.07 */
+#define VNAND      VX4(1412)      /* v2.07 */
+#define VORC       VX4(1348)      /* v2.07 */
 
 #define VSPLTB     VX4(524)
 #define VSPLTH     VX4(588)
@@ -2904,26 +2923,37 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_andc_vec:
     case INDEX_op_not_vec:
         return 1;
+    case INDEX_op_orc_vec:
+        return have_isa_2_07;
     case INDEX_op_add_vec:
     case INDEX_op_sub_vec:
     case INDEX_op_smax_vec:
     case INDEX_op_smin_vec:
     case INDEX_op_umax_vec:
     case INDEX_op_umin_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+        return vece <= MO_32 || have_isa_2_07;
     case INDEX_op_ssadd_vec:
     case INDEX_op_sssub_vec:
     case INDEX_op_usadd_vec:
     case INDEX_op_ussub_vec:
-    case INDEX_op_shlv_vec:
-    case INDEX_op_shrv_vec:
-    case INDEX_op_sarv_vec:
         return vece <= MO_32;
     case INDEX_op_cmp_vec:
-    case INDEX_op_mul_vec:
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
-        return vece <= MO_32 ? -1 : 0;
+        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
+    case INDEX_op_mul_vec:
+        switch (vece) {
+        case MO_8:
+        case MO_16:
+            return -1;
+        case MO_32:
+            return have_isa_2_07 ? 1 : -1;
+        }
+        return 0;
     case INDEX_op_bitsel_vec:
         return have_vsx;
     default:
@@ -3027,28 +3057,28 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                            const TCGArg *args, const int *const_args)
 {
     static const uint32_t
-        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
-        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
-        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
-        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
-        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
+        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
+        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
+        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
+        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
+        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
         ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
         usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
         sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
         ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
-        umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
-        smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
-        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
-        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
-        shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
-        shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
-        sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
+        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
+        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
+        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
+        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
+        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
+        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
+        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
         mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
         mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
-        muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
-        mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
+        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
+        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
         pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
-        rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
+        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
 
     TCGType type = vecl + TCG_TYPE_V64;
     TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
@@ -3071,6 +3101,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_sub_vec:
         insn = sub_op[vece];
         break;
+    case INDEX_op_mul_vec:
+        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
+        insn = VMULUWM;
+        break;
     case INDEX_op_ssadd_vec:
         insn = ssadd_op[vece];
         break;
@@ -3120,6 +3154,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         insn = VNOR;
         a2 = a1;
         break;
+    case INDEX_op_orc_vec:
+        insn = VORC;
+        break;
 
     case INDEX_op_cmp_vec:
         switch (args[3]) {
@@ -3200,7 +3237,7 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
 {
     bool need_swap = false, need_inv = false;
 
-    tcg_debug_assert(vece <= MO_32);
+    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
 
     switch (cond) {
     case TCG_COND_EQ:
@@ -3264,6 +3301,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
 	break;
 
     case MO_32:
+        tcg_debug_assert(!have_isa_2_07);
         t3 = tcg_temp_new_vec(type);
         t4 = tcg_temp_new_vec(type);
         tcg_gen_dupi_vec(MO_8, t4, -16);
@@ -3554,6 +3592,11 @@ static void tcg_target_init(TCGContext *s)
     if (hwcap & PPC_FEATURE_ARCH_2_06) {
         have_isa = tcg_isa_2_06;
     }
+#ifdef PPC_FEATURE2_ARCH_2_07
+    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
+        have_isa = tcg_isa_2_07;
+    }
+#endif
 #ifdef PPC_FEATURE2_ARCH_3_00
     if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
         have_isa = tcg_isa_3_00;
-- 
2.17.1



  parent reply	other threads:[~2019-09-30 20:43 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-30 20:21 [PATCH v7 00/22] tcg/ppc: Add vector opcodes Richard Henderson
2019-09-30 20:21 ` [PATCH v7 01/22] tcg/ppc: Introduce Altivec registers Richard Henderson
2019-09-30 20:21 ` [PATCH v7 02/22] tcg/ppc: Introduce macro VX4() Richard Henderson
2019-09-30 20:21 ` [PATCH v7 03/22] tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() Richard Henderson
2019-09-30 20:21 ` [PATCH v7 04/22] tcg/ppc: Create TCGPowerISA and have_isa Richard Henderson
2019-09-30 20:21 ` [PATCH v7 05/22] tcg/ppc: Replace HAVE_ISA_2_06 Richard Henderson
2019-09-30 20:21 ` [PATCH v7 06/22] tcg/ppc: Replace HAVE_ISEL macro with a variable Richard Henderson
2019-09-30 20:21 ` [PATCH v7 07/22] tcg/ppc: Enable tcg backend vector compilation Richard Henderson
2019-09-30 20:21 ` [PATCH v7 08/22] tcg/ppc: Add support for load/store/logic/comparison Richard Henderson
2019-09-30 20:21 ` [PATCH v7 09/22] tcg/ppc: Add support for vector maximum/minimum Richard Henderson
2019-09-30 20:21 ` [PATCH v7 10/22] tcg/ppc: Add support for vector add/subtract Richard Henderson
2019-09-30 20:21 ` [PATCH v7 11/22] tcg/ppc: Add support for vector saturated add/subtract Richard Henderson
2019-09-30 20:21 ` [PATCH v7 12/22] tcg/ppc: Support vector shift by immediate Richard Henderson
2019-09-30 20:21 ` [PATCH v7 13/22] tcg/ppc: Support vector multiply Richard Henderson
2019-09-30 20:21 ` [PATCH v7 14/22] tcg/ppc: Support vector dup2 Richard Henderson
2019-09-30 20:21 ` [PATCH v7 15/22] tcg/ppc: Enable Altivec detection Richard Henderson
2019-09-30 20:21 ` [PATCH v7 16/22] tcg/ppc: Update vector support for VSX Richard Henderson
2019-09-30 20:21 ` Richard Henderson [this message]
2019-09-30 20:21 ` [PATCH v7 18/22] tcg/ppc: Update vector support for v2.07 VSX Richard Henderson
2019-09-30 20:21 ` [PATCH v7 19/22] tcg/ppc: Update vector support for v2.07 FP Richard Henderson
2019-09-30 20:21 ` [PATCH v7 20/22] tcg/ppc: Update vector support for v3.00 Altivec Richard Henderson
2019-09-30 20:21 ` [PATCH v7 21/22] tcg/ppc: Update vector support for v3.00 load/store Richard Henderson
2019-09-30 20:21 ` [PATCH v7 22/22] tcg/ppc: Update vector support for v3.00 dup/dupi Richard Henderson
2019-09-30 21:46 ` [PATCH v7 00/22] tcg/ppc: Add vector opcodes no-reply
2019-10-01 20:33 ` Mark Cave-Ayland
2019-10-02  9:34   ` Aleksandar Markovic
2019-10-14  0:25 ` David Gibson
2019-10-14  3:31   ` Richard Henderson
2019-10-14  3:54     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190930202125.21064-18-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=amarkovic@wavecomp.com \
    --cc=hsp.cat7@gmail.com \
    --cc=mark.cave-ayland@ilande.co.uk \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).