All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: mark.cave-ayland@ilande.co.uk, amarkovic@wavecomp.com,
	hsp.cat7@gmail.com
Subject: [Qemu-devel] [PATCH v6 14/16] tcg/ppc: Update vector support to v2.06
Date: Sat, 29 Jun 2019 15:00:15 +0200	[thread overview]
Message-ID: <20190629130017.2973-15-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190629130017.2973-1-richard.henderson@linaro.org>

This includes double-word loads and stores, double-word load and splat,
double-word permute, and bit select.  All of which require multiple
operations in the base Altivec instruction set.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
---
 tcg/ppc/tcg-target.h     |  5 ++--
 tcg/ppc/tcg-target.inc.c | 51 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index a130192cbd..40544f996d 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -60,6 +60,7 @@ typedef enum {
 
 extern bool have_isa_altivec;
 extern bool have_isa_2_06;
+extern bool have_isa_2_06_vsx;
 extern bool have_isa_3_00;
 
 /* optional instructions automatically implemented */
@@ -141,7 +142,7 @@ extern bool have_isa_3_00;
  * instruction and substituting two 32-bit stores makes the generated
  * code quite large.
  */
-#define TCG_TARGET_HAS_v64              0
+#define TCG_TARGET_HAS_v64              have_isa_2_06_vsx
 #define TCG_TARGET_HAS_v128             have_isa_altivec
 #define TCG_TARGET_HAS_v256             0
 
@@ -157,7 +158,7 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_bitsel_vec       have_isa_2_06_vsx
 #define TCG_TARGET_HAS_cmpsel_vec       0
 
 void flush_icache_range(uintptr_t start, uintptr_t stop);
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index c6defd4df7..50d1b5612c 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -66,6 +66,7 @@ static tcg_insn_unit *tb_ret_addr;
 
 bool have_isa_altivec;
 bool have_isa_2_06;
+bool have_isa_2_06_vsx;
 bool have_isa_3_00;
 
 #define HAVE_ISA_2_06  have_isa_2_06
@@ -470,9 +471,12 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define LVEBX      XO31(7)
 #define LVEHX      XO31(39)
 #define LVEWX      XO31(71)
+#define LXSDX      XO31(588)      /* v2.06 */
+#define LXVDSX     XO31(332)      /* v2.06 */
 
 #define STVX       XO31(231)
 #define STVEWX     XO31(199)
+#define STXSDX     XO31(716)      /* v2.06 */
 
 #define VADDSBS    VX4(768)
 #define VADDUBS    VX4(512)
@@ -561,6 +565,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 
 #define VSLDOI     VX4(44)
 
+#define XXPERMDI   (OPCD(60) | (10 << 3))   /* v2.06 */
+#define XXSEL      (OPCD(60) | (3 << 4))    /* v2.06 */
+
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
 #define RA(r) ((r)<<16)
@@ -887,11 +894,21 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
         add = 0;
     }
 
-    load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
-    if (TCG_TARGET_REG_BITS == 64) {
-        new_pool_l2(s, rel, s->code_ptr, add, val, val);
+    if (have_isa_2_06_vsx) {
+        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
+        load_insn |= VRT(ret) | RB(TCG_REG_TMP1) | 1;
+        if (TCG_TARGET_REG_BITS == 64) {
+            new_pool_label(s, val, rel, s->code_ptr, add);
+        } else {
+            new_pool_l2(s, rel, s->code_ptr, add, val, val);
+        }
     } else {
-        new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
+        if (TCG_TARGET_REG_BITS == 64) {
+            new_pool_l2(s, rel, s->code_ptr, add, val, val);
+        } else {
+            new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+        }
     }
 
     if (USE_REG_TB) {
@@ -1139,6 +1156,10 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
         /* fallthru */
     case TCG_TYPE_V64:
         tcg_debug_assert(ret >= TCG_REG_V0);
+        if (have_isa_2_06_vsx) {
+            tcg_out_mem_long(s, 0, LXSDX | 1, ret, base, offset);
+            break;
+        }
         assert((offset & 7) == 0);
         tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
         if (offset & 8) {
@@ -1183,6 +1204,10 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
         /* fallthru */
     case TCG_TYPE_V64:
         tcg_debug_assert(arg >= TCG_REG_V0);
+        if (have_isa_2_06_vsx) {
+            tcg_out_mem_long(s, 0, STXSDX | 1, arg, base, offset);
+            break;
+        }
         assert((offset & 7) == 0);
         if (offset & 8) {
             tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
@@ -2902,6 +2927,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
         return vece <= MO_32 ? -1 : 0;
+    case INDEX_op_bitsel_vec:
+        return have_isa_2_06_vsx;
     default:
         return 0;
     }
@@ -2928,6 +2955,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
         tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
         break;
     case MO_64:
+        if (have_isa_2_06_vsx) {
+            tcg_out32(s, XXPERMDI | 7 | VRT(dst) | VRA(src) | VRB(src));
+            break;
+        }
         tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
         tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
         break;
@@ -2971,6 +3002,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
         tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
         break;
     case MO_64:
+        if (have_isa_2_06_vsx) {
+            tcg_out_mem_long(s, 0, LXVDSX | 1, out, base, offset);
+            break;
+        }
         assert((offset & 7) == 0);
         tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
         tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
@@ -3105,6 +3140,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    case INDEX_op_bitsel_vec:
+        tcg_out32(s, XXSEL | 0xf | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
+        return;
+
     case INDEX_op_dup2_vec:
         assert(TCG_TARGET_REG_BITS == 32);
         /* With inputs a1 = xLxx, a2 = xHxx  */
@@ -3500,6 +3539,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     case INDEX_op_st_vec:
     case INDEX_op_dupm_vec:
         return &v_r;
+    case INDEX_op_bitsel_vec:
     case INDEX_op_ppc_msum_vec:
         return &v_v_v_v;
 
@@ -3518,6 +3558,9 @@ static void tcg_target_init(TCGContext *s)
     }
     if (hwcap & PPC_FEATURE_ARCH_2_06) {
         have_isa_2_06 = true;
+        if (hwcap & PPC_FEATURE_HAS_VSX) {
+            have_isa_2_06_vsx = true;
+        }
     }
 #ifdef PPC_FEATURE2_ARCH_3_00
     if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
-- 
2.17.1



  parent reply	other threads:[~2019-06-29 13:16 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-29 13:00 [Qemu-devel] [PATCH v6 00/16] tcg/ppc: Add vector opcodes Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 01/16] tcg/ppc: Introduce Altivec registers Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 02/16] tcg/ppc: Introduce macro VX4() Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 03/16] tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 04/16] tcg/ppc: Enable tcg backend vector compilation Richard Henderson
2019-06-30  9:46   ` Aleksandar Markovic
2019-06-30 10:48     ` Richard Henderson
2019-06-30 11:45       ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 05/16] tcg/ppc: Add support for load/store/logic/comparison Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 06/16] tcg/ppc: Add support for vector maximum/minimum Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 07/16] tcg/ppc: Add support for vector add/subtract Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 08/16] tcg/ppc: Add support for vector saturated add/subtract Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 09/16] tcg/ppc: Prepare case for vector multiply Richard Henderson
2019-06-30  9:52   ` Aleksandar Markovic
2019-06-30 10:49     ` Richard Henderson
2019-06-30 11:35       ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 10/16] tcg/ppc: Support vector shift by immediate Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 11/16] tcg/ppc: Support vector multiply Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 12/16] tcg/ppc: Support vector dup2 Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 13/16] tcg/ppc: Enable Altivec detection Richard Henderson
2019-06-29 13:00 ` Richard Henderson [this message]
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07 Richard Henderson
2019-06-30 11:50   ` Aleksandar Markovic
2019-06-30 13:37   ` Aleksandar Markovic
2019-06-30 15:12     ` Richard Henderson
2019-07-01  3:57       ` Aleksandar Markovic
2019-07-01 10:29         ` Richard Henderson
2019-07-01 11:41           ` Aleksandar Markovic
2019-07-02 14:25             ` Richard Henderson
2019-07-10 10:52               ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 16/16] tcg/ppc: Update vector support to v3.00 Richard Henderson
2019-06-29 13:37 ` [Qemu-devel] [PATCH v6 00/16] tcg/ppc: Add vector opcodes no-reply
2019-06-30 17:58 ` Mark Cave-Ayland
2019-07-01 10:30   ` Richard Henderson
2019-07-01 18:34     ` Howard Spoelstra
2019-09-03 17:02       ` Mark Cave-Ayland
2019-09-03 17:37         ` Aleksandar Markovic
2019-09-03 18:32           ` Mark Cave-Ayland
2019-09-05 11:43             ` Aleksandar Markovic
2019-09-27 12:13               ` Aleksandar Markovic

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190629130017.2973-15-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=amarkovic@wavecomp.com \
    --cc=hsp.cat7@gmail.com \
    --cc=mark.cave-ayland@ilande.co.uk \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.