All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aleksandar Markovic <aleksandar.m.mail@gmail.com>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: mark.cave-ayland@ilande.co.uk, qemu-devel@nongnu.org,
	amarkovic@wavecomp.com, hsp.cat7@gmail.com
Subject: Re: [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07
Date: Sun, 30 Jun 2019 15:37:53 +0200	[thread overview]
Message-ID: <CAL1e-=ghMjUtEyqKX+s15RcyQp6uQO_hJ_u4RTSJ5ZyPvFnymw@mail.gmail.com> (raw)
In-Reply-To: <20190629130017.2973-16-richard.henderson@linaro.org>

On Jun 29, 2019 3:14 PM, "Richard Henderson" <richard.henderson@linaro.org>
wrote:
>
> This includes single-word loads and stores, lots of double-word
> arithmetic, and a few extra logical operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
> ---
>  tcg/ppc/tcg-target.h     |   3 +-
>  tcg/ppc/tcg-target.inc.c | 128 ++++++++++++++++++++++++++++++---------
>  2 files changed, 103 insertions(+), 28 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index 40544f996d..b8355d0a56 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -61,6 +61,7 @@ typedef enum {
>  extern bool have_isa_altivec;
>  extern bool have_isa_2_06;
>  extern bool have_isa_2_06_vsx;
> +extern bool have_isa_2_07_vsx;
>  extern bool have_isa_3_00;
>
>  /* optional instructions automatically implemented */
> @@ -147,7 +148,7 @@ extern bool have_isa_3_00;
>  #define TCG_TARGET_HAS_v256             0
>
>  #define TCG_TARGET_HAS_andc_vec         1
> -#define TCG_TARGET_HAS_orc_vec          0
> +#define TCG_TARGET_HAS_orc_vec          have_isa_2_07_vsx
>  #define TCG_TARGET_HAS_not_vec          1
>  #define TCG_TARGET_HAS_neg_vec          0
>  #define TCG_TARGET_HAS_abs_vec          0
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index 50d1b5612c..af86ab07dd 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
>  bool have_isa_altivec;
>  bool have_isa_2_06;
>  bool have_isa_2_06_vsx;
> +bool have_isa_2_07_vsx;

Does this flag indicate support for PowerISA 2.07 or VSX?

If VSX support is implied by PowerISA 2.07, then “_vsx” suffix is really
not needed. If not, why are there two flavors of “2_06” flags (with and
without _vsx), and only one flavor if 2.07 (with _vsx) flg variables?

>  bool have_isa_3_00;
>
>  #define HAVE_ISA_2_06  have_isa_2_06
> @@ -473,10 +474,12 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define LVEWX      XO31(71)
>  #define LXSDX      XO31(588)      /* v2.06 */
>  #define LXVDSX     XO31(332)      /* v2.06 */
> +#define LXSIWZX    XO31(12)       /* v2.07 */
>
>  #define STVX       XO31(231)
>  #define STVEWX     XO31(199)
>  #define STXSDX     XO31(716)      /* v2.06 */
> +#define STXSIWX    XO31(140)      /* v2.07 */
>
>  #define VADDSBS    VX4(768)
>  #define VADDUBS    VX4(512)
> @@ -487,6 +490,7 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VADDSWS    VX4(896)
>  #define VADDUWS    VX4(640)
>  #define VADDUWM    VX4(128)
> +#define VADDUDM    VX4(192)       /* v2.07 */
>
>  #define VSUBSBS    VX4(1792)
>  #define VSUBUBS    VX4(1536)
> @@ -497,47 +501,62 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VSUBSWS    VX4(1920)
>  #define VSUBUWS    VX4(1664)
>  #define VSUBUWM    VX4(1152)
> +#define VSUBUDM    VX4(1216)      /* v2.07 */
>
>  #define VMAXSB     VX4(258)
>  #define VMAXSH     VX4(322)
>  #define VMAXSW     VX4(386)
> +#define VMAXSD     VX4(450)       /* v2.07 */
>  #define VMAXUB     VX4(2)
>  #define VMAXUH     VX4(66)
>  #define VMAXUW     VX4(130)
> +#define VMAXUD     VX4(194)       /* v2.07 */
>  #define VMINSB     VX4(770)
>  #define VMINSH     VX4(834)
>  #define VMINSW     VX4(898)
> +#define VMINSD     VX4(962)       /* v2.07 */
>  #define VMINUB     VX4(514)
>  #define VMINUH     VX4(578)
>  #define VMINUW     VX4(642)
> +#define VMINUD     VX4(706)       /* v2.07 */
>
>  #define VCMPEQUB   VX4(6)
>  #define VCMPEQUH   VX4(70)
>  #define VCMPEQUW   VX4(134)
> +#define VCMPEQUD   VX4(199)       /* v2.07 */
>  #define VCMPGTSB   VX4(774)
>  #define VCMPGTSH   VX4(838)
>  #define VCMPGTSW   VX4(902)
> +#define VCMPGTSD   VX4(967)       /* v2.07 */
>  #define VCMPGTUB   VX4(518)
>  #define VCMPGTUH   VX4(582)
>  #define VCMPGTUW   VX4(646)
> +#define VCMPGTUD   VX4(711)       /* v2.07 */
>
>  #define VSLB       VX4(260)
>  #define VSLH       VX4(324)
>  #define VSLW       VX4(388)
> +#define VSLD       VX4(1476)      /* v2.07 */
>  #define VSRB       VX4(516)
>  #define VSRH       VX4(580)
>  #define VSRW       VX4(644)
> +#define VSRD       VX4(1732)      /* v2.07 */
>  #define VSRAB      VX4(772)
>  #define VSRAH      VX4(836)
>  #define VSRAW      VX4(900)
> +#define VSRAD      VX4(964)       /* v2.07 */
>  #define VRLB       VX4(4)
>  #define VRLH       VX4(68)
>  #define VRLW       VX4(132)
> +#define VRLD       VX4(196)       /* v2.07 */
>
>  #define VMULEUB    VX4(520)
>  #define VMULEUH    VX4(584)
> +#define VMULEUW    VX4(648)       /* v2.07 */
>  #define VMULOUB    VX4(8)
>  #define VMULOUH    VX4(72)
> +#define VMULOUW    VX4(136)       /* v2.07 */
> +#define VMULUWM    VX4(137)       /* v2.07 */
>  #define VMSUMUHM   VX4(38)
>
>  #define VMRGHB     VX4(12)
> @@ -555,6 +574,9 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VNOR       VX4(1284)
>  #define VOR        VX4(1156)
>  #define VXOR       VX4(1220)
> +#define VEQV       VX4(1668)      /* v2.07 */
> +#define VNAND      VX4(1412)      /* v2.07 */
> +#define VORC       VX4(1348)      /* v2.07 */
>
>  #define VSPLTB     VX4(524)
>  #define VSPLTH     VX4(588)
> @@ -568,6 +590,11 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define XXPERMDI   (OPCD(60) | (10 << 3))   /* v2.06 */
>  #define XXSEL      (OPCD(60) | (3 << 4))    /* v2.06 */
>
> +#define MFVSRD     XO31(51)       /* v2.07 */
> +#define MFVSRWZ    XO31(115)      /* v2.07 */
> +#define MTVSRD     XO31(179)      /* v2.07 */
> +#define MTVSRWZ    XO31(179)      /* v2.07 */
> +
>  #define RT(r) ((r)<<21)
>  #define RS(r) ((r)<<21)
>  #define RA(r) ((r)<<16)
> @@ -697,12 +724,27 @@ static bool tcg_out_mov(TCGContext *s, TCGType
type, TCGReg ret, TCGReg arg)
>          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
>          /* fallthru */
>      case TCG_TYPE_I32:
> -        if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
> -            tcg_out32(s, OR | SAB(arg, ret, arg));
> -            break;
> -        } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
> -            /* Altivec does not support vector/integer moves.  */
> -            return false;
> +        if (ret < TCG_REG_V0) {
> +            if (arg < TCG_REG_V0) {
> +                tcg_out32(s, OR | SAB(arg, ret, arg));
> +                break;
> +            } else if (have_isa_2_07_vsx) {
> +                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
> +                          | VRT(arg) | RA(ret) | 1);
> +                break;
> +            } else {
> +                /* Altivec does not support vector->integer moves.  */
> +                return false;
> +            }
> +        } else if (arg < TCG_REG_V0) {
> +            if (have_isa_2_07_vsx) {
> +                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
> +                          | VRT(ret) | RA(arg) | 1);
> +                break;
> +            } else {
> +                /* Altivec does not support integer->vector moves.  */
> +                return false;
> +            }
>          }
>          /* fallthru */
>      case TCG_TYPE_V64:
> @@ -1140,6 +1182,10 @@ static void tcg_out_ld(TCGContext *s, TCGType
type, TCGReg ret,
>              tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
>              break;
>          }
> +        if (have_isa_2_07_vsx) {
> +            tcg_out_mem_long(s, 0, LXSIWZX | 1, ret, base, offset);
> +            break;
> +        }
>          assert((offset & 3) == 0);
>          tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
>          shift = (offset - 4) & 0xc;
> @@ -1187,6 +1233,10 @@ static void tcg_out_st(TCGContext *s, TCGType
type, TCGReg arg,
>              tcg_out_mem_long(s, STW, STWX, arg, base, offset);
>              break;
>          }
> +        if (have_isa_2_07_vsx) {
> +            tcg_out_mem_long(s, 0, STXSIWX | 1, arg, base, offset);
> +            break;
> +        }
>          assert((offset & 3) == 0);
>          shift = (offset - 4) & 0xc;
>          if (shift) {
> @@ -2907,26 +2957,37 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType
type, unsigned vece)
>      case INDEX_op_andc_vec:
>      case INDEX_op_not_vec:
>          return 1;
> +    case INDEX_op_orc_vec:
> +        return have_isa_2_07_vsx;
>      case INDEX_op_add_vec:
>      case INDEX_op_sub_vec:
>      case INDEX_op_smax_vec:
>      case INDEX_op_smin_vec:
>      case INDEX_op_umax_vec:
>      case INDEX_op_umin_vec:
> +    case INDEX_op_shlv_vec:
> +    case INDEX_op_shrv_vec:
> +    case INDEX_op_sarv_vec:
> +        return vece <= MO_32 || have_isa_2_07_vsx;
>      case INDEX_op_ssadd_vec:
>      case INDEX_op_sssub_vec:
>      case INDEX_op_usadd_vec:
>      case INDEX_op_ussub_vec:
> -    case INDEX_op_shlv_vec:
> -    case INDEX_op_shrv_vec:
> -    case INDEX_op_sarv_vec:
>          return vece <= MO_32;
>      case INDEX_op_cmp_vec:
> -    case INDEX_op_mul_vec:
>      case INDEX_op_shli_vec:
>      case INDEX_op_shri_vec:
>      case INDEX_op_sari_vec:
> -        return vece <= MO_32 ? -1 : 0;
> +        return vece <= MO_32 || have_isa_2_07_vsx ? -1 : 0;
> +    case INDEX_op_mul_vec:
> +        switch (vece) {
> +        case MO_8:
> +        case MO_16:
> +            return -1;
> +        case MO_32:
> +            return have_isa_2_07_vsx ? 1 : -1;
> +        }
> +        return 0;
>      case INDEX_op_bitsel_vec:
>          return have_isa_2_06_vsx;
>      default:
> @@ -3030,28 +3091,28 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
>                             const TCGArg *args, const int *const_args)
>  {
>      static const uint32_t
> -        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
> -        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
> -        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
> -        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
> -        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
> +        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
> +        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
> +        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
> +        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
> +        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
>          ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
>          usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
>          sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
>          ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
> -        umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
> -        smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
> -        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
> -        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
> -        shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
> -        shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
> -        sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
> +        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
> +        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
> +        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
> +        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
> +        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
> +        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
> +        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
>          mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
>          mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
> -        muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
> -        mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
> +        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
> +        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
>          pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
> -        rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
> +        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
>
>      TCGType type = vecl + TCG_TYPE_V64;
>      TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
> @@ -3074,6 +3135,10 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
>      case INDEX_op_sub_vec:
>          insn = sub_op[vece];
>          break;
> +    case INDEX_op_mul_vec:
> +        tcg_debug_assert(vece == MO_32 && have_isa_2_07_vsx);
> +        insn = VMULUWM;
> +        break;
>      case INDEX_op_ssadd_vec:
>          insn = ssadd_op[vece];
>          break;
> @@ -3123,6 +3188,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode
opc,
>          insn = VNOR;
>          a2 = a1;
>          break;
> +    case INDEX_op_orc_vec:
> +        insn = VORC;
> +        break;
>
>      case INDEX_op_cmp_vec:
>          switch (args[3]) {
> @@ -3203,7 +3271,7 @@ static void expand_vec_cmp(TCGType type, unsigned
vece, TCGv_vec v0,
>  {
>      bool need_swap = false, need_inv = false;
>
> -    tcg_debug_assert(vece <= MO_32);
> +    tcg_debug_assert(vece <= MO_32 || have_isa_2_07_vsx);
>
>      switch (cond) {
>      case TCG_COND_EQ:
> @@ -3267,6 +3335,7 @@ static void expand_vec_mul(TCGType type, unsigned
vece, TCGv_vec v0,
>         break;
>
>      case MO_32:
> +        tcg_debug_assert(!have_isa_2_07_vsx);
>          t3 = tcg_temp_new_vec(type);
>          t4 = tcg_temp_new_vec(type);
>          tcg_gen_dupi_vec(MO_8, t4, -16);
> @@ -3562,6 +3631,11 @@ static void tcg_target_init(TCGContext *s)
>              have_isa_2_06_vsx = true;
>          }
>      }
> +    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
> +        if (hwcap & PPC_FEATURE_HAS_VSX) {
> +            have_isa_2_07_vsx = true;
> +        }
> +    }
>  #ifdef PPC_FEATURE2_ARCH_3_00
>      if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
>          have_isa_3_00 = true;
> --
> 2.17.1
>
>

  parent reply	other threads:[~2019-06-30 13:39 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-29 13:00 [Qemu-devel] [PATCH v6 00/16] tcg/ppc: Add vector opcodes Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 01/16] tcg/ppc: Introduce Altivec registers Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 02/16] tcg/ppc: Introduce macro VX4() Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 03/16] tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 04/16] tcg/ppc: Enable tcg backend vector compilation Richard Henderson
2019-06-30  9:46   ` Aleksandar Markovic
2019-06-30 10:48     ` Richard Henderson
2019-06-30 11:45       ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 05/16] tcg/ppc: Add support for load/store/logic/comparison Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 06/16] tcg/ppc: Add support for vector maximum/minimum Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 07/16] tcg/ppc: Add support for vector add/subtract Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 08/16] tcg/ppc: Add support for vector saturated add/subtract Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 09/16] tcg/ppc: Prepare case for vector multiply Richard Henderson
2019-06-30  9:52   ` Aleksandar Markovic
2019-06-30 10:49     ` Richard Henderson
2019-06-30 11:35       ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 10/16] tcg/ppc: Support vector shift by immediate Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 11/16] tcg/ppc: Support vector multiply Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 12/16] tcg/ppc: Support vector dup2 Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 13/16] tcg/ppc: Enable Altivec detection Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 14/16] tcg/ppc: Update vector support to v2.06 Richard Henderson
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07 Richard Henderson
2019-06-30 11:50   ` Aleksandar Markovic
2019-06-30 13:37   ` Aleksandar Markovic [this message]
2019-06-30 15:12     ` Richard Henderson
2019-07-01  3:57       ` Aleksandar Markovic
2019-07-01 10:29         ` Richard Henderson
2019-07-01 11:41           ` Aleksandar Markovic
2019-07-02 14:25             ` Richard Henderson
2019-07-10 10:52               ` Aleksandar Markovic
2019-06-29 13:00 ` [Qemu-devel] [PATCH v6 16/16] tcg/ppc: Update vector support to v3.00 Richard Henderson
2019-06-29 13:37 ` [Qemu-devel] [PATCH v6 00/16] tcg/ppc: Add vector opcodes no-reply
2019-06-30 17:58 ` Mark Cave-Ayland
2019-07-01 10:30   ` Richard Henderson
2019-07-01 18:34     ` Howard Spoelstra
2019-09-03 17:02       ` Mark Cave-Ayland
2019-09-03 17:37         ` Aleksandar Markovic
2019-09-03 18:32           ` Mark Cave-Ayland
2019-09-05 11:43             ` Aleksandar Markovic
2019-09-27 12:13               ` Aleksandar Markovic

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAL1e-=ghMjUtEyqKX+s15RcyQp6uQO_hJ_u4RTSJ5ZyPvFnymw@mail.gmail.com' \
    --to=aleksandar.m.mail@gmail.com \
    --cc=amarkovic@wavecomp.com \
    --cc=hsp.cat7@gmail.com \
    --cc=mark.cave-ayland@ilande.co.uk \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.