Re: [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07

From: Aleksandar Markovic <aleksandar.m.mail@gmail.com>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: mark.cave-ayland@ilande.co.uk, qemu-devel@nongnu.org,
	amarkovic@wavecomp.com, hsp.cat7@gmail.com
Subject: Re: [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07
Date: Sun, 30 Jun 2019 15:37:53 +0200	[thread overview]
Message-ID: <CAL1e-=ghMjUtEyqKX+s15RcyQp6uQO_hJ_u4RTSJ5ZyPvFnymw@mail.gmail.com> (raw)
In-Reply-To: <20190629130017.2973-16-richard.henderson@linaro.org>

On Jun 29, 2019 3:14 PM, "Richard Henderson" <richard.henderson@linaro.org>
wrote:
>
> This includes single-word loads and stores, lots of double-word
> arithmetic, and a few extra logical operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
> ---
>  tcg/ppc/tcg-target.h     |   3 +-
>  tcg/ppc/tcg-target.inc.c | 128 ++++++++++++++++++++++++++++++---------
>  2 files changed, 103 insertions(+), 28 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index 40544f996d..b8355d0a56 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -61,6 +61,7 @@ typedef enum {
>  extern bool have_isa_altivec;
>  extern bool have_isa_2_06;
>  extern bool have_isa_2_06_vsx;
> +extern bool have_isa_2_07_vsx;
>  extern bool have_isa_3_00;
>
>  /* optional instructions automatically implemented */
> @@ -147,7 +148,7 @@ extern bool have_isa_3_00;
>  #define TCG_TARGET_HAS_v256             0
>
>  #define TCG_TARGET_HAS_andc_vec         1
> -#define TCG_TARGET_HAS_orc_vec          0
> +#define TCG_TARGET_HAS_orc_vec          have_isa_2_07_vsx
>  #define TCG_TARGET_HAS_not_vec          1
>  #define TCG_TARGET_HAS_neg_vec          0
>  #define TCG_TARGET_HAS_abs_vec          0
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index 50d1b5612c..af86ab07dd 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
>  bool have_isa_altivec;
>  bool have_isa_2_06;
>  bool have_isa_2_06_vsx;
> +bool have_isa_2_07_vsx;

Does this flag indicate support for PowerISA 2.07 or VSX?

If VSX support is implied by PowerISA 2.07, then “_vsx” suffix is really
not needed. If not, why are there two flavors of “2_06” flags (with and
without _vsx), and only one flavor if 2.07 (with _vsx) flg variables?

>  bool have_isa_3_00;
>
>  #define HAVE_ISA_2_06  have_isa_2_06
> @@ -473,10 +474,12 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define LVEWX      XO31(71)
>  #define LXSDX      XO31(588)      /* v2.06 */
>  #define LXVDSX     XO31(332)      /* v2.06 */
> +#define LXSIWZX    XO31(12)       /* v2.07 */
>
>  #define STVX       XO31(231)
>  #define STVEWX     XO31(199)
>  #define STXSDX     XO31(716)      /* v2.06 */
> +#define STXSIWX    XO31(140)      /* v2.07 */
>
>  #define VADDSBS    VX4(768)
>  #define VADDUBS    VX4(512)
> @@ -487,6 +490,7 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VADDSWS    VX4(896)
>  #define VADDUWS    VX4(640)
>  #define VADDUWM    VX4(128)
> +#define VADDUDM    VX4(192)       /* v2.07 */
>
>  #define VSUBSBS    VX4(1792)
>  #define VSUBUBS    VX4(1536)
> @@ -497,47 +501,62 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VSUBSWS    VX4(1920)
>  #define VSUBUWS    VX4(1664)
>  #define VSUBUWM    VX4(1152)
> +#define VSUBUDM    VX4(1216)      /* v2.07 */
>
>  #define VMAXSB     VX4(258)
>  #define VMAXSH     VX4(322)
>  #define VMAXSW     VX4(386)
> +#define VMAXSD     VX4(450)       /* v2.07 */
>  #define VMAXUB     VX4(2)
>  #define VMAXUH     VX4(66)
>  #define VMAXUW     VX4(130)
> +#define VMAXUD     VX4(194)       /* v2.07 */
>  #define VMINSB     VX4(770)
>  #define VMINSH     VX4(834)
>  #define VMINSW     VX4(898)
> +#define VMINSD     VX4(962)       /* v2.07 */
>  #define VMINUB     VX4(514)
>  #define VMINUH     VX4(578)
>  #define VMINUW     VX4(642)
> +#define VMINUD     VX4(706)       /* v2.07 */
>
>  #define VCMPEQUB   VX4(6)
>  #define VCMPEQUH   VX4(70)
>  #define VCMPEQUW   VX4(134)
> +#define VCMPEQUD   VX4(199)       /* v2.07 */
>  #define VCMPGTSB   VX4(774)
>  #define VCMPGTSH   VX4(838)
>  #define VCMPGTSW   VX4(902)
> +#define VCMPGTSD   VX4(967)       /* v2.07 */
>  #define VCMPGTUB   VX4(518)
>  #define VCMPGTUH   VX4(582)
>  #define VCMPGTUW   VX4(646)
> +#define VCMPGTUD   VX4(711)       /* v2.07 */
>
>  #define VSLB       VX4(260)
>  #define VSLH       VX4(324)
>  #define VSLW       VX4(388)
> +#define VSLD       VX4(1476)      /* v2.07 */
>  #define VSRB       VX4(516)
>  #define VSRH       VX4(580)
>  #define VSRW       VX4(644)
> +#define VSRD       VX4(1732)      /* v2.07 */
>  #define VSRAB      VX4(772)
>  #define VSRAH      VX4(836)
>  #define VSRAW      VX4(900)
> +#define VSRAD      VX4(964)       /* v2.07 */
>  #define VRLB       VX4(4)
>  #define VRLH       VX4(68)
>  #define VRLW       VX4(132)
> +#define VRLD       VX4(196)       /* v2.07 */
>
>  #define VMULEUB    VX4(520)
>  #define VMULEUH    VX4(584)
> +#define VMULEUW    VX4(648)       /* v2.07 */
>  #define VMULOUB    VX4(8)
>  #define VMULOUH    VX4(72)
> +#define VMULOUW    VX4(136)       /* v2.07 */
> +#define VMULUWM    VX4(137)       /* v2.07 */
>  #define VMSUMUHM   VX4(38)
>
>  #define VMRGHB     VX4(12)
> @@ -555,6 +574,9 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define VNOR       VX4(1284)
>  #define VOR        VX4(1156)
>  #define VXOR       VX4(1220)
> +#define VEQV       VX4(1668)      /* v2.07 */
> +#define VNAND      VX4(1412)      /* v2.07 */
> +#define VORC       VX4(1348)      /* v2.07 */
>
>  #define VSPLTB     VX4(524)
>  #define VSPLTH     VX4(588)
> @@ -568,6 +590,11 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
>  #define XXPERMDI   (OPCD(60) | (10 << 3))   /* v2.06 */
>  #define XXSEL      (OPCD(60) | (3 << 4))    /* v2.06 */
>
> +#define MFVSRD     XO31(51)       /* v2.07 */
> +#define MFVSRWZ    XO31(115)      /* v2.07 */
> +#define MTVSRD     XO31(179)      /* v2.07 */
> +#define MTVSRWZ    XO31(179)      /* v2.07 */
> +
>  #define RT(r) ((r)<<21)
>  #define RS(r) ((r)<<21)
>  #define RA(r) ((r)<<16)
> @@ -697,12 +724,27 @@ static bool tcg_out_mov(TCGContext *s, TCGType
type, TCGReg ret, TCGReg arg)
>          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
>          /* fallthru */
>      case TCG_TYPE_I32:
> -        if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
> -            tcg_out32(s, OR | SAB(arg, ret, arg));
> -            break;
> -        } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
> -            /* Altivec does not support vector/integer moves.  */
> -            return false;
> +        if (ret < TCG_REG_V0) {
> +            if (arg < TCG_REG_V0) {
> +                tcg_out32(s, OR | SAB(arg, ret, arg));
> +                break;
> +            } else if (have_isa_2_07_vsx) {
> +                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
> +                          | VRT(arg) | RA(ret) | 1);
> +                break;
> +            } else {
> +                /* Altivec does not support vector->integer moves.  */
> +                return false;
> +            }
> +        } else if (arg < TCG_REG_V0) {
> +            if (have_isa_2_07_vsx) {
> +                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
> +                          | VRT(ret) | RA(arg) | 1);
> +                break;
> +            } else {
> +                /* Altivec does not support integer->vector moves.  */
> +                return false;
> +            }
>          }
>          /* fallthru */
>      case TCG_TYPE_V64:
> @@ -1140,6 +1182,10 @@ static void tcg_out_ld(TCGContext *s, TCGType
type, TCGReg ret,
>              tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
>              break;
>          }
> +        if (have_isa_2_07_vsx) {
> +            tcg_out_mem_long(s, 0, LXSIWZX | 1, ret, base, offset);
> +            break;
> +        }
>          assert((offset & 3) == 0);
>          tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
>          shift = (offset - 4) & 0xc;
> @@ -1187,6 +1233,10 @@ static void tcg_out_st(TCGContext *s, TCGType
type, TCGReg arg,
>              tcg_out_mem_long(s, STW, STWX, arg, base, offset);
>              break;
>          }
> +        if (have_isa_2_07_vsx) {
> +            tcg_out_mem_long(s, 0, STXSIWX | 1, arg, base, offset);
> +            break;
> +        }
>          assert((offset & 3) == 0);
>          shift = (offset - 4) & 0xc;
>          if (shift) {
> @@ -2907,26 +2957,37 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType
type, unsigned vece)
>      case INDEX_op_andc_vec:
>      case INDEX_op_not_vec:
>          return 1;
> +    case INDEX_op_orc_vec:
> +        return have_isa_2_07_vsx;
>      case INDEX_op_add_vec:
>      case INDEX_op_sub_vec:
>      case INDEX_op_smax_vec:
>      case INDEX_op_smin_vec:
>      case INDEX_op_umax_vec:
>      case INDEX_op_umin_vec:
> +    case INDEX_op_shlv_vec:
> +    case INDEX_op_shrv_vec:
> +    case INDEX_op_sarv_vec:
> +        return vece <= MO_32 || have_isa_2_07_vsx;
>      case INDEX_op_ssadd_vec:
>      case INDEX_op_sssub_vec:
>      case INDEX_op_usadd_vec:
>      case INDEX_op_ussub_vec:
> -    case INDEX_op_shlv_vec:
> -    case INDEX_op_shrv_vec:
> -    case INDEX_op_sarv_vec:
>          return vece <= MO_32;
>      case INDEX_op_cmp_vec:
> -    case INDEX_op_mul_vec:
>      case INDEX_op_shli_vec:
>      case INDEX_op_shri_vec:
>      case INDEX_op_sari_vec:
> -        return vece <= MO_32 ? -1 : 0;
> +        return vece <= MO_32 || have_isa_2_07_vsx ? -1 : 0;
> +    case INDEX_op_mul_vec:
> +        switch (vece) {
> +        case MO_8:
> +        case MO_16:
> +            return -1;
> +        case MO_32:
> +            return have_isa_2_07_vsx ? 1 : -1;
> +        }
> +        return 0;
>      case INDEX_op_bitsel_vec:
>          return have_isa_2_06_vsx;
>      default:
> @@ -3030,28 +3091,28 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
>                             const TCGArg *args, const int *const_args)
>  {
>      static const uint32_t
> -        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
> -        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
> -        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
> -        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
> -        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
> +        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
> +        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
> +        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
> +        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
> +        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
>          ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
>          usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
>          sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
>          ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
> -        umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
> -        smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
> -        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
> -        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
> -        shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
> -        shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
> -        sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
> +        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
> +        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
> +        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
> +        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
> +        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
> +        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
> +        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
>          mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
>          mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
> -        muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
> -        mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
> +        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
> +        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
>          pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
> -        rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
> +        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
>
>      TCGType type = vecl + TCG_TYPE_V64;
>      TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
> @@ -3074,6 +3135,10 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
>      case INDEX_op_sub_vec:
>          insn = sub_op[vece];
>          break;
> +    case INDEX_op_mul_vec:
> +        tcg_debug_assert(vece == MO_32 && have_isa_2_07_vsx);
> +        insn = VMULUWM;
> +        break;
>      case INDEX_op_ssadd_vec:
>          insn = ssadd_op[vece];
>          break;
> @@ -3123,6 +3188,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode
opc,
>          insn = VNOR;
>          a2 = a1;
>          break;
> +    case INDEX_op_orc_vec:
> +        insn = VORC;
> +        break;
>
>      case INDEX_op_cmp_vec:
>          switch (args[3]) {
> @@ -3203,7 +3271,7 @@ static void expand_vec_cmp(TCGType type, unsigned
vece, TCGv_vec v0,
>  {
>      bool need_swap = false, need_inv = false;
>
> -    tcg_debug_assert(vece <= MO_32);
> +    tcg_debug_assert(vece <= MO_32 || have_isa_2_07_vsx);
>
>      switch (cond) {
>      case TCG_COND_EQ:
> @@ -3267,6 +3335,7 @@ static void expand_vec_mul(TCGType type, unsigned
vece, TCGv_vec v0,
>         break;
>
>      case MO_32:
> +        tcg_debug_assert(!have_isa_2_07_vsx);
>          t3 = tcg_temp_new_vec(type);
>          t4 = tcg_temp_new_vec(type);
>          tcg_gen_dupi_vec(MO_8, t4, -16);
> @@ -3562,6 +3631,11 @@ static void tcg_target_init(TCGContext *s)
>              have_isa_2_06_vsx = true;
>          }
>      }
> +    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
> +        if (hwcap & PPC_FEATURE_HAS_VSX) {
> +            have_isa_2_07_vsx = true;
> +        }
> +    }
>  #ifdef PPC_FEATURE2_ARCH_3_00
>      if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
>          have_isa_3_00 = true;
> --
> 2.17.1
>
>