All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: peter.maydell@linaro.org, qemu-devel@nongnu.org
Subject: Re: [PATCH 1/3] tcg: Improve vector tail clearing
Date: Mon, 20 Apr 2020 16:25:17 +0100	[thread overview]
Message-ID: <87imhudvs2.fsf@linaro.org> (raw)
In-Reply-To: <20200418155651.3901-2-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Better handling of non-power-of-2 tails as seen with Arm 8-byte
> vector operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  tcg/tcg-op-gvec.c | 82 ++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 63 insertions(+), 19 deletions(-)
>
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index 5a6cc19812..43cac1a0bf 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -326,11 +326,34 @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
>     in units of LNSZ.  This limits the expansion of inline code.  */
>  static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
>  {
> -    if (oprsz % lnsz == 0) {
> -        uint32_t lnct = oprsz / lnsz;
> -        return lnct >= 1 && lnct <= MAX_UNROLL;
> +    uint32_t q, r;
> +
> +    if (oprsz < lnsz) {
> +        return false;
>      }
> -    return false;
> +
> +    q = oprsz / lnsz;
> +    r = oprsz % lnsz;
> +    tcg_debug_assert((r & 7) == 0);
> +
> +    if (lnsz < 16) {
> +        /* For sizes below 16, accept no remainder. */
> +        if (r != 0) {
> +            return false;
> +        }
> +    } else {
> +        /*
> +         * Recall that ARM SVE allows vector sizes that are not a
> +         * power of 2, but always a multiple of 16.  The intent is
> +         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
> +         * In addition, expand_clr needs to handle a multiple of 8.
> +         * Thus we can handle the tail with one more operation per
> +         * diminishing power of 2.
> +         */
> +        q += ctpop32(r);
> +    }
> +
> +    return q <= MAX_UNROLL;
>  }
>  
>  static void expand_clr(uint32_t dofs, uint32_t maxsz);
> @@ -402,22 +425,31 @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
>  static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
>                                    uint32_t size, bool prefer_i64)
>  {
> -    if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
> -        /*
> -         * Recall that ARM SVE allows vector sizes that are not a
> -         * power of 2, but always a multiple of 16.  The intent is
> -         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
> -         * It is hard to imagine a case in which v256 is supported
> -         * but v128 is not, but check anyway.
> -         */
> -        if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece)
> -            && (size % 32 == 0
> -                || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) {
> -            return TCG_TYPE_V256;
> -        }
> +    /*
> +     * Recall that ARM SVE allows vector sizes that are not a
> +     * power of 2, but always a multiple of 16.  The intent is
> +     * that e.g. size == 80 would be expanded with 2x32 + 1x16.
> +     * It is hard to imagine a case in which v256 is supported
> +     * but v128 is not, but check anyway.
> +     * In addition, expand_clr needs to handle a multiple of 8.
> +     */
> +    if (TCG_TARGET_HAS_v256 &&
> +        check_size_impl(size, 32) &&
> +        tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) &&
> +        (!(size & 16) ||
> +         (TCG_TARGET_HAS_v128 &&
> +          tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) &&
> +        (!(size & 8) ||
> +         (TCG_TARGET_HAS_v64 &&
> +          tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
> +        return TCG_TYPE_V256;
>      }
> -    if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
> -        && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) {
> +    if (TCG_TARGET_HAS_v128 &&
> +        check_size_impl(size, 16) &&
> +        tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) &&
> +        (!(size & 8) ||
> +         (TCG_TARGET_HAS_v64 &&
> +          tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
>          return TCG_TYPE_V128;
>      }
>      if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
> @@ -432,6 +464,18 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
>  {
>      uint32_t i = 0;
>  
> +    tcg_debug_assert(oprsz >= 8);
> +
> +    /*
> +     * This may be expand_clr for the tail of an operation, e.g.
> +     * oprsz == 8 && maxsz == 64.  The first 8 bytes of this store
> +     * are misaligned wrt the maximum vector size, so do that first.
> +     */
> +    if (dofs & 8) {
> +        tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
> +        i += 8;
> +    }
> +
>      switch (type) {
>      case TCG_TYPE_V256:
>          /*


-- 
Alex Bennée


  reply	other threads:[~2020-04-20 15:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-18 15:56 [PATCH 0/3] tcg: Improve vector tail clearing Richard Henderson
2020-04-18 15:56 ` [PATCH 1/3] " Richard Henderson
2020-04-20 15:25   ` Alex Bennée [this message]
2020-04-18 15:56 ` [PATCH 2/3] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
2020-04-20 15:29   ` Alex Bennée
2020-04-18 15:56 ` [PATCH 3/3] target/arm: Use clear_vec_high more effectively Richard Henderson
2020-04-20 15:32   ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87imhudvs2.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.