Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.

From: Aurelien Jarno <aurelien@aurel32.net>
To: christophe.lyon@st.com
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
Date: Mon, 31 Jan 2011 09:20:50 +0100	[thread overview]
Message-ID: <20110131082050.GA4304@volta.aurel32.net> (raw)
In-Reply-To: <1296229866-32011-2-git-send-email-christophe.lyon@st.com>

On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
> 
> Handle corner cases where the addition of the rounding constant could
> cause overflows.

After applying this patch, I get the following gcc warning:
  CC    translate.o
cc1: warnings being treated as errors
qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
make: *** [translate.o] Error 1

> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/neon_helper.c |   61 ++++++++++++++++++++++++++++++++++++++-------
>  target-arm/translate.c   |   17 ++++++++++--
>  2 files changed, 65 insertions(+), 13 deletions(-)
> 
> diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
> index bf29bbe..5971275 100644
> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -540,6 +540,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
>      return val;
>  }
>  
> +/* The addition of the rounding constant may overflow, so we use an
> + * intermediate 64 bits accumulator, which is really needed only when
> + * dealing with 32 bits input values.  */
>  #define NEON_FN(dest, src1, src2) do { \
>      int8_t tmp; \
>      tmp = (int8_t)src2; \
> @@ -548,11 +551,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
>      } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
>          dest = src1 >> (sizeof(src1) * 8 - 1); \
>      } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
> -        dest = src1 >> (tmp - 1); \
> +        dest = src1 >> (-tmp - 1); \
>          dest++; \
>          dest >>= 1; \
>      } else if (tmp < 0) { \
> -        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
> +        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
> +        dest = big_dest >> -tmp; \
>      } else { \
>          dest = src1 << tmp; \
>      }} while (0)
> @@ -561,6 +565,8 @@ NEON_VOP(rshl_s16, neon_s16, 2)
>  NEON_VOP(rshl_s32, neon_s32, 1)
>  #undef NEON_FN
>  
> +/* Handling addition overflow with 64 bits inputs values is more
> + * tricky than with 32 bits values.  */
>  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>  {
>      int8_t shift = (int8_t)shiftop;
> @@ -569,18 +575,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>          val = 0;
>      } else if (shift < -64) {
>          val >>= 63;
> -    } else if (shift == -63) {
> +    } else if (shift == -64) {
>          val >>= 63;
>          val++;
>          val >>= 1;
>      } else if (shift < 0) {
> -        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
> +        int64_t round = (int64_t)1 << (-1 - shift);
> +        /* Reduce the range as long as the addition overflows.  It's
> +         * sufficient to check if (val+round) is < 0 and val > 0
> +         * because round is > 0.  */
> +        while ((val > 0) && ((val + round) < 0) && round > 1) {
> +            shift++;
> +            round >>= 1;
> +            val >>= 1;
> +        }
> +        if ((val > 0) && (val + round) < 0) {
> +            /* If addition still overflows at this point, it means
> +             * that round==1, thus shift==-1, and also that
> +             * val==0x7FFFFFFFFFFFFFFF.  */
> +            val = 0x4000000000000000LL;
> +        } else {
> +            val = (val + round) >> -shift;
> +        }
>      } else {
>          val <<= shift;
>      }
>      return val;
>  }
>  
> +/* The addition of the rounding constant may overflow, so we use an
> + * intermediate 64 bits accumulator, which is really needed only when
> + * dealing with 32 bits input values.  */
>  #define NEON_FN(dest, src1, src2) do { \
>      int8_t tmp; \
>      tmp = (int8_t)src2; \
> @@ -588,9 +613,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>          tmp < -(ssize_t)sizeof(src1) * 8) { \
>          dest = 0; \
>      } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
> -        dest = src1 >> (tmp - 1); \
> +        dest = src1 >> (-tmp - 1); \
>      } else if (tmp < 0) { \
> -        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
> +        uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
> +        dest = big_dest >> -tmp; \
>      } else { \
>          dest = src1 << tmp; \
>      }} while (0)
> @@ -602,14 +628,29 @@ NEON_VOP(rshl_u32, neon_u32, 1)
>  uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
>  {
>      int8_t shift = (uint8_t)shiftop;
> -    if (shift >= 64 || shift < 64) {
> +    if (shift >= 64 || shift < -64) {
>          val = 0;
>      } else if (shift == -64) {
>          /* Rounding a 1-bit result just preserves that bit.  */
>          val >>= 63;
> -    } if (shift < 0) {
> -        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
> -        val >>= -shift;
> +    } else if (shift < 0) {
> +        uint64_t round = (uint64_t)1 << (-1 - shift);
> +        /* Reduce the range as long as the addition overflows.  It's
> +         * sufficient to check if (val+round) is < val
> +         * because val and round are > 0.  */
> +        while (((val + round) < val) && round > 1) {
> +            shift++;
> +            round >>= 1;
> +            val >>= 1;
> +        }
> +        if ((val + round) < val) {
> +            /* If addition still overflows at this point, it means
> +             * that round==1, thus shift==-1, and also that
> +             * val==0x&FFFFFFFFFFFFFFF.  */
> +            val = 0x8000000000000000LL;
> +        } else {
> +            val = (val + round) >> -shift;
> +        }
>      } else {
>          val <<= shift;
>      }
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 4cf2ecd..b14fa4b 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4885,13 +4885,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                          tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
>                          if (size < 2 || !u) {
>                              uint64_t imm64;
> -                            if (size == 0) {
> +                            switch(size) {
> +                            case 0:
>                                  imm = (0xffu >> (8 - shift));
>                                  imm |= imm << 16;
> -                            } else {
> +                                break;
> +                            case 1:
>                                  imm = 0xffff >> (16 - shift);
> +                                break;
> +                            case 2:
> +                                imm = 0xffffffff >> (32 - shift);
> +                                break;
> +                            }
> +                            if (size < 2) {
> +                                imm64 = imm | (((uint64_t)imm) << 32);
> +                            } else {
> +                                imm64 = imm;
>                              }
> -                            imm64 = imm | (((uint64_t)imm) << 32);
> +                            imm64 = ~imm64;
>                              tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
>                          }
>                      }
> -- 
> 1.7.2.3
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net