All of lore.kernel.org
 help / color / mirror / Atom feed
From: <christophe.lyon@st.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for Neon shift instructions.
Date: Fri, 11 Feb 2011 16:10:57 +0100	[thread overview]
Message-ID: <1297437062-6118-2-git-send-email-christophe.lyon@st.com> (raw)
In-Reply-To: <1297437062-6118-1-git-send-email-christophe.lyon@st.com>

From: Christophe Lyon <christophe.lyon@st.com>

Handle cases where adding the rounding constant could overflow in Neon
shift instructions: VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |  149 ++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index cf82072..3f1f3d4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -558,9 +558,34 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_s8, neon_s8, 4)
 NEON_VOP(rshl_s16, neon_s16, 2)
-NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32) {
+        dest = 0;
+    } else if (shift < -32) {
+        dest = val >> 31;
+    } else if (shift == -32) {
+        dest = val >> 31;
+        dest++;
+        dest >>= 1;
+    } else if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -574,7 +599,16 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -596,9 +630,29 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_u8, neon_u8, 4)
 NEON_VOP(rshl_u16, neon_u16, 2)
-NEON_VOP(rshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32 || shift < -32) {
+        dest = 0;
+    } else if (shift == -32) {
+        dest = val >> 31;
+    } else if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
@@ -607,9 +661,17 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -784,14 +846,43 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + ( 1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = ~0;
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else { \
         uint64_t tmp = val;
         val <<= shift;
@@ -817,22 +908,56 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
 
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
-        int64_t tmp = val;;
+        int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-            val = tmp >> 31;
+            if (tmp < 0) {
+                val = INT64_MIN;
+            } else {
+                val = INT64_MAX;
+            }
         }
     }
     return val;
-- 
1.7.2.3

  reply	other threads:[~2011-02-11 15:11 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
2011-02-11 15:10 ` christophe.lyon [this message]
2011-02-14 18:12   ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " Peter Maydell
2011-02-15 10:07     ` Christophe Lyon
2011-02-11 15:10 ` [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width christophe.lyon
2011-02-14 18:16   ` Peter Maydell
2011-02-15 13:47     ` Christophe Lyon
2011-02-11 15:10 ` [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts christophe.lyon
2011-02-14 17:40   ` Peter Maydell
2011-02-11 15:11 ` [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon " christophe.lyon
2011-02-14 17:46   ` Peter Maydell
2011-02-14 17:49     ` Peter Maydell
2011-02-15 14:06     ` Christophe Lyon
2011-02-11 15:11 ` [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN christophe.lyon
2011-02-14 17:41   ` Peter Maydell
2011-02-11 15:11 ` [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts christophe.lyon
2011-02-14 17:53   ` Peter Maydell
2011-02-14 18:18 ` [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions Peter Maydell
2011-02-15 14:03   ` Christophe Lyon
2011-02-15 14:21     ` Peter Maydell
2011-02-15 14:49       ` Christophe Lyon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297437062-6118-2-git-send-email-christophe.lyon@st.com \
    --to=christophe.lyon@st.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.