[Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.
@ 2011-02-11 15:10 christophe.lyon
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " christophe.lyon
                   ` (6 more replies)
  0 siblings, 7 replies; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:10 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

This patch series provides fixes such that ARM Neon instructions
VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN, VQSHRN, VSHRN, VQSHRUN now
pass all my tests.

I have reworked all these patches and I hope they are now easier to
review.

Christophe Lyon (6):
  target-arm: Fix rounding constant addition for Neon shift
    instructions.
  target-arm: fix Neon right shifts with shift amount == input width.
  target-arm: fix unsigned 64 bit right shifts.
  target-arm: fix saturated values for Neon right shifts.
  target-arm: fix Neon VQSHRN and VSHRN.
  target-arm: fix decoding of Neon 64 bit shifts.

 target-arm/neon_helper.c |  163 +++++++++++++++++++++++++++++++++++++++++-----
 target-arm/translate.c   |   47 +++++++++-----
 2 files changed, 176 insertions(+), 34 deletions(-)

-- 
1.7.2.3

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for Neon shift instructions.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
@ 2011-02-11 15:10 ` christophe.lyon
  2011-02-14 18:12   ` Peter Maydell
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width christophe.lyon
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:10 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Handle cases where adding the rounding constant could overflow in Neon
shift instructions: VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |  149 ++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index cf82072..3f1f3d4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -558,9 +558,34 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_s8, neon_s8, 4)
 NEON_VOP(rshl_s16, neon_s16, 2)
-NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32) {
+        dest = 0;
+    } else if (shift < -32) {
+        dest = val >> 31;
+    } else if (shift == -32) {
+        dest = val >> 31;
+        dest++;
+        dest >>= 1;
+    } else if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -574,7 +599,16 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -596,9 +630,29 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_u8, neon_u8, 4)
 NEON_VOP(rshl_u16, neon_u16, 2)
-NEON_VOP(rshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32 || shift < -32) {
+        dest = 0;
+    } else if (shift == -32) {
+        dest = val >> 31;
+    } else if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
@@ -607,9 +661,17 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -784,14 +846,43 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + ( 1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = ~0;
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else { \
         uint64_t tmp = val;
         val <<= shift;
@@ -817,22 +908,56 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
 
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
-        int64_t tmp = val;;
+        int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-            val = tmp >> 31;
+            if (tmp < 0) {
+                val = INT64_MIN;
+            } else {
+                val = INT64_MAX;
+            }
         }
     }
     return val;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " christophe.lyon
@ 2011-02-11 15:10 ` christophe.lyon
  2011-02-14 18:16   ` Peter Maydell
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts christophe.lyon
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:10 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Fix rshl helpers (s8, s16, s64, u8, u16)

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 3f1f3d4..1ac362f 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -548,7 +548,7 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = src1 >> (sizeof(src1) * 8 - 1); \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
         dest++; \
         dest >>= 1; \
     } else if (tmp < 0) { \
@@ -594,7 +594,7 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val = 0;
     } else if (shift < -64) {
         val >>= 63;
-    } else if (shift == -63) {
+    } else if (shift == -64) {
         val >>= 63;
         val++;
         val >>= 1;
@@ -622,7 +622,7 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = 0; \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
     } else if (tmp < 0) { \
         dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
     } else { \
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " christophe.lyon
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width christophe.lyon
@ 2011-02-11 15:10 ` christophe.lyon
  2011-02-14 17:40   ` Peter Maydell
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon " christophe.lyon
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:10 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Fix range of shift amounts which always give 0 as result.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 1ac362f..907f7b7 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -656,7 +656,7 @@ uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
-    if (shift >= 64 || shift < 64) {
+    if (shift >= 64 || shift < -64) {
         val = 0;
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon right shifts.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
                   ` (2 preceding siblings ...)
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts christophe.lyon
@ 2011-02-11 15:11 ` christophe.lyon
  2011-02-14 17:46   ` Peter Maydell
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN christophe.lyon
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:11 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Fix value returned by signed qrshl helpers (8, 16 and 32 bits).

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 907f7b7..83d610a 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -903,7 +903,7 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
         dest = src1 << tmp; \
         if ((dest >> tmp) != src1) { \
             SET_QC(); \
-            dest = src1 >> 31; \
+            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)) - (src1 > 0 ? 1 : 0); \
         } \
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
@@ -924,7 +924,11 @@ uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
         dest = val << shift;
         if ((dest >> shift) != val) {
             SET_QC();
-            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
+            if (val < 0) {
+                dest = INT32_MIN;
+            } else {
+                dest = INT32_MAX;
+            }
         }
     }
     return dest;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
                   ` (3 preceding siblings ...)
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon " christophe.lyon
@ 2011-02-11 15:11 ` christophe.lyon
  2011-02-14 17:41   ` Peter Maydell
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts christophe.lyon
  2011-02-14 18:18 ` [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions Peter Maydell
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:11 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Call the normal shift helpers instead of the rounding ones.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8791bc5..ace533f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4095,8 +4095,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
     } else {
         if (u) {
             switch (size) {
-            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
-            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
+            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
+            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
             default: abort();
             }
         } else {
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
                   ` (4 preceding siblings ...)
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN christophe.lyon
@ 2011-02-11 15:11 ` christophe.lyon
  2011-02-14 17:53   ` Peter Maydell
  2011-02-14 18:18 ` [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions Peter Maydell
  6 siblings, 1 reply; 21+ messages in thread
From: christophe.lyon @ 2011-02-11 15:11 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Fix decoding of 64 bits variants of VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN, taking into account whether inputs are unsigned or not.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |   43 ++++++++++++++++++++++++++++---------------
 1 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index ace533f..10b8c5f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4815,6 +4815,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if (op < 10) {
                 /* Shift by immediate and narrow:
                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
+                int input_unsigned = (op == 8) ? !u : u;
+
                 shift = shift - (1 << (size + 3));
                 size++;
                 switch (size) {
@@ -4841,33 +4843,44 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (size == 3) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         if (q) {
-                          if (u)
-                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
+                            if (input_unsigned) {
+                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            } else {
+                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            }
                         } else {
-                          if (u)
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
+                            if (input_unsigned) {
+                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            } else {
+                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            }
                         }
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
-                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
+                        gen_neon_shift_narrow(size, tmp, tmp2, q, input_unsigned);
                         tmp3 = neon_load_reg(rm + pass, 1);
-                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+                        gen_neon_shift_narrow(size, tmp3, tmp2, q, input_unsigned);
                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
                         dead_tmp(tmp);
                         dead_tmp(tmp3);
                     }
                     tmp = new_tmp();
-                    if (op == 8 && !u) {
-                        gen_neon_narrow(size - 1, tmp, cpu_V0);
+                    if (op == 8) {
+                        if (u) { /* VQSHRUN / VQRSHRUN */
+                            gen_neon_unarrow_sats(size - 1, tmp, cpu_V0);
+                        } else { /* VSHRN / VRSHRN */
+                            gen_neon_narrow(size - 1, tmp, cpu_V0);
+                        }
                     } else {
-                        if (op == 8)
+                        if (u) { /* VQSHRN / VQRSHRN */
+                        gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                        } else { /* VQSHRN / VQRSHRN */
                             gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
-                        else
-                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                        }
                     }
                     neon_store_reg(rd, pass, tmp);
                 } /* for pass */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts.
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts christophe.lyon
@ 2011-02-14 17:40   ` Peter Maydell
  0 siblings, 0 replies; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 17:40 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Fix range of shift amounts which always give 0 as result.
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN.
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN christophe.lyon
@ 2011-02-14 17:41   ` Peter Maydell
  0 siblings, 0 replies; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 17:41 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:11,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Call the normal shift helpers instead of the rounding ones.
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon right shifts.
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon " christophe.lyon
@ 2011-02-14 17:46   ` Peter Maydell
  2011-02-14 17:49     ` Peter Maydell
  2011-02-15 14:06     ` Christophe Lyon
  0 siblings, 2 replies; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 17:46 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:11,  <christophe.lyon@st.com> wrote:

> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -903,7 +903,7 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
>         dest = src1 << tmp; \
>         if ((dest >> tmp) != src1) { \
>             SET_QC(); \
> -            dest = src1 >> 31; \
> +            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)) - (src1 > 0 ? 1 : 0); \
>         } \
>     }} while (0)

This gives the right values but is a pretty long expression (among
other things it is more than 80 chars and breaks the QEMU coding
style). I'd rather do the same as the existing qshl_s* helper:

            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
            if (src1 > 0) { \
                dest--; \
            } \

>  NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
> @@ -924,7 +924,11 @@ uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
>         dest = val << shift;
>         if ((dest >> shift) != val) {
>             SET_QC();
> -            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
> +            if (val < 0) {
> +                dest = INT32_MIN;
> +            } else {
> +                dest = INT32_MAX;
> +            }

Again, right answers but the way most of the rest of the code
forces a 32 bit value to signed saturation is
   dest = (val >> 31) ^ ~SIGNBIT;

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon right shifts.
  2011-02-14 17:46   ` Peter Maydell
@ 2011-02-14 17:49     ` Peter Maydell
  2011-02-15 14:06     ` Christophe Lyon
  1 sibling, 0 replies; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 17:49 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 14 February 2011 17:46, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 February 2011 15:11,  <christophe.lyon@st.com> wrote:
>>  NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
>> @@ -924,7 +924,11 @@ uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
>>         dest = val << shift;
>>         if ((dest >> shift) != val) {
>>             SET_QC();
>> -            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
>> +            if (val < 0) {
>> +                dest = INT32_MIN;
>> +            } else {
>> +                dest = INT32_MAX;
>> +            }
>
> Again, right answers but the way most of the rest of the code
> forces a 32 bit value to signed saturation is
>   dest = (val >> 31) ^ ~SIGNBIT;

...and also this is patching a function newly introduced in
patch 1/6 -- better to just have 1/6 have the correct code.

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts.
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts christophe.lyon
@ 2011-02-14 17:53   ` Peter Maydell
  0 siblings, 0 replies; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 17:53 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:11,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Fix decoding of 64 bits variants of VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN, taking into account whether inputs are unsigned or not.
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Mostly OK (gives correct answers). Style issues:

>                         tmp = neon_load_reg(rm + pass, 0);
> -                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
> +                        gen_neon_shift_narrow(size, tmp, tmp2, q, input_unsigned);
>                         tmp3 = neon_load_reg(rm + pass, 1);
> -                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
> +                        gen_neon_shift_narrow(size, tmp3, tmp2, q, input_unsigned);

These lines are >80 chars now.

>                     } else {
> -                        if (op == 8)
> +                        if (u) { /* VQSHRN / VQRSHRN */
> +                        gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
> +                        } else { /* VQSHRN / VQRSHRN */

Missing indentation.

The other problem with this area of the code is that it is not
correctly handling the case where pass 1 wants to read a
register which is the target for pass 2. I have a patch to fix
this which I'll post in a moment.

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for Neon shift instructions.
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " christophe.lyon
@ 2011-02-14 18:12   ` Peter Maydell
  2011-02-15 10:07     ` Christophe Lyon
  0 siblings, 1 reply; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 18:12 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
> +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
> +{
> +    int32_t dest;
> +    int32_t val = (int32_t)valop;
> +    int8_t shift = (int8_t)shiftop;
> +    if (shift >= 32) {
> +        dest = 0;
> +    } else if (shift < -32) {
> +        dest = val >> 31;

This is the wrong answer: large rounding right shifts give zero.

> +    } else if (shift == -32) {
> +        dest = val >> 31;
> +        dest++;
> +        dest >>= 1;

These three lines will always result in dest becoming
0 regardless of the input value.

I'm going to post a patch which fixes these as part
of getting the answers right for VRSHL by large shift
counts in general.

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width.
  2011-02-11 15:10 ` [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width christophe.lyon
@ 2011-02-14 18:16   ` Peter Maydell
  2011-02-15 13:47     ` Christophe Lyon
  0 siblings, 1 reply; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 18:16 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Fix rshl helpers (s8, s16, s64, u8, u16)
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/neon_helper.c |    6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
> index 3f1f3d4..1ac362f 100644
> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -548,7 +548,7 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
>     } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
>         dest = src1 >> (sizeof(src1) * 8 - 1); \
>     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
> -        dest = src1 >> (tmp - 1); \
> +        dest = src1 >> (-tmp - 1); \
>         dest++; \
>         dest >>= 1; \

Again, these three lines have the same effect as dest = 0,
so we can fold into the previous if().

>     } else if (tmp < 0) { \
> @@ -594,7 +594,7 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>         val = 0;
>     } else if (shift < -64) {
>         val >>= 63;

You didn't change this case, but it is the wrong answer:
should be 0.

> -    } else if (shift == -63) {
> +    } else if (shift == -64) {
>         val >>= 63;
>         val++;
>         val >>= 1;

Always results in 0.

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.
  2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
                   ` (5 preceding siblings ...)
  2011-02-11 15:11 ` [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts christophe.lyon
@ 2011-02-14 18:18 ` Peter Maydell
  2011-02-15 14:03   ` Christophe Lyon
  6 siblings, 1 reply; 21+ messages in thread
From: Peter Maydell @ 2011-02-14 18:18 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> This patch series provides fixes such that ARM Neon instructions
> VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN, VQSHRN, VSHRN, VQSHRUN now
> pass all my tests.
>
> I have reworked all these patches and I hope they are now easier to
> review.

Thanks; this was indeed a lot easier to review. Mostly
this is OK, there are a few things:
 * minor style issues
 * handling of very large shift counts is not right
(both in code you wrote and existing routines)
 * the shift-and-narrow loop doesn't handle the case
where pass 1 reads registers pass 0 writes

I have patches which (sitting on top of your 6) fix all
these and give a clean pass on the random instruction
set testing for the shift instructions.

Unless you object, I think the simplest thing will be for
me to just fix the minor nits I identified in your patches
and then post a combined series of your patches and
mine.

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for Neon shift instructions.
  2011-02-14 18:12   ` Peter Maydell
@ 2011-02-15 10:07     ` Christophe Lyon
  0 siblings, 0 replies; 21+ messages in thread
From: Christophe Lyon @ 2011-02-15 10:07 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 14.02.2011 19:12, Peter Maydell wrote:
> On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
>> +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
>> +{
>> +    int32_t dest;
>> +    int32_t val = (int32_t)valop;
>> +    int8_t shift = (int8_t)shiftop;
>> +    if (shift >= 32) {
>> +        dest = 0;
>> +    } else if (shift < -32) {
>> +        dest = val >> 31;
> 
> This is the wrong answer: large rounding right shifts give zero.
> 
>> +    } else if (shift == -32) {
>> +        dest = val >> 31;
>> +        dest++;
>> +        dest >>= 1;
> 
> These three lines will always result in dest becoming
> 0 regardless of the input value.
> 

You are right. Actually, I just intended to fix the case where
-32 < shift < 0, and merely re-instanciated the preceding macro with a known size of 32.

You comments also apply to the 8 and 16 bits variants in that macro.

I am too respectful of existing code :-)

Christophe.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width.
  2011-02-14 18:16   ` Peter Maydell
@ 2011-02-15 13:47     ` Christophe Lyon
  0 siblings, 0 replies; 21+ messages in thread
From: Christophe Lyon @ 2011-02-15 13:47 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel


>> -        dest = src1 >> (tmp - 1); \
>> +        dest = src1 >> (-tmp - 1); \
>>         dest++; \
>>         dest >>= 1; \
> 
> Again, these three lines have the same effect as dest = 0,
> so we can fold into the previous if().
> 
>>     } else if (tmp < 0) { \
>> @@ -594,7 +594,7 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>>         val = 0;
>>     } else if (shift < -64) {
>>         val >>= 63;
> 
> You didn't change this case, but it is the wrong answer:
> should be 0.
> 
>> -    } else if (shift == -63) {
>> +    } else if (shift == -64) {
>>         val >>= 63;
>>         val++;
>>         val >>= 1;
> 
> Always results in 0.
> 


Oops sorry, in these 3 cases, I just fixed obvious typos but didn't question the actual code.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.
  2011-02-14 18:18 ` [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions Peter Maydell
@ 2011-02-15 14:03   ` Christophe Lyon
  2011-02-15 14:21     ` Peter Maydell
  0 siblings, 1 reply; 21+ messages in thread
From: Christophe Lyon @ 2011-02-15 14:03 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 14.02.2011 19:18, Peter Maydell wrote:
> On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> This patch series provides fixes such that ARM Neon instructions
>> VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN, VQSHRN, VSHRN, VQSHRUN now
>> pass all my tests.
>>
>> I have reworked all these patches and I hope they are now easier to
>> review.
> 
> Thanks; this was indeed a lot easier to review. Mostly
> this is OK, there are a few things:
>  * minor style issues
>  * handling of very large shift counts is not right
> (both in code you wrote and existing routines)

Yes, I mostly copied existing code to fix the parts I identified as wrong, but indeed very large shift amounts are not part of my tests.

>  * the shift-and-narrow loop doesn't handle the case
> where pass 1 reads registers pass 0 writes
> 
> I have patches which (sitting on top of your 6) fix all
> these and give a clean pass on the random instruction
> set testing for the shift instructions.
> 
> Unless you object, I think the simplest thing will be for
> me to just fix the minor nits I identified in your patches
> and then post a combined series of your patches and
> mine.
OK, thanks.

It also seems that the neon_shl_s* helpers don't handle correctly large negative shift amounts.

Christophe.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon right shifts.
  2011-02-14 17:46   ` Peter Maydell
  2011-02-14 17:49     ` Peter Maydell
@ 2011-02-15 14:06     ` Christophe Lyon
  1 sibling, 0 replies; 21+ messages in thread
From: Christophe Lyon @ 2011-02-15 14:06 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel


>> @@ -924,7 +924,11 @@ uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
>>         dest = val << shift;
>>         if ((dest >> shift) != val) {
>>             SET_QC();
>> -            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
>> +            if (val < 0) {
>> +                dest = INT32_MIN;
>> +            } else {
>> +                dest = INT32_MAX;
>> +            }
> 
> Again, right answers but the way most of the rest of the code
> forces a 32 bit value to signed saturation is
>    dest = (val >> 31) ^ ~SIGNBIT;
> 

Indeed; I hadn't given a close look at how saturation is performed elsewhere.
Anyway I find my version more readable ;-)

Quite a few times, I have wondered whether there are rules in QEmu development helping decide between performance of the code vs readability/maintainability?

Christophe.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.
  2011-02-15 14:03   ` Christophe Lyon
@ 2011-02-15 14:21     ` Peter Maydell
  2011-02-15 14:49       ` Christophe Lyon
  0 siblings, 1 reply; 21+ messages in thread
From: Peter Maydell @ 2011-02-15 14:21 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: qemu-devel

On 15 February 2011 14:03, Christophe Lyon <christophe.lyon@st.com> wrote:
> It also seems that the neon_shl_s* helpers don't handle correctly large negative shift amounts.

They look OK to me (large -ve shift is a huge right shift,
which for signed values should propagate the sign bit
to all bit positions, so we shift-right by typesize - 1),
and I didn't see any problems with them when I was
doing my testing. What do you think they get wrong?

-- PMM

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions.
  2011-02-15 14:21     ` Peter Maydell
@ 2011-02-15 14:49       ` Christophe Lyon
  0 siblings, 0 replies; 21+ messages in thread
From: Christophe Lyon @ 2011-02-15 14:49 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 15.02.2011 15:21, Peter Maydell wrote:
> On 15 February 2011 14:03, Christophe Lyon <christophe.lyon@st.com> wrote:
>> It also seems that the neon_shl_s* helpers don't handle correctly large negative shift amounts.
> 
> They look OK to me (large -ve shift is a huge right shift,
> which for signed values should propagate the sign bit
> to all bit positions, so we shift-right by typesize - 1),
> and I didn't see any problems with them when I was
> doing my testing. What do you think they get wrong?
> 

Sorry, you are right. I was confused by comparing with the shifts with rounding. Maybe we should add a few comments in a few places... :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2011-02-15 14:50 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-11 15:10 [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions christophe.lyon
2011-02-11 15:10 ` [Qemu-devel] [PATCH 1/6] target-arm: Fix rounding constant addition for " christophe.lyon
2011-02-14 18:12   ` Peter Maydell
2011-02-15 10:07     ` Christophe Lyon
2011-02-11 15:10 ` [Qemu-devel] [PATCH 2/6] target-arm: fix Neon right shifts with shift amount == input width christophe.lyon
2011-02-14 18:16   ` Peter Maydell
2011-02-15 13:47     ` Christophe Lyon
2011-02-11 15:10 ` [Qemu-devel] [PATCH 3/6] target-arm: fix unsigned 64 bit right shifts christophe.lyon
2011-02-14 17:40   ` Peter Maydell
2011-02-11 15:11 ` [Qemu-devel] [PATCH 4/6] target-arm: fix saturated values for Neon " christophe.lyon
2011-02-14 17:46   ` Peter Maydell
2011-02-14 17:49     ` Peter Maydell
2011-02-15 14:06     ` Christophe Lyon
2011-02-11 15:11 ` [Qemu-devel] [PATCH 5/6] target-arm: fix Neon VQSHRN and VSHRN christophe.lyon
2011-02-14 17:41   ` Peter Maydell
2011-02-11 15:11 ` [Qemu-devel] [PATCH 6/6] target-arm: fix decoding of Neon 64 bit shifts christophe.lyon
2011-02-14 17:53   ` Peter Maydell
2011-02-14 18:18 ` [Qemu-devel] [PATCH v3 0/6] target-arm: Fix Neon shift instructions Peter Maydell
2011-02-15 14:03   ` Christophe Lyon
2011-02-15 14:21     ` Peter Maydell
2011-02-15 14:49       ` Christophe Lyon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.