[Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI
@ 2011-01-31 18:06 christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
                   ` (7 more replies)
  0 siblings, 8 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

This patchset combines fixes from the Meego tree (Peter Maydell, Juha
Riihimäki) and my own fixes such that ARM Neon instructions VQMOVUN
VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI now pass all my
tests.

Christophe Lyon (3):
  Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL,
    VRSRA.
  target-arm: Fix Neon VQ(R)SHRN instructions.
  target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and
        signed 32 bits variants).

Juha Riihimäki (1):
  target-arm: fix neon vqrshl instruction

Meego (4):
  Create and use neon_unarrow_sat* helpers
  VQRSHRN related changes
  fiddle decoding of 64 bit shift by imm and narrow
  implement vsli.64, vsri.64

 target-arm/helpers.h     |    3 +
 target-arm/neon_helper.c |  195 ++++++++++++++++++++++++++++++++++++++++++----
 target-arm/translate.c   |   98 +++++++++++++++++------
 3 files changed, 253 insertions(+), 43 deletions(-)

-- 
1.7.2.3

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-02-07 15:57   ` Peter Maydell
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers christophe.lyon
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

For variants with rounding, fix cases where adding the rounding
constant could overflow.

For VSHLL, fix bit mask.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |   61 ++++++++++++++++++++++++++++++++++++++-------
 target-arm/translate.c   |   12 +++++++-
 2 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index fead152..6c832b4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -451,6 +451,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -459,11 +462,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = src1 >> (sizeof(src1) * 8 - 1); \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
         dest++; \
         dest >>= 1; \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -472,6 +476,8 @@ NEON_VOP(rshl_s16, neon_s16, 2)
 NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -480,18 +486,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val = 0;
     } else if (shift < -64) {
         val >>= 63;
-    } else if (shift == -63) {
+    } else if (shift == -64) {
         val >>= 63;
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        int64_t round = (int64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < 0 and val > 0
+         * because round is > 0.  */
+        while ((val > 0) && ((val + round) < 0) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val > 0) && (val + round) < 0) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x7FFFFFFFFFFFFFFF.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -499,9 +524,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = 0; \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -513,14 +539,29 @@ NEON_VOP(rshl_u32, neon_u32, 1)
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
-    if (shift >= 64 || shift < 64) {
+    if (shift >= 64 || shift < -64) {
         val = 0;
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        uint64_t round = (uint64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < val
+         * because val and round are > 0.  */
+        while (((val + round) < val) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val + round) < val) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x&FFFFFFFFFFFFFFF.  */
+            val = 0x8000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d95133f..b44f7a1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4877,10 +4877,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             if (size == 0) {
                                 imm = (0xffu >> (8 - shift));
                                 imm |= imm << 16;
-                            } else {
+                            } else if (size == 1) {
                                 imm = 0xffff >> (16 - shift);
+                            } else {
+                                /* size == 2 */
+                                imm = 0xffffffff >> (32 - shift);
+                            }
+                            if (size < 2) {
+                                imm64 = imm | (((uint64_t)imm) << 32);
+                            } else {
+                                imm64 = imm;
                             }
-                            imm64 = imm | (((uint64_t)imm) << 32);
+                            imm64 = ~imm64;
                             tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
                         }
                     }
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
@ 2011-02-07 15:57   ` Peter Maydell
  2011-02-09 12:16     ` Christophe Lyon
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 15:57 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> For variants with rounding, fix cases where adding the rounding
> constant could overflow.
>
> For VSHLL, fix bit mask.

These two things are completely distinct -- please put them
in separate patches.

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
  2011-02-07 15:57   ` Peter Maydell
@ 2011-02-09 12:16     ` Christophe Lyon
  0 siblings, 0 replies; 19+ messages in thread
From: Christophe Lyon @ 2011-02-09 12:16 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 07.02.2011 16:57, Peter Maydell wrote:
> On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
>> For variants with rounding, fix cases where adding the rounding
>> constant could overflow.
>>
>> For VSHLL, fix bit mask.
> 
> These two things are completely distinct -- please put them
> in separate patches.
> 

OK, I am going to re-submit this whole series of patches.

Thanks

Christophe.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Fix VQMOVUN, improve VQSHRUN and VQRSHRUN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/helpers.h     |    3 ++
 target-arm/neon_helper.c |   63 ++++++++++++++++++++++++++++++++++++++++++++++
 target-arm/translate.c   |   43 ++++++++++++++++++++++---------
 3 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index b88ebae..8cc6a44 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -295,10 +295,13 @@ DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
 
 DEF_HELPER_1(neon_narrow_u8, i32, i64)
 DEF_HELPER_1(neon_narrow_u16, i32, i64)
+DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
 DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
 DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 6c832b4..891b812 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1005,6 +1005,33 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
     return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
 }
 
+uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x)
+{
+    uint16_t s;
+    uint8_t d;
+    uint32_t res = 0;
+#define SAT8(n) \
+    s = x >> n; \
+    if (s & 0x8000) { \
+        SET_QC(); \
+    } else { \
+        if (s > 0xff) { \
+            d = 0xff; \
+            SET_QC(); \
+        } else  { \
+            d = s; \
+        } \
+        res |= (uint32_t)d << (n / 2); \
+    }
+    
+    SAT8(0);
+    SAT8(16);
+    SAT8(32);
+    SAT8(48);
+#undef SAT8
+    return res;
+}
+
 uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
 {
     uint16_t s;
@@ -1051,6 +1078,29 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
     return res;
 }
 
+uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x)
+{
+    uint32_t high;
+    uint32_t low;
+    low = x;
+    if (low & 0x80000000) {
+        low = 0;
+        SET_QC();
+    } else if (low > 0xffff) {
+        low = 0xffff;
+        SET_QC();
+    }
+    high = x >> 32;
+    if (high & 0x80000000) {
+        high = 0;
+        SET_QC();
+    } else if (high > 0xffff) {
+        high = 0xffff;
+        SET_QC();
+    }
+    return low | (high << 16);
+}
+
 uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
 {
     uint32_t high;
@@ -1085,6 +1135,19 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
     return (uint16_t)low | (high << 16);
 }
 
+uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x)
+{
+    if (x & 0x8000000000000000ull) {
+        SET_QC();
+        return 0;
+    }
+    if (x > 0xffffffffu) {
+        SET_QC();
+        return 0xffffffffu;
+    }
+    return x;
+}
+
 uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
 {
     if (x > 0xffffffffu) {
diff --git a/target-arm/translate.c b/target-arm/translate.c
index b44f7a1..6dd024d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4071,6 +4071,16 @@ static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
     }
 }
 
+static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
+{
+    switch(size) {
+    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
+    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
+    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
+    default: abort();
+    }
+}
+
 static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
                                          int q, int u)
 {
@@ -4841,13 +4851,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         dead_tmp(tmp3);
                     }
                     tmp = new_tmp();
-                    if (op == 8 && !u) {
-                        gen_neon_narrow(size - 1, tmp, cpu_V0);
-                    } else {
-                        if (op == 8)
-                            gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
-                        else
-                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                    if (op == 8) {
+                        if (u) { /* VQSHRUN / VQRSHRUN */
+                            gen_neon_unarrow_sats(size - 1, tmp, cpu_V0);
+                        } else { /* VSHRN / VRSHRN */
+                            gen_neon_narrow(size - 1, tmp, cpu_V0);
+                        }
+                    } else { /* VQSHRN / VQRSHRN */
+                        gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
                     }
                     neon_store_reg(rd, pass, tmp);
                 } /* for pass */
@@ -5471,12 +5482,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     for (pass = 0; pass < 2; pass++) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         tmp = new_tmp();
-                        if (op == 36 && q == 0) {
-                            gen_neon_narrow(size, tmp, cpu_V0);
-                        } else if (q) {
-                            gen_neon_narrow_satu(size, tmp, cpu_V0);
-                        } else {
-                            gen_neon_narrow_sats(size, tmp, cpu_V0);
+                        if (op == 36) {
+                            if (q) { /* VQMOVUN */
+                                gen_neon_unarrow_sats(size, tmp, cpu_V0);
+                            } else { /* VMOVN */
+                                gen_neon_narrow(size, tmp, cpu_V0);
+                            }
+                        } else { /* VQMOVN */
+                            if (q) {
+                                gen_neon_narrow_satu(size, tmp, cpu_V0);
+                            } else {
+                                gen_neon_narrow_sats(size, tmp, cpu_V0);
+                            }
                         }
                         if (pass == 0) {
                             tmp2 = tmp;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-02-08 13:41   ` Peter Maydell
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

More fixes for VQSHRN and VQSHRUN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6dd024d..9ca5b82 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4101,8 +4101,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
     } else {
         if (u) {
             switch (size) {
-            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
-            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
+            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
+            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
             default: abort();
             }
         } else {
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
@ 2011-02-08 13:41   ` Peter Maydell
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-08 13:41 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> More fixes for VQSHRN and VQSHRUN.
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
as far as the code goes, but the commit message is
excessively vague.

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
                   ` (2 preceding siblings ...)
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-02-08 13:49   ` Peter Maydell
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction christophe.lyon
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Tweak decoding of the shift-by-imm and narrow 64 bit insns
(VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN).

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |   28 ++++++++++++++++++----------
 1 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9ca5b82..a614e34 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4831,21 +4831,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (size == 3) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         if (q) {
-                          if (u)
-                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
+                            if ((op == 8 && !u) || (op == 9 && u)) {
+                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            } else {
+                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            }
                         } else {
-                          if (u)
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
+                            if ((op == 8 && !u) || (op == 9 && u)) {
+                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            } else {
+                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            }
                         }
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
-                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
+                        gen_neon_shift_narrow(size, tmp, tmp2, q,
+                                              (op == 8) ? !u : u);
                         tmp3 = neon_load_reg(rm + pass, 1);
-                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+                        gen_neon_shift_narrow(size, tmp3, tmp2, q,
+                                              (op == 8) ? !u : u);
                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
                         dead_tmp(tmp);
                         dead_tmp(tmp3);
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
@ 2011-02-08 13:49   ` Peter Maydell
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-08 13:49 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Tweak decoding of the shift-by-imm and narrow 64 bit insns
> (VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN).
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/translate.c |   28 ++++++++++++++++++----------
>  1 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 9ca5b82..a614e34 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4831,21 +4831,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                     if (size == 3) {
>                         neon_load_reg64(cpu_V0, rm + pass);
>                         if (q) {
> -                          if (u)
> -                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
> -                          else
> -                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
> +                            if ((op == 8 && !u) || (op == 9 && u)) {

This patch would improved if you started with:
                int input_unsigned = (op == 8) ? !u : u;
and then used that here and:

> +                            if ((op == 8 && !u) || (op == 9 && u)) {

here...

> +                        gen_neon_shift_narrow(size, tmp, tmp2, q,
> +                                              (op == 8) ? !u : u);

here...

> +                        gen_neon_shift_narrow(size, tmp3, tmp2, q,
> +                                              (op == 8) ? !u : u);

and here.

(What all these things are trying to check is whether the
input elements for the operations are signed or unsigned;
this isn't the same as the u bit, which is whether the output
is unsigned.)

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
                   ` (3 preceding siblings ...)
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |   21 ++++++++++++++++++---
 1 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 891b812..46fcdc4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -736,9 +736,24 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        val = ((uint64_t)val + (1 << (-1 - shift))) >> -shift;
+    } else {
+        uint32_t tmp = val;
+        val <<= shift;
+        if ((val >> shift) != tmp) {
+            SET_QC();
+            val = ~0;
+        }
+    }
+    return val;
+}
+
 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -764,7 +779,7 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
         dest = src1 << tmp; \
         if ((dest >> tmp) != src1) { \
             SET_QC(); \
-            dest = src1 >> 31; \
+            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)) - (src1 > 0 ? 1 : 0); \
         } \
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
@@ -780,7 +795,7 @@ uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     if (shift < 0) {
         val = (val + (1 << (-1 - shift))) >> -shift;
     } else {
-        int64_t tmp = val;;
+        int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
                   ` (4 preceding siblings ...)
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-02-07 16:08   ` Peter Maydell
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants) christophe.lyon
  7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Handle unsigned variant of VQ(R)SHRN instructions.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index a614e34..61d4c4c 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4865,8 +4865,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else { /* VSHRN / VRSHRN */
                             gen_neon_narrow(size - 1, tmp, cpu_V0);
                         }
-                    } else { /* VQSHRN / VQRSHRN */
-                        gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                    } else {
+                        if (u) { /* VQSHRUN / VQRSHRUN */
+                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                        } else { /* VQSHRN / VQRSHRN */
+                            gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
+                        }
                     }
                     neon_store_reg(rd, pass, tmp);
                 } /* for pass */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
@ 2011-02-07 16:08   ` Peter Maydell
  2011-02-07 16:50     ` Christophe Lyon
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 16:08 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Handle unsigned variant of VQ(R)SHRN instructions.

This patch appears to be modifying a section of code
that was already patched by 2/8 in this series. That's
too confusing to review -- please combine them into
one patch.

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
  2011-02-07 16:08   ` Peter Maydell
@ 2011-02-07 16:50     ` Christophe Lyon
  2011-02-07 17:01       ` Peter Maydell
  0 siblings, 1 reply; 19+ messages in thread
From: Christophe Lyon @ 2011-02-07 16:50 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 07.02.2011 17:08, Peter Maydell wrote:
> On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> Handle unsigned variant of VQ(R)SHRN instructions.
> 
> This patch appears to be modifying a section of code
> that was already patched by 2/8 in this series. That's
> too confusing to review -- please combine them into
> one patch.
> 

That's because I borrowed patch 2/8 from your meego tree. It was incomplete, but as I preferred to keep it as-is, I completed it in a separate patch.

>From your other comments it looks like I'd have had better ignore the patches from Meego and rewrite the patches on my own ;-)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
  2011-02-07 16:50     ` Christophe Lyon
@ 2011-02-07 17:01       ` Peter Maydell
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 17:01 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: qemu-devel

On 7 February 2011 16:50, Christophe Lyon <christophe.lyon@st.com> wrote:
> On 07.02.2011 17:08, Peter Maydell wrote:
>> On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
>>> From: Christophe Lyon <christophe.lyon@st.com>
>>>
>>> Handle unsigned variant of VQ(R)SHRN instructions.
>>
>> This patch appears to be modifying a section of code
>> that was already patched by 2/8 in this series. That's
>> too confusing to review -- please combine them into
>> one patch.
>>
>
> That's because I borrowed patch 2/8 from your meego tree.
> It was incomplete, but as I preferred to keep it as-is, I
> completed it in a separate patch.
>
> From your other comments it looks like I'd have had better ignore
> the patches from Meego and rewrite the patches on my own ;-)

Well, you shouldn't ignore them, but part of the process of getting
those patches into upstream is cleaning them up: testing whether
they work, adding the cases where they don't, providing better
commit messages, splitting and combining them so that each
patch submitted upstream is a single easy to review logical change,
and so on. (It's exactly because this isn't a totally trivial process
that there's still a queue of non-upstreamed patches in that tree.)

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
                   ` (5 preceding siblings ...)
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  2011-02-07 15:55   ` Peter Maydell
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants) christophe.lyon
  7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 61d4c4c..9150242 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4700,7 +4700,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
                         } else if (op == 4 || (op == 5 && u)) {
                             /* Insert */
-                            cpu_abort(env, "VS[LR]I.64 not implemented");
+                            neon_load_reg64(cpu_V1, rd + pass);
+                            uint64_t mask;
+                            if (op == 4) {
+                                mask = 0xffffffffffffffffull >> -shift;
+                            } else {
+                                mask = 0xffffffffffffffffull << shift;
+                            }
+                            tcg_gen_andi_i64(cpu_V0, cpu_V0, mask);
+                            tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
+                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
@ 2011-02-07 15:55   ` Peter Maydell
  2011-02-07 16:14     ` Peter Maydell
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 15:55 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/translate.c |   11 ++++++++++-
>  1 files changed, 10 insertions(+), 1 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 61d4c4c..9150242 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4700,7 +4700,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
>                         } else if (op == 4 || (op == 5 && u)) {
>                             /* Insert */
> -                            cpu_abort(env, "VS[LR]I.64 not implemented");
> +                            neon_load_reg64(cpu_V1, rd + pass);
> +                            uint64_t mask;
> +                            if (op == 4) {
> +                                mask = 0xffffffffffffffffull >> -shift;
> +                            } else {
> +                                mask = 0xffffffffffffffffull << shift;
> +                            }

If shift is 64 or -64 then the result of this shift is undefined (and for
an x86 host we get the wrong results for eg "vsri.64 q5,q5,64").

You want to add an
  if (shift < -63 || shift > 63) {
    mask = 0;
  } else ...

clause (compare the 32 bit case.)

> +                            tcg_gen_andi_i64(cpu_V0, cpu_V0, mask);

This AND is harmless but unnecessary (and not specified in the
ARM ARM.)

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
  2011-02-07 15:55   ` Peter Maydell
@ 2011-02-07 16:14     ` Peter Maydell
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 16:14 UTC (permalink / raw)
  To: christophe.lyon; +Cc: qemu-devel

On 7 February 2011 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Incidentally, this patch works fine (when corrected) as a standalone
fix for these instructions, so you could just submit the reworked
version as a patch on its own if you don't want it to get tangled
up in review of the other patches in this series.

-- PMM

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants).
  2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
                   ` (6 preceding siblings ...)
  2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
  7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

The addition of the rounding constant could cause overflows.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |   50 ++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 46fcdc4..2f96575 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -758,7 +758,23 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        uint64_t round = (uint64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < val
+         * because val and round are > 0.  */
+        while (((val + round) < val) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val + round) < val) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x&FFFFFFFFFFFFFFF.  */
+            val = 0x8000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else { \
         uint64_t tmp = val;
         val <<= shift;
@@ -770,11 +786,15 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
     if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
         if ((dest >> tmp) != src1) { \
@@ -787,19 +807,41 @@ NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
 NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
 
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        int64_t round = (int64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < 0 and val > 0
+         * because round is > 0.  */
+        while ((val > 0) && ((val + round) < 0) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val > 0) && (val + round) < 0) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x7FFFFFFFFFFFFFFF.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-            val = tmp >> 31;
+            if (tmp < 0) {
+                val = INT64_MIN;
+            } else {
+                val = INT64_MAX;
+            }
         }
     }
     return val;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI
@ 2011-01-28 15:50 christophe.lyon
  2011-01-28 15:51 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
  0 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-28 15:50 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

This patchset combines fixes from the Meego tree (Peter Maydell, Juha
Riihimäki) and my own fixes such that ARM Neon instructions VQMOVUN
VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI now pass all my
tests.

Christophe Lyon (3):
  Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL,
    VRSRA.
  target-arm: Fix Neon VQ(R)SHRN instructions.
  target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and
        signed 32 bits variants).

Juha Riihimäki (1):
  target-arm: fix neon vqrshl instruction

Peter Maydell (4):
  Create and use neon_unarrow_sat* helpers
  VQRSHRN related changes
  fiddle decoding of 64 bit shift by imm and narrow
  implement vsli.64, vsri.64

 target-arm/helpers.h     |    3 +
 target-arm/neon_helper.c |  195 ++++++++++++++++++++++++++++++++++++++++++----
 target-arm/translate.c   |  103 ++++++++++++++++++-------
 3 files changed, 257 insertions(+), 44 deletions(-)

-- 
1.7.2.3

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
  2011-01-28 15:50 [Qemu-devel] [PATCH 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
@ 2011-01-28 15:51 ` christophe.lyon
  0 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-28 15:51 UTC (permalink / raw)
  To: qemu-devel

From: Christophe Lyon <christophe.lyon@st.com>

Handle unsigned variant of VQ(R)SHRN instructions.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 452cb71..3b14b8f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4876,8 +4876,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else { /* VSHRN / VRSHRN */
                             gen_neon_narrow(size - 1, tmp, cpu_V0);
                         }
-                    } else { /* VQSHRN / VQRSHRN */
-                        gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                    } else {
+                        if (u) { /* VQSHRUN / VQRSHRUN */
+                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                        } else { /* VQSHRN / VQRSHRN */
+                            gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
+                        }
                     }
                     neon_store_reg(rd, pass, tmp);
                 } /* for pass */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2011-02-09 12:16 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
2011-02-07 15:57   ` Peter Maydell
2011-02-09 12:16     ` Christophe Lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
2011-02-08 13:41   ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
2011-02-08 13:49   ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
2011-02-07 16:08   ` Peter Maydell
2011-02-07 16:50     ` Christophe Lyon
2011-02-07 17:01       ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
2011-02-07 15:55   ` Peter Maydell
2011-02-07 16:14     ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants) christophe.lyon
  -- strict thread matches above, loose matches on Subject: below --
2011-01-28 15:50 [Qemu-devel] [PATCH 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
2011-01-28 15:51 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.