* [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-02-07 15:57 ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers christophe.lyon
` (6 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
For variants with rounding, fix cases where adding the rounding
constant could overflow.
For VSHLL, fix bit mask.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/neon_helper.c | 61 ++++++++++++++++++++++++++++++++++++++-------
target-arm/translate.c | 12 +++++++-
2 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index fead152..6c832b4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -451,6 +451,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
return val;
}
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values. */
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
@@ -459,11 +462,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
} else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
dest = src1 >> (sizeof(src1) * 8 - 1); \
} else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (tmp - 1); \
+ dest = src1 >> (-tmp - 1); \
dest++; \
dest >>= 1; \
} else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+ int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+ dest = big_dest >> -tmp; \
} else { \
dest = src1 << tmp; \
}} while (0)
@@ -472,6 +476,8 @@ NEON_VOP(rshl_s16, neon_s16, 2)
NEON_VOP(rshl_s32, neon_s32, 1)
#undef NEON_FN
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
{
int8_t shift = (int8_t)shiftop;
@@ -480,18 +486,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
val = 0;
} else if (shift < -64) {
val >>= 63;
- } else if (shift == -63) {
+ } else if (shift == -64) {
val >>= 63;
val++;
val >>= 1;
} else if (shift < 0) {
- val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+ int64_t round = (int64_t)1 << (-1 - shift);
+ /* Reduce the range as long as the addition overflows. It's
+ * sufficient to check if (val+round) is < 0 and val > 0
+ * because round is > 0. */
+ while ((val > 0) && ((val + round) < 0) && round > 1) {
+ shift++;
+ round >>= 1;
+ val >>= 1;
+ }
+ if ((val > 0) && (val + round) < 0) {
+ /* If addition still overflows at this point, it means
+ * that round==1, thus shift==-1, and also that
+ * val==0x7FFFFFFFFFFFFFFF. */
+ val = 0x4000000000000000LL;
+ } else {
+ val = (val + round) >> -shift;
+ }
} else {
val <<= shift;
}
return val;
}
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values. */
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
@@ -499,9 +524,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
tmp < -(ssize_t)sizeof(src1) * 8) { \
dest = 0; \
} else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (tmp - 1); \
+ dest = src1 >> (-tmp - 1); \
} else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+ uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
+ dest = big_dest >> -tmp; \
} else { \
dest = src1 << tmp; \
}} while (0)
@@ -513,14 +539,29 @@ NEON_VOP(rshl_u32, neon_u32, 1)
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
{
int8_t shift = (uint8_t)shiftop;
- if (shift >= 64 || shift < 64) {
+ if (shift >= 64 || shift < -64) {
val = 0;
} else if (shift == -64) {
/* Rounding a 1-bit result just preserves that bit. */
val >>= 63;
- } if (shift < 0) {
- val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
- val >>= -shift;
+ } else if (shift < 0) {
+ uint64_t round = (uint64_t)1 << (-1 - shift);
+ /* Reduce the range as long as the addition overflows. It's
+ * sufficient to check if (val+round) is < val
+ * because val and round are > 0. */
+ while (((val + round) < val) && round > 1) {
+ shift++;
+ round >>= 1;
+ val >>= 1;
+ }
+ if ((val + round) < val) {
+ /* If addition still overflows at this point, it means
+ * that round==1, thus shift==-1, and also that
+ * val==0x&FFFFFFFFFFFFFFF. */
+ val = 0x8000000000000000LL;
+ } else {
+ val = (val + round) >> -shift;
+ }
} else {
val <<= shift;
}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d95133f..b44f7a1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4877,10 +4877,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (size == 0) {
imm = (0xffu >> (8 - shift));
imm |= imm << 16;
- } else {
+ } else if (size == 1) {
imm = 0xffff >> (16 - shift);
+ } else {
+ /* size == 2 */
+ imm = 0xffffffff >> (32 - shift);
+ }
+ if (size < 2) {
+ imm64 = imm | (((uint64_t)imm) << 32);
+ } else {
+ imm64 = imm;
}
- imm64 = imm | (((uint64_t)imm) << 32);
+ imm64 = ~imm64;
tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
}
}
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
@ 2011-02-07 15:57 ` Peter Maydell
2011-02-09 12:16 ` Christophe Lyon
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 15:57 UTC (permalink / raw)
To: christophe.lyon; +Cc: qemu-devel
On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
> For variants with rounding, fix cases where adding the rounding
> constant could overflow.
>
> For VSHLL, fix bit mask.
These two things are completely distinct -- please put them
in separate patches.
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.
2011-02-07 15:57 ` Peter Maydell
@ 2011-02-09 12:16 ` Christophe Lyon
0 siblings, 0 replies; 19+ messages in thread
From: Christophe Lyon @ 2011-02-09 12:16 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-devel
On 07.02.2011 16:57, Peter Maydell wrote:
> On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
>> For variants with rounding, fix cases where adding the rounding
>> constant could overflow.
>>
>> For VSHLL, fix bit mask.
>
> These two things are completely distinct -- please put them
> in separate patches.
>
OK, I am going to re-submit this whole series of patches.
Thanks
Christophe.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
` (5 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
Fix VQMOVUN, improve VQSHRUN and VQRSHRUN.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/helpers.h | 3 ++
target-arm/neon_helper.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
target-arm/translate.c | 43 ++++++++++++++++++++++---------
3 files changed, 96 insertions(+), 13 deletions(-)
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index b88ebae..8cc6a44 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -295,10 +295,13 @@ DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
DEF_HELPER_1(neon_narrow_u8, i32, i64)
DEF_HELPER_1(neon_narrow_u16, i32, i64)
+DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
+DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 6c832b4..891b812 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1005,6 +1005,33 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
}
+uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x)
+{
+ uint16_t s;
+ uint8_t d;
+ uint32_t res = 0;
+#define SAT8(n) \
+ s = x >> n; \
+ if (s & 0x8000) { \
+ SET_QC(); \
+ } else { \
+ if (s > 0xff) { \
+ d = 0xff; \
+ SET_QC(); \
+ } else { \
+ d = s; \
+ } \
+ res |= (uint32_t)d << (n / 2); \
+ }
+
+ SAT8(0);
+ SAT8(16);
+ SAT8(32);
+ SAT8(48);
+#undef SAT8
+ return res;
+}
+
uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
{
uint16_t s;
@@ -1051,6 +1078,29 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
return res;
}
+uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x)
+{
+ uint32_t high;
+ uint32_t low;
+ low = x;
+ if (low & 0x80000000) {
+ low = 0;
+ SET_QC();
+ } else if (low > 0xffff) {
+ low = 0xffff;
+ SET_QC();
+ }
+ high = x >> 32;
+ if (high & 0x80000000) {
+ high = 0;
+ SET_QC();
+ } else if (high > 0xffff) {
+ high = 0xffff;
+ SET_QC();
+ }
+ return low | (high << 16);
+}
+
uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
{
uint32_t high;
@@ -1085,6 +1135,19 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
return (uint16_t)low | (high << 16);
}
+uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x)
+{
+ if (x & 0x8000000000000000ull) {
+ SET_QC();
+ return 0;
+ }
+ if (x > 0xffffffffu) {
+ SET_QC();
+ return 0xffffffffu;
+ }
+ return x;
+}
+
uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
{
if (x > 0xffffffffu) {
diff --git a/target-arm/translate.c b/target-arm/translate.c
index b44f7a1..6dd024d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4071,6 +4071,16 @@ static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
}
}
+static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
+{
+ switch(size) {
+ case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
+ case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
+ case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
+ default: abort();
+ }
+}
+
static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
int q, int u)
{
@@ -4841,13 +4851,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
dead_tmp(tmp3);
}
tmp = new_tmp();
- if (op == 8 && !u) {
- gen_neon_narrow(size - 1, tmp, cpu_V0);
- } else {
- if (op == 8)
- gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
- else
- gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+ if (op == 8) {
+ if (u) { /* VQSHRUN / VQRSHRUN */
+ gen_neon_unarrow_sats(size - 1, tmp, cpu_V0);
+ } else { /* VSHRN / VRSHRN */
+ gen_neon_narrow(size - 1, tmp, cpu_V0);
+ }
+ } else { /* VQSHRN / VQRSHRN */
+ gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
}
neon_store_reg(rd, pass, tmp);
} /* for pass */
@@ -5471,12 +5482,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
for (pass = 0; pass < 2; pass++) {
neon_load_reg64(cpu_V0, rm + pass);
tmp = new_tmp();
- if (op == 36 && q == 0) {
- gen_neon_narrow(size, tmp, cpu_V0);
- } else if (q) {
- gen_neon_narrow_satu(size, tmp, cpu_V0);
- } else {
- gen_neon_narrow_sats(size, tmp, cpu_V0);
+ if (op == 36) {
+ if (q) { /* VQMOVUN */
+ gen_neon_unarrow_sats(size, tmp, cpu_V0);
+ } else { /* VMOVN */
+ gen_neon_narrow(size, tmp, cpu_V0);
+ }
+ } else { /* VQMOVN */
+ if (q) {
+ gen_neon_narrow_satu(size, tmp, cpu_V0);
+ } else {
+ gen_neon_narrow_sats(size, tmp, cpu_V0);
+ }
}
if (pass == 0) {
tmp2 = tmp;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 2/8] target-arm: Create and use neon_unarrow_sat* helpers christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-02-08 13:41 ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
` (4 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
More fixes for VQSHRN and VQSHRUN.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/translate.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6dd024d..9ca5b82 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4101,8 +4101,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
} else {
if (u) {
switch (size) {
- case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
+ case 1: gen_helper_neon_shl_u16(var, var, shift); break;
+ case 2: gen_helper_neon_shl_u32(var, var, shift); break;
default: abort();
}
} else {
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
` (2 preceding siblings ...)
2011-01-31 18:06 ` [Qemu-devel] [PATCH 3/8] target-arm: VQRSHRN related changes christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-02-08 13:49 ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction christophe.lyon
` (3 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
Tweak decoding of the shift-by-imm and narrow 64 bit insns
(VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN).
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/translate.c | 28 ++++++++++++++++++----------
1 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9ca5b82..a614e34 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4831,21 +4831,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (size == 3) {
neon_load_reg64(cpu_V0, rm + pass);
if (q) {
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
+ if ((op == 8 && !u) || (op == 9 && u)) {
+ gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
+ tmp64);
+ } else {
+ gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
+ tmp64);
+ }
} else {
- if (u)
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
- else
- gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
+ if ((op == 8 && !u) || (op == 9 && u)) {
+ gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
+ tmp64);
+ } else {
+ gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
+ tmp64);
+ }
}
} else {
tmp = neon_load_reg(rm + pass, 0);
- gen_neon_shift_narrow(size, tmp, tmp2, q, u);
+ gen_neon_shift_narrow(size, tmp, tmp2, q,
+ (op == 8) ? !u : u);
tmp3 = neon_load_reg(rm + pass, 1);
- gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+ gen_neon_shift_narrow(size, tmp3, tmp2, q,
+ (op == 8) ? !u : u);
tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
dead_tmp(tmp);
dead_tmp(tmp3);
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow
2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
@ 2011-02-08 13:49 ` Peter Maydell
0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-08 13:49 UTC (permalink / raw)
To: christophe.lyon; +Cc: qemu-devel
On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Tweak decoding of the shift-by-imm and narrow 64 bit insns
> (VSHRN, VRSHRN, VQSHRN, VQSHRUN, VQRSHRN, VQRSHRUN).
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
> target-arm/translate.c | 28 ++++++++++++++++++----------
> 1 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 9ca5b82..a614e34 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4831,21 +4831,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
> if (size == 3) {
> neon_load_reg64(cpu_V0, rm + pass);
> if (q) {
> - if (u)
> - gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
> - else
> - gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
> + if ((op == 8 && !u) || (op == 9 && u)) {
This patch would improved if you started with:
int input_unsigned = (op == 8) ? !u : u;
and then used that here and:
> + if ((op == 8 && !u) || (op == 9 && u)) {
here...
> + gen_neon_shift_narrow(size, tmp, tmp2, q,
> + (op == 8) ? !u : u);
here...
> + gen_neon_shift_narrow(size, tmp3, tmp2, q,
> + (op == 8) ? !u : u);
and here.
(What all these things are trying to check is whether the
input elements for the operations are signed or unsigned;
this isn't the same as the u bit, which is whether the output
is unsigned.)
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
` (3 preceding siblings ...)
2011-01-31 18:06 ` [Qemu-devel] [PATCH 4/8] target-arm: fiddle decoding of 64 bit shift by imm and narrow christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
` (2 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/neon_helper.c | 21 ++++++++++++++++++---
1 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 891b812..46fcdc4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -736,9 +736,24 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
}} while (0)
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
#undef NEON_FN
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+ int8_t shift = (int8_t)shiftop;
+ if (shift < 0) {
+ val = ((uint64_t)val + (1 << (-1 - shift))) >> -shift;
+ } else {
+ uint32_t tmp = val;
+ val <<= shift;
+ if ((val >> shift) != tmp) {
+ SET_QC();
+ val = ~0;
+ }
+ }
+ return val;
+}
+
uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
{
int8_t shift = (int8_t)shiftop;
@@ -764,7 +779,7 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
dest = src1 << tmp; \
if ((dest >> tmp) != src1) { \
SET_QC(); \
- dest = src1 >> 31; \
+ dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)) - (src1 > 0 ? 1 : 0); \
} \
}} while (0)
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
@@ -780,7 +795,7 @@ uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
if (shift < 0) {
val = (val + (1 << (-1 - shift))) >> -shift;
} else {
- int64_t tmp = val;;
+ int64_t tmp = val;
val <<= shift;
if ((val >> shift) != tmp) {
SET_QC();
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
` (4 preceding siblings ...)
2011-01-31 18:06 ` [Qemu-devel] [PATCH 5/8] target-arm: fix neon vqrshl instruction christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-02-07 16:08 ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
2011-01-31 18:06 ` [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants) christophe.lyon
7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
Handle unsigned variant of VQ(R)SHRN instructions.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/translate.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index a614e34..61d4c4c 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4865,8 +4865,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
} else { /* VSHRN / VRSHRN */
gen_neon_narrow(size - 1, tmp, cpu_V0);
}
- } else { /* VQSHRN / VQRSHRN */
- gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+ } else {
+ if (u) { /* VQSHRUN / VQRSHRUN */
+ gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+ } else { /* VQSHRN / VQRSHRN */
+ gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
+ }
}
neon_store_reg(rd, pass, tmp);
} /* for pass */
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
@ 2011-02-07 16:08 ` Peter Maydell
2011-02-07 16:50 ` Christophe Lyon
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 16:08 UTC (permalink / raw)
To: christophe.lyon; +Cc: qemu-devel
On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Handle unsigned variant of VQ(R)SHRN instructions.
This patch appears to be modifying a section of code
that was already patched by 2/8 in this series. That's
too confusing to review -- please combine them into
one patch.
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
2011-02-07 16:08 ` Peter Maydell
@ 2011-02-07 16:50 ` Christophe Lyon
2011-02-07 17:01 ` Peter Maydell
0 siblings, 1 reply; 19+ messages in thread
From: Christophe Lyon @ 2011-02-07 16:50 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-devel
On 07.02.2011 17:08, Peter Maydell wrote:
> On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> Handle unsigned variant of VQ(R)SHRN instructions.
>
> This patch appears to be modifying a section of code
> that was already patched by 2/8 in this series. That's
> too confusing to review -- please combine them into
> one patch.
>
That's because I borrowed patch 2/8 from your meego tree. It was incomplete, but as I preferred to keep it as-is, I completed it in a separate patch.
>From your other comments it looks like I'd have had better ignore the patches from Meego and rewrite the patches on my own ;-)
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions.
2011-02-07 16:50 ` Christophe Lyon
@ 2011-02-07 17:01 ` Peter Maydell
0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 17:01 UTC (permalink / raw)
To: Christophe Lyon; +Cc: qemu-devel
On 7 February 2011 16:50, Christophe Lyon <christophe.lyon@st.com> wrote:
> On 07.02.2011 17:08, Peter Maydell wrote:
>> On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
>>> From: Christophe Lyon <christophe.lyon@st.com>
>>>
>>> Handle unsigned variant of VQ(R)SHRN instructions.
>>
>> This patch appears to be modifying a section of code
>> that was already patched by 2/8 in this series. That's
>> too confusing to review -- please combine them into
>> one patch.
>>
>
> That's because I borrowed patch 2/8 from your meego tree.
> It was incomplete, but as I preferred to keep it as-is, I
> completed it in a separate patch.
>
> From your other comments it looks like I'd have had better ignore
> the patches from Meego and rewrite the patches on my own ;-)
Well, you shouldn't ignore them, but part of the process of getting
those patches into upstream is cleaning them up: testing whether
they work, adding the cases where they don't, providing better
commit messages, splitting and combining them so that each
patch submitted upstream is a single easy to review logical change,
and so on. (It's exactly because this isn't a totally trivial process
that there's still a queue of non-upstreamed patches in that tree.)
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
` (5 preceding siblings ...)
2011-01-31 18:06 ` [Qemu-devel] [PATCH 6/8] target-arm: Fix Neon VQ(R)SHRN instructions christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
2011-02-07 15:55 ` Peter Maydell
2011-01-31 18:06 ` [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants) christophe.lyon
7 siblings, 1 reply; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/translate.c | 11 ++++++++++-
1 files changed, 10 insertions(+), 1 deletions(-)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 61d4c4c..9150242 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4700,7 +4700,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
} else if (op == 4 || (op == 5 && u)) {
/* Insert */
- cpu_abort(env, "VS[LR]I.64 not implemented");
+ neon_load_reg64(cpu_V1, rd + pass);
+ uint64_t mask;
+ if (op == 4) {
+ mask = 0xffffffffffffffffull >> -shift;
+ } else {
+ mask = 0xffffffffffffffffull << shift;
+ }
+ tcg_gen_andi_i64(cpu_V0, cpu_V0, mask);
+ tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
+ tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
}
neon_store_reg64(cpu_V0, rd + pass);
} else { /* size < 3 */
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
@ 2011-02-07 15:55 ` Peter Maydell
2011-02-07 16:14 ` Peter Maydell
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 15:55 UTC (permalink / raw)
To: christophe.lyon; +Cc: qemu-devel
On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
> target-arm/translate.c | 11 ++++++++++-
> 1 files changed, 10 insertions(+), 1 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 61d4c4c..9150242 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4700,7 +4700,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
> tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
> } else if (op == 4 || (op == 5 && u)) {
> /* Insert */
> - cpu_abort(env, "VS[LR]I.64 not implemented");
> + neon_load_reg64(cpu_V1, rd + pass);
> + uint64_t mask;
> + if (op == 4) {
> + mask = 0xffffffffffffffffull >> -shift;
> + } else {
> + mask = 0xffffffffffffffffull << shift;
> + }
If shift is 64 or -64 then the result of this shift is undefined (and for
an x86 host we get the wrong results for eg "vsri.64 q5,q5,64").
You want to add an
if (shift < -63 || shift > 63) {
mask = 0;
} else ...
clause (compare the 32 bit case.)
> + tcg_gen_andi_i64(cpu_V0, cpu_V0, mask);
This AND is harmless but unnecessary (and not specified in the
ARM ARM.)
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64
2011-02-07 15:55 ` Peter Maydell
@ 2011-02-07 16:14 ` Peter Maydell
0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2011-02-07 16:14 UTC (permalink / raw)
To: christophe.lyon; +Cc: qemu-devel
On 7 February 2011 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 31 January 2011 18:06, <christophe.lyon@st.com> wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
Incidentally, this patch works fine (when corrected) as a standalone
fix for these instructions, so you could just submit the reworked
version as a patch on its own if you don't want it to get tangled
up in review of the other patches in this series.
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 8/8] target-arm: Fix VQRSHL Neon instructions (signed/unsigned 64 bits and signed 32 bits variants).
2011-01-31 18:06 [Qemu-devel] [PATCH v2 0/8] target-arm: Fix Neon instructions VQMOVUN VQRSHL VQRSHRN VQRSHRUN VQSHRN VQSHRUN VSLI VSRI christophe.lyon
` (6 preceding siblings ...)
2011-01-31 18:06 ` [Qemu-devel] [PATCH 7/8] target-arm: implement vsli.64, vsri.64 christophe.lyon
@ 2011-01-31 18:06 ` christophe.lyon
7 siblings, 0 replies; 19+ messages in thread
From: christophe.lyon @ 2011-01-31 18:06 UTC (permalink / raw)
To: qemu-devel
From: Christophe Lyon <christophe.lyon@st.com>
The addition of the rounding constant could cause overflows.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/neon_helper.c | 50 ++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 46fcdc4..2f96575 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -758,7 +758,23 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
{
int8_t shift = (int8_t)shiftop;
if (shift < 0) {
- val = (val + (1 << (-1 - shift))) >> -shift;
+ uint64_t round = (uint64_t)1 << (-1 - shift);
+ /* Reduce the range as long as the addition overflows. It's
+ * sufficient to check if (val+round) is < val
+ * because val and round are > 0. */
+ while (((val + round) < val) && round > 1) {
+ shift++;
+ round >>= 1;
+ val >>= 1;
+ }
+ if ((val + round) < val) {
+ /* If addition still overflows at this point, it means
+ * that round==1, thus shift==-1, and also that
+ * val==0x&FFFFFFFFFFFFFFF. */
+ val = 0x8000000000000000LL;
+ } else {
+ val = (val + round) >> -shift;
+ }
} else { \
uint64_t tmp = val;
val <<= shift;
@@ -770,11 +786,15 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
return val;
}
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values. */
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+ int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+ dest = big_dest >> -tmp; \
} else { \
dest = src1 << tmp; \
if ((dest >> tmp) != src1) { \
@@ -787,19 +807,41 @@ NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
#undef NEON_FN
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
{
int8_t shift = (uint8_t)shiftop;
int64_t val = valop;
if (shift < 0) {
- val = (val + (1 << (-1 - shift))) >> -shift;
+ int64_t round = (int64_t)1 << (-1 - shift);
+ /* Reduce the range as long as the addition overflows. It's
+ * sufficient to check if (val+round) is < 0 and val > 0
+ * because round is > 0. */
+ while ((val > 0) && ((val + round) < 0) && round > 1) {
+ shift++;
+ round >>= 1;
+ val >>= 1;
+ }
+ if ((val > 0) && (val + round) < 0) {
+ /* If addition still overflows at this point, it means
+ * that round==1, thus shift==-1, and also that
+ * val==0x7FFFFFFFFFFFFFFF. */
+ val = 0x4000000000000000LL;
+ } else {
+ val = (val + round) >> -shift;
+ }
} else {
int64_t tmp = val;
val <<= shift;
if ((val >> shift) != tmp) {
SET_QC();
- val = tmp >> 31;
+ if (tmp < 0) {
+ val = INT64_MIN;
+ } else {
+ val = INT64_MAX;
+ }
}
}
return val;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 19+ messages in thread