* [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode
2020-09-03 13:32 [PATCH 0/3] target/arm: Decode Neon fp sizes in decodetree Peter Maydell
@ 2020-09-03 13:32 ` Peter Maydell
2020-09-03 16:25 ` Richard Henderson
2020-09-03 13:32 ` [PATCH 2/3] target/arm: Convert Neon VCVT fp " Peter Maydell
2020-09-03 13:32 ` [PATCH 3/3] target/arm: Convert VCMLA, VCADD " Peter Maydell
2 siblings, 1 reply; 8+ messages in thread
From: Peter Maydell @ 2020-09-03 13:32 UTC (permalink / raw)
To: qemu-arm, qemu-devel; +Cc: Richard Henderson
In the Neon instructions, some instruction formats have a 2-bit size
field which corresponds exactly to QEMU's MO_8/16/32/64. However the
floating-point insns in the 3-same group have a 1-bit size field
which is "0 for 32-bit float and 1 for 16-bit float". Currently we
pass these values directly through to trans_ functions, which means
that when reading a particular trans_ function you need to know if
that insn uses a 2-bit size or a 1-bit size.
Move the handling of the 1-bit size to the decodetree file, so that
all these insns consistently pass a size to the trans_ function which
is an MO_8/16/32/64 value.
In this commit we switch over the insns using the 3same_fp and
3same_fp_q0 formats.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
I wanted to call the field %3same_fp_size, but decodetree
doesn't seem to allow a field starting with a digit, even
though it does allow a format that starts with a digit.
So it's %fp_3same_size...
---
target/arm/neon-dp.decode | 15 ++++++++++-----
target/arm/translate-neon.c.inc | 16 +++++++++++-----
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 1e9e8592917..f453833396f 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -45,11 +45,16 @@
@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
-# For FP insns the high bit of 'size' is used as part of opcode decode
-@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
- &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
-@3same_fp_q0 .... ... . . . . size:1 .... .... .... . 0 . . .... \
- &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+# For FP insns the high bit of 'size' is used as part of opcode decode,
+# and the 'size' bit is 0 for 32-bit float and 1 for 16-bit float.
+# This converts this encoding to the same MO_8/16/32/64 values that the
+# integer neon insns use.
+%fp_3same_size 20:1 !function=neon_3same_fp_size
+
+@3same_fp .... ... . . . . . .... .... .... . q:1 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%fp_3same_size
+@3same_fp_q0 .... ... . . . . . .... .... .... . 0 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0 size=%fp_3same_size
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 2d4926316a4..255c1cf8a2a 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -49,6 +49,12 @@ static inline int rsub_8(DisasContext *s, int x)
return 8 - x;
}
+static inline int neon_3same_fp_size(DisasContext *s, int x)
+{
+ /* Convert 0==fp32, 1==fp16 into a MO_* value */
+ return MO_32 - x;
+}
+
/* Include the generated Neon decoder */
#include "decode-neon-dp.c.inc"
#include "decode-neon-ls.c.inc"
@@ -1049,7 +1055,7 @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
{ \
- if (a->size != 0) { \
+ if (a->size == MO_16) { \
if (!dc_isar_feature(aa32_fp16_arith, s)) { \
return false; \
} \
@@ -1088,7 +1094,7 @@ static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
return false;
}
- if (a->size != 0) {
+ if (a->size == MO_16) {
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
@@ -1103,7 +1109,7 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
return false;
}
- if (a->size != 0) {
+ if (a->size == MO_16) {
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
@@ -1135,7 +1141,7 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
assert(a->q == 0); /* enforced by decode patterns */
- fpstatus = fpstatus_ptr(a->size != 0 ? FPST_STD_F16 : FPST_STD);
+ fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
@@ -1152,7 +1158,7 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
#define DO_3S_FP_PAIR(INSN,FUNC) \
static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
{ \
- if (a->size != 0) { \
+ if (a->size == MO_16) { \
if (!dc_isar_feature(aa32_fp16_arith, s)) { \
return false; \
} \
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode
2020-09-03 13:32 ` [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode Peter Maydell
@ 2020-09-03 16:25 ` Richard Henderson
2020-09-03 18:14 ` Peter Maydell
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2020-09-03 16:25 UTC (permalink / raw)
To: Peter Maydell, qemu-arm, qemu-devel
On 9/3/20 6:32 AM, Peter Maydell wrote:
> In the Neon instructions, some instruction formats have a 2-bit size
> field which corresponds exactly to QEMU's MO_8/16/32/64. However the
> floating-point insns in the 3-same group have a 1-bit size field
> which is "0 for 32-bit float and 1 for 16-bit float". Currently we
> pass these values directly through to trans_ functions, which means
> that when reading a particular trans_ function you need to know if
> that insn uses a 2-bit size or a 1-bit size.
>
> Move the handling of the 1-bit size to the decodetree file, so that
> all these insns consistently pass a size to the trans_ function which
> is an MO_8/16/32/64 value.
>
> In this commit we switch over the insns using the 3same_fp and
> 3same_fp_q0 formats.
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> I wanted to call the field %3same_fp_size, but decodetree
> doesn't seem to allow a field starting with a digit, even
> though it does allow a format that starts with a digit.
> So it's %fp_3same_size...
Odd. All of the names get prefixed, so we don't have a problem of a digit
beginning a C identifier... I can look at fixing this if you want.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode
2020-09-03 16:25 ` Richard Henderson
@ 2020-09-03 18:14 ` Peter Maydell
0 siblings, 0 replies; 8+ messages in thread
From: Peter Maydell @ 2020-09-03 18:14 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-arm, QEMU Developers
On Thu, 3 Sep 2020 at 17:25, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 9/3/20 6:32 AM, Peter Maydell wrote:
> > I wanted to call the field %3same_fp_size, but decodetree
> > doesn't seem to allow a field starting with a digit, even
> > though it does allow a format that starts with a digit.
> > So it's %fp_3same_size...
>
> Odd. All of the names get prefixed, so we don't have a problem of a digit
> beginning a C identifier... I can look at fixing this if you want.
If it's not too involved a fix it would be nice. The failure
is that it doesn't manage to parse the file, rather than
that it produces bad code.
thanks
-- PMM
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/3] target/arm: Convert Neon VCVT fp size field to MO_* in decode
2020-09-03 13:32 [PATCH 0/3] target/arm: Decode Neon fp sizes in decodetree Peter Maydell
2020-09-03 13:32 ` [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode Peter Maydell
@ 2020-09-03 13:32 ` Peter Maydell
2020-09-03 16:27 ` Richard Henderson
2020-09-03 13:32 ` [PATCH 3/3] target/arm: Convert VCMLA, VCADD " Peter Maydell
2 siblings, 1 reply; 8+ messages in thread
From: Peter Maydell @ 2020-09-03 13:32 UTC (permalink / raw)
To: qemu-arm, qemu-devel; +Cc: Richard Henderson
Convert the insns using the 2reg_vcvt and 2reg_vcvt_f16 formats
to pass the size through to the trans function as a MO_* value
rather than the '0==f32, 1==f16' used in the fp 3-same encodings.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/neon-dp.decode | 3 +--
target/arm/translate-neon.c.inc | 4 ++--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index f453833396f..d7da2e7db3c 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -256,9 +256,8 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0
-# We use size=0 for fp32 and size=1 for fp16 to match the 3-same encodings.
@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \
- &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i5
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
@2reg_vcvt_f16 .... ... . . . 11 .... .... .... . q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 255c1cf8a2a..213c1c2174a 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -1626,7 +1626,7 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
return false;
}
- if (a->size != 0) {
+ if (a->size == MO_16) {
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
@@ -1646,7 +1646,7 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
return true;
}
- fpst = fpstatus_ptr(a->size ? FPST_STD_F16 : FPST_STD);
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
tcg_temp_free_ptr(fpst);
return true;
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/3] target/arm: Convert Neon VCVT fp size field to MO_* in decode
2020-09-03 13:32 ` [PATCH 2/3] target/arm: Convert Neon VCVT fp " Peter Maydell
@ 2020-09-03 16:27 ` Richard Henderson
0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2020-09-03 16:27 UTC (permalink / raw)
To: Peter Maydell, qemu-arm, qemu-devel
On 9/3/20 6:32 AM, Peter Maydell wrote:
> Convert the insns using the 2reg_vcvt and 2reg_vcvt_f16 formats
> to pass the size through to the trans function as a MO_* value
> rather than the '0==f32, 1==f16' used in the fp 3-same encodings.
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
> target/arm/neon-dp.decode | 3 +--
> target/arm/translate-neon.c.inc | 4 ++--
> 2 files changed, 3 insertions(+), 4 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/3] target/arm: Convert VCMLA, VCADD size field to MO_* in decode
2020-09-03 13:32 [PATCH 0/3] target/arm: Decode Neon fp sizes in decodetree Peter Maydell
2020-09-03 13:32 ` [PATCH 1/3] target/arm: Convert Neon 3-same-fp size field to MO_* in decode Peter Maydell
2020-09-03 13:32 ` [PATCH 2/3] target/arm: Convert Neon VCVT fp " Peter Maydell
@ 2020-09-03 13:32 ` Peter Maydell
2020-09-03 16:32 ` Richard Henderson
2 siblings, 1 reply; 8+ messages in thread
From: Peter Maydell @ 2020-09-03 13:32 UTC (permalink / raw)
To: qemu-arm, qemu-devel; +Cc: Richard Henderson
The VCMLA and VCADD insns have a size field which is 0 for fp16
and 1 for fp32 (note that this is the reverse of the Neon 3-same
encoding!). Convert it to MO_* values in decode for consistency.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/neon-shared.decode | 18 ++++++++++++------
target/arm/translate-neon.c.inc | 22 ++++++++++++----------
2 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
index f297ba8cdfc..a9d010880d4 100644
--- a/target/arm/neon-shared.decode
+++ b/target/arm/neon-shared.decode
@@ -34,11 +34,17 @@
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
-VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
- vm=%vm_dp vn=%vn_dp vd=%vd_dp
+# For VCMLA/VCADD insns, convert the single-bit size field
+# which is 0 for fp16 and 1 for fp32 into a MO_* constant.
+# (Note that this is the reverse of the sense of the 1-bit size
+# field in the 3same_fp Neon insns.)
+%vcadd_size 20:1 !function=plus1
-VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
- vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VCMLA 1111 110 rot:2 . 1 . .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
+
+VCADD 1111 110 rot:1 1 . 0 . .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
# VUDOT and VSDOT
VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
@@ -51,9 +57,9 @@ VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
- vn=%vn_dp vd=%vd_dp size=0
+ vn=%vn_dp vd=%vd_dp size=1
VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
- vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=2 index=0
VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 213c1c2174a..4d1a292981b 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -168,7 +168,7 @@ static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
gen_helper_gvec_3_ptr *fn_gvec_ptr;
if (!dc_isar_feature(aa32_vcma, s)
- || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+ || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
return false;
}
@@ -187,8 +187,9 @@ static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
}
opr_sz = (1 + a->q) * 8;
- fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
- fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+ fn_gvec_ptr = (a->size == MO_16) ?
+ gen_helper_gvec_fcmlah : gen_helper_gvec_fcmlas;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
@@ -205,7 +206,7 @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
gen_helper_gvec_3_ptr *fn_gvec_ptr;
if (!dc_isar_feature(aa32_vcma, s)
- || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+ || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
return false;
}
@@ -224,8 +225,9 @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
}
opr_sz = (1 + a->q) * 8;
- fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
- fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+ fn_gvec_ptr = (a->size == MO_16) ?
+ gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
@@ -307,7 +309,7 @@ static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
if (!dc_isar_feature(aa32_vcma, s)) {
return false;
}
- if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
+ if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
@@ -325,10 +327,10 @@ static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
return true;
}
- fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
+ fn_gvec_ptr = (a->size == MO_16) ?
+ gen_helper_gvec_fcmlah_idx : gen_helper_gvec_fcmlas_idx;
opr_sz = (1 + a->q) * 8;
- fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
--
2.20.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] target/arm: Convert VCMLA, VCADD size field to MO_* in decode
2020-09-03 13:32 ` [PATCH 3/3] target/arm: Convert VCMLA, VCADD " Peter Maydell
@ 2020-09-03 16:32 ` Richard Henderson
0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2020-09-03 16:32 UTC (permalink / raw)
To: Peter Maydell, qemu-arm, qemu-devel
On 9/3/20 6:32 AM, Peter Maydell wrote:
> The VCMLA and VCADD insns have a size field which is 0 for fp16
> and 1 for fp32 (note that this is the reverse of the Neon 3-same
> encoding!). Convert it to MO_* values in decode for consistency.
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
> target/arm/neon-shared.decode | 18 ++++++++++++------
> target/arm/translate-neon.c.inc | 22 ++++++++++++----------
> 2 files changed, 24 insertions(+), 16 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 8+ messages in thread