* [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work
@ 2018-02-10 23:05 Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove ARM_CP_64BIT from ZCR_EL registers Richard Henderson
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
First, we had noted that ARM_CP_64BIT needed to be removed from
the ZCR_EL registers, but the patch set was applied without
actually fixing that.
Second, there's an existing bug by which the FPCR/FPSR registers
are not properly trapped when FP is disabled. Fix that with a
translation-time check.
Third, my attempt at using .accessfn for ZCR_EL fails to take
into account the two different exception syndromes that must be
raised. Although they probably aren't as important as FPCR/FPSR,
handle them at translation time too.
Fourth, when writing to an AdvSIMD register, zero the rest of
the SVE register.
r~
Richard Henderson (5):
target/arm: Remove ARM_CP_64BIT from ZCR_EL registers
target/arm: Enforce FP access to FPCR/FPSR
target/arm: Suppress TB end for FPCR/FPSR
target/arm: Enforce access to ZCR_EL at translation
target/arm: Handle SVE registers when using clear_vec_high
target/arm/cpu.h | 36 ++++-----
target/arm/internals.h | 6 ++
target/arm/helper.c | 28 ++-----
target/arm/translate-a64.c | 181 ++++++++++++++++++++-------------------------
4 files changed, 114 insertions(+), 137 deletions(-)
--
2.14.3
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 1/5] target/arm: Remove ARM_CP_64BIT from ZCR_EL registers
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
@ 2018-02-10 23:05 ` Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 2/5] target/arm: Enforce FP access to FPCR/FPSR Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Because they are ARM_CP_STATE_AA64, ARM_CP_64BIT is implied.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 180ab75458..4b102ec356 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4357,7 +4357,7 @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static const ARMCPRegInfo zcr_el1_reginfo = {
.name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .access = PL1_RW, .accessfn = zcr_access,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
.writefn = zcr_write, .raw_writefn = raw_write
};
@@ -4365,7 +4365,7 @@ static const ARMCPRegInfo zcr_el1_reginfo = {
static const ARMCPRegInfo zcr_el2_reginfo = {
.name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .access = PL2_RW, .accessfn = zcr_access,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]),
.writefn = zcr_write, .raw_writefn = raw_write
};
@@ -4373,14 +4373,14 @@ static const ARMCPRegInfo zcr_el2_reginfo = {
static const ARMCPRegInfo zcr_no_el2_reginfo = {
.name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_64BIT,
+ .access = PL2_RW,
.readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore
};
static const ARMCPRegInfo zcr_el3_reginfo = {
.name = "ZCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL3_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .access = PL3_RW, .accessfn = zcr_access,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]),
.writefn = zcr_write, .raw_writefn = raw_write
};
--
2.14.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 2/5] target/arm: Enforce FP access to FPCR/FPSR
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove ARM_CP_64BIT from ZCR_EL registers Richard Henderson
@ 2018-02-10 23:05 ` Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 3/5] target/arm: Suppress TB end for FPCR/FPSR Richard Henderson
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 35 ++++++++++++++++++-----------------
target/arm/helper.c | 6 ++++--
target/arm/translate-a64.c | 3 +++
3 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 521444a5a1..e966a57f8a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1714,7 +1714,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
}
/* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a
- * special-behaviour cp reg and bits [15..8] indicate what behaviour
+ * special-behaviour cp reg and bits [11..8] indicate what behaviour
* it has. Otherwise it is a simple cp reg, where CONST indicates that
* TCG can assume the value to be constant (ie load at translate time)
* and 64BIT indicates a 64 bit wide coprocessor register. SUPPRESS_TB_END
@@ -1735,24 +1735,25 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
* need to be surrounded by gen_io_start()/gen_io_end(). In particular,
* registers which implement clocks or timers require this.
*/
-#define ARM_CP_SPECIAL 1
-#define ARM_CP_CONST 2
-#define ARM_CP_64BIT 4
-#define ARM_CP_SUPPRESS_TB_END 8
-#define ARM_CP_OVERRIDE 16
-#define ARM_CP_ALIAS 32
-#define ARM_CP_IO 64
-#define ARM_CP_NO_RAW 128
-#define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8))
-#define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
-#define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
-#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
-#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
+#define ARM_CP_SPECIAL 0x0001
+#define ARM_CP_CONST 0x0002
+#define ARM_CP_64BIT 0x0004
+#define ARM_CP_SUPPRESS_TB_END 0x0008
+#define ARM_CP_OVERRIDE 0x0010
+#define ARM_CP_ALIAS 0x0020
+#define ARM_CP_IO 0x0040
+#define ARM_CP_NO_RAW 0x0080
+#define ARM_CP_NOP (ARM_CP_SPECIAL | 0x0100)
+#define ARM_CP_WFI (ARM_CP_SPECIAL | 0x0200)
+#define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300)
+#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400)
+#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500)
+#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
+#define ARM_CP_FPU 0x1000
/* Used only as a terminator for ARMCPRegInfo lists */
-#define ARM_CP_SENTINEL 0xffff
+#define ARM_CP_SENTINEL 0xffff
/* Mask of only the flag bits in a type field */
-#define ARM_CP_FLAG_MASK 0xff
+#define ARM_CP_FLAG_MASK 0x10ff
/* Valid values for ARMCPRegInfo state field, indicating which of
* the AArch32 and AArch64 execution states this register is visible in.
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 4b102ec356..d41fb8371f 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3356,10 +3356,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
- .access = PL0_RW, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
+ .access = PL0_RW, .type = ARM_CP_FPU,
+ .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
- .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
+ .access = PL0_RW, .type = ARM_CP_FPU,
+ .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
{ .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
.access = PL0_R, .type = ARM_CP_NO_RAW,
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index fb1a4cb532..89f50558a7 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1631,6 +1631,9 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
default:
break;
}
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
+ return;
+ }
if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
gen_io_start();
--
2.14.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 3/5] target/arm: Suppress TB end for FPCR/FPSR
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove ARM_CP_64BIT from ZCR_EL registers Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 2/5] target/arm: Enforce FP access to FPCR/FPSR Richard Henderson
@ 2018-02-10 23:05 ` Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 4/5] target/arm: Enforce access to ZCR_EL at translation Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 5/5] target/arm: Handle SVE registers when using clear_vec_high Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Nothing in either register affects the TB.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index d41fb8371f..e0184c7162 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3356,11 +3356,11 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
- .access = PL0_RW, .type = ARM_CP_FPU,
+ .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
- .access = PL0_RW, .type = ARM_CP_FPU,
+ .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
.readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
{ .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
--
2.14.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 4/5] target/arm: Enforce access to ZCR_EL at translation
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
` (2 preceding siblings ...)
2018-02-10 23:05 ` [Qemu-devel] [PATCH 3/5] target/arm: Suppress TB end for FPCR/FPSR Richard Henderson
@ 2018-02-10 23:05 ` Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 5/5] target/arm: Handle SVE registers when using clear_vec_high Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
This also makes sure that we get the correct ordering of
SVE vs FP exceptions.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 3 ++-
target/arm/internals.h | 6 ++++++
target/arm/helper.c | 22 ++++------------------
target/arm/translate-a64.c | 16 ++++++++++++++++
4 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e966a57f8a..51a3e16275 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1750,10 +1750,11 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500)
#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
#define ARM_CP_FPU 0x1000
+#define ARM_CP_SVE 0x2000
/* Used only as a terminator for ARMCPRegInfo lists */
#define ARM_CP_SENTINEL 0xffff
/* Mask of only the flag bits in a type field */
-#define ARM_CP_FLAG_MASK 0x10ff
+#define ARM_CP_FLAG_MASK 0x30ff
/* Valid values for ARMCPRegInfo state field, indicating which of
* the AArch32 and AArch64 execution states this register is visible in.
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 89f5d2fe12..47cc224a46 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -243,6 +243,7 @@ enum arm_exception_class {
EC_AA64_HVC = 0x16,
EC_AA64_SMC = 0x17,
EC_SYSTEMREGISTERTRAP = 0x18,
+ EC_SVEACCESSTRAP = 0x19,
EC_INSNABORT = 0x20,
EC_INSNABORT_SAME_EL = 0x21,
EC_PCALIGNMENT = 0x22,
@@ -381,6 +382,11 @@ static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit)
| (cv << 24) | (cond << 20);
}
+static inline uint32_t syn_sve_access_trap(void)
+{
+ return EC_SVEACCESSTRAP << ARM_EL_EC_SHIFT;
+}
+
static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
{
return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index e0184c7162..550dc3d290 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4335,20 +4335,6 @@ static int sve_exception_el(CPUARMState *env)
return 0;
}
-static CPAccessResult zcr_access(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
-{
- switch (sve_exception_el(env)) {
- case 3:
- return CP_ACCESS_TRAP_EL3;
- case 2:
- return CP_ACCESS_TRAP_EL2;
- case 1:
- return CP_ACCESS_TRAP;
- }
- return CP_ACCESS_OK;
-}
-
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
@@ -4359,7 +4345,7 @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static const ARMCPRegInfo zcr_el1_reginfo = {
.name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .accessfn = zcr_access,
+ .access = PL1_RW, .type = ARM_CP_SVE | ARM_CP_FPU,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
.writefn = zcr_write, .raw_writefn = raw_write
};
@@ -4367,7 +4353,7 @@ static const ARMCPRegInfo zcr_el1_reginfo = {
static const ARMCPRegInfo zcr_el2_reginfo = {
.name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .accessfn = zcr_access,
+ .access = PL2_RW, .type = ARM_CP_SVE | ARM_CP_FPU,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]),
.writefn = zcr_write, .raw_writefn = raw_write
};
@@ -4375,14 +4361,14 @@ static const ARMCPRegInfo zcr_el2_reginfo = {
static const ARMCPRegInfo zcr_no_el2_reginfo = {
.name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW,
+ .access = PL2_RW, .type = ARM_CP_SVE | ARM_CP_FPU,
.readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore
};
static const ARMCPRegInfo zcr_el3_reginfo = {
.name = "ZCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL3_RW, .accessfn = zcr_access,
+ .access = PL3_RW, .type = ARM_CP_SVE | ARM_CP_FPU,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]),
.writefn = zcr_write, .raw_writefn = raw_write
};
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 89f50558a7..e3881d4999 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1182,6 +1182,19 @@ static inline bool fp_access_check(DisasContext *s)
return false;
}
+/* Check that SVE access is enabled. If it is, return true.
+ * If not, emit code to generate an appropriate exception and return false.
+ */
+static inline bool sve_access_check(DisasContext *s)
+{
+ if (s->sve_excp_el) {
+ gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
+ s->sve_excp_el);
+ return false;
+ }
+ return true;
+}
+
/*
* This utility function is for doing register extension with an
* optional shift. You will likely want to pass a temporary for the
@@ -1631,6 +1644,9 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
default:
break;
}
+ if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
+ return;
+ }
if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
return;
}
--
2.14.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 5/5] target/arm: Handle SVE registers when using clear_vec_high
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
` (3 preceding siblings ...)
2018-02-10 23:05 ` [Qemu-devel] [PATCH 4/5] target/arm: Enforce access to ZCR_EL at translation Richard Henderson
@ 2018-02-10 23:05 ` Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2018-02-10 23:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
When storing to an AdvSIMD FP register, all of the high
bits of the SVE register are zeroed. Therefore, call it
more often with is_q as a parameter.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 162 +++++++++++++++++----------------------------
1 file changed, 62 insertions(+), 100 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e3881d4999..1c88539d62 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -602,13 +602,30 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
return v;
}
+/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
+ * If SVE is not enabled, then there are only 128 bits in the vector.
+ */
+static void clear_vec_high(DisasContext *s, bool is_q, int rd)
+{
+ unsigned ofs = fp_reg_offset(s, rd, MO_64);
+ unsigned vsz = vec_full_reg_size(s);
+
+ if (!is_q) {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
+ tcg_temp_free_i64(tcg_zero);
+ }
+ if (vsz > 16) {
+ tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
+ }
+}
+
static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
{
- TCGv_i64 tcg_zero = tcg_const_i64(0);
+ unsigned ofs = fp_reg_offset(s, reg, MO_64);
- tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
- tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
- tcg_temp_free_i64(tcg_zero);
+ tcg_gen_st_i64(v, cpu_env, ofs);
+ clear_vec_high(s, false, reg);
}
static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
@@ -1009,6 +1026,8 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
tcg_temp_free_i64(tmplo);
tcg_temp_free_i64(tmphi);
+
+ clear_vec_high(s, true, destidx);
}
/*
@@ -1124,17 +1143,6 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
}
}
-/* Clear the high 64 bits of a 128 bit vector (in general non-quad
- * vector ops all need to do this).
- */
-static void clear_vec_high(DisasContext *s, int rd)
-{
- TCGv_i64 tcg_zero = tcg_const_i64(0);
-
- write_vec_element(s, tcg_zero, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_zero);
-}
-
/* Store from vector register to memory */
static void do_vec_st(DisasContext *s, int srcidx, int element,
TCGv_i64 tcg_addr, int size)
@@ -2794,12 +2802,13 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
/* For non-quad operations, setting a slice of the low
* 64 bits of the register clears the high 64 bits (in
* the ARM ARM pseudocode this is implicit in the fact
- * that 'rval' is a 64 bit wide variable). We optimize
- * by noticing that we only need to do this the first
- * time we touch a register.
+ * that 'rval' is a 64 bit wide variable).
+ * For quad operations, we might still need to zero the
+ * high bits of SVE. We optimize by noticing that we only
+ * need to do this the first time we touch a register.
*/
- if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
- clear_vec_high(s, tt);
+ if (e == 0 && (r == 0 || xs == selem - 1)) {
+ clear_vec_high(s, is_q, tt);
}
}
tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
@@ -2942,10 +2951,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_tmp, rt, 0, MO_64);
if (is_q) {
write_vec_element(s, tcg_tmp, rt, 1, MO_64);
- } else {
- clear_vec_high(s, rt);
}
tcg_temp_free_i64(tcg_tmp);
+ clear_vec_high(s, is_q, rt);
} else {
/* Load/store one element per register */
if (is_load) {
@@ -6718,7 +6726,6 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
}
if (!is_q) {
- clear_vec_high(s, rd);
write_vec_element(s, tcg_final, rd, 0, MO_64);
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
@@ -6731,7 +6738,8 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i32(tcg_rd_narrowed);
tcg_temp_free_i64(tcg_final);
- return;
+
+ clear_vec_high(s, is_q, rd);
}
/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
@@ -6795,10 +6803,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
tcg_temp_free_i64(tcg_op);
}
tcg_temp_free_i64(tcg_shift);
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
TCGv_i32 tcg_shift = tcg_const_i32(shift);
static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
@@ -6847,8 +6852,8 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
}
tcg_temp_free_i32(tcg_shift);
- if (!is_q && !scalar) {
- clear_vec_high(s, rd);
+ if (!scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
}
@@ -6901,13 +6906,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
}
}
- if (!is_double && elements == 2) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_int);
tcg_temp_free_ptr(tcg_fpst);
tcg_temp_free_i32(tcg_shift);
+
+ clear_vec_high(s, elements << size == 16, rd);
}
/* UCVTF/SCVTF - Integer to FP conversion */
@@ -6995,9 +6998,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
write_vec_element(s, tcg_op, rd, pass, MO_64);
tcg_temp_free_i64(tcg_op);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
for (pass = 0; pass < maxpass; pass++) {
@@ -7016,8 +7017,8 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
}
tcg_temp_free_i32(tcg_op);
}
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
@@ -7502,10 +7503,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
tcg_temp_free_ptr(fpst);
- if ((elements << size) < 4) {
- /* scalar, or non-quad vector op */
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
}
/* AdvSIMD scalar three same
@@ -7831,13 +7829,11 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_zero);
tcg_temp_free_i64(tcg_op);
+
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_zero = tcg_const_i32(0);
@@ -7888,8 +7884,8 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_zero);
tcg_temp_free_i32(tcg_op);
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
@@ -7925,12 +7921,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_op);
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
@@ -7970,8 +7963,8 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
}
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_op);
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
tcg_temp_free_ptr(fpst);
@@ -8077,9 +8070,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
/* Remaining saturating accumulating ops */
@@ -8104,12 +8095,9 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
}
write_vec_element(s, tcg_rd, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_rn);
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_rn = tcg_temp_new_i32();
TCGv_i32 tcg_rd = tcg_temp_new_i32();
@@ -8167,13 +8155,9 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
}
write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
}
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i32(tcg_rd);
tcg_temp_free_i32(tcg_rn);
+ clear_vec_high(s, is_q, rd);
}
}
@@ -8664,9 +8648,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
tcg_temp_free_i64(tcg_round);
done:
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
@@ -8855,19 +8837,18 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
}
if (!is_q) {
- clear_vec_high(s, rd);
write_vec_element(s, tcg_final, rd, 0, MO_64);
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
}
-
if (round) {
tcg_temp_free_i64(tcg_round);
}
tcg_temp_free_i64(tcg_rn);
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_final);
- return;
+
+ clear_vec_high(s, is_q, rd);
}
@@ -9261,9 +9242,7 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
@@ -9671,9 +9650,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
if (fpst) {
@@ -10161,10 +10138,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tcg_op2);
}
}
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
/* AdvSIMD three same
@@ -10303,9 +10277,7 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
write_vec_element(s, tcg_tmp, rd, i, grp_size);
tcg_temp_free_i64(tcg_tmp);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
int revmask = (1 << grp_size) - 1;
int esize = 8 << size;
@@ -10949,9 +10921,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tcg_op);
}
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
if (need_rmode) {
gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
@@ -11130,11 +11100,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_res);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_idx);
+ clear_vec_high(s, !is_scalar, rd);
} else if (!is_long) {
/* 32 bit floating point, or 16 or 32 bit integer.
* For the 16 bit scalar case we use the usual Neon helpers and
@@ -11238,10 +11205,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
tcg_temp_free_i32(tcg_idx);
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
/* long ops: 16x16->32 or 32x32->64 */
TCGv_i64 tcg_res[2];
@@ -11318,9 +11282,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
tcg_temp_free_i64(tcg_idx);
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_idx = tcg_temp_new_i32();
--
2.14.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-02-10 23:05 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-10 23:05 [Qemu-devel] [PATCH 0/5] target/arm: More SVE prep work Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove ARM_CP_64BIT from ZCR_EL registers Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 2/5] target/arm: Enforce FP access to FPCR/FPSR Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 3/5] target/arm: Suppress TB end for FPCR/FPSR Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 4/5] target/arm: Enforce access to ZCR_EL at translation Richard Henderson
2018-02-10 23:05 ` [Qemu-devel] [PATCH 5/5] target/arm: Handle SVE registers when using clear_vec_high Richard Henderson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.