From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59703) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bk6xB-0004VT-Hz for qemu-devel@nongnu.org; Wed, 14 Sep 2016 05:58:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bk6x7-0000D8-IV for qemu-devel@nongnu.org; Wed, 14 Sep 2016 05:58:05 -0400 Received: from mail-wm0-f65.google.com ([74.125.82.65]:33328) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bk6x7-0000D0-91 for qemu-devel@nongnu.org; Wed, 14 Sep 2016 05:58:01 -0400 Received: by mail-wm0-f65.google.com with SMTP id b187so1724141wme.0 for ; Wed, 14 Sep 2016 02:58:01 -0700 (PDT) Received: from donizetti.lan (94-39-176-182.adsl-ull.clienti.tiscali.it. [94.39.176.182]) by smtp.gmail.com with ESMTPSA id v73sm9876453wmf.19.2016.09.14.02.56.59 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 14 Sep 2016 02:56:59 -0700 (PDT) Sender: Paolo Bonzini From: Paolo Bonzini Date: Wed, 14 Sep 2016 11:56:52 +0200 Message-Id: <1473847013-20191-3-git-send-email-pbonzini@redhat.com> In-Reply-To: <1473847013-20191-1-git-send-email-pbonzini@redhat.com> References: <1473847013-20191-1-git-send-email-pbonzini@redhat.com> Subject: [Qemu-devel] [PATCH 2/3] target-arm: add env->tbflags List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Computing TranslationBlock flags is pretty expensive on ARM, especially 32-bit. In order to limit the cost we want to cache as many of them as possible. Therefore, the flags are split in two parts. Static flags come directly from a new CPUARMState field env->tbflags, and are updated whenever EL changes (i.e. on exceptions and exception returns) or after MSR instructions. Dynamic flags are computed on the fly by cpu_get_tb_cpu_state (which calls cpu_dynamic_tb_cpu_flags to retrieve them), same as before. As of this patch, all flags are dynamic and env->tbflags is always 0, so this patch adds the infrastructure but does not do any caching yet. Signed-off-by: Paolo Bonzini --- target-arm/cpu.c | 2 ++ target-arm/cpu.h | 10 +++++++++- target-arm/helper.c | 2 ++ target-arm/helper.h | 1 + target-arm/op_helper.c | 7 +++++++ target-arm/translate-a64.c | 4 ++++ target-arm/translate.c | 12 ++++++++++-- target-arm/translate.h | 1 + 8 files changed, 36 insertions(+), 3 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index ce8b8f4..189ceab 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -225,6 +225,8 @@ static void arm_cpu_reset(CPUState *s) &env->vfp.fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.standard_fp_status); + + env->tbflags = cpu_get_tb_cpu_flags(env); tlb_flush(s, 1); #ifndef CONFIG_USER_ONLY diff --git a/target-arm/cpu.h b/target-arm/cpu.h index ef195bd..5918df5 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -155,6 +155,7 @@ typedef struct CPUARMState { */ uint32_t pstate; uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */ + uint32_t tbflags; /* Frequently accessed CPSR bits are stored separately for efficiency. This contains all the other bits. Use cpsr_{read,write} to access @@ -2370,10 +2371,17 @@ static inline uint32_t cpu_dynamic_tb_cpu_flags(CPUARMState *env) return flags; } +static inline uint32_t cpu_get_tb_cpu_flags(CPUARMState *env) +{ + uint32_t flags = 0; + + return flags; +} + static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *flags) { - *flags = cpu_dynamic_tb_cpu_flags(env); + *flags = env->tbflags | cpu_dynamic_tb_cpu_flags(env); if (is_a64(env)) { *pc = env->pc; diff --git a/target-arm/helper.c b/target-arm/helper.c index bdb842c..0b4f6de 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -6068,6 +6068,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) addr = ldl_phys(cs->as, env->v7m.vecbase + env->v7m.exception * 4); env->regs[15] = addr & 0xfffffffe; env->thumb = addr & 1; + env->tbflags = cpu_get_tb_cpu_flags(env); } /* Function used to synchronize QEMU's AArch64 register set with AArch32 @@ -6642,6 +6643,7 @@ void arm_cpu_do_interrupt(CPUState *cs) arm_cpu_do_interrupt_aarch32(cs); } + env->tbflags = cpu_get_tb_cpu_flags(env); arm_call_el_change_hook(cpu); if (!kvm_enabled()) { diff --git a/target-arm/helper.h b/target-arm/helper.h index 84aa637..21a0e35 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -73,6 +73,7 @@ DEF_HELPER_2(get_cp_reg64, i64, env, ptr) DEF_HELPER_3(msr_i_pstate, void, env, i32, i32) DEF_HELPER_1(clear_pstate_ss, void, env) DEF_HELPER_1(exception_return, void, env) +DEF_HELPER_FLAGS_1(compute_tbflags, TCG_CALL_NO_WG_SE, i32, env) DEF_HELPER_2(get_r13_banked, i32, env, i32) DEF_HELPER_3(set_r13_banked, void, env, i32, i32) diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index be27b21..e5d7cfc 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -479,6 +479,7 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) { cpsr_write(env, val, CPSR_ERET_MASK, CPSRWriteExceptionReturn); + env->tbflags = cpu_get_tb_cpu_flags(env); arm_call_el_change_hook(arm_env_get_cpu(env)); } @@ -975,6 +976,7 @@ void HELPER(exception_return)(CPUARMState *env) env->pc = env->elr_el[cur_el]; } + env->tbflags = cpu_get_tb_cpu_flags(env); arm_call_el_change_hook(arm_env_get_cpu(env)); return; @@ -1326,3 +1328,8 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } + +uint32_t HELPER(compute_tbflags)(CPUARMState *env) +{ + return cpu_get_tb_cpu_flags(env); +} diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index f5e29d2..7c355b0 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -1507,6 +1507,10 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } } + if (!isread) { + gen_helper_compute_tbflags(cpu_tbflags, cpu_env); + } + if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { /* I/O operations must end the TB here (whether read or write) */ gen_io_end(); diff --git a/target-arm/translate.c b/target-arm/translate.c index bd5d5cb..b4d8fe7 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -68,6 +68,7 @@ TCGv_i64 cpu_exclusive_val; TCGv_i64 cpu_exclusive_test; TCGv_i32 cpu_exclusive_info; #endif +TCGv_i32 cpu_tbflags; /* FIXME: These should be removed. */ static TCGv_i32 cpu_F0s, cpu_F1s; @@ -107,6 +108,8 @@ void arm_translate_init(void) cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, exclusive_info), "exclusive_info"); #endif + cpu_tbflags = tcg_global_mem_new_i32(cpu_env, + offsetof(CPUARMState, tbflags), "tbflags"); a64_translate_init(); } @@ -1135,6 +1138,7 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp, /* Force a TB lookup after an instruction that changes the CPU state. */ static inline void gen_lookup_tb(DisasContext *s) { + gen_helper_compute_tbflags(cpu_tbflags, cpu_env); tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); s->is_jmp = DISAS_JUMP; } @@ -7630,12 +7634,16 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) /* I/O operations must end the TB here (whether read or write) */ gen_io_end(); gen_lookup_tb(s); - } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { + } else if (!isread) { /* We default to ending the TB on a coprocessor register write, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ - gen_lookup_tb(s); + if (ri->type & ARM_CP_SUPPRESS_TB_END) { + gen_helper_compute_tbflags(cpu_tbflags, cpu_env); + } else { + gen_lookup_tb(s); + } } return 0; diff --git a/target-arm/translate.h b/target-arm/translate.h index dbd7ac8..d269f4c 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -81,6 +81,7 @@ extern TCGv_i64 cpu_exclusive_val; extern TCGv_i64 cpu_exclusive_test; extern TCGv_i32 cpu_exclusive_info; #endif +extern TCGv_i32 cpu_tbflags; static inline int arm_dc_feature(DisasContext *dc, int feature) { -- 2.7.4