All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions
@ 2015-08-17  7:34 Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 2/5] tlb: Add "ifetch" argument to cpu_mmu_index() Benjamin Herrenschmidt
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  7:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

We don't use the resulting accessors and this gets in the way of
the split I/D TLB work.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 6f76674..5dfd195 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1246,9 +1246,6 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#define MMU_MODE2_SUFFIX _hypv
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env)
 {
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 2/5] tlb: Add "ifetch" argument to cpu_mmu_index()
  2015-08-17  7:34 [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
@ 2015-08-17  7:34 ` Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 3/5] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  7:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

This is set to true when the index is for an instruction fetch
translation.

The core get_page_addr_code() sets it, as do the SOFTMMU_CODE_ACCESS
acessors.

All targets ignore it for now, and all other callers pass "false".

This will allow targets who wish to split the mmu index between
instruction and data accesses to do so. A subsequent patch will
do just that for PowerPC.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 cputlb.c                      |  2 +-
 include/exec/cpu_ldst.h       |  4 ++--
 target-alpha/cpu.h            |  2 +-
 target-alpha/translate.c      |  2 +-
 target-arm/cpu.h              |  4 ++--
 target-arm/helper.c           |  4 ++--
 target-cris/cpu.h             |  2 +-
 target-cris/translate.c       |  6 +++---
 target-cris/translate_v10.c   |  2 +-
 target-i386/cpu.h             |  2 +-
 target-i386/translate.c       |  2 +-
 target-lm32/cpu.h             |  2 +-
 target-m68k/cpu.h             |  2 +-
 target-microblaze/cpu.h       |  2 +-
 target-microblaze/mmu.c       |  2 +-
 target-microblaze/translate.c | 16 ++++++++--------
 target-mips/cpu.h             |  2 +-
 target-mips/op_helper.c       |  4 ++--
 target-moxie/cpu.h            |  2 +-
 target-openrisc/cpu.h         |  2 +-
 target-openrisc/translate.c   |  2 +-
 target-ppc/cpu.h              |  2 +-
 target-s390x/cpu.h            |  2 +-
 target-s390x/mem_helper.c     |  4 ++--
 target-sh4/cpu.h              |  2 +-
 target-sparc/cpu.h            |  2 +-
 target-sparc/mmu_helper.c     |  2 +-
 target-sparc/translate.c      |  2 +-
 target-tricore/cpu.h          |  2 +-
 target-tricore/translate.c    |  2 +-
 target-unicore32/cpu.h        |  2 +-
 target-xtensa/cpu.h           |  2 +-
 32 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index a506086..f7ccc1d 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -355,7 +355,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     CPUState *cpu = ENV_GET_CPU(env1);
 
     page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    mmu_idx = cpu_mmu_index(env1);
+    mmu_idx = cpu_mmu_index(env1, true);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
         cpu_ldub_code(env1, addr);
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 1239c60..8a28818 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -363,7 +363,7 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 #endif /* (NB_MMU_MODES > 12) */
 
 /* these access are slower, they must be as rare as possible */
-#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define CPU_MMU_INDEX (cpu_mmu_index(env, false))
 #define MEMSUFFIX _data
 #define DATA_SIZE 1
 #include "exec/cpu_ldst_template.h"
@@ -379,7 +379,7 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 #undef CPU_MMU_INDEX
 #undef MEMSUFFIX
 
-#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define CPU_MMU_INDEX (cpu_mmu_index(env, true))
 #define MEMSUFFIX _code
 #define SOFTMMU_CODE_ACCESS
 
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 91c56d6..ba7daa5 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -376,7 +376,7 @@ enum {
     PS_USER_MODE = 8
 };
 
-static inline int cpu_mmu_index(CPUAlphaState *env)
+static inline int cpu_mmu_index(CPUAlphaState *env, bool ifetch)
 {
     if (env->pal_mode) {
         return MMU_KERNEL_IDX;
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 81d4ff8..5b9b554 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -2813,7 +2813,7 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
 
     ctx.tb = tb;
     ctx.pc = pc_start;
-    ctx.mem_idx = cpu_mmu_index(env);
+    ctx.mem_idx = cpu_mmu_index(env, false);
     ctx.implver = env->implver;
     ctx.singlestep_enabled = cs->singlestep_enabled;
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 2e680da..a4c02b7 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -1674,7 +1674,7 @@ static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
 }
 
 /* Determine the current mmu_idx to use for normal loads/stores */
-static inline int cpu_mmu_index(CPUARMState *env)
+static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
 {
     int el = arm_current_el(env);
 
@@ -1907,7 +1907,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                    << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
     }
 
-    *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT);
+    *flags |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT);
     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
      * states defined in the ARM ARM for software singlestep:
      *  SS_ACTIVE   PSTATE.SS   State
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 1568aa6..b1d55b6 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -6507,7 +6507,7 @@ hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     uint32_t fsr;
     MemTxAttrs attrs = {};
 
-    ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env), &phys_addr,
+    ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env, false), &phys_addr,
                         &attrs, &prot, &page_size, &fsr);
 
     if (ret) {
@@ -6672,7 +6672,7 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
         int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
         void *hostaddr[maxidx];
         int try, i;
-        unsigned mmu_idx = cpu_mmu_index(env);
+        unsigned mmu_idx = cpu_mmu_index(env, false);
         TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 
         for (try = 0; try < 2; try++) {
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index d422e35..2c804b1 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -233,7 +233,7 @@ enum {
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUCRISState *env)
+static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
 {
 	return !!(env->pregs[PR_CCS] & U_FLAG);
 }
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 3e59601..f56aa45 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -1089,7 +1089,7 @@ static inline void cris_prepare_jmp (DisasContext *dc, unsigned int type)
 
 static void gen_load64(DisasContext *dc, TCGv_i64 dst, TCGv addr)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
@@ -1103,7 +1103,7 @@ static void gen_load64(DisasContext *dc, TCGv_i64 dst, TCGv addr)
 static void gen_load(DisasContext *dc, TCGv dst, TCGv addr, 
              unsigned int size, int sign)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
@@ -1118,7 +1118,7 @@ static void gen_load(DisasContext *dc, TCGv dst, TCGv addr,
 static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
                unsigned int size)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index b742c4c..fe980e0 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -96,7 +96,7 @@ static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val,
 static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val,
                        unsigned int size)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 74b674d..151f1b9 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1199,7 +1199,7 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define MMU_KSMAP_IDX   0
 #define MMU_USER_IDX    1
 #define MMU_KNOSMAP_IDX 2
-static inline int cpu_mmu_index(CPUX86State *env)
+static inline int cpu_mmu_index(CPUX86State *env, bool ifetch)
 {
     return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
         (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK))
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 82e2245..0b345b1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7943,7 +7943,7 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        dc->mem_index = cpu_mmu_index(env);
+	dc->mem_index = cpu_mmu_index(env, false);
     }
     dc->cpuid_features = env->features[FEAT_1_EDX];
     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h
index 944777d..cc77263 100644
--- a/target-lm32/cpu.h
+++ b/target-lm32/cpu.h
@@ -34,7 +34,7 @@ typedef struct CPULM32State CPULM32State;
 
 #define NB_MMU_MODES 1
 #define TARGET_PAGE_BITS 12
-static inline int cpu_mmu_index(CPULM32State *env)
+static inline int cpu_mmu_index(CPULM32State *env, bool ifetch)
 {
     return 0;
 }
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index 9a62f6c..43a9a1c 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -223,7 +223,7 @@ void register_m68k_insns (CPUM68KState *env);
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUM68KState *env)
+static inline int cpu_mmu_index (CPUM68KState *env, bool ifetch)
 {
     return (env->sr & SR_S) == 0 ? 1 : 0;
 }
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index 7e20e59..402124a 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -309,7 +309,7 @@ int cpu_mb_signal_handler(int host_signum, void *pinfo,
 #define MMU_USER_IDX    2
 /* See NB_MMU_MODES further up the file.  */
 
-static inline int cpu_mmu_index (CPUMBState *env)
+static inline int cpu_mmu_index (CPUMBState *env, bool ifetch)
 {
         /* Are we in nommu mode?.  */
         if (!(env->sregs[SR_MSR] & MSR_VM))
diff --git a/target-microblaze/mmu.c b/target-microblaze/mmu.c
index 728da13..2ef1dc2 100644
--- a/target-microblaze/mmu.c
+++ b/target-microblaze/mmu.c
@@ -279,7 +279,7 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
             }
 
             hit = mmu_translate(&env->mmu, &lu,
-                                v & TLB_EPN_MASK, 0, cpu_mmu_index(env));
+                                v & TLB_EPN_MASK, 0, cpu_mmu_index(env, false));
             if (hit) {
                 env->mmu.regs[MMU_R_TLBX] = lu.idx;
             } else
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index f4e969b..2310f0d 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -433,7 +433,7 @@ static void dec_msr(DisasContext *dc)
     CPUState *cs = CPU(dc->cpu);
     TCGv t0, t1;
     unsigned int sr, to, rn;
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     sr = dc->imm & ((1 << 14) - 1);
     to = dc->imm & (1 << 14);
@@ -745,7 +745,7 @@ static void dec_bit(DisasContext *dc)
     CPUState *cs = CPU(dc->cpu);
     TCGv t0;
     unsigned int op;
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     op = dc->ir & ((1 << 9) - 1);
     switch (op) {
@@ -1009,7 +1009,7 @@ static void dec_load(DisasContext *dc)
      * address and if that succeeds we write into the destination reg.
      */
     v = tcg_temp_new();
-    tcg_gen_qemu_ld_tl(v, *addr, cpu_mmu_index(&dc->cpu->env), mop);
+    tcg_gen_qemu_ld_tl(v, *addr, cpu_mmu_index(&dc->cpu->env, false), mop);
 
     if ((dc->cpu->env.pvr.regs[2] & PVR2_UNALIGNED_EXC_MASK) && size > 1) {
         tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
@@ -1087,7 +1087,7 @@ static void dec_store(DisasContext *dc)
            this compare and the following write to be atomic. For user
            emulation we need to add atomicity between threads.  */
         tval = tcg_temp_new();
-        tcg_gen_qemu_ld_tl(tval, swx_addr, cpu_mmu_index(&dc->cpu->env),
+        tcg_gen_qemu_ld_tl(tval, swx_addr, cpu_mmu_index(&dc->cpu->env, false),
                            MO_TEUL);
         tcg_gen_brcond_tl(TCG_COND_NE, env_res_val, tval, swx_skip);
         write_carryi(dc, 0);
@@ -1138,7 +1138,7 @@ static void dec_store(DisasContext *dc)
                 break;
         }
     }
-    tcg_gen_qemu_st_tl(cpu_R[dc->rd], *addr, cpu_mmu_index(&dc->cpu->env), mop);
+    tcg_gen_qemu_st_tl(cpu_R[dc->rd], *addr, cpu_mmu_index(&dc->cpu->env, false), mop);
 
     /* Verify alignment if needed.  */
     if ((dc->cpu->env.pvr.regs[2] & PVR2_UNALIGNED_EXC_MASK) && size > 1) {
@@ -1234,7 +1234,7 @@ static void dec_bcc(DisasContext *dc)
 static void dec_br(DisasContext *dc)
 {
     unsigned int dslot, link, abs, mbar;
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     dslot = dc->ir & (1 << 20);
     abs = dc->ir & (1 << 19);
@@ -1366,7 +1366,7 @@ static inline void do_rte(DisasContext *dc)
 static void dec_rts(DisasContext *dc)
 {
     unsigned int b_bit, i_bit, e_bit;
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     i_bit = dc->ir & (1 << 21);
     b_bit = dc->ir & (1 << 22);
@@ -1538,7 +1538,7 @@ static void dec_null(DisasContext *dc)
 /* Insns connected to FSL or AXI stream attached devices.  */
 static void dec_stream(DisasContext *dc)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env);
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
     TCGv_i32 t_id, t_ctrl;
     int ctrl;
 
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index c91883d..2acc4b3 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -634,7 +634,7 @@ extern uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env);
 #define MMU_MODE1_SUFFIX _super
 #define MMU_MODE2_SUFFIX _user
 #define MMU_USER_IDX 2
-static inline int cpu_mmu_index (CPUMIPSState *env)
+static inline int cpu_mmu_index (CPUMIPSState *env, bool ifetch)
 {
     return env->hflags & MIPS_HFLAG_KSU;
 }
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 809a061..1aa9e3c 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -3629,7 +3629,7 @@ FOP_CONDN_S(sne,  (float32_lt(fst1, fst0, &env->active_fpu.fp_status)
 #if !defined(CONFIG_USER_ONLY)
 #define MEMOP_IDX(DF)                                           \
         TCGMemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,  \
-                                        cpu_mmu_index(env));
+                                        cpu_mmu_index(env, false));
 #else
 #define MEMOP_IDX(DF)
 #endif
@@ -3685,7 +3685,7 @@ void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
                             target_ulong addr)                          \
 {                                                                       \
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
-    int mmu_idx = cpu_mmu_index(env);                                   \
+    int mmu_idx = cpu_mmu_index(env, false);				\
     int i;                                                              \
     MEMOP_IDX(DF)                                                       \
     ensure_writable_pages(env, addr, mmu_idx, GETRA());                 \
diff --git a/target-moxie/cpu.h b/target-moxie/cpu.h
index 29572aa..15ca15b 100644
--- a/target-moxie/cpu.h
+++ b/target-moxie/cpu.h
@@ -127,7 +127,7 @@ int cpu_moxie_signal_handler(int host_signum, void *pinfo,
 #define cpu_gen_code cpu_moxie_gen_code
 #define cpu_signal_handler cpu_moxie_signal_handler
 
-static inline int cpu_mmu_index(CPUMoxieState *env)
+static inline int cpu_mmu_index(CPUMoxieState *env, bool ifetch)
 {
     return 0;
 }
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 36c4f20..560210d9 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -403,7 +403,7 @@ static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
     *flags = (env->flags & D_FLAG);
 }
 
-static inline int cpu_mmu_index(CPUOpenRISCState *env)
+static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
 {
     if (!(env->sr & SR_IME)) {
         return MMU_NOMMU_IDX;
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index a62cbf4..58f6e8b 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1653,7 +1653,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     dc->ppc = pc_start;
     dc->pc = pc_start;
     dc->flags = cpu->env.cpucfgr;
-    dc->mem_idx = cpu_mmu_index(&cpu->env);
+    dc->mem_idx = cpu_mmu_index(&cpu->env, false);
     dc->synced_flags = dc->tb_flags = tb->flags;
     dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
     dc->singlestep_enabled = cs->singlestep_enabled;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 5dfd195..0a0c47e 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1247,7 +1247,7 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 
 /* MMU modes definitions */
 #define MMU_USER_IDX 0
-static inline int cpu_mmu_index (CPUPPCState *env)
+static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 {
     return env->mmu_idx;
 }
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 63aebf4..6bbad92 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -310,7 +310,7 @@ static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr)
 #define MMU_SECONDARY_IDX       1
 #define MMU_HOME_IDX            2
 
-static inline int cpu_mmu_index (CPUS390XState *env)
+static inline int cpu_mmu_index (CPUS390XState *env, bool ifetch)
 {
     switch (env->psw.mask & PSW_MASK_ASC) {
     case PSW_ASC_PRIMARY:
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 6f8bd79..51b5b04 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -68,7 +68,7 @@ static inline uint64_t adj_len_to_page(uint64_t len, uint64_t addr)
 static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
                         uint32_t l)
 {
-    int mmu_idx = cpu_mmu_index(env);
+    int mmu_idx = cpu_mmu_index(env, false);
 
     while (l > 0) {
         void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
@@ -91,7 +91,7 @@ static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
 static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
                          uint32_t l)
 {
-    int mmu_idx = cpu_mmu_index(env);
+    int mmu_idx = cpu_mmu_index(env, false);
 
     while (l > 0) {
         void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index 34bb3d7..1f68b27 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -235,7 +235,7 @@ void cpu_load_tlb(CPUSH4State * env);
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUSH4State *env)
+static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
 {
     return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
 }
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 0522b65..72ea171 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -642,7 +642,7 @@ static inline int cpu_supervisor_mode(CPUSPARCState *env1)
 }
 #endif
 
-static inline int cpu_mmu_index(CPUSPARCState *env1)
+static inline int cpu_mmu_index(CPUSPARCState *env1, bool ifetch)
 {
 #if defined(CONFIG_USER_ONLY)
     return MMU_USER_IDX;
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index 2a0c6f0..7495406 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -849,7 +849,7 @@ hwaddr sparc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     SPARCCPU *cpu = SPARC_CPU(cs);
     CPUSPARCState *env = &cpu->env;
     hwaddr phys_addr;
-    int mmu_idx = cpu_mmu_index(env);
+    int mmu_idx = cpu_mmu_index(env, false);
     MemoryRegionSection section;
 
     if (cpu_sparc_get_phys_page(env, &phys_addr, addr, 2, mmu_idx) != 0) {
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index c58dd4e..e1579be 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -5234,7 +5234,7 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
     last_pc = dc->pc;
     dc->npc = (target_ulong) tb->cs_base;
     dc->cc_op = CC_OP_DYNAMIC;
-    dc->mem_idx = cpu_mmu_index(env);
+    dc->mem_idx = cpu_mmu_index(env, false);
     dc->def = env->def;
     dc->fpu_enabled = tb_fpu_enabled(tb->flags);
     dc->address_mask_32bit = tb_am_enabled(tb->flags);
diff --git a/target-tricore/cpu.h b/target-tricore/cpu.h
index 916ee27..42751e8 100644
--- a/target-tricore/cpu.h
+++ b/target-tricore/cpu.h
@@ -350,7 +350,7 @@ void tricore_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 #define cpu_signal_handler cpu_tricore_signal_handler
 #define cpu_list tricore_cpu_list
 
-static inline int cpu_mmu_index(CPUTriCoreState *env)
+static inline int cpu_mmu_index(CPUTriCoreState *env, bool ifetch)
 {
     return 0;
 }
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 7dc7a32..966ac5d 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -8287,7 +8287,7 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
     ctx.tb = tb;
     ctx.singlestep_enabled = cs->singlestep_enabled;
     ctx.bstate = BS_NONE;
-    ctx.mem_idx = cpu_mmu_index(env);
+    ctx.mem_idx = cpu_mmu_index(env, false);
 
     tcg_clear_temp_count();
     gen_tb_start(tb);
diff --git a/target-unicore32/cpu.h b/target-unicore32/cpu.h
index 45e31e5..121e528 100644
--- a/target-unicore32/cpu.h
+++ b/target-unicore32/cpu.h
@@ -131,7 +131,7 @@ int uc32_cpu_signal_handler(int host_signum, void *pinfo, void *puc);
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index(CPUUniCore32State *env)
+static inline int cpu_mmu_index(CPUUniCore32State *env, bool ifetch)
 {
     return (env->uncached_asr & ASR_M) == ASR_MODE_USER ? 1 : 0;
 }
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 96bfc82..dbd2c9c 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -492,7 +492,7 @@ static inline uint32_t xtensa_replicate_windowstart(CPUXtensaState *env)
 #define MMU_MODE2_SUFFIX _ring2
 #define MMU_MODE3_SUFFIX _ring3
 
-static inline int cpu_mmu_index(CPUXtensaState *env)
+static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
 {
     return xtensa_get_cring(env);
 }
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 3/5] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-08-17  7:34 [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 2/5] tlb: Add "ifetch" argument to cpu_mmu_index() Benjamin Herrenschmidt
@ 2015-08-17  7:34 ` Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 4/5] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend Benjamin Herrenschmidt
  3 siblings, 0 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  7:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

We rework the way the MMU indices are calculated, providing separate
indices for I and D side based on MSR:IR and MSR:DR respectively,
and thus no longer need to flush the TLB on context changes. This also
adds correct support for HV as a separate address space.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h         | 11 +++++++---
 target-ppc/excp_helper.c | 11 ----------
 target-ppc/helper_regs.h | 54 +++++++++++++++++++++++++++++++++++++++++-------
 target-ppc/machine.c     |  4 +++-
 target-ppc/translate.c   |  7 ++++---
 5 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 0a0c47e..fb678a0 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -462,6 +462,8 @@ struct ppc_slb_t {
 #define MSR_EP   6  /* Exception prefix on 601                               */
 #define MSR_IR   5  /* Instruction relocate                                  */
 #define MSR_DR   4  /* Data relocate                                         */
+#define MSR_IS   5  /* Instruction address space (BookE)                     */
+#define MSR_DS   4  /* Data address space (BookE)                            */
 #define MSR_PE   3  /* Protection enable on 403                              */
 #define MSR_PX   2  /* Protection exclusive on 403                  x        */
 #define MSR_PMM  2  /* Performance monitor mark on POWER            x        */
@@ -505,6 +507,8 @@ struct ppc_slb_t {
 #define msr_ep   ((env->msr >> MSR_EP)   & 1)
 #define msr_ir   ((env->msr >> MSR_IR)   & 1)
 #define msr_dr   ((env->msr >> MSR_DR)   & 1)
+#define msr_is   ((env->msr >> MSR_IS)   & 1)
+#define msr_ds   ((env->msr >> MSR_DS)   & 1)
 #define msr_pe   ((env->msr >> MSR_PE)   & 1)
 #define msr_px   ((env->msr >> MSR_PX)   & 1)
 #define msr_pmm  ((env->msr >> MSR_PMM)  & 1)
@@ -944,7 +948,7 @@ struct ppc_segment_page_sizes {
 
 /*****************************************************************************/
 /* The whole PowerPC CPU context */
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES    8
 
 #define PPC_CPU_OPCODES_LEN          0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
@@ -1107,7 +1111,8 @@ struct CPUPPCState {
     /* Those resources are used only in QEMU core */
     target_ulong hflags;      /* hflags is a MSR & HFLAGS_MASK         */
     target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
-    int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
+    int immu_idx;         /* precomputed MMU index to speed up insn access */
+    int dmmu_idx;         /* precomputed MMU index to speed up data accesses */
 
     /* Power management */
     int (*check_pow)(CPUPPCState *env);
@@ -1249,7 +1254,7 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 {
-    return env->mmu_idx;
+    return ifetch ? env->immu_idx : env->dmmu_idx;
 }
 
 #include "exec/cpu-all.h"
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index b803475..6f4ea28 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 
     if (env->spr[SPR_LPCR] & LPCR_AIL) {
         new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
-        /* If we disactivated any translation, flush TLBs */
-        tlb_flush(cs, 1);
     }
 
 #ifdef TARGET_PPC64
@@ -674,14 +671,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Reset exception state */
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
-
-    if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-        (env->mmu_model == POWERPC_MMU_BOOKE206)) {
-        /* XXX: The BookE changes address space when switching modes,
-                we should probably implement that as different MMU indexes,
-                but for the moment we do it the slow way and flush all.  */
-        tlb_flush(cs, 1);
-    }
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 271fddf..f7edd5b 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -41,11 +41,50 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
 
 static inline void hreg_compute_mem_idx(CPUPPCState *env)
 {
-    /* Precompute MMU index */
-    if (msr_pr == 0 && msr_hv != 0) {
-        env->mmu_idx = 2;
+    /* This is our encoding for server processors
+     *
+     *   0 = Guest User space virtual mode
+     *   1 = Guest Kernel space virtual mode
+     *   2 = Guest Kernel space real mode
+     *   3 = HV User space virtual mode
+     *   4 = HV Kernel space virtual mode
+     *   5 = HV Kernel space real mode
+     *
+     * The combination PR=1 IR&DR=0 is invalid, we will treat
+     * it as IR=DR=1
+     *
+     * For BookE, we need 8 MMU modes as follow:
+     *
+     *  0 = AS 0 HV User space
+     *  1 = AS 0 HV Kernel space
+     *  2 = AS 1 HV User space
+     *  3 = AS 1 HV Kernel space
+     *  4 = AS 0 Guest User space
+     *  5 = AS 0 Guest Kernel space
+     *  6 = AS 1 Guest User space
+     *  7 = AS 1 Guest Kernel space
+     */
+    if (env->mmu_model & POWERPC_MMU_BOOKE) {
+        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
+        env->immu_idx += msr_is ? 2 : 0;
+        env->dmmu_idx += msr_ds ? 2 : 0;
+        env->immu_idx += msr_gs ? 4 : 0;
+        env->dmmu_idx += msr_gs ? 4 : 0;
     } else {
-        env->mmu_idx = 1 - msr_pr;
+        /* First calucalte a base value independent of HV */
+        if (msr_pr != 0) {
+            /* User space, ignore IR and DR */
+            env->immu_idx = env->dmmu_idx = 0;
+        } else {
+            /* Kernel, setup a base I/D value */
+            env->immu_idx = msr_ir ? 1 : 2;
+            env->dmmu_idx = msr_dr ? 1 : 2;
+        }
+        /* Then offset it for HV */
+        if (msr_hv) {
+            env->immu_idx += 3;
+            env->dmmu_idx += 3;
+        }
     }
 }
 
@@ -82,9 +121,10 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     if (((value >> MSR_IR) & 1) != msr_ir ||
         ((value >> MSR_DR) & 1) != msr_dr) {
-        /* Flush all tlb when changing translation mode */
-        tlb_flush(cs, 1);
-        excp = POWERPC_EXCP_NONE;
+        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+    }
+    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
+        ((value >> MSR_GS) & 1) != msr_gs) {
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
     if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index f4ac761..b969492 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
     qemu_get_betls(f, &env->nip);
     qemu_get_betls(f, &env->hflags);
     qemu_get_betls(f, &env->hflags_nmsr);
-    qemu_get_sbe32s(f, &env->mmu_idx);
     qemu_get_sbe32(f); /* Discard unused power_mode */
 
+    /* Ignore saved mmu_idx, recompute */
+    hreg_compute_mem_idx(env);
+
     return 0;
 }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 84c5cea..dee0cc8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11213,8 +11213,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
                 cs->cpu_index);
     cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
-                TARGET_FMT_lx " idx %d\n", env->msr, env->spr[SPR_HID0],
-                env->hflags, env->mmu_idx);
+                TARGET_FMT_lx " iidx %d didx %d\n",
+                env->msr, env->spr[SPR_HID0],
+                env->hflags, env->immu_idx, env->dmmu_idx);
 #if !defined(NO_TIMER_DUMP)
     cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
 #if !defined(CONFIG_USER_ONLY)
@@ -11423,7 +11424,7 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     ctx.spr_cb = env->spr_cb;
     ctx.pr = msr_pr;
     ctx.hv = !msr_pr && msr_hv;
-    ctx.mem_idx = env->mmu_idx;
+    ctx.mem_idx = env->dmmu_idx;
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
     ctx.access_type = -1;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 4/5] ppc: Do some batching of TCG tlb flushes
  2015-08-17  7:34 [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 2/5] tlb: Add "ifetch" argument to cpu_mmu_index() Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 3/5] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
@ 2015-08-17  7:34 ` Benjamin Herrenschmidt
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend Benjamin Herrenschmidt
  3 siblings, 0 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  7:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.

However, those instructions often come in bursts of 3 or more (context
switch will favor a series of slbie's for example to an slbia if the
SLB has less than a certain number of entries in it, and tlbie's can
happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
at a time.

Doing a tlb_flush() each time is a waste of time. We end up doing a memset
of the whole TLB, reloading it for the next instruction, memset'ing again,
etc...

Those instructions don't have to take effect immediately. For slbie, they
can wait for the next context synchronizing event. For tlbie, the next
tlbsync.

This implements batching by keeping a flag that indicates that we have a
TLB in need of flushing. We check it on interrupts, rfi's, isync's and
tlbsync and flush the TLB if needed.

This reduces the number of tlb_flush() on a boot to a ubuntu installer
first dialog screen from roughly 360K down to 36K.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr_hcall.c     | 12 +++++++++---
 target-ppc/cpu.h         |  2 ++
 target-ppc/excp_helper.c |  9 +++++++++
 target-ppc/helper.h      |  1 +
 target-ppc/helper_regs.h | 13 +++++++++++++
 target-ppc/mmu-hash64.c  | 12 +++---------
 target-ppc/mmu_helper.c  |  9 ++++++++-
 target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
 8 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 652ddf6..3f4e275 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     switch (ret) {
     case REMOVE_SUCCESS:
+        check_tlb_flush(env);
         return H_SUCCESS;
 
     case REMOVE_NOT_FOUND:
@@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
+    target_ulong rc = H_SUCCESS;
     int i;
 
     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
@@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             break;
 
         case REMOVE_PARM:
-            return H_PARAMETER;
+            rc = H_PARAMETER;
+            goto exit;
 
         case REMOVE_HW:
-            return H_HARDWARE;
+            rc = H_HARDWARE;
+            goto exit;
         }
     }
+ exit:
+    check_tlb_flush(env);
 
-    return H_SUCCESS;
+    return rc;
 }
 
 static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index fb678a0..b68d30f 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1013,6 +1013,8 @@ struct CPUPPCState {
     /* PowerPC 64 SLB area */
     ppc_slb_t slb[MAX_SLB_ENTRIES];
     int32_t slb_nr;
+    /* tcg TLB needs flush (deferred slb inval instruction typically) */
+    uint32_t tlb_need_flush;
 #endif
     /* segment registers */
     hwaddr htab_base;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 6f4ea28..2a5f4a2 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Reset exception state */
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
+
+    /* Any interrupt is context synchronizing, check if TCG TLB
+     * needs a delayed flush on ppc64
+     */
+    check_tlb_flush(env);
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
@@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
                   __func__, env, env->pending_interrupts,
                   cs->interrupt_request, (int)msr_me, (int)msr_ee);
 #endif
+
     /* External reset */
     if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
         env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
@@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
      * as rfi is always the last insn of a TB
      */
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+
+    /* Context synchronizing: check if TCG TLB needs flush */
+    check_tlb_flush(env);
 }
 
 void helper_rfi(CPUPPCState *env)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 869be15..ff2d50b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 #endif
+DEF_HELPER_1(check_tlb_flush, void, env)
 #endif
 
 DEF_HELPER_3(lmw, void, env, tl, i32)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index f7edd5b..57da931 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     return excp;
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void check_tlb_flush(CPUPPCState *env)
+{
+    CPUState *cs = CPU(ppc_env_get_cpu(env));
+    if (env->tlb_need_flush) {
+        env->tlb_need_flush = 0;
+        tlb_flush(cs, 1);
+    }
+}
+#else
+static inline void check_tlb_flush(CPUPPCState *env) { }
+#endif
+
 #endif /* !defined(__HELPER_REGS_H__) */
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 7df6ede..71e1d14 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
 
 void helper_slbia(CPUPPCState *env)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
-    int n, do_invalidate;
+    int n;
 
-    do_invalidate = 0;
     /* XXX: Warning: slbia never invalidates the first segment */
     for (n = 1; n < env->slb_nr; n++) {
         ppc_slb_t *slb = &env->slb[n];
@@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
              *      and we still don't have a tlb_flush_mask(env, n, mask)
              *      in QEMU, we just invalidate all TLBs
              */
-            do_invalidate = 1;
+            env->tlb_need_flush = true;
         }
     }
-    if (do_invalidate) {
-        tlb_flush(CPU(cpu), 1);
-    }
 }
 
 void helper_slbie(CPUPPCState *env, target_ulong addr)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_slb_t *slb;
 
     slb = slb_lookup(env, addr);
@@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask)
          *      in QEMU, we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = true;
     }
 }
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 527c6ad..0f054ca 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -23,6 +23,7 @@
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
 #include "exec/cpu_ldst.h"
+#include "helper_regs.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_2_06:
     case POWERPC_MMU_2_06a:
     case POWERPC_MMU_2_06d:
+        env->tlb_need_flush = 0;
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(CPU(cpu), 1);
         break;
@@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
          *      we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = 1;
         break;
 #endif /* defined(TARGET_PPC64) */
     default:
@@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
 }
 
 
+void helper_check_tlb_flush(CPUPPCState *env)
+{
+    check_tlb_flush(env);
+}
+
 /*****************************************************************************/
 
 /* try to fill the TLB and return an exception if error. If retaddr is
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index dee0cc8..f4391d8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
 {
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void gen_check_tlb_flush(DisasContext *ctx)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+    TCGLabel *l = gen_new_label();
+
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
+    gen_helper_check_tlb_flush(cpu_env);
+    gen_set_label(l);
+    tcg_temp_free_i32(t);
+}
+#else
+static inline void gen_check_tlb_flush(DisasContext *ctx) { }
+#endif
+
 /* isync */
 static void gen_isync(DisasContext *ctx)
 {
+    /*
+     * We need to check for a pending TLB flush. This can only happen in
+     * kernel mode however so check MSR_PR
+     */
+    if (!ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
     gen_stop_exception(ctx);
 }
 
@@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
 /* sync */
 static void gen_sync(DisasContext *ctx)
 {
+    uint32_t l = (ctx->opcode >> 21) & 3;
+
+    /*
+     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
+     * This can only happen in kernel mode however so check MSR_PR as well.
+     */
+    if (l == 2 && !ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
 }
 
 /* wait */
@@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
-    /* This has no effect: it should ensure that all previous
-     * tlbie have completed
+    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
+     * embedded however needs to deal with tlbsync. We don't try to be
+     * fancy and swallow the overhead of checking for both.
      */
-    gen_stop_exception(ctx);
+    gen_check_tlb_flush(ctx);
 #endif
 }
 
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend
  2015-08-17  7:34 [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
                   ` (2 preceding siblings ...)
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 4/5] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
@ 2015-08-17  7:34 ` Benjamin Herrenschmidt
  2015-08-17  8:16   ` Benjamin Herrenschmidt
  3 siblings, 1 reply; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  7:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

Currently, we get to the slow path for any unaligned access in the
backend, because we effectively preserve the bottom address bits
below the alignment requirement when comparing with the TLB entry,
so any non-0 bit there will cause the compare to fail.

For the same number of instructions, we can instead add the access
size - 1 to the address and stick to clearing all the bottom bits.

That means that normal unaligned accesses will not fallback (the HW
will handle them fine). Only when crossing a page boundary well we
end up having a mismatch because we'll end up pointing to the next
page which cannot possibly be in that same TLB entry.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 tcg/ppc/tcg-target.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 2b6eafa..ce8d546 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1361,7 +1361,7 @@ static void * const qemu_st_helpers[16] = {
    in CR7, loads the addend of the TLB into R3, and returns the register
    containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
 
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
                                TCGReg addrlo, TCGReg addrhi,
                                int mem_index, bool is_read)
 {
@@ -1371,6 +1371,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
            : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     TCGReg base = TCG_AREG0;
+    TCGMemOp s_bits = opc & MO_SIZE;
 
     /* Extract the page index, shifted into place for tlb index.  */
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1422,17 +1423,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
        to minimize any load use delay.  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
 
-    /* Clear the non-page, non-alignment bits from the address.  */
+    /* Clear the non-page, non-alignment bits from the address */
     if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+        /* We don't support unaligned accesses on 32-bits, preserve
+         * the bottom bits and thus trigger a comparison failure on
+         * unaligned accesses
+         */
         tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
                     (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-    } else if (!s_bits) {
-        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo,
-                    0, 63 - TARGET_PAGE_BITS);
+    } else if (s_bits) {
+        /* > byte access, we need to handle alignment */
+        if ((opc & MO_AMASK) == MO_ALIGN) {
+            /* Alignment required by the front-end, same as 32-bits */
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
+                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+       } else {
+           /* We support unaligned accesses, we need to make sure we fail
+            * if we cross a page boundary. The trick is to add the
+            * access_size-1 to the address before masking the low bits.
+            * That will make the address overflow to the next page if we
+            * cross a page boundary which will then force a mismatch of
+            * the TLB compare since the next page cannot possibly be in
+            * the same TLB index.
+            */
+            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
+            tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
+                        0, 63 - TARGET_PAGE_BITS);
+        }
     } else {
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
-                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+        /* Byte access, just chop off the bits below the page index */
+        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
     }
 
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -1592,7 +1613,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
@@ -1667,7 +1688,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend
  2015-08-17  7:34 ` [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend Benjamin Herrenschmidt
@ 2015-08-17  8:16   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2015-08-17  8:16 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, qemu-ppc, Alexander Graf, Aurelien Jarno

On Mon, 2015-08-17 at 17:34 +1000, Benjamin Herrenschmidt wrote:
> Currently, we get to the slow path for any unaligned access in the
> backend, because we effectively preserve the bottom address bits
> below the alignment requirement when comparing with the TLB entry,
> so any non-0 bit there will cause the compare to fail.

Forget about this one, it was already picked up by Richard, I forgot
about it when I did git send-email. The other 4 however are candidate
for review/merge.

Cheers.
Ben.

> For the same number of instructions, we can instead add the access
> size - 1 to the address and stick to clearing all the bottom bits.
> 
> That means that normal unaligned accesses will not fallback (the HW
> will handle them fine). Only when crossing a page boundary well we
> end up having a mismatch because we'll end up pointing to the next
> page which cannot possibly be in that same TLB entry.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  tcg/ppc/tcg-target.c | 41 +++++++++++++++++++++++++++++++----------
>  1 file changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index 2b6eafa..ce8d546 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -1361,7 +1361,7 @@ static void * const qemu_st_helpers[16] = {
>     in CR7, loads the addend of the TLB into R3, and returns the 
> register
>     containing the guest address (zero-extended into R4).  Clobbers 
> R0 and R2. */
>  
> -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
> +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
>                                 TCGReg addrlo, TCGReg addrhi,
>                                 int mem_index, bool is_read)
>  {
> @@ -1371,6 +1371,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, 
> TCGMemOp s_bits,
>             : offsetof(CPUArchState, 
> tlb_table[mem_index][0].addr_write));
>      int add_off = offsetof(CPUArchState, 
> tlb_table[mem_index][0].addend);
>      TCGReg base = TCG_AREG0;
> +    TCGMemOp s_bits = opc & MO_SIZE;
>  
>      /* Extract the page index, shifted into place for tlb index.  */
>      if (TCG_TARGET_REG_BITS == 64) {
> @@ -1422,17 +1423,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, 
> TCGMemOp s_bits,
>         to minimize any load use delay.  */
>      tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
>  
> -    /* Clear the non-page, non-alignment bits from the address.  */
> +    /* Clear the non-page, non-alignment bits from the address */
>      if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
> +        /* We don't support unaligned accesses on 32-bits, preserve
> +         * the bottom bits and thus trigger a comparison failure on
> +         * unaligned accesses
> +         */
>          tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
>                      (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
> -    } else if (!s_bits) {
> -        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo,
> -                    0, 63 - TARGET_PAGE_BITS);
> +    } else if (s_bits) {
> +        /* > byte access, we need to handle alignment */
> +        if ((opc & MO_AMASK) == MO_ALIGN) {
> +            /* Alignment required by the front-end, same as 32-bits 
> */
> +            tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
> +                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - 
> s_bits);
> +            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, 
> TARGET_PAGE_BITS, 0);
> +       } else {
> +           /* We support unaligned accesses, we need to make sure we 
> fail
> +            * if we cross a page boundary. The trick is to add the
> +            * access_size-1 to the address before masking the low 
> bits.
> +            * That will make the address overflow to the next page 
> if we
> +            * cross a page boundary which will then force a mismatch 
> of
> +            * the TLB compare since the next page cannot possibly be 
> in
> +            * the same TLB index.
> +            */
> +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << 
> s_bits) - 1));
> +            tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
> +                        0, 63 - TARGET_PAGE_BITS);
> +        }
>      } else {
> -        tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
> -                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - 
> s_bits);
> -        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, 
> TARGET_PAGE_BITS, 0);
> +        /* Byte access, just chop off the bits below the page index 
> */
> +        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - 
> TARGET_PAGE_BITS);
>      }
>  
>      if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
> @@ -1592,7 +1613,7 @@ static void tcg_out_qemu_ld(TCGContext *s, 
> const TCGArg *args, bool is_64)
>  
>  #ifdef CONFIG_SOFTMMU
>      mem_index = get_mmuidx(oi);
> -    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, 
> true);
> +    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, 
> true);
>  
>      /* Load a pointer into the current opcode w/conditional branch
> -link. */
>      label_ptr = s->code_ptr;
> @@ -1667,7 +1688,7 @@ static void tcg_out_qemu_st(TCGContext *s, 
> const TCGArg *args, bool is_64)
>  
>  #ifdef CONFIG_SOFTMMU
>      mem_index = get_mmuidx(oi);
> -    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, 
> false);
> +    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, 
> false);
>  
>      /* Load a pointer into the current opcode w/conditional branch
> -link. */
>      label_ptr = s->code_ptr;

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-08-17  8:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-17  7:34 [Qemu-devel] [PATCH 1/5] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
2015-08-17  7:34 ` [Qemu-devel] [PATCH 2/5] tlb: Add "ifetch" argument to cpu_mmu_index() Benjamin Herrenschmidt
2015-08-17  7:34 ` [Qemu-devel] [PATCH 3/5] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
2015-08-17  7:34 ` [Qemu-devel] [PATCH 4/5] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
2015-08-17  7:34 ` [Qemu-devel] [PATCH 5/5] tcg/ppc: Improve unaligned load/store handling on 64-bit backend Benjamin Herrenschmidt
2015-08-17  8:16   ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.