From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34748) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a1H5i-0007YF-LA for qemu-devel@nongnu.org; Tue, 24 Nov 2015 12:09:20 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1a1H5g-0006Jw-Lw for qemu-devel@nongnu.org; Tue, 24 Nov 2015 12:09:18 -0500 Received: from roura.ac.upc.edu ([147.83.33.10]:50148 helo=roura.ac.upc.es) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a1H5g-0006Jl-1N for qemu-devel@nongnu.org; Tue, 24 Nov 2015 12:09:16 -0500 From: =?utf-8?b?TGx1w61z?= Vilanova Date: Tue, 24 Nov 2015 18:09:14 +0100 Message-Id: <144838495465.3052.12976986743758707300.stgit@localhost> In-Reply-To: <144838492534.3052.2948919558518613064.stgit@localhost> References: <144838492534.3052.2948919558518613064.stgit@localhost> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Subject: [Qemu-devel] =?utf-8?b?W1BBVENIIHYyIDA1LzEwXSBleGVjOiBbxadjZ10g?= =?utf-8?q?Use_multiple_physical_TB_caches?= List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Stefan Hajnoczi , Eduardo Habkost , =?UTF-8?q?Andreas=20F=C3=A4rber?= The physical translation block cache is split into as many caches as want= ed, and the virtual TB cache on each guest CPU uses a (potentially) different phy= sical TB cache. This is later exploited to support different tracing event states on a pe= r-vCPU basis. Signed-off-by: Llu=C3=ADs Vilanova --- cpu-exec.c | 17 +++++ include/exec/exec-all.h | 10 +++ include/qom/cpu.h | 5 ++ qom/cpu.c | 9 +++ translate-all.c | 146 +++++++++++++++++++++++++++++++++++++++++= ------ translate-all.h | 49 ++++++++++++++++ 6 files changed, 214 insertions(+), 22 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index c88d0ff..c258f16 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -27,6 +27,7 @@ #include "exec/address-spaces.h" #include "qemu/rcu.h" #include "exec/tb-hash.h" +#include "translate-all.h" #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif @@ -224,7 +225,7 @@ static TranslationBlock *tb_find_physical(CPUState *c= pu, phys_pc =3D get_page_addr_code(env, pc); phys_page1 =3D phys_pc & TARGET_PAGE_MASK; h =3D tb_phys_hash_func(phys_pc); - ptb1 =3D &tcg_ctx.tb_ctx.tb_phys_hash[h]; + ptb1 =3D &tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h]; for(;;) { tb =3D *ptb1; if (!tb) { @@ -253,8 +254,8 @@ static TranslationBlock *tb_find_physical(CPUState *c= pu, =20 /* Move the TB to the head of the list */ *ptb1 =3D tb->phys_hash_next; - tb->phys_hash_next =3D tcg_ctx.tb_ctx.tb_phys_hash[h]; - tcg_ctx.tb_ctx.tb_phys_hash[h] =3D tb; + tb->phys_hash_next =3D tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx]= [h]; + tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h] =3D tb; return tb; } =20 @@ -488,6 +489,16 @@ int cpu_exec(CPUState *cpu) cpu->exception_index =3D EXCP_INTERRUPT; cpu_loop_exit(cpu); } + if (unlikely(tcg_ctx.tb_ctx.tb_phys_hash_size_req !=3D + tcg_ctx.tb_ctx.tb_phys_hash_size)) { + if (tb_caches_apply() < 0) { + next_tb =3D 0; + } + } + if (unlikely(cpu->tb_phys_idx !=3D cpu->tb_phys_idx_req)= ) { + cpu_tb_cache_apply(cpu); + next_tb =3D 0; + } tb_lock(); tb =3D tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X wh= en diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 0859873..ba8b15c 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -177,6 +177,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu= , ...) #define USE_DIRECT_JUMP #endif =20 +/** + * TranslationBlock: + * @phys_idx: Index of physical TB cache where this TB has been allocate= d. + */ struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP = + CS base) */ target_ulong cs_base; /* CS base for this block */ @@ -216,6 +220,8 @@ struct TranslationBlock { jmp_first */ struct TranslationBlock *jmp_next[2]; struct TranslationBlock *jmp_first; + + unsigned int phys_idx; }; =20 #include "qemu/thread.h" @@ -225,7 +231,9 @@ typedef struct TBContext TBContext; struct TBContext { =20 TranslationBlock *tbs; - TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + TranslationBlock ***tb_phys_hash; + size_t tb_phys_hash_size; + size_t tb_phys_hash_size_req; int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 51a1323..40962e0 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -250,6 +250,8 @@ struct kvm_run; * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to queued_work_*. * @queued_work_first: First asynchronous work pending. + * @tb_phys_idx: Index of current phsyical TB cache. + * @tb_phys_idx_req: Index of requested phsyical TB cache. * * State of one CPU core or thread. */ @@ -314,6 +316,9 @@ struct CPUState { struct KVMState *kvm_state; struct kvm_run *kvm_run; =20 + unsigned int tb_phys_idx; + unsigned int tb_phys_idx_req; + /* TODO Move common fields from CPUArchState here. */ int cpu_index; /* used by alpha TCG */ uint32_t halted; /* used by alpha, cris, ppc TCG */ diff --git a/qom/cpu.c b/qom/cpu.c index fb80d13..bb7a618 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -363,6 +363,14 @@ static void cpu_class_init(ObjectClass *klass, void = *data) dc->cannot_instantiate_with_device_add_yet =3D true; } =20 +static void cpu_init(Object *obj) +{ + CPUState *cpu =3D CPU(obj); + + cpu->tb_phys_idx =3D 0; + cpu->tb_phys_idx_req =3D 0; +} + static const TypeInfo cpu_type_info =3D { .name =3D TYPE_CPU, .parent =3D TYPE_DEVICE, @@ -372,6 +380,7 @@ static const TypeInfo cpu_type_info =3D { .abstract =3D true, .class_size =3D sizeof(CPUClass), .class_init =3D cpu_class_init, + .instance_init =3D cpu_init, }; =20 static void cpu_register_types(void) diff --git a/translate-all.c b/translate-all.c index ca4ea7f..9704efa 100644 --- a/translate-all.c +++ b/translate-all.c @@ -163,9 +163,22 @@ static void tb_link_page(TranslationBlock *tb, tb_pa= ge_addr_t phys_pc, tb_page_addr_t phys_page2); static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); =20 +static void tb_phys_cache_alloc(unsigned int idx) +{ + size_t size =3D sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) * + CODE_GEN_PHYS_HASH_SIZE; + tcg_ctx.tb_ctx.tb_phys_hash[idx] =3D malloc(size); + memset(tcg_ctx.tb_ctx.tb_phys_hash[idx], 0, size); +} + void cpu_gen_init(void) { tcg_context_init(&tcg_ctx);=20 + + tcg_ctx.tb_ctx.tb_phys_hash_size =3D 0; + tcg_ctx.tb_ctx.tb_phys_hash_size_req =3D 1; + tcg_ctx.tb_ctx.tb_phys_hash =3D NULL; + tb_caches_apply(); } =20 /* Encode VAL as a signed leb128 sequence at P. @@ -849,7 +862,12 @@ void tb_flush(CPUState *cpu) tb_flush_jmp_cache_all(cpu); } =20 - memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys= _hash)); + unsigned int cache; + for (cache =3D 0; cache < tb_caches_get(); cache++) { + memset(tcg_ctx.tb_ctx.tb_phys_hash[cache], 0, + (sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) * + CODE_GEN_PHYS_HASH_SIZE)); + } page_flush_tb(); =20 tcg_ctx.code_gen_ptr =3D tcg_ctx.code_gen_buffer; @@ -863,16 +881,21 @@ void tb_flush(CPUState *cpu) static void tb_invalidate_check(target_ulong address) { TranslationBlock *tb; + unsigned int cache; int i; =20 address &=3D TARGET_PAGE_MASK; - for (i =3D 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb =3D tb_ctx.tb_phys_hash[i]; tb !=3D NULL; tb =3D tb->phy= s_hash_next) { - if (!(address + TARGET_PAGE_SIZE <=3D tb->pc || - address >=3D tb->pc + tb->size)) { - printf("ERROR invalidate: address=3D" TARGET_FMT_lx - " PC=3D%08lx size=3D%04x\n", - address, (long)tb->pc, tb->size); + for (cache =3D 0; cache < tb_caches_get(); cache++) { + for (i =3D 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for (tb =3D tb_phys_hash[cache][i]; + tb !=3D NULL; + tb =3D tb->phys_hash_next) { + if (!(address + TARGET_PAGE_SIZE <=3D tb->pc || + address >=3D tb->pc + tb->size)) { + printf("ERROR invalidate: address=3D" TARGET_FMT_lx + " PC=3D%08lx size=3D%04x\n", + address, (long)tb->pc, tb->size); + } } } } @@ -882,16 +905,20 @@ static void tb_invalidate_check(target_ulong addres= s) static void tb_page_check(void) { TranslationBlock *tb; + unsigned int cache; int i, flags1, flags2; =20 - for (i =3D 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb =3D tcg_ctx.tb_ctx.tb_phys_hash[i]; tb !=3D NULL; - tb =3D tb->phys_hash_next) { - flags1 =3D page_get_flags(tb->pc); - flags2 =3D page_get_flags(tb->pc + tb->size - 1); - if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { - printf("ERROR page flags: PC=3D%08lx size=3D%04x f1=3D%x= f2=3D%x\n", - (long)tb->pc, tb->size, flags1, flags2); + for (cache =3D 0; cache < tb_caches_get(); cache++) { + for (i =3D 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for (tb =3D tb_phys_hash[cache][i]; + tb !=3D NULL; + tb =3D tb->phys_hash_next) { + flags1 =3D page_get_flags(tb->pc); + flags2 =3D page_get_flags(tb->pc + tb->size - 1); + if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { + printf("ERROR page flags: PC=3D%08lx size=3D%04x f1=3D= %x f2=3D%x\n", + (long)tb->pc, tb->size, flags1, flags2); + } } } } @@ -978,7 +1005,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_pag= e_addr_t page_addr) /* remove the TB from the hash list */ phys_pc =3D tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h =3D tb_phys_hash_func(phys_pc); - tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); + tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h], tb); =20 /* remove the TB from the page list */ if (tb->page_addr[0] !=3D page_addr) { @@ -1053,6 +1080,86 @@ static void build_page_bitmap(PageDesc *p) } } =20 +size_t tb_caches_get(void) +{ + return tcg_ctx.tb_ctx.tb_phys_hash_size; +} + +void tb_caches_set(size_t count) +{ + assert(count > 0); +#ifndef NDEBUG + /* ensure no CPU is going to switch/stay in one of the removed cache= s */ + CPUState *cpu; + CPU_FOREACH(cpu) { + assert(cpu->tb_phys_idx_req < count); + } +#endif + tcg_ctx.tb_ctx.tb_phys_hash_size_req =3D count; +} + +int tb_caches_apply(void) +{ + struct TBContext *tb_ctx =3D &tcg_ctx.tb_ctx; + + if (likely(tb_ctx->tb_phys_hash_size_req =3D=3D tb_ctx->tb_phys_hash= _size)) { + return 0; + } + + int res =3D tb_ctx->tb_phys_hash_size_req < tb_ctx->tb_phys_hash_siz= e ? + -1 : 1; + + if (res < 0) { + int i; + for (i =3D tb_ctx->tb_phys_hash_size_req; + i < tb_ctx->tb_phys_hash_size; + i++) { + free(tb_ctx->tb_phys_hash[i]); + } + + CPUState *cpu; + CPU_FOREACH(cpu) { + if (cpu->tb_phys_idx >=3D tb_ctx->tb_phys_hash_size_req) { + fprintf(stderr, + "CPU %d is using a deleted TB cache\n", cpu->cpu= _index); + exit(1); + } + } + } + + size_t size =3D sizeof(tb_ctx->tb_phys_hash[0]) * + tb_ctx->tb_phys_hash_size_req; + tb_ctx->tb_phys_hash =3D realloc(tb_ctx->tb_phys_hash, size); + int i; + for (i =3D tb_ctx->tb_phys_hash_size; + i < tb_ctx->tb_phys_hash_size_req; + i++) { + tb_phys_cache_alloc(i); + } + + tb_ctx->tb_phys_hash_size =3D tb_ctx->tb_phys_hash_size_req; + return res; +} + +unsigned int cpu_tb_cache_get(CPUState *cpu) +{ + return cpu->tb_phys_idx; +} + +void cpu_tb_cache_set(CPUState *cpu, unsigned int index) +{ + assert(index < tcg_ctx.tb_ctx.tb_phys_hash_size_req); + cpu->tb_phys_idx_req =3D index; + cpu->tcg_exit_req =3D true; +} + +void cpu_tb_cache_apply(CPUState *cpu) +{ + cpu->tb_phys_idx =3D cpu->tb_phys_idx_req; + tb_flush_jmp_cache_all(cpu); +} + + /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, @@ -1090,6 +1197,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cs_base =3D cs_base; tb->flags =3D flags; tb->cflags =3D cflags; + tb->phys_idx =3D ENV_GET_CPU(env)->tb_phys_idx; =20 #ifdef CONFIG_PROFILER tcg_ctx.tb_count1++; /* includes aborted translations because of @@ -1480,7 +1588,7 @@ static void tb_link_page(TranslationBlock *tb, tb_p= age_addr_t phys_pc, =20 /* add in the physical hash table */ h =3D tb_phys_hash_func(phys_pc); - ptb =3D &tcg_ctx.tb_ctx.tb_phys_hash[h]; + ptb =3D &tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h]; tb->phys_hash_next =3D *ptb; *ptb =3D tb; =20 @@ -1643,6 +1751,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t reta= ddr) pc =3D tb->pc; cs_base =3D tb->cs_base; flags =3D tb->flags; + /* XXX: It is OK to invalidate only this TB, as this is the one trig= gering + * the memory access */ tb_phys_invalidate(tb, -1); if (tb->cflags & CF_NOCACHE) { if (tb->orig_tb) { diff --git a/translate-all.h b/translate-all.h index 0384640..d7ad063 100644 --- a/translate-all.h +++ b/translate-all.h @@ -19,6 +19,55 @@ #ifndef TRANSLATE_ALL_H #define TRANSLATE_ALL_H =20 + +/** + * tb_caches_get: + * + * Number of physical TB caches. + */ +size_t tb_caches_get(void); +/** + * tb_caches_set: + * + * Request a new number of physical TB caches. + */ +void tb_caches_set(size_t count); +/** + * tb_caches_apply: + * + * Apply the changes for a tb_caches_set() request. + * + * Returns: -1/1 if the number of caches has been shrinked/grown; 0 othe= rwise. + * + * Note: All TBs of eliminated caches are invalidated. + * + * Precondition: No vCPU uses any of the caches that will be removed (if= any; + * see cpu_tb_cache_set() and tb_caches_set()). + */ +int tb_caches_apply(void); +/** + * cpu_tb_cache_get: + * + * Get the physical TB cache index for the given CPU. + */ +unsigned int cpu_tb_cache_get(CPUState *cpu); +/** + * cpu_tb_cache_set: + * + * Set the physical TB cache index for the given CPU. + * + * Will have effect at the beginning of the next executed TB. + */ +void cpu_tb_cache_set(CPUState *cpu, unsigned int index); +/** + * cpu_tb_cache_apply: + * + * Apply the changes for a cpu_tb_cache_set() request. + * + * Note: Invalidates the jump cache of the given CPU. + */ +void cpu_tb_cache_apply(CPUState *env); + /* translate-all.c */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t = end,