All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Lluís Vilanova" <vilanova@ac.upc.edu>
To: qemu-devel@nongnu.org
Cc: "Eduardo Habkost" <ehabkost@redhat.com>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"Stefan Hajnoczi" <stefanha@gmail.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Andreas Färber" <afaerber@suse.de>,
	"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH v4 5/9] exec: [tcg] Use multiple physical TB caches
Date: Fri, 15 Jan 2016 17:38:20 +0100	[thread overview]
Message-ID: <145287589960.11400.13905635039886380360.stgit@localhost> (raw)
In-Reply-To: <145287587081.11400.4178335509020334684.stgit@localhost>

The physical translation block cache is split into as many caches as wanted, and
the virtual TB cache on each guest CPU uses a (potentially) different physical
TB cache.

This is later exploited to support different tracing event states on a per-vCPU
basis.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 cpu-exec.c              |   17 +++++
 include/exec/exec-all.h |   10 +++
 include/qom/cpu.h       |    5 ++
 qom/cpu.c               |    9 +++
 translate-all.c         |  146 +++++++++++++++++++++++++++++++++++++++++------
 translate-all.h         |   49 ++++++++++++++++
 6 files changed, 214 insertions(+), 22 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index c88d0ff..c258f16 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -27,6 +27,7 @@
 #include "exec/address-spaces.h"
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
+#include "translate-all.h"
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 #include "hw/i386/apic.h"
 #endif
@@ -224,7 +225,7 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
     phys_pc = get_page_addr_code(env, pc);
     phys_page1 = phys_pc & TARGET_PAGE_MASK;
     h = tb_phys_hash_func(phys_pc);
-    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h];
     for(;;) {
         tb = *ptb1;
         if (!tb) {
@@ -253,8 +254,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
 
     /* Move the TB to the head of the list */
     *ptb1 = tb->phys_hash_next;
-    tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
-    tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+    tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h];
+    tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h] = tb;
     return tb;
 }
 
@@ -488,6 +489,16 @@ int cpu_exec(CPUState *cpu)
                     cpu->exception_index = EXCP_INTERRUPT;
                     cpu_loop_exit(cpu);
                 }
+                if (unlikely(tcg_ctx.tb_ctx.tb_phys_hash_size_req !=
+                             tcg_ctx.tb_ctx.tb_phys_hash_size)) {
+                    if (tb_caches_apply() < 0) {
+                        next_tb = 0;
+                    }
+                }
+                if (unlikely(cpu->tb_phys_idx != cpu->tb_phys_idx_req)) {
+                    cpu_tb_cache_apply(cpu);
+                    next_tb = 0;
+                }
                 tb_lock();
                 tb = tb_find_fast(cpu);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 0859873..ba8b15c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -177,6 +177,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
 #define USE_DIRECT_JUMP
 #endif
 
+/**
+ * TranslationBlock:
+ * @phys_idx: Index of physical TB cache where this TB has been allocated.
+ */
 struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
@@ -216,6 +220,8 @@ struct TranslationBlock {
        jmp_first */
     struct TranslationBlock *jmp_next[2];
     struct TranslationBlock *jmp_first;
+
+    unsigned int phys_idx;
 };
 
 #include "qemu/thread.h"
@@ -225,7 +231,9 @@ typedef struct TBContext TBContext;
 struct TBContext {
 
     TranslationBlock *tbs;
-    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+    TranslationBlock ***tb_phys_hash;
+    size_t tb_phys_hash_size;
+    size_t tb_phys_hash_size_req;
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
     QemuMutex tb_lock;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 51a1323..40962e0 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -250,6 +250,8 @@ struct kvm_run;
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @tb_phys_idx: Index of current phsyical TB cache.
+ * @tb_phys_idx_req: Index of requested phsyical TB cache.
  *
  * State of one CPU core or thread.
  */
@@ -314,6 +316,9 @@ struct CPUState {
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
+    unsigned int tb_phys_idx;
+    unsigned int tb_phys_idx_req;
+
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index; /* used by alpha TCG */
     uint32_t halted; /* used by alpha, cris, ppc TCG */
diff --git a/qom/cpu.c b/qom/cpu.c
index fb80d13..bb7a618 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -363,6 +363,14 @@ static void cpu_class_init(ObjectClass *klass, void *data)
     dc->cannot_instantiate_with_device_add_yet = true;
 }
 
+static void cpu_init(Object *obj)
+{
+    CPUState *cpu = CPU(obj);
+
+    cpu->tb_phys_idx = 0;
+    cpu->tb_phys_idx_req = 0;
+}
+
 static const TypeInfo cpu_type_info = {
     .name = TYPE_CPU,
     .parent = TYPE_DEVICE,
@@ -372,6 +380,7 @@ static const TypeInfo cpu_type_info = {
     .abstract = true,
     .class_size = sizeof(CPUClass),
     .class_init = cpu_class_init,
+    .instance_init = cpu_init,
 };
 
 static void cpu_register_types(void)
diff --git a/translate-all.c b/translate-all.c
index 56899ee..ecadb66 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -163,9 +163,22 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2);
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
 
+static void tb_phys_cache_alloc(unsigned int idx)
+{
+    size_t size = sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) *
+        CODE_GEN_PHYS_HASH_SIZE;
+    tcg_ctx.tb_ctx.tb_phys_hash[idx] = malloc(size);
+    memset(tcg_ctx.tb_ctx.tb_phys_hash[idx], 0, size);
+}
+
 void cpu_gen_init(void)
 {
     tcg_context_init(&tcg_ctx); 
+
+    tcg_ctx.tb_ctx.tb_phys_hash_size = 0;
+    tcg_ctx.tb_ctx.tb_phys_hash_size_req = 1;
+    tcg_ctx.tb_ctx.tb_phys_hash = NULL;
+    tb_caches_apply();
 }
 
 /* Encode VAL as a signed leb128 sequence at P.
@@ -849,7 +862,12 @@ void tb_flush(CPUState *cpu)
         tb_flush_jmp_cache_all(cpu);
     }
 
-    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+    unsigned int cache;
+    for (cache = 0; cache < tb_caches_get(); cache++) {
+        memset(tcg_ctx.tb_ctx.tb_phys_hash[cache], 0,
+               (sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) *
+                CODE_GEN_PHYS_HASH_SIZE));
+    }
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -863,16 +881,21 @@ void tb_flush(CPUState *cpu)
 static void tb_invalidate_check(target_ulong address)
 {
     TranslationBlock *tb;
+    unsigned int cache;
     int i;
 
     address &= TARGET_PAGE_MASK;
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
-                  address >= tb->pc + tb->size)) {
-                printf("ERROR invalidate: address=" TARGET_FMT_lx
-                       " PC=%08lx size=%04x\n",
-                       address, (long)tb->pc, tb->size);
+    for (cache = 0; cache < tb_caches_get(); cache++) {
+        for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+            for (tb = tb_phys_hash[cache][i];
+                 tb != NULL;
+                 tb = tb->phys_hash_next) {
+                if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+                      address >= tb->pc + tb->size)) {
+                    printf("ERROR invalidate: address=" TARGET_FMT_lx
+                           " PC=%08lx size=%04x\n",
+                           address, (long)tb->pc, tb->size);
+                }
             }
         }
     }
@@ -882,16 +905,20 @@ static void tb_invalidate_check(target_ulong address)
 static void tb_page_check(void)
 {
     TranslationBlock *tb;
+    unsigned int cache;
     int i, flags1, flags2;
 
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
-                tb = tb->phys_hash_next) {
-            flags1 = page_get_flags(tb->pc);
-            flags2 = page_get_flags(tb->pc + tb->size - 1);
-            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
-                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-                       (long)tb->pc, tb->size, flags1, flags2);
+    for (cache = 0; cache < tb_caches_get(); cache++) {
+        for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+            for (tb = tb_phys_hash[cache][i];
+                 tb != NULL;
+                 tb = tb->phys_hash_next) {
+                flags1 = page_get_flags(tb->pc);
+                flags2 = page_get_flags(tb->pc + tb->size - 1);
+                if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+                    printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+                           (long)tb->pc, tb->size, flags1, flags2);
+                }
             }
         }
     }
@@ -978,7 +1005,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_phys_hash_func(phys_pc);
-    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h], tb);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
@@ -1053,6 +1080,86 @@ static void build_page_bitmap(PageDesc *p)
     }
 }
 
+size_t tb_caches_get(void)
+{
+    return tcg_ctx.tb_ctx.tb_phys_hash_size;
+}
+
+void tb_caches_set(size_t count)
+{
+    assert(count > 0);
+#ifndef NDEBUG
+    /* ensure no CPU is going to switch/stay in one of the removed caches */
+    CPUState *cpu;
+    CPU_FOREACH(cpu) {
+        assert(cpu->tb_phys_idx_req < count);
+    }
+#endif
+    tcg_ctx.tb_ctx.tb_phys_hash_size_req = count;
+}
+
+int tb_caches_apply(void)
+{
+    struct TBContext *tb_ctx = &tcg_ctx.tb_ctx;
+
+    if (likely(tb_ctx->tb_phys_hash_size_req == tb_ctx->tb_phys_hash_size)) {
+        return 0;
+    }
+
+    int res = tb_ctx->tb_phys_hash_size_req < tb_ctx->tb_phys_hash_size ?
+        -1 : 1;
+
+    if (res < 0) {
+        int i;
+        for (i = tb_ctx->tb_phys_hash_size_req;
+             i < tb_ctx->tb_phys_hash_size;
+             i++) {
+            free(tb_ctx->tb_phys_hash[i]);
+        }
+
+        CPUState *cpu;
+        CPU_FOREACH(cpu) {
+            if (cpu->tb_phys_idx >= tb_ctx->tb_phys_hash_size_req) {
+                fprintf(stderr,
+                        "CPU %d is using a deleted TB cache\n", cpu->cpu_index);
+                exit(1);
+            }
+        }
+    }
+
+    size_t size = sizeof(tb_ctx->tb_phys_hash[0]) *
+        tb_ctx->tb_phys_hash_size_req;
+    tb_ctx->tb_phys_hash = realloc(tb_ctx->tb_phys_hash, size);
+    int i;
+    for (i = tb_ctx->tb_phys_hash_size;
+         i < tb_ctx->tb_phys_hash_size_req;
+         i++) {
+        tb_phys_cache_alloc(i);
+    }
+
+    tb_ctx->tb_phys_hash_size = tb_ctx->tb_phys_hash_size_req;
+    return res;
+}
+
+unsigned int cpu_tb_cache_get(CPUState *cpu)
+{
+    return cpu->tb_phys_idx;
+}
+
+void cpu_tb_cache_set(CPUState *cpu, unsigned int index)
+{
+    assert(index < tcg_ctx.tb_ctx.tb_phys_hash_size_req);
+    cpu->tb_phys_idx_req = index;
+    cpu->tcg_exit_req = true;
+}
+
+void cpu_tb_cache_apply(CPUState *cpu)
+{
+    cpu->tb_phys_idx = cpu->tb_phys_idx_req;
+    tb_flush_jmp_cache_all(cpu);
+}
+
+
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
@@ -1090,6 +1197,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
+    tb->phys_idx = ENV_GET_CPU(env)->tb_phys_idx;
 
 #ifdef CONFIG_PROFILER
     tcg_ctx.tb_count1++; /* includes aborted translations because of
@@ -1480,7 +1588,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 
     /* add in the physical hash table */
     h = tb_phys_hash_func(phys_pc);
-    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h];
     tb->phys_hash_next = *ptb;
     *ptb = tb;
 
@@ -1643,6 +1751,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     pc = tb->pc;
     cs_base = tb->cs_base;
     flags = tb->flags;
+    /* XXX: It is OK to invalidate only this TB, as this is the one triggering
+     * the memory access */
     tb_phys_invalidate(tb, -1);
     if (tb->cflags & CF_NOCACHE) {
         if (tb->orig_tb) {
diff --git a/translate-all.h b/translate-all.h
index 0384640..d7ad063 100644
--- a/translate-all.h
+++ b/translate-all.h
@@ -19,6 +19,55 @@
 #ifndef TRANSLATE_ALL_H
 #define TRANSLATE_ALL_H
 
+
+/**
+ * tb_caches_get:
+ *
+ * Number of physical TB caches.
+ */
+size_t tb_caches_get(void);
+/**
+ * tb_caches_set:
+ *
+ * Request a new number of physical TB caches.
+ */
+void tb_caches_set(size_t count);
+/**
+ * tb_caches_apply:
+ *
+ * Apply the changes for a tb_caches_set() request.
+ *
+ * Returns: -1/1 if the number of caches has been shrinked/grown; 0 otherwise.
+ *
+ * Note: All TBs of eliminated caches are invalidated.
+ *
+ * Precondition: No vCPU uses any of the caches that will be removed (if any;
+ *               see cpu_tb_cache_set() and tb_caches_set()).
+ */
+int tb_caches_apply(void);
+/**
+ * cpu_tb_cache_get:
+ *
+ * Get the physical TB cache index for the given CPU.
+ */
+unsigned int cpu_tb_cache_get(CPUState *cpu);
+/**
+ * cpu_tb_cache_set:
+ *
+ * Set the physical TB cache index for the given CPU.
+ *
+ * Will have effect at the beginning of the next executed TB.
+ */
+void cpu_tb_cache_set(CPUState *cpu, unsigned int index);
+/**
+ * cpu_tb_cache_apply:
+ *
+ * Apply the changes for a cpu_tb_cache_set() request.
+ *
+ * Note: Invalidates the jump cache of the given CPU.
+ */
+void cpu_tb_cache_apply(CPUState *env);
+
 /* translate-all.c */
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,

  parent reply	other threads:[~2016-01-15 16:38 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-15 16:37 [Qemu-devel] [PATCH v4 0/9] trace: Per-vCPU tracing states Lluís Vilanova
2016-01-15 16:37 ` [Qemu-devel] [PATCH v4 1/9] trace: Add support for vCPU pointers in trace events Lluís Vilanova
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 2/9] trace: Add 'vcpu' event property Lluís Vilanova
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 3/9] trace: [tcg] Identify events with the 'vcpu' property Lluís Vilanova
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 4/9] exec: [tcg] Refactor flush of per-CPU virtual TB cache Lluís Vilanova
2016-01-15 16:38 ` Lluís Vilanova [this message]
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 6/9] exec: [tcg] Track which vCPU is performing translation and execution Lluís Vilanova
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 7/9] disas: Remove unused macro '_' Lluís Vilanova
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 8/9] trace: [tcg] Add per-vCPU tracing states for events with the 'vcpu' property Lluís Vilanova
2016-01-18 10:50   ` Paolo Bonzini
2016-01-18 11:55     ` Lluís Vilanova
2016-01-19 22:06   ` Eric Blake
2016-01-15 16:38 ` [Qemu-devel] [PATCH v4 9/9] trace: [tcg] Generate TCG code to trace guest events on a per-vCPU basis Lluís Vilanova

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=145287589960.11400.13905635039886380360.stgit@localhost \
    --to=vilanova@ac.upc.edu \
    --cc=afaerber@suse.de \
    --cc=crosthwaite.peter@gmail.com \
    --cc=ehabkost@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.