All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Lluís Vilanova" <vilanova@ac.upc.edu>
To: qemu-devel@nongnu.org
Cc: Eric Blake <eblake@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Peter Crosthwaite <crosthwaite.peter@gmail.com>,
	Richard Henderson <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH 2/4] exec: [tcg] Use multiple physical TB caches
Date: Wed, 14 Sep 2016 23:23:28 +0200	[thread overview]
Message-ID: <147388820802.17002.12187474866416310198.stgit@fimbulvetr.bsc.es> (raw)
In-Reply-To: <147388819720.17002.17020698136656908126.stgit@fimbulvetr.bsc.es>

The physical TB cache is split into 2^E caches, where E is the number of
events with the "vcpu" and without the "disable" properties.

The virtual TB cache on each vCPU uses a (potentially) different
physical TB cache.

This is later exploited to support different tracing event states on a
per-vCPU basis.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 cpu-exec.c                |    5 ++++
 include/exec/exec-all.h   |    6 +++++
 include/exec/tb-context.h |    2 +-
 include/qom/cpu.h         |    4 +++-
 qom/cpu.c                 |    1 +
 translate-all.c           |   51 +++++++++++++++++++++++++++++++++++++--------
 translate-all.h           |   17 +++++++++++++++
 translate-all.inc.h       |   13 +++++++++++
 8 files changed, 87 insertions(+), 12 deletions(-)
 create mode 100644 translate-all.inc.h

diff --git a/cpu-exec.c b/cpu-exec.c
index 5d9710a..7b2d8c6 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -33,6 +33,7 @@
 #include "hw/i386/apic.h"
 #endif
 #include "sysemu/replay.h"
+#include "translate-all.h"
 
 /* -icount align implementation. */
 
@@ -267,6 +268,7 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
     uint32_t h;
+    struct qht *qht;
 
     desc.env = (CPUArchState *)cpu->env_ptr;
     desc.cs_base = cs_base;
@@ -275,7 +277,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
     phys_pc = get_page_addr_code(desc.env, pc);
     desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
     h = tb_hash_func(phys_pc, pc, flags);
-    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
+    qht = tb_caches_get(&tcg_ctx.tb_ctx, cpu->tb_cache_idx);
+    return qht_lookup(qht, tb_cmp, &desc, h);
 }
 
 static TranslationBlock *tb_find_slow(CPUState *cpu,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e2124dc..4ae04f6 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -211,6 +211,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
 #define USE_DIRECT_JUMP
 #endif
 
+/**
+ * TranslationBlock:
+ * @tb_cache_idx: Index of physical TB cache where this TB has been allocated.
+ */
 struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
@@ -262,6 +266,8 @@ struct TranslationBlock {
      */
     uintptr_t jmp_list_next[2];
     uintptr_t jmp_list_first;
+
+    DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
 };
 
 void tb_free(TranslationBlock *tb);
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index dce95d9..7728904 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -32,7 +32,7 @@ typedef struct TBContext TBContext;
 struct TBContext {
 
     TranslationBlock *tbs;
-    struct qht htable;
+    struct qht *htables;
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
     QemuMutex tb_lock;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ce0c406..d870810 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -282,6 +282,7 @@ struct qemu_work_item {
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @tb_cache_idx: Index of current TB cache.
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
  *
  * State of one CPU core or thread.
@@ -350,7 +351,8 @@ struct CPUState {
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
-    /* Used for events with 'vcpu' and *without* the 'disabled' properties */
+    /* Used for events with 'vcpu' and *without* the 'disable' properties */
+    DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
     DECLARE_BITMAP(trace_dstate, TRACE_VCPU_EVENT_COUNT);
 
     /* TODO Move common fields from CPUArchState here. */
diff --git a/qom/cpu.c b/qom/cpu.c
index 2553247..2225103 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -345,6 +345,7 @@ static void cpu_common_initfn(Object *obj)
     qemu_mutex_init(&cpu->work_mutex);
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
+    bitmap_zero(cpu->tb_cache_idx, TRACE_VCPU_EVENT_COUNT);
     bitmap_zero(cpu->trace_dstate, TRACE_VCPU_EVENT_COUNT);
 }
 
diff --git a/translate-all.c b/translate-all.c
index ebd9fa0..c864eee 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -733,11 +733,22 @@ static inline void code_gen_alloc(size_t tb_size)
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
+/*
+ * Ensure bitmaps can be used as indexes.
+ */
+void *__error__too_many_vcpu_events[
+    (TRACE_VCPU_EVENT_COUNT + 1) <= BITS_PER_LONG ? 0 : -1];
+
 static void tb_htable_init(void)
 {
+    int cache;
     unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
-    qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode);
+    tcg_ctx.tb_ctx.htables = g_malloc(
+        sizeof(tcg_ctx.tb_ctx.htables[0]) * tb_caches_count());
+    for (cache = 0; cache < tb_caches_count(); cache++) {
+        qht_init(&tcg_ctx.tb_ctx.htables[cache], CODE_GEN_HTABLE_SIZE, mode);
+    }
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -834,6 +845,8 @@ static void page_flush_tb(void)
 /* XXX: tb_flush is currently not thread safe */
 void tb_flush(CPUState *cpu)
 {
+    int i;
+
     if (!tcg_enabled()) {
         return;
     }
@@ -854,7 +867,9 @@ void tb_flush(CPUState *cpu)
         tb_flush_jmp_cache_all(cpu);
     }
 
-    qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
+    for (i = 0; i < tb_caches_count(); i++) {
+        qht_reset_size(&tcg_ctx.tb_ctx.htables[i], CODE_GEN_HTABLE_SIZE);
+    }
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -879,8 +894,12 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp)
 
 static void tb_invalidate_check(target_ulong address)
 {
+    int i;
+
     address &= TARGET_PAGE_MASK;
-    qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address);
+    for (i = 0; i < tb_caches_count(); i++) {
+        qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_invalidate_check, &address);
+    }
 }
 
 static void
@@ -900,7 +919,10 @@ do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp)
 /* verify that all the pages have correct rights for code */
 static void tb_page_check(void)
 {
-    qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL);
+    int i;
+    for (i = 0; i < tb_caches_count(); i++) {
+        qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_page_check, NULL);
+    }
 }
 
 #endif
@@ -987,12 +1009,14 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     CPUState *cpu;
     PageDesc *p;
     uint32_t h;
+    struct qht *qht;
     tb_page_addr_t phys_pc;
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_hash_func(phys_pc, tb->pc, tb->flags);
-    qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
+    qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx);
+    qht_remove(qht, tb, h);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
@@ -1122,10 +1146,12 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2)
 {
     uint32_t h;
+    struct qht *qht;
 
     /* add in the hash table */
     h = tb_hash_func(phys_pc, tb->pc, tb->flags);
-    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
+    qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx);
+    qht_insert(qht, tb, h);
 
     /* add in the page list */
     tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
@@ -1175,6 +1201,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
+    bitmap_copy(tb->tb_cache_idx, ENV_GET_CPU(env)->tb_cache_idx,
+                TRACE_VCPU_EVENT_COUNT);
 
 #ifdef CONFIG_PROFILER
     tcg_ctx.tb_count1++; /* includes aborted translations because of
@@ -1636,6 +1664,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     pc = tb->pc;
     cs_base = tb->cs_base;
     flags = tb->flags;
+    /* XXX: It is OK to invalidate only this TB, as this is the one triggering
+     * the memory access */
     tb_phys_invalidate(tb, -1);
     if (tb->cflags & CF_NOCACHE) {
         if (tb->orig_tb) {
@@ -1715,6 +1745,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     int direct_jmp_count, direct_jmp2_count, cross_page;
     TranslationBlock *tb;
     struct qht_stats hst;
+    int cache;
 
     target_code_size = 0;
     max_target_code_size = 0;
@@ -1766,9 +1797,11 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                 tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
                         tcg_ctx.tb_ctx.nb_tbs : 0);
 
-    qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst);
-    print_qht_statistics(f, cpu_fprintf, hst);
-    qht_statistics_destroy(&hst);
+    for (cache = 0; cache < tb_caches_count(); cache++) {
+        qht_statistics_init(&tcg_ctx.tb_ctx.htables[cache], &hst);
+        print_qht_statistics(f, cpu_fprintf, hst);
+        qht_statistics_destroy(&hst);
+    }
 
     cpu_fprintf(f, "\nStatistics:\n");
     cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
diff --git a/translate-all.h b/translate-all.h
index ba8e4d6..d39bf32 100644
--- a/translate-all.h
+++ b/translate-all.h
@@ -20,7 +20,21 @@
 #define TRANSLATE_ALL_H
 
 #include "exec/exec-all.h"
+#include "qemu/typedefs.h"
 
+/**
+ * tb_caches_count:
+ *
+ * Number of TB caches.
+ */
+static size_t tb_caches_count(void);
+
+/**
+ * tb_caches_get:
+ *
+ * Get the TB cache for the given bitmap index.
+ */
+static struct qht *tb_caches_get(TBContext *tb_ctx, unsigned long *bitmap);
 
 /* translate-all.c */
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
@@ -33,4 +47,7 @@ void tb_check_watchpoint(CPUState *cpu);
 int page_unprotect(target_ulong address, uintptr_t pc);
 #endif
 
+
+#include "translate-all.inc.h"
+
 #endif /* TRANSLATE_ALL_H */
diff --git a/translate-all.inc.h b/translate-all.inc.h
new file mode 100644
index 0000000..c60a48e
--- /dev/null
+++ b/translate-all.inc.h
@@ -0,0 +1,13 @@
+/* Inline implementations for translate-all.h */
+
+static inline size_t tb_caches_count(void)
+{
+    return 1ULL << TRACE_VCPU_EVENT_COUNT;
+}
+
+static inline struct qht *tb_caches_get(TBContext *tb_ctx,
+                                        unsigned long *bitmap)
+{
+    unsigned long idx = *bitmap;
+    return &tb_ctx->htables[idx];
+}

  parent reply	other threads:[~2016-09-14 21:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-14 21:23 [Qemu-devel] [PATCH 0/4] trace: [tcg] Optimize per-vCPU tracing states with separate TB caches Lluís Vilanova
2016-09-14 21:23 ` [Qemu-devel] [PATCH 1/4] exec: [tcg] Refactor flush of per-CPU virtual TB cache Lluís Vilanova
2016-09-14 21:23 ` Lluís Vilanova [this message]
2016-09-14 21:23 ` [Qemu-devel] [PATCH 3/4] exec: [tcg] Switch physical TB cache based on vCPU tracing state Lluís Vilanova
2016-09-15 12:57   ` Lluís Vilanova
2016-09-14 21:23 ` [Qemu-devel] [PATCH 4/4] trace: [tcg] Do not generate TCG code to trace dinamically-disabled events Lluís Vilanova
2016-09-15 12:55   ` Daniel P. Berrange
2016-09-15 14:24     ` Lluís Vilanova

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=147388820802.17002.12187474866416310198.stgit@fimbulvetr.bsc.es \
    --to=vilanova@ac.upc.edu \
    --cc=crosthwaite.peter@gmail.com \
    --cc=eblake@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.