[Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics

* [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics
@ 2019-07-02 21:00 vandersonmr
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson

We want to store statistics for each TB even after flushes.
We do not want to modify or grow the TB struct.
So we create a new struct to contain this statistics and
we link one of it to each TB as they are generated.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/translate-all.c | 60 +++++++++++++++++++++++++++++++++++++++
 include/exec/exec-all.h   | 46 ++++++++++++++++++++++++++++++
 include/exec/tb-context.h |  1 +
 include/exec/tb-hash.h    |  7 +++++
 include/qemu/log.h        |  1 +
 5 files changed, 115 insertions(+)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..d05803a142 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1132,11 +1132,31 @@ static bool tb_cmp(const void *ap, const void *bp)
         a->page_addr[1] == b->page_addr[1];
 }
 
+/*
+ * This is the more or less the same compare, but the data persists
+ * over tb_flush. We also aggregate the various variations of cflags
+ * under one record and ignore the details of page overlap (although
+ * we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+    const TBStatistics *a = ap;
+    const TBStatistics *b = bp;
+
+    return a->phys_pc == b->phys_pc &&
+        a->pc == b->pc &&
+        a->cs_base == b->cs_base &&
+        a->flags == b->flags;
+}
+
 static void tb_htable_init(void)
 {
     unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+        qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
+    }
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -1586,6 +1606,31 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
 #endif
 }
 
+static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
+                                  target_ulong cs_base, uint32_t flags)
+{
+    TBStatistics *new_stats = g_new0(TBStatistics, 1);
+    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
+    void *existing_stats = NULL;
+    new_stats->phys_pc = phys_pc;
+    new_stats->pc = pc;
+    new_stats->cs_base = cs_base;
+    new_stats->flags = flags;
+
+    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
+
+    if (unlikely(existing_stats)) {
+        /*
+         * If there is already a TBStatistic for this TB from a previous flush
+         * then just make the new TB point to the older TBStatistic
+         */
+        g_free(new_stats);
+        return existing_stats;
+    } else {
+        return new_stats;
+    }
+}
+
 /* add a new TB and link it to the physical page tables. phys_page2 is
  * (-1) to indicate that only one page contains the TB.
  *
@@ -1732,6 +1777,21 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     ti = profile_getclock();
 #endif
 
+    /*
+     * We want to fetch the stats structure before we start code
+     * generation so we can count interesting things about this
+     * generation.
+     *
+     * XXX: using loglevel is fugly - we should have a general flag
+     */
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
+        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+        /* XXX: should we lock and update in bulk? */
+        atomic_inc(&tb->tb_stats->translations.total);
+    } else {
+        tb->tb_stats = NULL;
+    }
+
     tcg_func_start(tcg_ctx);
 
     tcg_ctx->cpu = env_cpu(env);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 16034ee651..a4bcd9b6df 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,6 +324,49 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
 #define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
+typedef struct TBStatistics TBStatistics;
+
+/*
+ * This struct stores statistics such as execution count of the
+ * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
+ * has its own TBStatistics which will persist over tb_flush.
+ *
+ * We include additional counters to track number of translations as
+ * well as variants for compile flags.
+ */
+struct TBStatistics {
+    tb_page_addr_t phys_pc;
+    target_ulong pc;
+    uint32_t     flags;
+    /* cs_base isn't included in the hash but we do check for matches */
+    target_ulong cs_base;
+
+    /* Translation stats */
+    struct {
+        unsigned long total;
+        unsigned long uncached;
+        unsigned long spanning;
+        /* XXX: count invalidation? */
+    } translations;
+
+    /* Execution stats */
+    struct {
+        unsigned long total;
+        unsigned long atomic;
+    } executions;
+
+    struct {
+        unsigned num_guest_inst;
+        unsigned num_host_inst;
+        unsigned num_tcg_inst;
+    } code;
+
+    /* HMP information - used for referring to previous search */
+    int display_id;
+};
+
+bool tb_stats_cmp(const void *ap, const void *bp);
+
 /*
  * Translation Cache-related fields of a TB.
  * This struct exists just for convenience; we keep track of TB's in a binary
@@ -403,6 +446,9 @@ struct TranslationBlock {
     uintptr_t jmp_list_head;
     uintptr_t jmp_list_next[2];
     uintptr_t jmp_dest[2];
+
+    /* Pointer to a struct where statistics from the TB is stored */
+    TBStatistics *tb_stats;
 };
 
 extern bool parallel_cpus;
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index feb585e0a7..a4decb5d1f 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 
     /* statistics */
     unsigned tb_flush_count;
+    struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 4f3a37d927..54c477fe79 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
     return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+                            uint32_t flags)
+{
+    return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index b097a6cae1..2fca65dd01 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
 /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
 #define CPU_LOG_TB_OP_IND  (1 << 16)
 #define CPU_LOG_TB_FPU     (1 << 17)
+#define CPU_LOG_HOT_TBS    (1 << 18)
 
 /* Lock output for a series of related logs.  Since this is not needed
  * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread