qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics
@ 2019-07-02 21:00 vandersonmr
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson

We want to store statistics for each TB even after flushes.
We do not want to modify or grow the TB struct.
So we create a new struct to contain this statistics and
we link one of it to each TB as they are generated.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/translate-all.c | 60 +++++++++++++++++++++++++++++++++++++++
 include/exec/exec-all.h   | 46 ++++++++++++++++++++++++++++++
 include/exec/tb-context.h |  1 +
 include/exec/tb-hash.h    |  7 +++++
 include/qemu/log.h        |  1 +
 5 files changed, 115 insertions(+)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..d05803a142 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1132,11 +1132,31 @@ static bool tb_cmp(const void *ap, const void *bp)
         a->page_addr[1] == b->page_addr[1];
 }
 
+/*
+ * This is the more or less the same compare, but the data persists
+ * over tb_flush. We also aggregate the various variations of cflags
+ * under one record and ignore the details of page overlap (although
+ * we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+    const TBStatistics *a = ap;
+    const TBStatistics *b = bp;
+
+    return a->phys_pc == b->phys_pc &&
+        a->pc == b->pc &&
+        a->cs_base == b->cs_base &&
+        a->flags == b->flags;
+}
+
 static void tb_htable_init(void)
 {
     unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+        qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
+    }
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -1586,6 +1606,31 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
 #endif
 }
 
+static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
+                                  target_ulong cs_base, uint32_t flags)
+{
+    TBStatistics *new_stats = g_new0(TBStatistics, 1);
+    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
+    void *existing_stats = NULL;
+    new_stats->phys_pc = phys_pc;
+    new_stats->pc = pc;
+    new_stats->cs_base = cs_base;
+    new_stats->flags = flags;
+
+    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
+
+    if (unlikely(existing_stats)) {
+        /*
+         * If there is already a TBStatistic for this TB from a previous flush
+         * then just make the new TB point to the older TBStatistic
+         */
+        g_free(new_stats);
+        return existing_stats;
+    } else {
+        return new_stats;
+    }
+}
+
 /* add a new TB and link it to the physical page tables. phys_page2 is
  * (-1) to indicate that only one page contains the TB.
  *
@@ -1732,6 +1777,21 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     ti = profile_getclock();
 #endif
 
+    /*
+     * We want to fetch the stats structure before we start code
+     * generation so we can count interesting things about this
+     * generation.
+     *
+     * XXX: using loglevel is fugly - we should have a general flag
+     */
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
+        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+        /* XXX: should we lock and update in bulk? */
+        atomic_inc(&tb->tb_stats->translations.total);
+    } else {
+        tb->tb_stats = NULL;
+    }
+
     tcg_func_start(tcg_ctx);
 
     tcg_ctx->cpu = env_cpu(env);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 16034ee651..a4bcd9b6df 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,6 +324,49 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
 #define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
+typedef struct TBStatistics TBStatistics;
+
+/*
+ * This struct stores statistics such as execution count of the
+ * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
+ * has its own TBStatistics which will persist over tb_flush.
+ *
+ * We include additional counters to track number of translations as
+ * well as variants for compile flags.
+ */
+struct TBStatistics {
+    tb_page_addr_t phys_pc;
+    target_ulong pc;
+    uint32_t     flags;
+    /* cs_base isn't included in the hash but we do check for matches */
+    target_ulong cs_base;
+
+    /* Translation stats */
+    struct {
+        unsigned long total;
+        unsigned long uncached;
+        unsigned long spanning;
+        /* XXX: count invalidation? */
+    } translations;
+
+    /* Execution stats */
+    struct {
+        unsigned long total;
+        unsigned long atomic;
+    } executions;
+
+    struct {
+        unsigned num_guest_inst;
+        unsigned num_host_inst;
+        unsigned num_tcg_inst;
+    } code;
+
+    /* HMP information - used for referring to previous search */
+    int display_id;
+};
+
+bool tb_stats_cmp(const void *ap, const void *bp);
+
 /*
  * Translation Cache-related fields of a TB.
  * This struct exists just for convenience; we keep track of TB's in a binary
@@ -403,6 +446,9 @@ struct TranslationBlock {
     uintptr_t jmp_list_head;
     uintptr_t jmp_list_next[2];
     uintptr_t jmp_dest[2];
+
+    /* Pointer to a struct where statistics from the TB is stored */
+    TBStatistics *tb_stats;
 };
 
 extern bool parallel_cpus;
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index feb585e0a7..a4decb5d1f 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 
     /* statistics */
     unsigned tb_flush_count;
+    struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 4f3a37d927..54c477fe79 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
     return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+                            uint32_t flags)
+{
+    return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index b097a6cae1..2fca65dd01 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
 /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
 #define CPU_LOG_TB_OP_IND  (1 << 16)
 #define CPU_LOG_TB_FPU     (1 << 17)
+#define CPU_LOG_HOT_TBS    (1 << 18)
 
 /* Lock output for a series of related logs.  Since this is not needed
  * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
@ 2019-07-02 21:00 ` vandersonmr
  2019-07-04 15:22   ` Alex Bennée
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements vandersonmr
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson

We add the option to instrument each TB to
count the number of times it is executed and
store this in the its TBStatistics struct.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/tcg-runtime.c   | 7 +++++++
 accel/tcg/tcg-runtime.h   | 2 ++
 accel/tcg/translator.c    | 1 +
 include/exec/gen-icount.h | 9 +++++++++
 4 files changed, 19 insertions(+)

diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 8a1e408e31..f332eae334 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+    TBStatistics *stats = (TBStatistics *) ptr;
+    g_assert(stats);
+    atomic_inc(&stats->executions.total);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b49b4..bf0b75dbe8 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
 
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
+
 #ifdef CONFIG_SOFTMMU
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 9226a348a3..cc06070e7e 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -54,6 +54,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     gen_tb_start(db->tb);
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
+    gen_tb_exec_count(tb);
 
     while (true) {
         db->num_insns++;
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f7669b6841..6701ab70c0 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,6 +7,15 @@
 
 static TCGOp *icount_start_insn;
 
+static inline void gen_tb_exec_count(TranslationBlock *tb)
+{
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && tb->tb_stats) {
+        TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
+        gen_helper_inc_exec_freq(ptr);
+        tcg_temp_free_ptr(ptr);
+    }
+}
+
 static inline void gen_tb_start(TranslationBlock *tb)
 {
     TCGv_i32 count, imm;
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
@ 2019-07-02 21:00 ` vandersonmr
  2019-07-04 15:39   ` Alex Bennée
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit vandersonmr
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson

Filling other tb statistics such as number of times the
tb is compiled, its number of guest/host/IR instructions...

Signed-off-by: vandersonmr <vandersonmr2@gmail.com>
---
 accel/tcg/translate-all.c |  14 +++++
 accel/tcg/translator.c    |   4 ++
 disas.c                   | 107 ++++++++++++++++++++++++++++++++++++++
 include/disas/disas.h     |   1 +
 tcg/tcg.c                 |   8 +++
 5 files changed, 134 insertions(+)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index d05803a142..9ee7232bb8 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1865,6 +1865,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
 #endif
 
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
+        size_t code_size = gen_code_size;
+        if (tcg_ctx->data_gen_ptr) {
+            code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
+        }
+        qemu_log_lock();
+        atomic_set(&tb->tb_stats->code.num_host_inst,
+                    get_num_insts(tb->tc.ptr, code_size));
+        qemu_log_unlock();
+    }
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(tb->pc)) {
@@ -1922,6 +1933,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     phys_page2 = -1;
     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
         phys_page2 = get_page_addr_code(env, virt_page2);
+        if (tb->tb_stats) {
+            atomic_inc(&tb->tb_stats->translations.spanning);
+        }
     }
     /*
      * No explicit memory barrier is required -- tb_link_page() makes the
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index cc06070e7e..d2529ca97d 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -117,6 +117,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     db->tb->size = db->pc_next - db->pc_first;
     db->tb->icount = db->num_insns;
 
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
+        db->tb->tb_stats->code.num_guest_inst = db->num_insns;
+    }
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(db->pc_first)) {
diff --git a/disas.c b/disas.c
index 3e2bfa572b..f5ae9c009a 100644
--- a/disas.c
+++ b/disas.c
@@ -475,6 +475,113 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
     }
 }
 
+
+static int fprintf_fake(struct _IO_FILE *a, const char *b, ...)
+{
+    return 1;
+}
+
+/*
+ * This is a work around to get the number of host instructions with
+ * a small effort. It reuses the disas function with a fake printf to
+ * print nothing but count the number of instructions.
+ *
+ */
+unsigned get_num_insts(void *code, unsigned long size)
+{
+    uintptr_t pc;
+    int count;
+    CPUDebug s;
+    int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL;
+
+    INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake);
+    s.info.print_address_func = generic_print_host_address;
+
+    s.info.buffer = code;
+    s.info.buffer_vma = (uintptr_t)code;
+    s.info.buffer_length = size;
+    s.info.cap_arch = -1;
+    s.info.cap_mode = 0;
+    s.info.cap_insn_unit = 4;
+    s.info.cap_insn_split = 4;
+
+#ifdef HOST_WORDS_BIGENDIAN
+    s.info.endian = BFD_ENDIAN_BIG;
+#else
+    s.info.endian = BFD_ENDIAN_LITTLE;
+#endif
+#if defined(CONFIG_TCG_INTERPRETER)
+    print_insn = print_insn_tci;
+#elif defined(__i386__)
+    s.info.mach = bfd_mach_i386_i386;
+    print_insn = print_insn_i386;
+    s.info.cap_arch = CS_ARCH_X86;
+    s.info.cap_mode = CS_MODE_32;
+    s.info.cap_insn_unit = 1;
+    s.info.cap_insn_split = 8;
+#elif defined(__x86_64__)
+    s.info.mach = bfd_mach_x86_64;
+    print_insn = print_insn_i386;
+    s.info.cap_arch = CS_ARCH_X86;
+    s.info.cap_mode = CS_MODE_64;
+    s.info.cap_insn_unit = 1;
+    s.info.cap_insn_split = 8;
+#elif defined(_ARCH_PPC)
+    s.info.disassembler_options = (char *)"any";
+    print_insn = print_insn_ppc;
+    s.info.cap_arch = CS_ARCH_PPC;
+# ifdef _ARCH_PPC64
+    s.info.cap_mode = CS_MODE_64;
+# endif
+#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
+#if defined(_ILP32) || (__riscv_xlen == 32)
+    print_insn = print_insn_riscv32;
+#elif defined(_LP64)
+    print_insn = print_insn_riscv64;
+#else
+#error unsupported RISC-V ABI
+#endif
+#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
+    print_insn = print_insn_arm_a64;
+    s.info.cap_arch = CS_ARCH_ARM64;
+#elif defined(__alpha__)
+    print_insn = print_insn_alpha;
+#elif defined(__sparc__)
+    print_insn = print_insn_sparc;
+    s.info.mach = bfd_mach_sparc_v9b;
+#elif defined(__arm__)
+    print_insn = print_insn_arm;
+    s.info.cap_arch = CS_ARCH_ARM;
+    /* TCG only generates code for arm mode.  */
+#elif defined(__MIPSEB__)
+    print_insn = print_insn_big_mips;
+#elif defined(__MIPSEL__)
+    print_insn = print_insn_little_mips;
+#elif defined(__m68k__)
+    print_insn = print_insn_m68k;
+#elif defined(__s390__)
+    print_insn = print_insn_s390;
+#elif defined(__hppa__)
+    print_insn = print_insn_hppa;
+#endif
+
+    if (print_insn == NULL) {
+        print_insn = print_insn_od_host;
+    }
+
+    s.info.fprintf_func = fprintf_fake;
+    unsigned num_insts = 0;
+    for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) {
+        num_insts++;
+        count = print_insn(pc, &s.info);
+        if (count < 0) {
+            break;
+        }
+    }
+    return num_insts;
+}
+
+
 /* Disassemble this for me please... (debugging). */
 void disas(FILE *out, void *code, unsigned long size)
 {
diff --git a/include/disas/disas.h b/include/disas/disas.h
index 15da511f49..9797ae7cfa 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -7,6 +7,7 @@
 
 /* Disassemble this for me please... (debugging). */
 void disas(FILE *out, void *code, unsigned long size);
+unsigned get_num_insts(void *code, unsigned long size);
 void target_disas(FILE *out, CPUState *cpu, target_ulong code,
                   target_ulong size);
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 02a2680169..bd57bb642b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4072,6 +4072,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
 #endif
 
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
+        int n = 0;
+        QTAILQ_FOREACH(op, &s->ops, link) {
+            n++;
+        }
+        tb->tb_stats->code.num_tcg_inst = n;
+    }
+
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
                  && qemu_log_in_addr_range(tb->pc))) {
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements vandersonmr
@ 2019-07-02 21:00 ` vandersonmr
  2019-07-04 15:40   ` Alex Bennée
  2019-07-04 16:34   ` Alex Bennée
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Riku Voipio, vandersonmr, Laurent Vivier,
	Richard Henderson

add option to dump the N most hot TB blocks.
-d hot_tbs:N
and also add all tbstats dump functions.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/Makefile.objs      |   1 +
 accel/tcg/tb-stats.c         | 293 +++++++++++++++++++++++++++++++++++
 include/exec/cpu-all.h       |  43 +++++
 include/qemu/log-for-trace.h |   2 +
 include/qemu/log.h           |   1 +
 linux-user/exit.c            |   3 +
 util/log.c                   |  35 ++++-
 7 files changed, 370 insertions(+), 8 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c

diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index d381a02f34..59d50d2dc5 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -3,6 +3,7 @@ obj-$(CONFIG_SOFTMMU) += cputlb.o
 obj-y += tcg-runtime.o tcg-runtime-gvec.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
 obj-y += translator.o
+obj-y += tb-stats.o
 
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 0000000000..922023f29d
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,293 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+/* XXX: I'm not sure what includes could be safely removed */
+#define NO_CPU_IO_DEFS
+#include "cpu.h"
+#include "trace.h"
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg.h"
+#if defined(CONFIG_USER_ONLY)
+#include "qemu.h"
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/param.h>
+#if __FreeBSD_version >= 700104
+#define HAVE_KINFO_GETVMMAP
+#define sigqueue sigqueue_freebsd  /* avoid redefinition */
+#include <sys/proc.h>
+#include <machine/profile.h>
+#define _KERNEL
+#include <sys/user.h>
+#undef _KERNEL
+#undef sigqueue
+#include <libutil.h>
+#endif
+#endif
+#else
+#include "exec/ram_addr.h"
+#endif
+
+#include "qemu/qemu-print.h"
+
+
+/* only accessed in safe work */
+static GList *last_search;
+
+static void collect_tb_stats(void *p, uint32_t hash, void *userp)
+{
+    last_search = g_list_prepend(last_search, p);
+}
+
+static void dump_tb_header(TBStatistics *tbs)
+{
+    qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx
+             " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u op:%u h:%u h/g: %f)\n",
+             tbs->display_id,
+             tbs->phys_pc, tbs->pc, tbs->flags,
+             tbs->translations.total, tbs->translations.uncached,
+             tbs->executions.total,
+             tbs->code.num_guest_inst,
+             tbs->code.num_tcg_inst,
+             tbs->code.num_host_inst,
+             tbs->code.num_guest_inst ?
+                ((float) tbs->code.num_host_inst / tbs->code.num_guest_inst) :
+                0);
+}
+
+static gint
+inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by)
+{
+    const TBStatistics *tbs1 = (TBStatistics *) p1;
+    const TBStatistics *tbs2 = (TBStatistics *) p2;
+    int sort_by = *((int *) psort_by);
+    unsigned long c1 = 0;
+    unsigned long c2 = 0;
+
+    if (likely(sort_by == SORT_BY_HOTNESS)) {
+        c1 = tbs1->executions.total;
+        c2 = tbs2->executions.total;
+    } else if (likely(sort_by == SORT_BY_HG)) {
+        if (tbs1->code.num_guest_inst == 0) {
+            return -1;
+        }
+        if (tbs2->code.num_guest_inst == 0) {
+            return 1;
+        }
+
+        float a = (float) tbs1->code.num_host_inst / tbs1->code.num_guest_inst;
+        float b = (float) tbs2->code.num_host_inst / tbs2->code.num_guest_inst;
+        c1 = a <= b ? 0 : 1;
+        c2 = a <= b ? 1 : 0;
+    }
+
+
+    return c1 < c2 ? 1 : c1 == c2 ? 0 : -1;
+}
+
+
+static void do_dump_coverset_info(int percentage)
+{
+    uint64_t total_exec_count = 0;
+    uint64_t covered_exec_count = 0;
+    unsigned coverset_size = 0;
+    int id = 1;
+    GList *i;
+
+    g_list_free(last_search);
+    last_search = NULL;
+
+    /* XXX: we could pass user data to collect_tb_stats to filter */
+    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
+
+    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
+                                        SORT_BY_HOTNESS);
+
+    /* Compute total execution count for all tbs */
+    for (i = last_search; i; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        total_exec_count += tbs->executions.total;
+    }
+
+    for (i = last_search; i; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        covered_exec_count += tbs->executions.total;
+        tbs->display_id = id++;
+        coverset_size++;
+        dump_tb_header(tbs);
+
+        /* Iterate and display tbs until reach the percentage count cover */
+        if (((double) covered_exec_count / total_exec_count) >
+                ((double) percentage / 100)) {
+            break;
+        }
+    }
+
+    qemu_log("\n------------------------------\n");
+    qemu_log("# of TBs to reach %d%% of the total exec count: %u\t",
+                percentage, coverset_size);
+    qemu_log("Total exec count: %lu\n", total_exec_count);
+    qemu_log("\n------------------------------\n");
+
+    /* free the unused bits */
+    i->next->prev = NULL;
+    g_list_free(i->next);
+    i->next = NULL;
+}
+
+
+static void do_dump_tbs_info(int count, int sort_by)
+{
+    int id = 1;
+    GList *i;
+
+    g_list_free(last_search);
+    last_search = NULL;
+
+    /* XXX: we could pass user data to collect_tb_stats to filter */
+    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
+
+    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
+                                        &sort_by);
+
+    for (i = last_search; i && count--; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        tbs->display_id = id++;
+        dump_tb_header(tbs);
+    }
+
+    /* free the unused bits */
+    if (i && i->next) {
+        i->next->prev = NULL;
+    }
+    g_list_free(i->next);
+    i->next = NULL;
+}
+
+static void
+do_dump_coverset_info_safe(CPUState *cpu, run_on_cpu_data percentage)
+{
+    qemu_log_to_monitor(true);
+    do_dump_coverset_info(percentage.host_int);
+    qemu_log_to_monitor(false);
+}
+
+struct tbs_dump_info {
+    int count;
+    int sort_by;
+};
+
+static void do_dump_tbs_info_safe(CPUState *cpu, run_on_cpu_data tbdi)
+{
+    struct tbs_dump_info *info = tbdi.host_ptr;
+    qemu_log_to_monitor(true);
+    do_dump_tbs_info(info->count, info->sort_by);
+    qemu_log_to_monitor(false);
+    g_free(info);
+}
+
+/*
+ * When we dump_tbs_info on a live system via the HMP we want to
+ * ensure the system is quiessent before we start outputting stuff.
+ * Otherwise we could pollute the output with other logging output.
+ */
+void dump_coverset_info(int percentage, bool use_monitor)
+{
+    if (use_monitor) {
+        async_safe_run_on_cpu(first_cpu, do_dump_coverset_info_safe,
+                              RUN_ON_CPU_HOST_INT(percentage));
+    } else {
+        do_dump_coverset_info(percentage);
+    }
+}
+
+void dump_tbs_info(int count, int sort_by, bool use_monitor)
+{
+    if (use_monitor) {
+        struct tbs_dump_info *tbdi = g_new(struct tbs_dump_info, 1);
+        tbdi->count = count;
+        tbdi->sort_by = sort_by;
+        async_safe_run_on_cpu(first_cpu, do_dump_tbs_info_safe,
+                              RUN_ON_CPU_HOST_PTR(tbdi));
+    } else {
+        do_dump_tbs_info(count, sort_by);
+    }
+}
+
+static void do_tb_dump_with_statistics(TBStatistics *tbs, int log_flags)
+{
+    CPUState *cpu = current_cpu;
+    uint32_t cflags = curr_cflags() | CF_NOCACHE;
+    int old_log_flags = qemu_loglevel;
+    TranslationBlock *tb = NULL;
+
+    qemu_set_log(log_flags);
+
+    qemu_log("\n------------------------------\n");
+    dump_tb_header(tbs);
+
+    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+        mmap_lock();
+        tb = tb_gen_code(cpu, tbs->pc, tbs->cs_base, tbs->flags, cflags);
+        tb_phys_invalidate(tb, -1);
+        mmap_unlock();
+    } else {
+        /*
+         * The mmap_lock is dropped by tb_gen_code if it runs out of
+         * memory.
+         */
+        fprintf(stderr, "%s: dbg failed!\n", __func__);
+        assert_no_pages_locked();
+    }
+
+    qemu_set_log(old_log_flags);
+
+    tcg_tb_remove(tb);
+}
+
+struct tb_dump_info {
+    int id;
+    int log_flags;
+    bool use_monitor;
+};
+
+static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info)
+{
+    struct tb_dump_info *tbdi = (struct tb_dump_info *) info.host_ptr;
+    GList *iter;
+
+    if (!last_search) {
+        qemu_printf("no search on record");
+        return;
+    }
+    qemu_log_to_monitor(tbdi->use_monitor);
+
+    for (iter = last_search; iter; iter = g_list_next(iter)) {
+        TBStatistics *tbs = iter->data;
+        if (tbs->display_id == tbdi->id) {
+            do_tb_dump_with_statistics(tbs, tbdi->log_flags);
+        }
+    }
+    qemu_log_to_monitor(false);
+    g_free(tbdi);
+}
+
+/* XXX: only from monitor? */
+void dump_tb_info(int id, int log_mask, bool use_monitor)
+{
+    struct tb_dump_info *tbdi = g_new(struct tb_dump_info, 1);
+
+    tbdi->id = id;
+    tbdi->log_flags = log_mask;
+    tbdi->use_monitor = use_monitor;
+
+    async_safe_run_on_cpu(first_cpu, do_dump_tb_info_safe,
+                          RUN_ON_CPU_HOST_PTR(tbdi));
+
+    /* tbdi free'd by do_dump_tb_info_safe */
+}
+
+void clean_tbstats_info(void)
+{
+/* TODO: remove all tb_stats */
+}
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 536ea58f81..c4bfad75d3 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -365,6 +365,49 @@ void dump_exec_info(void);
 void dump_opcount_info(void);
 #endif /* !CONFIG_USER_ONLY */
 
+/**
+ * dump_coverset_info: report the hottest blocks to cover n% of execution
+ *
+ * @percentage: cover set percentage
+ * @use_monitor: redirect output to monitor
+ *
+ * Report the hottest blocks to either the log or monitor
+ */
+void dump_coverset_info(int percentage, bool use_monitor);
+
+#define SORT_BY_HOTNESS 0
+#define SORT_BY_HG 1
+
+/**
+ * dump_tbs_info: report the hottest blocks
+ *
+ * @count: the limit of hotblocks
+ * @sort_by: property in which the dump will be sorted
+ * @use_monitor: redirect output to monitor
+ *
+ * Report the hottest blocks to either the log or monitor
+ */
+void dump_tbs_info(int count, int sort_by, bool use_monitor);
+
+/**
+ * dump_tb_info: dump information about one TB
+ *
+ * @id: the display id of the block (from previous search)
+ * @mask: the temporary logging mask
+ * @Use_monitor: redirect output to monitor
+ *
+ * Re-run a translation of a block at addr for the purposes of debug output
+ */
+void dump_tb_info(int id, int log_mask, bool use_monitor);
+
+/**
+ * clean_tbstats_info: remove all tb_stats information
+ *
+ */
+void clean_tbstats_info(void);
+
+
+
 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
                         uint8_t *buf, target_ulong len, int is_write);
 
diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h
index 2f0a5b080e..d65eb83037 100644
--- a/include/qemu/log-for-trace.h
+++ b/include/qemu/log-for-trace.h
@@ -21,6 +21,8 @@
 /* Private global variable, don't use */
 extern int qemu_loglevel;
 
+extern int32_t max_num_hot_tbs_to_dump;
+
 #define LOG_TRACE          (1 << 15)
 
 /* Returns true if a bit is set in the current loglevel mask */
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 2fca65dd01..240b71f66a 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -114,6 +114,7 @@ typedef struct QEMULogItem {
 extern const QEMULogItem qemu_log_items[];
 
 void qemu_set_log(int log_flags);
+void qemu_log_to_monitor(bool enable);
 void qemu_log_needs_buffers(void);
 void qemu_set_log_filename(const char *filename, Error **errp);
 void qemu_set_dfilter_ranges(const char *ranges, Error **errp);
diff --git a/linux-user/exit.c b/linux-user/exit.c
index bdda720553..295d3f4cad 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -28,6 +28,9 @@ extern void __gcov_dump(void);
 
 void preexit_cleanup(CPUArchState *env, int code)
 {
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+        dump_tbs_info(max_num_hot_tbs_to_dump, SORT_BY_HOTNESS, false);
+    }
 #ifdef TARGET_GPROF
         _mcleanup();
 #endif
diff --git a/util/log.c b/util/log.c
index 1d1b33f7d9..c0f1e9980f 100644
--- a/util/log.c
+++ b/util/log.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/qemu-print.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -30,21 +31,26 @@ FILE *qemu_logfile;
 int qemu_loglevel;
 static int log_append = 0;
 static GArray *debug_regions;
+int32_t max_num_hot_tbs_to_dump;
+static bool to_monitor;
 
 /* Return the number of characters emitted.  */
 int qemu_log(const char *fmt, ...)
 {
     int ret = 0;
-    if (qemu_logfile) {
-        va_list ap;
-        va_start(ap, fmt);
+    va_list ap;
+    va_start(ap, fmt);
+
+    if (to_monitor) {
+        ret = qemu_vprintf(fmt, ap);
+    } else if (qemu_logfile) {
         ret = vfprintf(qemu_logfile, fmt, ap);
-        va_end(ap);
+    }
+    va_end(ap);
 
-        /* Don't pass back error results.  */
-        if (ret < 0) {
-            ret = 0;
-        }
+    /* Don't pass back error results.  */
+    if (ret < 0) {
+        ret = 0;
     }
     return ret;
 }
@@ -99,6 +105,11 @@ void qemu_set_log(int log_flags)
     }
 }
 
+void qemu_log_to_monitor(bool enable)
+{
+    to_monitor = enable;
+}
+
 void qemu_log_needs_buffers(void)
 {
     log_uses_own_buffers = true;
@@ -273,6 +284,9 @@ const QEMULogItem qemu_log_items[] = {
     { CPU_LOG_TB_NOCHAIN, "nochain",
       "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
       "complete traces" },
+    { CPU_LOG_HOT_TBS, "hot_tbs(:limit)",
+      "show TBs (until given a limit) ordered by their hotness.\n"
+      "(if no limit is given, show all)" },
     { 0, NULL, NULL },
 };
 
@@ -294,6 +308,11 @@ int qemu_str_to_log_mask(const char *str)
             trace_enable_events((*tmp) + 6);
             mask |= LOG_TRACE;
 #endif
+        } else if (g_str_has_prefix(*tmp, "hot_tbs")) {
+            if (g_str_has_prefix(*tmp, "hot_tbs:") && (*tmp)[8] != '\0') {
+                max_num_hot_tbs_to_dump = atoi((*tmp) + 8);
+            }
+            mask |= CPU_LOG_HOT_TBS;
         } else {
             for (item = qemu_log_items; item->mask != 0; item++) {
                 if (g_str_equal(*tmp, item->name)) {
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
                   ` (2 preceding siblings ...)
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit vandersonmr
@ 2019-07-02 21:00 ` vandersonmr
  2019-07-04 16:36   ` Alex Bennée
                     ` (2 more replies)
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats " vandersonmr
                   ` (2 subsequent siblings)
  6 siblings, 3 replies; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: vandersonmr, Dr. David Alan Gilbert, Markus Armbruster

adding options to list tbs by some metric and
investigate their code.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 hmp-commands-info.hx | 22 ++++++++++++++
 monitor/misc.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index c59444c461..0b8c0de95d 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -288,6 +288,28 @@ ETEXI
         .params     = "",
         .help       = "show dynamic compiler info",
         .cmd        = hmp_info_jit,
+    {
+        .name       = "tbs",
+        .args_type  = "number:i?,sortedby:s?",
+        .params     = "[number sortedby]",
+        .help       = "show a [number] translated blocks sorted by [sortedby]"
+                      "sortedby opts: hotness hg",
+        .cmd        = hmp_info_tbs,
+    },
+    {
+        .name       = "tb",
+        .args_type  = "id:i,flags:s?",
+        .params     = "id [log1[,...] flags]",
+        .help       = "show information about one translated block by id",
+        .cmd        = hmp_info_tb,
+    },
+    {
+        .name       = "coverset",
+        .args_type  = "number:i?",
+        .params     = "[number]",
+        .help       = "show hottest translated blocks neccesary to cover"
+                      "[number]% of the execution count",
+        .cmd        = hmp_info_coverset,
     },
 #endif
 
diff --git a/monitor/misc.c b/monitor/misc.c
index bf9faceb86..1fb4d75871 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -469,6 +469,75 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict)
     dump_drift_info();
 }
 
+static void hmp_info_tbs(Monitor *mon, const QDict *qdict)
+{
+    int n;
+    const char *s = NULL;
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+    if (!tb_ctx.tb_stats.map) {
+        error_report("no TB information recorded");
+        return;
+    }
+
+    n = qdict_get_try_int(qdict, "number", 10);
+    s = qdict_get_try_str(qdict, "sortedby");
+
+    int sortedby = 0;
+    if (s == NULL || strcmp(s, "hotness") == 0) {
+        sortedby = SORT_BY_HOTNESS;
+    } else if (strcmp(s, "hg") == 0) {
+        sortedby = SORT_BY_HG;
+    }
+
+    dump_tbs_info(n, sortedby, true);
+}
+
+static void hmp_info_tb(Monitor *mon, const QDict *qdict)
+{
+    const int id = qdict_get_int(qdict, "id");
+    const char *flags = qdict_get_try_str(qdict, "flags");
+    int mask;
+
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+
+    mask = flags ? qemu_str_to_log_mask(flags) : CPU_LOG_TB_IN_ASM;
+
+    if (!mask) {
+        help_cmd(mon, "log");
+        return;
+    }
+
+    dump_tb_info(id, mask, true);
+}
+
+static void hmp_info_coverset(Monitor *mon, const QDict *qdict)
+{
+    int n;
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+    if (!qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+        error_report("TB information not being recorded");
+        return;
+    }
+
+    n = qdict_get_try_int(qdict, "number", 90);
+
+    if (n < 0 || n > 100) {
+        error_report("Coverset percentage should be between 0 and 100");
+        return;
+    }
+
+    dump_coverset_info(n, true);
+}
+
 static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
 {
     dump_opcount_info();
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats to monitor
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
                   ` (3 preceding siblings ...)
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
@ 2019-07-02 21:00 ` vandersonmr
  2019-07-04 16:43   ` Alex Bennée
  2019-07-04 14:05 ` [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics Alex Bennée
  2019-07-04 14:05 ` Alex Bennée
  6 siblings, 1 reply; 20+ messages in thread
From: vandersonmr @ 2019-07-02 21:00 UTC (permalink / raw)
  To: qemu-devel; +Cc: vandersonmr, Dr. David Alan Gilbert, Markus Armbruster

adding the option to start collecting the tb
statistics later using the start_stats command.

Signed-off-by: vandersonmr <vandersonmr2@gmail.com>
---
 hmp-commands.hx | 15 +++++++++++++++
 monitor/misc.c  | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index bfa5681dd2..616b9f7388 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1885,6 +1885,21 @@ STEXI
 @findex qemu-io
 Executes a qemu-io command on the given block device.
 
+ETEXI
+
+    {
+        .name       = "start_stats",
+        .args_type  = "",
+        .params     = "",
+        .help       = "(re)start recording tb statistics",
+        .cmd        = hmp_tbstats_start,
+    },
+
+STEXI
+@item start_stats
+@findex
+(Re)start recording tb statistics
+
 ETEXI
 
     {
diff --git a/monitor/misc.c b/monitor/misc.c
index 1fb4d75871..d39a048fd7 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -469,6 +469,21 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict)
     dump_drift_info();
 }
 
+static void hmp_tbstats_start(Monitor *mon, const QDict *qdict)
+{
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+        error_report("TB information already being recorded");
+        return;
+    }
+    qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+                QHT_MODE_AUTO_RESIZE);
+    qemu_set_log(qemu_loglevel | CPU_LOG_HOT_TBS);
+}
+
 static void hmp_info_tbs(Monitor *mon, const QDict *qdict)
 {
     int n;
-- 
2.22.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
                   ` (4 preceding siblings ...)
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats " vandersonmr
@ 2019-07-04 14:05 ` Alex Bennée
  2019-07-04 14:05 ` Alex Bennée
  6 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 14:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> We want to store statistics for each TB even after flushes.
> We do not want to modify or grow the TB struct.
> So we create a new struct to contain this statistics and
> we link one of it to each TB as they are generated.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/translate-all.c | 60 +++++++++++++++++++++++++++++++++++++++
>  include/exec/exec-all.h   | 46 ++++++++++++++++++++++++++++++
>  include/exec/tb-context.h |  1 +
>  include/exec/tb-hash.h    |  7 +++++
>  include/qemu/log.h        |  1 +
>  5 files changed, 115 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..d05803a142 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1132,11 +1132,31 @@ static bool tb_cmp(const void *ap, const void *bp)
>          a->page_addr[1] == b->page_addr[1];
>  }
>
> +/*
> + * This is the more or less the same compare, but the data persists
> + * over tb_flush. We also aggregate the various variations of cflags
> + * under one record and ignore the details of page overlap (although
> + * we can count it).
> + */
> +bool tb_stats_cmp(const void *ap, const void *bp)
> +{
> +    const TBStatistics *a = ap;
> +    const TBStatistics *b = bp;
> +
> +    return a->phys_pc == b->phys_pc &&
> +        a->pc == b->pc &&
> +        a->cs_base == b->cs_base &&
> +        a->flags == b->flags;
> +}
> +
>  static void tb_htable_init(void)
>  {
>      unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>      qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {

So this should be a separate flag, not using qemu_loglevel_mask to
check. Something like tcg_collect_tbstats? Then the various things that
might need stats can just enable it.

> +        qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    }
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1586,6 +1606,31 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
>  #endif
>  }
>
> +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
> +                                  target_ulong cs_base, uint32_t flags)
> +{
> +    TBStatistics *new_stats = g_new0(TBStatistics, 1);
> +    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
> +    void *existing_stats = NULL;
> +    new_stats->phys_pc = phys_pc;
> +    new_stats->pc = pc;
> +    new_stats->cs_base = cs_base;
> +    new_stats->flags = flags;
> +
> +    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
> +
> +    if (unlikely(existing_stats)) {
> +        /*
> +         * If there is already a TBStatistic for this TB from a previous flush
> +         * then just make the new TB point to the older TBStatistic
> +         */
> +        g_free(new_stats);
> +        return existing_stats;
> +    } else {
> +        return new_stats;
> +    }
> +}
> +
>  /* add a new TB and link it to the physical page tables. phys_page2 is
>   * (-1) to indicate that only one page contains the TB.
>   *
> @@ -1732,6 +1777,21 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      ti = profile_getclock();
>  #endif
>
> +    /*
> +     * We want to fetch the stats structure before we start code
> +     * generation so we can count interesting things about this
> +     * generation.
> +     *
> +     * XXX: using loglevel is fugly - we should have a general flag
> +     */
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {

As the XXX says use a common flag. So something like:


  if (tcg_collect_tb_stats) {
     tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
     ...
  } else {
     tb->tb_stats = NULL;
  }

And then later inside that:

   if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc))
   {
        tb->tb_stats.stats_enabled |= TB_EXEC_STATS;
   }


> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
> +        /* XXX: should we lock and update in bulk? */
> +        atomic_inc(&tb->tb_stats->translations.total);

XXX is a todo note for later... make a decision and remove the comment.

> +    } else {
> +        tb->tb_stats = NULL;
> +    }
> +
>      tcg_func_start(tcg_ctx);
>
>      tcg_ctx->cpu = env_cpu(env);
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 16034ee651..a4bcd9b6df 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -324,6 +324,49 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
>  #define CODE_GEN_AVG_BLOCK_SIZE 150
>  #endif
>
> +typedef struct TBStatistics TBStatistics;
> +
> +/*
> + * This struct stores statistics such as execution count of the
> + * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
> + * has its own TBStatistics which will persist over tb_flush.
> + *
> + * We include additional counters to track number of translations as
> + * well as variants for compile flags.
> + */
> +struct TBStatistics {
> +    tb_page_addr_t phys_pc;
> +    target_ulong pc;
> +    uint32_t     flags;
> +    /* cs_base isn't included in the hash but we do check for matches */
> +    target_ulong cs_base;
> +
> +    /* Translation stats */
> +    struct {
> +        unsigned long total;
> +        unsigned long uncached;
> +        unsigned long spanning;
> +        /* XXX: count invalidation? */
> +    } translations;
> +
> +    /* Execution stats */
> +    struct {
> +        unsigned long total;
> +        unsigned long atomic;
> +    } executions;
> +
> +    struct {
> +        unsigned num_guest_inst;
> +        unsigned num_host_inst;
> +        unsigned num_tcg_inst;
> +    } code;
> +
> +    /* HMP information - used for referring to previous search */
> +    int display_id;

Usually we introduce the fields into the structure in the patches that
need them. Otherwise you run the risk of adding a bunch of fields that
you planned to add but never quite got around to the initial code. Of
course usually you write code and then start splitting the commits up
when you are cleaning up your branch.


> +};
> +
> +bool tb_stats_cmp(const void *ap, const void *bp);
> +
>  /*
>   * Translation Cache-related fields of a TB.
>   * This struct exists just for convenience; we keep track of TB's in a binary
> @@ -403,6 +446,9 @@ struct TranslationBlock {
>      uintptr_t jmp_list_head;
>      uintptr_t jmp_list_next[2];
>      uintptr_t jmp_dest[2];
> +
> +    /* Pointer to a struct where statistics from the TB is stored */
> +    TBStatistics *tb_stats;
>  };
>
>  extern bool parallel_cpus;
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..a4decb5d1f 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -35,6 +35,7 @@ struct TBContext {
>
>      /* statistics */
>      unsigned tb_flush_count;
> +    struct qht tb_stats;
>  };
>
>  extern TBContext tb_ctx;
> diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
> index 4f3a37d927..54c477fe79 100644
> --- a/include/exec/tb-hash.h
> +++ b/include/exec/tb-hash.h
> @@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
>      return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
>  }
>
> +static inline
> +uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
> +                            uint32_t flags)
> +{
> +    return qemu_xxhash5(phys_pc, pc, flags);
> +}
> +
>  #endif
> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index b097a6cae1..2fca65dd01 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
>  /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
>  #define CPU_LOG_TB_OP_IND  (1 << 16)
>  #define CPU_LOG_TB_FPU     (1 << 17)
> +#define CPU_LOG_HOT_TBS    (1 << 18)

this flag can be punted to when we introduce the actual tracking because
we are going to have a specific (internal) flag for initialising the
tracking machinery.

>
>  /* Lock output for a series of related logs.  Since this is not needed
>   * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics
  2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
                   ` (5 preceding siblings ...)
  2019-07-04 14:05 ` [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics Alex Bennée
@ 2019-07-04 14:05 ` Alex Bennée
  6 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 14:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> We want to store statistics for each TB even after flushes.
> We do not want to modify or grow the TB struct.
> So we create a new struct to contain this statistics and
> we link one of it to each TB as they are generated.

Mini note, what happened to the cover letter?

--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
@ 2019-07-04 15:22   ` Alex Bennée
  0 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 15:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> We add the option to instrument each TB to
> count the number of times it is executed and
> store this in the its TBStatistics struct.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/tcg-runtime.c   | 7 +++++++
>  accel/tcg/tcg-runtime.h   | 2 ++
>  accel/tcg/translator.c    | 1 +
>  include/exec/gen-icount.h | 9 +++++++++
>  4 files changed, 19 insertions(+)
>
> diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
> index 8a1e408e31..f332eae334 100644
> --- a/accel/tcg/tcg-runtime.c
> +++ b/accel/tcg/tcg-runtime.c
> @@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(env_cpu(env), GETPC());
>  }
> +
> +void HELPER(inc_exec_freq)(void *ptr)
> +{
> +    TBStatistics *stats = (TBStatistics *) ptr;
> +    g_assert(stats);
> +    atomic_inc(&stats->executions.total);
> +}
> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
> index 4fa61b49b4..bf0b75dbe8 100644
> --- a/accel/tcg/tcg-runtime.h
> +++ b/accel/tcg/tcg-runtime.h
> @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
>
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
> +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
> +
>  #ifdef CONFIG_SOFTMMU
>
>  DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 9226a348a3..cc06070e7e 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -54,6 +54,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>      gen_tb_start(db->tb);
>      ops->tb_start(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> +    gen_tb_exec_count(tb);
>
>      while (true) {
>          db->num_insns++;
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index f7669b6841..6701ab70c0 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -7,6 +7,15 @@
>
>  static TCGOp *icount_start_insn;
>
> +static inline void gen_tb_exec_count(TranslationBlock *tb)
> +{
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && tb->tb_stats) {

so with the changes suggested in 1/6 it would become:

 if (tb->tb_stats && tb->tb_stats.stats_enabled & TB_EXEC_STATS) {

 }

which would eventually make the granularity controllable on a per-TB
basis. You could even hide the above check in a inline helper in the
headers (tb-stats.h?).

> +        TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
> +        gen_helper_inc_exec_freq(ptr);
> +        tcg_temp_free_ptr(ptr);
> +    }
> +}
> +
>  static inline void gen_tb_start(TranslationBlock *tb)
>  {
>      TCGv_i32 count, imm;


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements vandersonmr
@ 2019-07-04 15:39   ` Alex Bennée
  0 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 15:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, vandersonmr, Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> Filling other tb statistics such as number of times the
> tb is compiled, its number of guest/host/IR instructions...
>
> Signed-off-by: vandersonmr <vandersonmr2@gmail.com>
> ---
>  accel/tcg/translate-all.c |  14 +++++
>  accel/tcg/translator.c    |   4 ++
>  disas.c                   | 107 ++++++++++++++++++++++++++++++++++++++
>  include/disas/disas.h     |   1 +
>  tcg/tcg.c                 |   8 +++
>  5 files changed, 134 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index d05803a142..9ee7232bb8 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1865,6 +1865,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
>  #endif
>
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) &&
> qemu_log_in_addr_range(tb->pc)) {

This should be a different flag - CPU_LOG_JIT_STATS? Also enable on the
TBstats creation and check the per-TB flag (tb_stats_enabled(tb,
JIT_STATS)).

> +        size_t code_size = gen_code_size;
> +        if (tcg_ctx->data_gen_ptr) {
> +            code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
> +        }
> +        qemu_log_lock();

Wrong lock, and not needed if you are using atomics. If you did want a
lock you can move get_num_insts out of the lock as it doesn't need
protection and reduce the contention on the lock.

> +        atomic_set(&tb->tb_stats->code.num_host_inst,
> +                    get_num_insts(tb->tc.ptr, code_size));

atomic_add and then later when we present the data / by the number of translations?

> +        qemu_log_unlock();
> +    }
> +
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
>          qemu_log_in_addr_range(tb->pc)) {
> @@ -1922,6 +1933,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      phys_page2 = -1;
>      if ((pc & TARGET_PAGE_MASK) != virt_page2) {
>          phys_page2 = get_page_addr_code(env, virt_page2);
> +        if (tb->tb_stats) {
> +            atomic_inc(&tb->tb_stats->translations.spanning);
> +        }
>      }
>      /*
>       * No explicit memory barrier is required -- tb_link_page() makes the
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index cc06070e7e..d2529ca97d 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -117,6 +117,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>      db->tb->size = db->pc_next - db->pc_first;
>      db->tb->icount = db->num_insns;
>
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) {
> +        db->tb->tb_stats->code.num_guest_inst = db->num_insns;
> +    }

Leave the in_addr range check to the TBStats creation, use
tb_stats_enabled(tb, JIT_STATS).

> +
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
>          && qemu_log_in_addr_range(db->pc_first)) {
> diff --git a/disas.c b/disas.c
> index 3e2bfa572b..f5ae9c009a 100644
> --- a/disas.c
> +++ b/disas.c
> @@ -475,6 +475,113 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
>      }
>  }
>
> +
> +static int fprintf_fake(struct _IO_FILE *a, const char *b, ...)
> +{
> +    return 1;
> +}
> +
> +/*
> + * This is a work around to get the number of host instructions with
> + * a small effort. It reuses the disas function with a fake printf to
> + * print nothing but count the number of instructions.
> + *
> + */
> +unsigned get_num_insts(void *code, unsigned long size)
> +{
> +    uintptr_t pc;
> +    int count;
> +    CPUDebug s;
> +    int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL;
> +
> +    INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake);
> +    s.info.print_address_func = generic_print_host_address;
> +
> +    s.info.buffer = code;
> +    s.info.buffer_vma = (uintptr_t)code;
> +    s.info.buffer_length = size;
> +    s.info.cap_arch = -1;
> +    s.info.cap_mode = 0;
> +    s.info.cap_insn_unit = 4;
> +    s.info.cap_insn_split = 4;
> +
> +#ifdef HOST_WORDS_BIGENDIAN
> +    s.info.endian = BFD_ENDIAN_BIG;
> +#else
> +    s.info.endian = BFD_ENDIAN_LITTLE;
> +#endif
> +#if defined(CONFIG_TCG_INTERPRETER)
> +    print_insn = print_insn_tci;
> +#elif defined(__i386__)
> +    s.info.mach = bfd_mach_i386_i386;
> +    print_insn = print_insn_i386;
> +    s.info.cap_arch = CS_ARCH_X86;
> +    s.info.cap_mode = CS_MODE_32;
> +    s.info.cap_insn_unit = 1;
> +    s.info.cap_insn_split = 8;
> +#elif defined(__x86_64__)
> +    s.info.mach = bfd_mach_x86_64;
> +    print_insn = print_insn_i386;
> +    s.info.cap_arch = CS_ARCH_X86;
> +    s.info.cap_mode = CS_MODE_64;
> +    s.info.cap_insn_unit = 1;
> +    s.info.cap_insn_split = 8;
> +#elif defined(_ARCH_PPC)
> +    s.info.disassembler_options = (char *)"any";
> +    print_insn = print_insn_ppc;
> +    s.info.cap_arch = CS_ARCH_PPC;
> +# ifdef _ARCH_PPC64
> +    s.info.cap_mode = CS_MODE_64;
> +# endif
> +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
> +#if defined(_ILP32) || (__riscv_xlen == 32)
> +    print_insn = print_insn_riscv32;
> +#elif defined(_LP64)
> +    print_insn = print_insn_riscv64;
> +#else
> +#error unsupported RISC-V ABI
> +#endif
> +#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
> +    print_insn = print_insn_arm_a64;
> +    s.info.cap_arch = CS_ARCH_ARM64;
> +#elif defined(__alpha__)
> +    print_insn = print_insn_alpha;
> +#elif defined(__sparc__)
> +    print_insn = print_insn_sparc;
> +    s.info.mach = bfd_mach_sparc_v9b;
> +#elif defined(__arm__)
> +    print_insn = print_insn_arm;
> +    s.info.cap_arch = CS_ARCH_ARM;
> +    /* TCG only generates code for arm mode.  */
> +#elif defined(__MIPSEB__)
> +    print_insn = print_insn_big_mips;
> +#elif defined(__MIPSEL__)
> +    print_insn = print_insn_little_mips;
> +#elif defined(__m68k__)
> +    print_insn = print_insn_m68k;
> +#elif defined(__s390__)
> +    print_insn = print_insn_s390;
> +#elif defined(__hppa__)
> +    print_insn = print_insn_hppa;
> +#endif
> +
> +    if (print_insn == NULL) {
> +        print_insn = print_insn_od_host;
> +    }

I feel the above should probably be wrapped in a function and called
from the various disas functions to initialise the data. DRY.

> +
> +    s.info.fprintf_func = fprintf_fake;
> +    unsigned num_insts = 0;
> +    for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) {
> +        num_insts++;
> +        count = print_insn(pc, &s.info);
> +        if (count < 0) {
> +            break;
> +        }
> +    }
> +    return num_insts;
> +}
> +
> +
>  /* Disassemble this for me please... (debugging). */
>  void disas(FILE *out, void *code, unsigned long size)
>  {
> diff --git a/include/disas/disas.h b/include/disas/disas.h
> index 15da511f49..9797ae7cfa 100644
> --- a/include/disas/disas.h
> +++ b/include/disas/disas.h
> @@ -7,6 +7,7 @@
>
>  /* Disassemble this for me please... (debugging). */
>  void disas(FILE *out, void *code, unsigned long size);
> +unsigned get_num_insts(void *code, unsigned long size);
>  void target_disas(FILE *out, CPUState *cpu, target_ulong code,
>                    target_ulong size);
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 02a2680169..bd57bb642b 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -4072,6 +4072,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      atomic_set(&prof->la_time, prof->la_time + profile_getclock());
>  #endif
>
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) &&
> qemu_log_in_addr_range(tb->pc)) {

local tb flag check

> +        int n = 0;
> +        QTAILQ_FOREACH(op, &s->ops, link) {
> +            n++;
> +        }
> +        tb->tb_stats->code.num_tcg_inst = n;

atomic_add and maybe num_tcg_ops? It would probably be worth checking
the op count before and after optimisation.

> +    }
> +
>  #ifdef DEBUG_DISAS
>      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
>                   && qemu_log_in_addr_range(tb->pc))) {


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit vandersonmr
@ 2019-07-04 15:40   ` Alex Bennée
  2019-07-04 16:34   ` Alex Bennée
  1 sibling, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 15:40 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Riku Voipio, vandersonmr, Laurent Vivier,
	Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> add option to dump the N most hot TB blocks.
> -d hot_tbs:N
> and also add all tbstats dump functions.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/Makefile.objs      |   1 +
>  accel/tcg/tb-stats.c         | 293 +++++++++++++++++++++++++++++++++++
>  include/exec/cpu-all.h       |  43 +++++
>  include/qemu/log-for-trace.h |   2 +
>  include/qemu/log.h           |   1 +
>  linux-user/exit.c            |   3 +
>  util/log.c                   |  35 ++++-
>  7 files changed, 370 insertions(+), 8 deletions(-)
>  create mode 100644 accel/tcg/tb-stats.c
>
> diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
> index d381a02f34..59d50d2dc5 100644
> --- a/accel/tcg/Makefile.objs
> +++ b/accel/tcg/Makefile.objs
> @@ -3,6 +3,7 @@ obj-$(CONFIG_SOFTMMU) += cputlb.o
>  obj-y += tcg-runtime.o tcg-runtime-gvec.o
>  obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
>  obj-y += translator.o
> +obj-y += tb-stats.o
>
>  obj-$(CONFIG_USER_ONLY) += user-exec.o
>  obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> new file mode 100644
> index 0000000000..922023f29d
> --- /dev/null
> +++ b/accel/tcg/tb-stats.c
> @@ -0,0 +1,293 @@
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +/* XXX: I'm not sure what includes could be safely removed */

In general start with osdep and maybe qemu-common.h and only add what
you need.

> +#define NO_CPU_IO_DEFS
> +#include "cpu.h"
> +#include "trace.h"
> +#include "disas/disas.h"
> +#include "exec/exec-all.h"
> +#include "tcg.h"
> +#if defined(CONFIG_USER_ONLY)
> +#include "qemu.h"
> +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> +#include <sys/param.h>
> +#if __FreeBSD_version >= 700104
> +#define HAVE_KINFO_GETVMMAP
> +#define sigqueue sigqueue_freebsd  /* avoid redefinition */
> +#include <sys/proc.h>
> +#include <machine/profile.h>
> +#define _KERNEL
> +#include <sys/user.h>
> +#undef _KERNEL
> +#undef sigqueue
> +#include <libutil.h>
> +#endif
> +#endif
> +#else
> +#include "exec/ram_addr.h"
> +#endif
> +
> +#include "qemu/qemu-print.h"
> +
> +
> +/* only accessed in safe work */
> +static GList *last_search;
> +
> +static void collect_tb_stats(void *p, uint32_t hash, void *userp)
> +{
> +    last_search = g_list_prepend(last_search, p);
> +}
> +
> +static void dump_tb_header(TBStatistics *tbs)
> +{
> +    qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx
> +             " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u op:%u h:%u h/g: %f)\n",
> +             tbs->display_id,
> +             tbs->phys_pc, tbs->pc, tbs->flags,
> +             tbs->translations.total, tbs->translations.uncached,
> +             tbs->executions.total,
> +             tbs->code.num_guest_inst,
> +             tbs->code.num_tcg_inst,
> +             tbs->code.num_host_inst,
> +             tbs->code.num_guest_inst ?
> +                ((float) tbs->code.num_host_inst / tbs->code.num_guest_inst) :
> +                0);
> +}
> +
> +static gint
> +inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by)
> +{
> +    const TBStatistics *tbs1 = (TBStatistics *) p1;
> +    const TBStatistics *tbs2 = (TBStatistics *) p2;
> +    int sort_by = *((int *) psort_by);
> +    unsigned long c1 = 0;
> +    unsigned long c2 = 0;
> +
> +    if (likely(sort_by == SORT_BY_HOTNESS)) {
> +        c1 = tbs1->executions.total;
> +        c2 = tbs2->executions.total;
> +    } else if (likely(sort_by == SORT_BY_HG)) {
> +        if (tbs1->code.num_guest_inst == 0) {
> +            return -1;
> +        }
> +        if (tbs2->code.num_guest_inst == 0) {
> +            return 1;
> +        }
> +
> +        float a = (float) tbs1->code.num_host_inst / tbs1->code.num_guest_inst;
> +        float b = (float) tbs2->code.num_host_inst / tbs2->code.num_guest_inst;
> +        c1 = a <= b ? 0 : 1;
> +        c2 = a <= b ? 1 : 0;
> +    }
> +
> +
> +    return c1 < c2 ? 1 : c1 == c2 ? 0 : -1;
> +}
> +
> +
> +static void do_dump_coverset_info(int percentage)
> +{
> +    uint64_t total_exec_count = 0;
> +    uint64_t covered_exec_count = 0;
> +    unsigned coverset_size = 0;
> +    int id = 1;
> +    GList *i;
> +
> +    g_list_free(last_search);
> +    last_search = NULL;
> +
> +    /* XXX: we could pass user data to collect_tb_stats to filter */
> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
> +
> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
> +                                        SORT_BY_HOTNESS);
> +
> +    /* Compute total execution count for all tbs */
> +    for (i = last_search; i; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        total_exec_count += tbs->executions.total;
> +    }
> +
> +    for (i = last_search; i; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        covered_exec_count += tbs->executions.total;
> +        tbs->display_id = id++;
> +        coverset_size++;
> +        dump_tb_header(tbs);
> +
> +        /* Iterate and display tbs until reach the percentage count cover */
> +        if (((double) covered_exec_count / total_exec_count) >
> +                ((double) percentage / 100)) {
> +            break;
> +        }
> +    }
> +
> +    qemu_log("\n------------------------------\n");
> +    qemu_log("# of TBs to reach %d%% of the total exec count: %u\t",
> +                percentage, coverset_size);
> +    qemu_log("Total exec count: %lu\n", total_exec_count);
> +    qemu_log("\n------------------------------\n");
> +
> +    /* free the unused bits */
> +    i->next->prev = NULL;
> +    g_list_free(i->next);
> +    i->next = NULL;
> +}
> +
> +
> +static void do_dump_tbs_info(int count, int sort_by)
> +{
> +    int id = 1;
> +    GList *i;
> +
> +    g_list_free(last_search);
> +    last_search = NULL;
> +
> +    /* XXX: we could pass user data to collect_tb_stats to filter */
> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
> +
> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
> +                                        &sort_by);
> +
> +    for (i = last_search; i && count--; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        tbs->display_id = id++;
> +        dump_tb_header(tbs);
> +    }
> +
> +    /* free the unused bits */
> +    if (i && i->next) {
> +        i->next->prev = NULL;
> +    }
> +    g_list_free(i->next);
> +    i->next = NULL;
> +}
> +
> +static void
> +do_dump_coverset_info_safe(CPUState *cpu, run_on_cpu_data percentage)
> +{
> +    qemu_log_to_monitor(true);
> +    do_dump_coverset_info(percentage.host_int);
> +    qemu_log_to_monitor(false);
> +}
> +
> +struct tbs_dump_info {
> +    int count;
> +    int sort_by;
> +};
> +
> +static void do_dump_tbs_info_safe(CPUState *cpu, run_on_cpu_data tbdi)
> +{
> +    struct tbs_dump_info *info = tbdi.host_ptr;
> +    qemu_log_to_monitor(true);
> +    do_dump_tbs_info(info->count, info->sort_by);
> +    qemu_log_to_monitor(false);
> +    g_free(info);
> +}
> +
> +/*
> + * When we dump_tbs_info on a live system via the HMP we want to
> + * ensure the system is quiessent before we start outputting stuff.
> + * Otherwise we could pollute the output with other logging output.
> + */
> +void dump_coverset_info(int percentage, bool use_monitor)
> +{
> +    if (use_monitor) {
> +        async_safe_run_on_cpu(first_cpu, do_dump_coverset_info_safe,
> +                              RUN_ON_CPU_HOST_INT(percentage));
> +    } else {
> +        do_dump_coverset_info(percentage);
> +    }
> +}
> +
> +void dump_tbs_info(int count, int sort_by, bool use_monitor)
> +{
> +    if (use_monitor) {
> +        struct tbs_dump_info *tbdi = g_new(struct tbs_dump_info, 1);
> +        tbdi->count = count;
> +        tbdi->sort_by = sort_by;
> +        async_safe_run_on_cpu(first_cpu, do_dump_tbs_info_safe,
> +                              RUN_ON_CPU_HOST_PTR(tbdi));
> +    } else {
> +        do_dump_tbs_info(count, sort_by);
> +    }
> +}
> +
> +static void do_tb_dump_with_statistics(TBStatistics *tbs, int log_flags)
> +{
> +    CPUState *cpu = current_cpu;
> +    uint32_t cflags = curr_cflags() | CF_NOCACHE;
> +    int old_log_flags = qemu_loglevel;
> +    TranslationBlock *tb = NULL;
> +
> +    qemu_set_log(log_flags);
> +
> +    qemu_log("\n------------------------------\n");
> +    dump_tb_header(tbs);
> +
> +    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
> +        mmap_lock();
> +        tb = tb_gen_code(cpu, tbs->pc, tbs->cs_base, tbs->flags, cflags);
> +        tb_phys_invalidate(tb, -1);
> +        mmap_unlock();
> +    } else {
> +        /*
> +         * The mmap_lock is dropped by tb_gen_code if it runs out of
> +         * memory.
> +         */
> +        fprintf(stderr, "%s: dbg failed!\n", __func__);
> +        assert_no_pages_locked();
> +    }
> +
> +    qemu_set_log(old_log_flags);
> +
> +    tcg_tb_remove(tb);
> +}
> +
> +struct tb_dump_info {
> +    int id;
> +    int log_flags;
> +    bool use_monitor;
> +};
> +
> +static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info)
> +{
> +    struct tb_dump_info *tbdi = (struct tb_dump_info *) info.host_ptr;
> +    GList *iter;
> +
> +    if (!last_search) {
> +        qemu_printf("no search on record");
> +        return;
> +    }
> +    qemu_log_to_monitor(tbdi->use_monitor);
> +
> +    for (iter = last_search; iter; iter = g_list_next(iter)) {
> +        TBStatistics *tbs = iter->data;
> +        if (tbs->display_id == tbdi->id) {
> +            do_tb_dump_with_statistics(tbs, tbdi->log_flags);
> +        }
> +    }
> +    qemu_log_to_monitor(false);
> +    g_free(tbdi);
> +}
> +
> +/* XXX: only from monitor? */
> +void dump_tb_info(int id, int log_mask, bool use_monitor)
> +{
> +    struct tb_dump_info *tbdi = g_new(struct tb_dump_info, 1);
> +
> +    tbdi->id = id;
> +    tbdi->log_flags = log_mask;
> +    tbdi->use_monitor = use_monitor;
> +
> +    async_safe_run_on_cpu(first_cpu, do_dump_tb_info_safe,
> +                          RUN_ON_CPU_HOST_PTR(tbdi));
> +
> +    /* tbdi free'd by do_dump_tb_info_safe */
> +}
> +
> +void clean_tbstats_info(void)
> +{
> +/* TODO: remove all tb_stats */
> +}
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 536ea58f81..c4bfad75d3 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -365,6 +365,49 @@ void dump_exec_info(void);
>  void dump_opcount_info(void);
>  #endif /* !CONFIG_USER_ONLY */
>
> +/**
> + * dump_coverset_info: report the hottest blocks to cover n% of execution
> + *
> + * @percentage: cover set percentage
> + * @use_monitor: redirect output to monitor
> + *
> + * Report the hottest blocks to either the log or monitor
> + */
> +void dump_coverset_info(int percentage, bool use_monitor);
> +
> +#define SORT_BY_HOTNESS 0
> +#define SORT_BY_HG 1
> +
> +/**
> + * dump_tbs_info: report the hottest blocks
> + *
> + * @count: the limit of hotblocks
> + * @sort_by: property in which the dump will be sorted
> + * @use_monitor: redirect output to monitor
> + *
> + * Report the hottest blocks to either the log or monitor
> + */
> +void dump_tbs_info(int count, int sort_by, bool use_monitor);
> +
> +/**
> + * dump_tb_info: dump information about one TB
> + *
> + * @id: the display id of the block (from previous search)
> + * @mask: the temporary logging mask
> + * @Use_monitor: redirect output to monitor
> + *
> + * Re-run a translation of a block at addr for the purposes of debug output
> + */
> +void dump_tb_info(int id, int log_mask, bool use_monitor);
> +
> +/**
> + * clean_tbstats_info: remove all tb_stats information
> + *
> + */
> +void clean_tbstats_info(void);
> +
> +
> +
>  int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
>                          uint8_t *buf, target_ulong len, int is_write);
>
> diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h
> index 2f0a5b080e..d65eb83037 100644
> --- a/include/qemu/log-for-trace.h
> +++ b/include/qemu/log-for-trace.h
> @@ -21,6 +21,8 @@
>  /* Private global variable, don't use */
>  extern int qemu_loglevel;
>
> +extern int32_t max_num_hot_tbs_to_dump;
> +
>  #define LOG_TRACE          (1 << 15)
>
>  /* Returns true if a bit is set in the current loglevel mask */
> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index 2fca65dd01..240b71f66a 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -114,6 +114,7 @@ typedef struct QEMULogItem {
>  extern const QEMULogItem qemu_log_items[];
>
>  void qemu_set_log(int log_flags);
> +void qemu_log_to_monitor(bool enable);
>  void qemu_log_needs_buffers(void);
>  void qemu_set_log_filename(const char *filename, Error **errp);
>  void qemu_set_dfilter_ranges(const char *ranges, Error **errp);
> diff --git a/linux-user/exit.c b/linux-user/exit.c
> index bdda720553..295d3f4cad 100644
> --- a/linux-user/exit.c
> +++ b/linux-user/exit.c
> @@ -28,6 +28,9 @@ extern void __gcov_dump(void);
>
>  void preexit_cleanup(CPUArchState *env, int code)
>  {
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +        dump_tbs_info(max_num_hot_tbs_to_dump, SORT_BY_HOTNESS, false);
> +    }
>  #ifdef TARGET_GPROF
>          _mcleanup();
>  #endif
> diff --git a/util/log.c b/util/log.c
> index 1d1b33f7d9..c0f1e9980f 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -19,6 +19,7 @@
>
>  #include "qemu/osdep.h"
>  #include "qemu/log.h"
> +#include "qemu/qemu-print.h"
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
> @@ -30,21 +31,26 @@ FILE *qemu_logfile;
>  int qemu_loglevel;
>  static int log_append = 0;
>  static GArray *debug_regions;
> +int32_t max_num_hot_tbs_to_dump;
> +static bool to_monitor;
>
>  /* Return the number of characters emitted.  */
>  int qemu_log(const char *fmt, ...)
>  {
>      int ret = 0;
> -    if (qemu_logfile) {
> -        va_list ap;
> -        va_start(ap, fmt);
> +    va_list ap;
> +    va_start(ap, fmt);
> +
> +    if (to_monitor) {
> +        ret = qemu_vprintf(fmt, ap);
> +    } else if (qemu_logfile) {
>          ret = vfprintf(qemu_logfile, fmt, ap);
> -        va_end(ap);
> +    }
> +    va_end(ap);
>
> -        /* Don't pass back error results.  */
> -        if (ret < 0) {
> -            ret = 0;
> -        }
> +    /* Don't pass back error results.  */
> +    if (ret < 0) {
> +        ret = 0;
>      }
>      return ret;
>  }
> @@ -99,6 +105,11 @@ void qemu_set_log(int log_flags)
>      }
>  }
>
> +void qemu_log_to_monitor(bool enable)
> +{
> +    to_monitor = enable;
> +}
> +
>  void qemu_log_needs_buffers(void)
>  {
>      log_uses_own_buffers = true;
> @@ -273,6 +284,9 @@ const QEMULogItem qemu_log_items[] = {
>      { CPU_LOG_TB_NOCHAIN, "nochain",
>        "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
>        "complete traces" },
> +    { CPU_LOG_HOT_TBS, "hot_tbs(:limit)",
> +      "show TBs (until given a limit) ordered by their hotness.\n"
> +      "(if no limit is given, show all)" },
>      { 0, NULL, NULL },
>  };
>
> @@ -294,6 +308,11 @@ int qemu_str_to_log_mask(const char *str)
>              trace_enable_events((*tmp) + 6);
>              mask |= LOG_TRACE;
>  #endif
> +        } else if (g_str_has_prefix(*tmp, "hot_tbs")) {
> +            if (g_str_has_prefix(*tmp, "hot_tbs:") && (*tmp)[8] != '\0') {
> +                max_num_hot_tbs_to_dump = atoi((*tmp) + 8);
> +            }
> +            mask |= CPU_LOG_HOT_TBS;
>          } else {
>              for (item = qemu_log_items; item->mask != 0; item++) {
>                  if (g_str_equal(*tmp, item->name)) {


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit vandersonmr
  2019-07-04 15:40   ` Alex Bennée
@ 2019-07-04 16:34   ` Alex Bennée
  1 sibling, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 16:34 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Riku Voipio, vandersonmr, Laurent Vivier,
	Richard Henderson


vandersonmr <vandersonmr2@gmail.com> writes:

> add option to dump the N most hot TB blocks.
> -d hot_tbs:N
> and also add all tbstats dump functions.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/Makefile.objs      |   1 +
>  accel/tcg/tb-stats.c         | 293 +++++++++++++++++++++++++++++++++++
>  include/exec/cpu-all.h       |  43 +++++
>  include/qemu/log-for-trace.h |   2 +
>  include/qemu/log.h           |   1 +
>  linux-user/exit.c            |   3 +
>  util/log.c                   |  35 ++++-
>  7 files changed, 370 insertions(+), 8 deletions(-)
>  create mode 100644 accel/tcg/tb-stats.c
>
> diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
> index d381a02f34..59d50d2dc5 100644
> --- a/accel/tcg/Makefile.objs
> +++ b/accel/tcg/Makefile.objs
> @@ -3,6 +3,7 @@ obj-$(CONFIG_SOFTMMU) += cputlb.o
>  obj-y += tcg-runtime.o tcg-runtime-gvec.o
>  obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
>  obj-y += translator.o
> +obj-y += tb-stats.o
>
>  obj-$(CONFIG_USER_ONLY) += user-exec.o
>  obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> new file mode 100644
> index 0000000000..922023f29d
> --- /dev/null
> +++ b/accel/tcg/tb-stats.c
> @@ -0,0 +1,293 @@
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +/* XXX: I'm not sure what includes could be safely removed */
> +#define NO_CPU_IO_DEFS
> +#include "cpu.h"
> +#include "trace.h"
> +#include "disas/disas.h"
> +#include "exec/exec-all.h"
> +#include "tcg.h"
> +#if defined(CONFIG_USER_ONLY)
> +#include "qemu.h"
> +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> +#include <sys/param.h>
> +#if __FreeBSD_version >= 700104
> +#define HAVE_KINFO_GETVMMAP
> +#define sigqueue sigqueue_freebsd  /* avoid redefinition */
> +#include <sys/proc.h>
> +#include <machine/profile.h>
> +#define _KERNEL
> +#include <sys/user.h>
> +#undef _KERNEL
> +#undef sigqueue
> +#include <libutil.h>
> +#endif
> +#endif
> +#else
> +#include "exec/ram_addr.h"
> +#endif
> +
> +#include "qemu/qemu-print.h"
> +
> +
> +/* only accessed in safe work */
> +static GList *last_search;
> +
> +static void collect_tb_stats(void *p, uint32_t hash, void *userp)
> +{
> +    last_search = g_list_prepend(last_search, p);
> +}
> +
> +static void dump_tb_header(TBStatistics *tbs)
> +{
> +    qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx
> +             " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u op:%u h:%u h/g: %f)\n",
> +             tbs->display_id,
> +             tbs->phys_pc, tbs->pc, tbs->flags,
> +             tbs->translations.total, tbs->translations.uncached,
> +             tbs->executions.total,
> +             tbs->code.num_guest_inst,
> +             tbs->code.num_tcg_inst,
> +             tbs->code.num_host_inst,
> +             tbs->code.num_guest_inst ?
> +                ((float) tbs->code.num_host_inst / tbs->code.num_guest_inst) :
> +                0);
> +}
> +
> +static gint
> +inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by)
> +{
> +    const TBStatistics *tbs1 = (TBStatistics *) p1;
> +    const TBStatistics *tbs2 = (TBStatistics *) p2;
> +    int sort_by = *((int *) psort_by);
> +    unsigned long c1 = 0;
> +    unsigned long c2 = 0;
> +
> +    if (likely(sort_by == SORT_BY_HOTNESS)) {
> +        c1 = tbs1->executions.total;
> +        c2 = tbs2->executions.total;
> +    } else if (likely(sort_by == SORT_BY_HG)) {
> +        if (tbs1->code.num_guest_inst == 0) {
> +            return -1;
> +        }
> +        if (tbs2->code.num_guest_inst == 0) {
> +            return 1;
> +        }
> +
> +        float a = (float) tbs1->code.num_host_inst / tbs1->code.num_guest_inst;
> +        float b = (float) tbs2->code.num_host_inst / tbs2->code.num_guest_inst;
> +        c1 = a <= b ? 0 : 1;
> +        c2 = a <= b ? 1 : 0;
> +    }
> +
> +
> +    return c1 < c2 ? 1 : c1 == c2 ? 0 : -1;
> +}
> +
> +
> +static void do_dump_coverset_info(int percentage)
> +{
> +    uint64_t total_exec_count = 0;
> +    uint64_t covered_exec_count = 0;
> +    unsigned coverset_size = 0;
> +    int id = 1;
> +    GList *i;
> +
> +    g_list_free(last_search);
> +    last_search = NULL;
> +
> +    /* XXX: we could pass user data to collect_tb_stats to filter */
> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
> +
> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
> +                                        SORT_BY_HOTNESS);
> +
> +    /* Compute total execution count for all tbs */
> +    for (i = last_search; i; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        total_exec_count += tbs->executions.total;
> +    }
> +
> +    for (i = last_search; i; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        covered_exec_count += tbs->executions.total;
> +        tbs->display_id = id++;
> +        coverset_size++;
> +        dump_tb_header(tbs);
> +
> +        /* Iterate and display tbs until reach the percentage count cover */
> +        if (((double) covered_exec_count / total_exec_count) >
> +                ((double) percentage / 100)) {
> +            break;
> +        }
> +    }
> +
> +    qemu_log("\n------------------------------\n");
> +    qemu_log("# of TBs to reach %d%% of the total exec count: %u\t",
> +                percentage, coverset_size);
> +    qemu_log("Total exec count: %lu\n", total_exec_count);
> +    qemu_log("\n------------------------------\n");
> +
> +    /* free the unused bits */
> +    i->next->prev = NULL;
> +    g_list_free(i->next);
> +    i->next = NULL;
> +}
> +
> +
> +static void do_dump_tbs_info(int count, int sort_by)
> +{
> +    int id = 1;
> +    GList *i;
> +
> +    g_list_free(last_search);
> +    last_search = NULL;
> +
> +    /* XXX: we could pass user data to collect_tb_stats to filter */
> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
> +
> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
> +                                        &sort_by);
> +
> +    for (i = last_search; i && count--; i = i->next) {
> +        TBStatistics *tbs = (TBStatistics *) i->data;
> +        tbs->display_id = id++;
> +        dump_tb_header(tbs);
> +    }
> +
> +    /* free the unused bits */
> +    if (i && i->next) {
> +        i->next->prev = NULL;
> +    }
> +    g_list_free(i->next);
> +    i->next = NULL;
> +}
> +
> +static void
> +do_dump_coverset_info_safe(CPUState *cpu, run_on_cpu_data percentage)
> +{
> +    qemu_log_to_monitor(true);
> +    do_dump_coverset_info(percentage.host_int);
> +    qemu_log_to_monitor(false);
> +}
> +
> +struct tbs_dump_info {
> +    int count;
> +    int sort_by;
> +};
> +
> +static void do_dump_tbs_info_safe(CPUState *cpu, run_on_cpu_data tbdi)
> +{
> +    struct tbs_dump_info *info = tbdi.host_ptr;
> +    qemu_log_to_monitor(true);
> +    do_dump_tbs_info(info->count, info->sort_by);
> +    qemu_log_to_monitor(false);
> +    g_free(info);
> +}
> +
> +/*
> + * When we dump_tbs_info on a live system via the HMP we want to
> + * ensure the system is quiessent before we start outputting stuff.
> + * Otherwise we could pollute the output with other logging output.
> + */
> +void dump_coverset_info(int percentage, bool use_monitor)
> +{
> +    if (use_monitor) {
> +        async_safe_run_on_cpu(first_cpu, do_dump_coverset_info_safe,
> +                              RUN_ON_CPU_HOST_INT(percentage));
> +    } else {
> +        do_dump_coverset_info(percentage);
> +    }
> +}
> +
> +void dump_tbs_info(int count, int sort_by, bool use_monitor)
> +{
> +    if (use_monitor) {
> +        struct tbs_dump_info *tbdi = g_new(struct tbs_dump_info, 1);
> +        tbdi->count = count;
> +        tbdi->sort_by = sort_by;
> +        async_safe_run_on_cpu(first_cpu, do_dump_tbs_info_safe,
> +                              RUN_ON_CPU_HOST_PTR(tbdi));
> +    } else {
> +        do_dump_tbs_info(count, sort_by);
> +    }
> +}
> +
> +static void do_tb_dump_with_statistics(TBStatistics *tbs, int log_flags)
> +{
> +    CPUState *cpu = current_cpu;
> +    uint32_t cflags = curr_cflags() | CF_NOCACHE;
> +    int old_log_flags = qemu_loglevel;
> +    TranslationBlock *tb = NULL;
> +
> +    qemu_set_log(log_flags);
> +
> +    qemu_log("\n------------------------------\n");
> +    dump_tb_header(tbs);
> +
> +    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
> +        mmap_lock();
> +        tb = tb_gen_code(cpu, tbs->pc, tbs->cs_base, tbs->flags, cflags);
> +        tb_phys_invalidate(tb, -1);
> +        mmap_unlock();
> +    } else {
> +        /*
> +         * The mmap_lock is dropped by tb_gen_code if it runs out of
> +         * memory.
> +         */
> +        fprintf(stderr, "%s: dbg failed!\n", __func__);
> +        assert_no_pages_locked();
> +    }
> +
> +    qemu_set_log(old_log_flags);
> +
> +    tcg_tb_remove(tb);
> +}
> +
> +struct tb_dump_info {
> +    int id;
> +    int log_flags;
> +    bool use_monitor;
> +};
> +
> +static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info)
> +{
> +    struct tb_dump_info *tbdi = (struct tb_dump_info *) info.host_ptr;
> +    GList *iter;
> +
> +    if (!last_search) {
> +        qemu_printf("no search on record");
> +        return;
> +    }
> +    qemu_log_to_monitor(tbdi->use_monitor);
> +
> +    for (iter = last_search; iter; iter = g_list_next(iter)) {
> +        TBStatistics *tbs = iter->data;
> +        if (tbs->display_id == tbdi->id) {
> +            do_tb_dump_with_statistics(tbs, tbdi->log_flags);
> +        }
> +    }
> +    qemu_log_to_monitor(false);
> +    g_free(tbdi);
> +}
> +
> +/* XXX: only from monitor? */
> +void dump_tb_info(int id, int log_mask, bool use_monitor)
> +{
> +    struct tb_dump_info *tbdi = g_new(struct tb_dump_info, 1);
> +
> +    tbdi->id = id;
> +    tbdi->log_flags = log_mask;
> +    tbdi->use_monitor = use_monitor;
> +
> +    async_safe_run_on_cpu(first_cpu, do_dump_tb_info_safe,
> +                          RUN_ON_CPU_HOST_PTR(tbdi));
> +
> +    /* tbdi free'd by do_dump_tb_info_safe */
> +}
> +
> +void clean_tbstats_info(void)
> +{
> +/* TODO: remove all tb_stats */
> +}
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 536ea58f81..c4bfad75d3 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -365,6 +365,49 @@ void dump_exec_info(void);
>  void dump_opcount_info(void);
>  #endif /* !CONFIG_USER_ONLY */

Lets move these external functions along with inline helpers and the
TBStatsistics structure into their own file (tb-stats.h). exec-allh and
cpu-all.h end up as dumping grounds and this stuff is all self contained.

>
> +/**
> + * dump_coverset_info: report the hottest blocks to cover n% of execution
> + *
> + * @percentage: cover set percentage
> + * @use_monitor: redirect output to monitor
> + *
> + * Report the hottest blocks to either the log or monitor
> + */
> +void dump_coverset_info(int percentage, bool use_monitor);
> +
> +#define SORT_BY_HOTNESS 0
> +#define SORT_BY_HG 1

Use an enum for these - they are internal anyway.

> +
> +/**
> + * dump_tbs_info: report the hottest blocks
> + *
> + * @count: the limit of hotblocks
> + * @sort_by: property in which the dump will be sorted
> + * @use_monitor: redirect output to monitor
> + *
> + * Report the hottest blocks to either the log or monitor
> + */
> +void dump_tbs_info(int count, int sort_by, bool use_monitor);
> +
> +/**
> + * dump_tb_info: dump information about one TB
> + *
> + * @id: the display id of the block (from previous search)
> + * @mask: the temporary logging mask
> + * @Use_monitor: redirect output to monitor
> + *
> + * Re-run a translation of a block at addr for the purposes of debug output
> + */
> +void dump_tb_info(int id, int log_mask, bool use_monitor);
> +
> +/**
> + * clean_tbstats_info: remove all tb_stats information
> + *
> + */
> +void clean_tbstats_info(void);
> +
> +
> +
>  int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
>                          uint8_t *buf, target_ulong len, int is_write);
>
> diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h
> index 2f0a5b080e..d65eb83037 100644
> --- a/include/qemu/log-for-trace.h
> +++ b/include/qemu/log-for-trace.h
> @@ -21,6 +21,8 @@
>  /* Private global variable, don't use */
>  extern int qemu_loglevel;
>
> +extern int32_t max_num_hot_tbs_to_dump;
> +
>  #define LOG_TRACE          (1 << 15)
>
>  /* Returns true if a bit is set in the current loglevel mask */
> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index 2fca65dd01..240b71f66a 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -114,6 +114,7 @@ typedef struct QEMULogItem {
>  extern const QEMULogItem qemu_log_items[];
>
>  void qemu_set_log(int log_flags);
> +void qemu_log_to_monitor(bool enable);
>  void qemu_log_needs_buffers(void);
>  void qemu_set_log_filename(const char *filename, Error **errp);
>  void qemu_set_dfilter_ranges(const char *ranges, Error **errp);
> diff --git a/linux-user/exit.c b/linux-user/exit.c
> index bdda720553..295d3f4cad 100644
> --- a/linux-user/exit.c
> +++ b/linux-user/exit.c
> @@ -28,6 +28,9 @@ extern void __gcov_dump(void);
>
>  void preexit_cleanup(CPUArchState *env, int code)
>  {
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +        dump_tbs_info(max_num_hot_tbs_to_dump, SORT_BY_HOTNESS, false);
> +    }
>  #ifdef TARGET_GPROF
>          _mcleanup();
>  #endif
> diff --git a/util/log.c b/util/log.c
> index 1d1b33f7d9..c0f1e9980f 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -19,6 +19,7 @@
>
>  #include "qemu/osdep.h"
>  #include "qemu/log.h"
> +#include "qemu/qemu-print.h"
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
> @@ -30,21 +31,26 @@ FILE *qemu_logfile;
>  int qemu_loglevel;
>  static int log_append = 0;
>  static GArray *debug_regions;
> +int32_t max_num_hot_tbs_to_dump;
> +static bool to_monitor;
>
>  /* Return the number of characters emitted.  */
>  int qemu_log(const char *fmt, ...)
>  {
>      int ret = 0;
> -    if (qemu_logfile) {
> -        va_list ap;
> -        va_start(ap, fmt);
> +    va_list ap;
> +    va_start(ap, fmt);
> +
> +    if (to_monitor) {
> +        ret = qemu_vprintf(fmt, ap);
> +    } else if (qemu_logfile) {
>          ret = vfprintf(qemu_logfile, fmt, ap);
> -        va_end(ap);
> +    }
> +    va_end(ap);
>
> -        /* Don't pass back error results.  */
> -        if (ret < 0) {
> -            ret = 0;
> -        }
> +    /* Don't pass back error results.  */
> +    if (ret < 0) {
> +        ret = 0;
>      }
>      return ret;
>  }
> @@ -99,6 +105,11 @@ void qemu_set_log(int log_flags)
>      }
>  }
>
> +void qemu_log_to_monitor(bool enable)
> +{
> +    to_monitor = enable;
> +}
> +
>  void qemu_log_needs_buffers(void)
>  {
>      log_uses_own_buffers = true;
> @@ -273,6 +284,9 @@ const QEMULogItem qemu_log_items[] = {
>      { CPU_LOG_TB_NOCHAIN, "nochain",
>        "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
>        "complete traces" },
> +    { CPU_LOG_HOT_TBS, "hot_tbs(:limit)",
> +      "show TBs (until given a limit) ordered by their hotness.\n"
> +      "(if no limit is given, show all)" },
>      { 0, NULL, NULL },
>  };
>
> @@ -294,6 +308,11 @@ int qemu_str_to_log_mask(const char *str)
>              trace_enable_events((*tmp) + 6);
>              mask |= LOG_TRACE;
>  #endif
> +        } else if (g_str_has_prefix(*tmp, "hot_tbs")) {
> +            if (g_str_has_prefix(*tmp, "hot_tbs:") && (*tmp)[8] != '\0') {
> +                max_num_hot_tbs_to_dump = atoi((*tmp) + 8);
> +            }
> +            mask |= CPU_LOG_HOT_TBS;
>          } else {
>              for (item = qemu_log_items; item->mask != 0; item++) {
>                  if (g_str_equal(*tmp, item->name)) {


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
@ 2019-07-04 16:36   ` Alex Bennée
  2019-07-05 13:54   ` Markus Armbruster
  2019-07-09  9:57   ` Dr. David Alan Gilbert
  2 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 16:36 UTC (permalink / raw)
  To: qemu-devel; +Cc: vandersonmr, Dr. David Alan Gilbert, Markus Armbruster


vandersonmr <vandersonmr2@gmail.com> writes:

> adding options to list tbs by some metric and
> investigate their code.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  hmp-commands-info.hx | 22 ++++++++++++++
>  monitor/misc.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 91 insertions(+)
>
> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
> index c59444c461..0b8c0de95d 100644
> --- a/hmp-commands-info.hx
> +++ b/hmp-commands-info.hx
> @@ -288,6 +288,28 @@ ETEXI
>          .params     = "",
>          .help       = "show dynamic compiler info",
>          .cmd        = hmp_info_jit,
> +    {

Ahh this is the breakage:

},
{

> +        .name       = "tbs",
> +        .args_type  = "number:i?,sortedby:s?",
> +        .params     = "[number sortedby]",
> +        .help       = "show a [number] translated blocks sorted by [sortedby]"
> +                      "sortedby opts: hotness hg",
> +        .cmd        = hmp_info_tbs,
> +    },
> +    {
> +        .name       = "tb",
> +        .args_type  = "id:i,flags:s?",
> +        .params     = "id [log1[,...] flags]",
> +        .help       = "show information about one translated block by id",
> +        .cmd        = hmp_info_tb,
> +    },
> +    {
> +        .name       = "coverset",
> +        .args_type  = "number:i?",
> +        .params     = "[number]",
> +        .help       = "show hottest translated blocks neccesary to cover"
> +                      "[number]% of the execution count",
> +        .cmd        = hmp_info_coverset,
>      },
>  #endif
>
> diff --git a/monitor/misc.c b/monitor/misc.c
> index bf9faceb86..1fb4d75871 100644
> --- a/monitor/misc.c
> +++ b/monitor/misc.c
> @@ -469,6 +469,75 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict)
>      dump_drift_info();
>  }
>
> +static void hmp_info_tbs(Monitor *mon, const QDict *qdict)
> +{
> +    int n;
> +    const char *s = NULL;
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +    if (!tb_ctx.tb_stats.map) {
> +        error_report("no TB information recorded");
> +        return;
> +    }
> +
> +    n = qdict_get_try_int(qdict, "number", 10);
> +    s = qdict_get_try_str(qdict, "sortedby");
> +
> +    int sortedby = 0;
> +    if (s == NULL || strcmp(s, "hotness") == 0) {
> +        sortedby = SORT_BY_HOTNESS;
> +    } else if (strcmp(s, "hg") == 0) {
> +        sortedby = SORT_BY_HG;
> +    }
> +
> +    dump_tbs_info(n, sortedby, true);
> +}
> +
> +static void hmp_info_tb(Monitor *mon, const QDict *qdict)
> +{
> +    const int id = qdict_get_int(qdict, "id");
> +    const char *flags = qdict_get_try_str(qdict, "flags");
> +    int mask;
> +
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +
> +    mask = flags ? qemu_str_to_log_mask(flags) : CPU_LOG_TB_IN_ASM;
> +
> +    if (!mask) {
> +        help_cmd(mon, "log");
> +        return;
> +    }
> +
> +    dump_tb_info(id, mask, true);
> +}
> +
> +static void hmp_info_coverset(Monitor *mon, const QDict *qdict)
> +{
> +    int n;
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +    if (!qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +        error_report("TB information not being recorded");
> +        return;
> +    }
> +
> +    n = qdict_get_try_int(qdict, "number", 90);
> +
> +    if (n < 0 || n > 100) {
> +        error_report("Coverset percentage should be between 0 and 100");
> +        return;
> +    }
> +
> +    dump_coverset_info(n, true);
> +}
> +
>  static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
>  {
>      dump_opcount_info();


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats to monitor
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats " vandersonmr
@ 2019-07-04 16:43   ` Alex Bennée
  0 siblings, 0 replies; 20+ messages in thread
From: Alex Bennée @ 2019-07-04 16:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: vandersonmr, Dr. David Alan Gilbert, Markus Armbruster


vandersonmr <vandersonmr2@gmail.com> writes:

> adding the option to start collecting the tb
> statistics later using the start_stats command.
>
> Signed-off-by: vandersonmr <vandersonmr2@gmail.com>
> ---
>  hmp-commands.hx | 15 +++++++++++++++
>  monitor/misc.c  | 15 +++++++++++++++
>  2 files changed, 30 insertions(+)
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index bfa5681dd2..616b9f7388 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1885,6 +1885,21 @@ STEXI
>  @findex qemu-io
>  Executes a qemu-io command on the given block device.
>
> +ETEXI
> +
> +    {
> +        .name       = "start_stats",

Maybe tb_stats? with an inferred "start" or a "reset" option? And then
an extensible set of options to expand what we record.

> +        .args_type  = "",
> +        .params     = "",
> +        .help       = "(re)start recording tb statistics",
> +        .cmd        = hmp_tbstats_start,
> +    },
> +
> +STEXI
> +@item start_stats
> +@findex
> +(Re)start recording tb statistics
> +
>  ETEXI
>
>      {
> diff --git a/monitor/misc.c b/monitor/misc.c
> index 1fb4d75871..d39a048fd7 100644
> --- a/monitor/misc.c
> +++ b/monitor/misc.c
> @@ -469,6 +469,21 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict)
>      dump_drift_info();
>  }
>
> +static void hmp_tbstats_start(Monitor *mon, const QDict *qdict)
> +{
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +        error_report("TB information already being recorded");
> +        return;
> +    }

As mentioned before lets have an internal flags for this.

> +    qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
> +                QHT_MODE_AUTO_RESIZE);
> +    qemu_set_log(qemu_loglevel | CPU_LOG_HOT_TBS);

I suspect we want to safe work this so we can a) flush existing tb stats
for a reset and b) ensure we do a tb_flush() when we enable stats
(otherwise we won't collect anything).

> +}
> +
>  static void hmp_info_tbs(Monitor *mon, const QDict *qdict)
>  {
>      int n;


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
  2019-07-04 16:36   ` Alex Bennée
@ 2019-07-05 13:54   ` Markus Armbruster
  2019-07-05 14:36     ` Alex Bennée
  2019-07-09  9:57   ` Dr. David Alan Gilbert
  2 siblings, 1 reply; 20+ messages in thread
From: Markus Armbruster @ 2019-07-05 13:54 UTC (permalink / raw)
  To: vandersonmr; +Cc: qemu-devel, Dr. David Alan Gilbert

vandersonmr <vandersonmr2@gmail.com> writes:

> adding options to list tbs by some metric and
> investigate their code.

What's "tbs"?

Why is listing them useful?

What do you mean by "some metric"?

What do you mean by "and investigate their code?"

> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  hmp-commands-info.hx | 22 ++++++++++++++
>  monitor/misc.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 91 insertions(+)
>
> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
> index c59444c461..0b8c0de95d 100644
> --- a/hmp-commands-info.hx
> +++ b/hmp-commands-info.hx
> @@ -288,6 +288,28 @@ ETEXI
>          .params     = "",
>          .help       = "show dynamic compiler info",
>          .cmd        = hmp_info_jit,
> +    {
> +        .name       = "tbs",
> +        .args_type  = "number:i?,sortedby:s?",
> +        .params     = "[number sortedby]",
> +        .help       = "show a [number] translated blocks sorted by [sortedby]"
> +                      "sortedby opts: hotness hg",

Your use of [square brackets] in .help is unusual.  Please try to have
your commands' help blend into the existing help.

> +        .cmd        = hmp_info_tbs,
> +    },
> +    {
> +        .name       = "tb",
> +        .args_type  = "id:i,flags:s?",
> +        .params     = "id [log1[,...] flags]",
> +        .help       = "show information about one translated block by id",
> +        .cmd        = hmp_info_tb,
> +    },
> +    {
> +        .name       = "coverset",
> +        .args_type  = "number:i?",
> +        .params     = "[number]",
> +        .help       = "show hottest translated blocks neccesary to cover"
> +                      "[number]% of the execution count",

When a parameter takes a percentage, "number" is a suboptimal name :)

> +        .cmd        = hmp_info_coverset,
>      },
>  #endif
>  

Standard request for new HMP commands without corresponding QMP
commands: please state in the commit message why the QMP command is not
worthwhile.

HMP commands without a QMP equivalent are okay if their functionality
makes no sense in QMP, or is of use only for human users.

Example for "makes no sense in QMP": setting the current CPU, because a
QMP monitor doesn't have a current CPU.

Examples for "is of use only for human users": HMP command "help", the
integrated pocket calculator.

Debugging commands are kind of borderline.  Debugging is commonly a
human activity, where HMP is just fine.  However, humans create tools to
assist with their activities, and then QMP is useful.  While I wouldn't
encourage HMP-only for the debugging use case, I wouldn't veto it.

Your (overly terse!) commit message and help texts make me guess the
commands are for gathering statistics.  Statistics can have debugging
uses.  But they often have non-debugging uses as well.  What use cases
can you imagine for these commands?

[...]


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-05 13:54   ` Markus Armbruster
@ 2019-07-05 14:36     ` Alex Bennée
  2019-07-06  6:09       ` Markus Armbruster
  0 siblings, 1 reply; 20+ messages in thread
From: Alex Bennée @ 2019-07-05 14:36 UTC (permalink / raw)
  To: qemu-devel; +Cc: vandersonmr, Dr. David Alan Gilbert


Markus Armbruster <armbru@redhat.com> writes:

> vandersonmr <vandersonmr2@gmail.com> writes:
>
<snip>

I'll leave Vanderson to address your other comments.

>
> Debugging commands are kind of borderline.  Debugging is commonly a
> human activity, where HMP is just fine.  However, humans create tools to
> assist with their activities, and then QMP is useful.  While I wouldn't
> encourage HMP-only for the debugging use case, I wouldn't veto it.
>
> Your (overly terse!) commit message and help texts make me guess the
> commands are for gathering statistics.  Statistics can have debugging
> uses.  But they often have non-debugging uses as well.  What use cases
> can you imagine for these commands?

So this is all really aimed at making TCG go faster - but before we can
make it go faster we need better tools for seeing where the time is
being spent and examining the code that we generate. So I expect the
main users of this functionality will be QEMU developers.

That said I can see a good rationale for supporting QMP because it is
more amenable to automation. However this is early days so I would
caution about exposing this stuff too early least we bake in a woolly
API.

The other wrinkle is we do have to take control of the emulator to
safely calculate some of the numbers we output. This essentially means
the HMP commands are asynchronous - we kick of safe work which waits
until all vCPU threads are stopped before we go through the records and
add up numbers. This is fine for the HMP because we just output to the
monitor FD when we are ready. I assume for QMP commands there is more
housekeeping to do? Can QMP commands wait for a response to be
calculated by another thread? Are there any existing commands that have
to support this sort of pattern?

>
> [...]


--
Alex Bennée


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-05 14:36     ` Alex Bennée
@ 2019-07-06  6:09       ` Markus Armbruster
  2019-07-08  2:51         ` Vanderson Martins do Rosario
  2019-07-08  4:49         ` Marc-André Lureau
  0 siblings, 2 replies; 20+ messages in thread
From: Markus Armbruster @ 2019-07-06  6:09 UTC (permalink / raw)
  To: Alex Bennée
  Cc: Marc-André Lureau, vandersonmr, qemu-devel, Dr. David Alan Gilbert

Cc: Marc-André, who has patches that might be useful here.

Alex Bennée <alex.bennee@linaro.org> writes:

> Markus Armbruster <armbru@redhat.com> writes:
>
>> vandersonmr <vandersonmr2@gmail.com> writes:
>>
> <snip>
>
> I'll leave Vanderson to address your other comments.
>
>>
>> Debugging commands are kind of borderline.  Debugging is commonly a
>> human activity, where HMP is just fine.  However, humans create tools to
>> assist with their activities, and then QMP is useful.  While I wouldn't
>> encourage HMP-only for the debugging use case, I wouldn't veto it.
>>
>> Your (overly terse!) commit message and help texts make me guess the
>> commands are for gathering statistics.  Statistics can have debugging
>> uses.  But they often have non-debugging uses as well.  What use cases
>> can you imagine for these commands?
>
> So this is all really aimed at making TCG go faster - but before we can
> make it go faster we need better tools for seeing where the time is
> being spent and examining the code that we generate. So I expect the
> main users of this functionality will be QEMU developers.
>
> That said I can see a good rationale for supporting QMP because it is
> more amenable to automation. However this is early days so I would
> caution about exposing this stuff too early least we bake in a woolly
> API.

Development tools should exempt themselves from QMP's interface
stability promise: prefix the command names with 'x-'.

> The other wrinkle is we do have to take control of the emulator to
> safely calculate some of the numbers we output. This essentially means
> the HMP commands are asynchronous - we kick of safe work which waits
> until all vCPU threads are stopped before we go through the records and
> add up numbers. This is fine for the HMP because we just output to the
> monitor FD when we are ready. I assume for QMP commands there is more
> housekeeping to do? Can QMP commands wait for a response to be
> calculated by another thread? Are there any existing commands that have
> to support this sort of pattern?

Let me clarify "synchronous" to avoid confusion.

Both QMP and HMP commands are synchronous protocols in the sense that
commands are executed one after the other, without overlap.  When a
client sends multiple commands, it can assume that each one starts only
after the previous one completed.

Both HMP and QMP commands execute synchronously in the sense that the
command runs to completion without ever yielding the thread.  Any
blocking operations put the thread to sleep (but see below).

HMP runs in the main thread.  Putting the main thread to sleep is
generally undesirable.

QMP used to run in the main thread, too.  Nowadays, the QMP core runs in
an I/O thread shared by all monitors, and dispatches commands to the
main thread.  Moving command execution out of the main thread as well
requires careful review of the command's code for hidden assumptions.
Major project.

Fine print: OOB commands are a special case, but I doubt you want to
know more.

Fine print: certain character devices can't support use of an I/O
thread; QMP runs in the main thread then.  The ones you want to use with
QMP all support I/O threads.

You wrote "we kick of safe work which waits until all vCPU threads are
stopped before we go through the records and add up numbers [...] we
just output to the monitor FD".  Does this mean the HMP command kicks
off the work, terminates, and some time later something else prints
results to the monitor?  How much later?

If "later" is actually "soon", for a suitable value of "soon",
Marc-André's work on "asynchronous" QMP might be pertinent.  I put
"asynchronous" in scare quotes, because of the confusion it has caused.
My current understanding (Marc-André, please correct me if wrong): it
lets QMP commands to block without putting their thread to sleep.  It
does not make QMP an asynchronous protocol.

If "later" need not be "soon", read on.

In QMP, there are two established ways to do potentially long-running
work.  Both ways use a command that kicks off the work, then terminates
without waiting for it to complete.

The first way is traditional: pair the kick off command with a query
command and optionally an event.

When the work completes, it fires off the event.  The event is broadcast
to all QMP monitors (we could implement unicast if we have a compelling
use case).

The query command reports whether the work has completed, and if yes,
the work's results, if any.

You need the event if you want to avoid polling.

Even with an event, you still need a query command.  If your management
application loses its QMP connection temporarily, you can miss the
event.  You want to poll on reconnect, with the query command.

If more than one instance of the work can be pending at any one time,
event and query need to identify the instance somehow.  This is
completely ad hoc.

The second way is a full-blown "job".  This provides more control: you
can cancel, pause, resume, ...  It also provides a job ID.  More
featureful and more structured.

Jobs have grown out of block jobs.  I'd love to see some uses outside
the block subsystem.

Hope this helps!


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-06  6:09       ` Markus Armbruster
@ 2019-07-08  2:51         ` Vanderson Martins do Rosario
  2019-07-08  4:49         ` Marc-André Lureau
  1 sibling, 0 replies; 20+ messages in thread
From: Vanderson Martins do Rosario @ 2019-07-08  2:51 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Marc-André Lureau, Alex Bennée, qemu-devel,
	Dr. David Alan Gilbert

Markus,

Thank you for your comments! Based on your questions and suggestions of
writing a more complete explanation in my commits, I decided to start to
describe our whole work on the wiki:
https://wiki.qemu.org/Internships/ProjectIdeas/TCGCodeQuality
I will update and expand it weekly, so I can link and cite it in future
patches to give a better whole vision for anyone interested.

Btw, thank you for your QMP tips, I will discuss with Alex how to address
it in our approach.

[]'s
Vanderson M. Rosario


On Sat, Jul 6, 2019 at 3:09 AM Markus Armbruster <armbru@redhat.com> wrote:

> Cc: Marc-André, who has patches that might be useful here.
>
> Alex Bennée <alex.bennee@linaro.org> writes:
>
> > Markus Armbruster <armbru@redhat.com> writes:
> >
> >> vandersonmr <vandersonmr2@gmail.com> writes:
> >>
> > <snip>
> >
> > I'll leave Vanderson to address your other comments.
> >
> >>
> >> Debugging commands are kind of borderline.  Debugging is commonly a
> >> human activity, where HMP is just fine.  However, humans create tools to
> >> assist with their activities, and then QMP is useful.  While I wouldn't
> >> encourage HMP-only for the debugging use case, I wouldn't veto it.
> >>
> >> Your (overly terse!) commit message and help texts make me guess the
> >> commands are for gathering statistics.  Statistics can have debugging
> >> uses.  But they often have non-debugging uses as well.  What use cases
> >> can you imagine for these commands?
> >
> > So this is all really aimed at making TCG go faster - but before we can
> > make it go faster we need better tools for seeing where the time is
> > being spent and examining the code that we generate. So I expect the
> > main users of this functionality will be QEMU developers.
> >
> > That said I can see a good rationale for supporting QMP because it is
> > more amenable to automation. However this is early days so I would
> > caution about exposing this stuff too early least we bake in a woolly
> > API.
>
> Development tools should exempt themselves from QMP's interface
> stability promise: prefix the command names with 'x-'.
>
> > The other wrinkle is we do have to take control of the emulator to
> > safely calculate some of the numbers we output. This essentially means
> > the HMP commands are asynchronous - we kick of safe work which waits
> > until all vCPU threads are stopped before we go through the records and
> > add up numbers. This is fine for the HMP because we just output to the
> > monitor FD when we are ready. I assume for QMP commands there is more
> > housekeeping to do? Can QMP commands wait for a response to be
> > calculated by another thread? Are there any existing commands that have
> > to support this sort of pattern?
>
> Let me clarify "synchronous" to avoid confusion.
>
> Both QMP and HMP commands are synchronous protocols in the sense that
> commands are executed one after the other, without overlap.  When a
> client sends multiple commands, it can assume that each one starts only
> after the previous one completed.
>
> Both HMP and QMP commands execute synchronously in the sense that the
> command runs to completion without ever yielding the thread.  Any
> blocking operations put the thread to sleep (but see below).
>
> HMP runs in the main thread.  Putting the main thread to sleep is
> generally undesirable.
>
> QMP used to run in the main thread, too.  Nowadays, the QMP core runs in
> an I/O thread shared by all monitors, and dispatches commands to the
> main thread.  Moving command execution out of the main thread as well
> requires careful review of the command's code for hidden assumptions.
> Major project.
>
> Fine print: OOB commands are a special case, but I doubt you want to
> know more.
>
> Fine print: certain character devices can't support use of an I/O
> thread; QMP runs in the main thread then.  The ones you want to use with
> QMP all support I/O threads.
>
> You wrote "we kick of safe work which waits until all vCPU threads are
> stopped before we go through the records and add up numbers [...] we
> just output to the monitor FD".  Does this mean the HMP command kicks
> off the work, terminates, and some time later something else prints
> results to the monitor?  How much later?
>
> If "later" is actually "soon", for a suitable value of "soon",
> Marc-André's work on "asynchronous" QMP might be pertinent.  I put
> "asynchronous" in scare quotes, because of the confusion it has caused.
> My current understanding (Marc-André, please correct me if wrong): it
> lets QMP commands to block without putting their thread to sleep.  It
> does not make QMP an asynchronous protocol.
>
> If "later" need not be "soon", read on.
>
> In QMP, there are two established ways to do potentially long-running
> work.  Both ways use a command that kicks off the work, then terminates
> without waiting for it to complete.
>
> The first way is traditional: pair the kick off command with a query
> command and optionally an event.
>
> When the work completes, it fires off the event.  The event is broadcast
> to all QMP monitors (we could implement unicast if we have a compelling
> use case).
>
> The query command reports whether the work has completed, and if yes,
> the work's results, if any.
>
> You need the event if you want to avoid polling.
>
> Even with an event, you still need a query command.  If your management
> application loses its QMP connection temporarily, you can miss the
> event.  You want to poll on reconnect, with the query command.
>
> If more than one instance of the work can be pending at any one time,
> event and query need to identify the instance somehow.  This is
> completely ad hoc.
>
> The second way is a full-blown "job".  This provides more control: you
> can cancel, pause, resume, ...  It also provides a job ID.  More
> featureful and more structured.
>
> Jobs have grown out of block jobs.  I'd love to see some uses outside
> the block subsystem.
>
> Hope this helps!
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-06  6:09       ` Markus Armbruster
  2019-07-08  2:51         ` Vanderson Martins do Rosario
@ 2019-07-08  4:49         ` Marc-André Lureau
  1 sibling, 0 replies; 20+ messages in thread
From: Marc-André Lureau @ 2019-07-08  4:49 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: vandersonmr, Alex Bennée, qemu-devel, Dr. David Alan Gilbert

Hi

On Sat, Jul 6, 2019 at 10:09 AM Markus Armbruster <armbru@redhat.com> wrote:
>
> Cc: Marc-André, who has patches that might be useful here.
>
> Alex Bennée <alex.bennee@linaro.org> writes:
>
> > Markus Armbruster <armbru@redhat.com> writes:
> >
> >> vandersonmr <vandersonmr2@gmail.com> writes:
> >>
> > <snip>
> >
> > I'll leave Vanderson to address your other comments.
> >
> >>
> >> Debugging commands are kind of borderline.  Debugging is commonly a
> >> human activity, where HMP is just fine.  However, humans create tools to
> >> assist with their activities, and then QMP is useful.  While I wouldn't
> >> encourage HMP-only for the debugging use case, I wouldn't veto it.
> >>
> >> Your (overly terse!) commit message and help texts make me guess the
> >> commands are for gathering statistics.  Statistics can have debugging
> >> uses.  But they often have non-debugging uses as well.  What use cases
> >> can you imagine for these commands?
> >
> > So this is all really aimed at making TCG go faster - but before we can
> > make it go faster we need better tools for seeing where the time is
> > being spent and examining the code that we generate. So I expect the
> > main users of this functionality will be QEMU developers.
> >
> > That said I can see a good rationale for supporting QMP because it is
> > more amenable to automation. However this is early days so I would
> > caution about exposing this stuff too early least we bake in a woolly
> > API.
>
> Development tools should exempt themselves from QMP's interface
> stability promise: prefix the command names with 'x-'.
>
> > The other wrinkle is we do have to take control of the emulator to
> > safely calculate some of the numbers we output. This essentially means
> > the HMP commands are asynchronous - we kick of safe work which waits
> > until all vCPU threads are stopped before we go through the records and
> > add up numbers. This is fine for the HMP because we just output to the
> > monitor FD when we are ready. I assume for QMP commands there is more
> > housekeeping to do? Can QMP commands wait for a response to be
> > calculated by another thread? Are there any existing commands that have
> > to support this sort of pattern?
>
> Let me clarify "synchronous" to avoid confusion.
>
> Both QMP and HMP commands are synchronous protocols in the sense that
> commands are executed one after the other, without overlap.  When a
> client sends multiple commands, it can assume that each one starts only
> after the previous one completed.
>
> Both HMP and QMP commands execute synchronously in the sense that the
> command runs to completion without ever yielding the thread.  Any
> blocking operations put the thread to sleep (but see below).
>
> HMP runs in the main thread.  Putting the main thread to sleep is
> generally undesirable.
>
> QMP used to run in the main thread, too.  Nowadays, the QMP core runs in
> an I/O thread shared by all monitors, and dispatches commands to the
> main thread.  Moving command execution out of the main thread as well
> requires careful review of the command's code for hidden assumptions.
> Major project.
>
> Fine print: OOB commands are a special case, but I doubt you want to
> know more.
>
> Fine print: certain character devices can't support use of an I/O
> thread; QMP runs in the main thread then.  The ones you want to use with
> QMP all support I/O threads.
>
> You wrote "we kick of safe work which waits until all vCPU threads are
> stopped before we go through the records and add up numbers [...] we
> just output to the monitor FD".  Does this mean the HMP command kicks
> off the work, terminates, and some time later something else prints
> results to the monitor?  How much later?
>
> If "later" is actually "soon", for a suitable value of "soon",
> Marc-André's work on "asynchronous" QMP might be pertinent.  I put
> "asynchronous" in scare quotes, because of the confusion it has caused.
> My current understanding (Marc-André, please correct me if wrong): it
> lets QMP commands to block without putting their thread to sleep.  It
> does not make QMP an asynchronous protocol.

In "[PATCH v4 00/20] monitor: add asynchronous command type", I
propose a way to defer monitor commands (both QMP and HMP). This
allows the main loop to reenter, and when the work is completed,
return the command. During that time, no other commands are accepted
(except OOB), effectively "blocking" the monitor.

The series probably doesn't apply cleanly anymore, I'll rebase and resubmit it.

>
> If "later" need not be "soon", read on.
>
> In QMP, there are two established ways to do potentially long-running
> work.  Both ways use a command that kicks off the work, then terminates
> without waiting for it to complete.
>
> The first way is traditional: pair the kick off command with a query
> command and optionally an event.
>
> When the work completes, it fires off the event.  The event is broadcast
> to all QMP monitors (we could implement unicast if we have a compelling
> use case).
>
> The query command reports whether the work has completed, and if yes,
> the work's results, if any.
>
> You need the event if you want to avoid polling.
>
> Even with an event, you still need a query command.  If your management
> application loses its QMP connection temporarily, you can miss the
> event.  You want to poll on reconnect, with the query command.
>
> If more than one instance of the work can be pending at any one time,
> event and query need to identify the instance somehow.  This is
> completely ad hoc.
>
> The second way is a full-blown "job".  This provides more control: you
> can cancel, pause, resume, ...  It also provides a job ID.  More
> featureful and more structured.
>
> Jobs have grown out of block jobs.  I'd love to see some uses outside
> the block subsystem.
>
> Hope this helps!


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor
  2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
  2019-07-04 16:36   ` Alex Bennée
  2019-07-05 13:54   ` Markus Armbruster
@ 2019-07-09  9:57   ` Dr. David Alan Gilbert
  2 siblings, 0 replies; 20+ messages in thread
From: Dr. David Alan Gilbert @ 2019-07-09  9:57 UTC (permalink / raw)
  To: vandersonmr; +Cc: qemu-devel, Markus Armbruster

* vandersonmr (vandersonmr2@gmail.com) wrote:
> adding options to list tbs by some metric and
> investigate their code.
> 
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>

As Markus said you need a short justification that it's for debug etc
to justify HMP only; it doesn't need to be huge, but we should have it.

> ---
>  hmp-commands-info.hx | 22 ++++++++++++++
>  monitor/misc.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 91 insertions(+)
> 
> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
> index c59444c461..0b8c0de95d 100644
> --- a/hmp-commands-info.hx
> +++ b/hmp-commands-info.hx
> @@ -288,6 +288,28 @@ ETEXI
>          .params     = "",
>          .help       = "show dynamic compiler info",
>          .cmd        = hmp_info_jit,
> +    {
> +        .name       = "tbs",
> +        .args_type  = "number:i?,sortedby:s?",
> +        .params     = "[number sortedby]",
> +        .help       = "show a [number] translated blocks sorted by [sortedby]"
> +                      "sortedby opts: hotness hg",

If this is showing 'number' blocks then I think it should be called
'count'

> +        .cmd        = hmp_info_tbs,
> +    },
> +    {
> +        .name       = "tb",
> +        .args_type  = "id:i,flags:s?",
> +        .params     = "id [log1[,...] flags]",
> +        .help       = "show information about one translated block by id",
> +        .cmd        = hmp_info_tb,

That doesn't say what those flags are for; qemu has lots of flags for
different things; I think you're  saying it's some log flag?

> +    },
> +    {
> +        .name       = "coverset",
> +        .args_type  = "number:i?",
> +        .params     = "[number]",
> +        .help       = "show hottest translated blocks neccesary to cover"
> +                      "[number]% of the execution count",
> +        .cmd        = hmp_info_coverset,
>      },

That 'number' should be something like 'percent' or 'coverage'
>  #endif
>  
> diff --git a/monitor/misc.c b/monitor/misc.c
> index bf9faceb86..1fb4d75871 100644
> --- a/monitor/misc.c
> +++ b/monitor/misc.c
> @@ -469,6 +469,75 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict)
>      dump_drift_info();
>  }
>  
> +static void hmp_info_tbs(Monitor *mon, const QDict *qdict)
> +{
> +    int n;
> +    const char *s = NULL;
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +    if (!tb_ctx.tb_stats.map) {
> +        error_report("no TB information recorded");
> +        return;
> +    }
> +
> +    n = qdict_get_try_int(qdict, "number", 10);
> +    s = qdict_get_try_str(qdict, "sortedby");

No need to use single characters; sortedby_str  is fine for
example.

> +    int sortedby = 0;
> +    if (s == NULL || strcmp(s, "hotness") == 0) {
> +        sortedby = SORT_BY_HOTNESS;
> +    } else if (strcmp(s, "hg") == 0) {
> +        sortedby = SORT_BY_HG;
> +    }

You should error if there's another word in 's'

> +    dump_tbs_info(n, sortedby, true);
> +}
> +
> +static void hmp_info_tb(Monitor *mon, const QDict *qdict)
> +{
> +    const int id = qdict_get_int(qdict, "id");
> +    const char *flags = qdict_get_try_str(qdict, "flags");
> +    int mask;
> +
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +
> +    mask = flags ? qemu_str_to_log_mask(flags) : CPU_LOG_TB_IN_ASM;
> +
> +    if (!mask) {
> +        help_cmd(mon, "log");

That's not obvious - I'd perfer you said something like
     Unable to parse log flags, see 'help log'

> +        return;
> +    }
> +
> +    dump_tb_info(id, mask, true);
> +}
> +
> +static void hmp_info_coverset(Monitor *mon, const QDict *qdict)
> +{
> +    int n;
> +    if (!tcg_enabled()) {
> +        error_report("TB information is only available with accel=tcg");
> +        return;
> +    }
> +    if (!qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +        error_report("TB information not being recorded");
> +        return;
> +    }
> +
> +    n = qdict_get_try_int(qdict, "number", 90);
> +
> +    if (n < 0 || n > 100) {
> +        error_report("Coverset percentage should be between 0 and 100");
> +        return;
> +    }
> +
> +    dump_coverset_info(n, true);
> +}
> +
>  static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
>  {
>      dump_opcount_info();
> -- 
> 2.22.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-07-09 10:09 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-02 21:00 [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics vandersonmr
2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 2/6] accel/tcg: Adding an optional tb execution counter vandersonmr
2019-07-04 15:22   ` Alex Bennée
2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 3/6] accel/tcg: Collecting translation/code quality measurements vandersonmr
2019-07-04 15:39   ` Alex Bennée
2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 4/6] util/log: introduce dump of tbstats and -d hot_tbs:limit vandersonmr
2019-07-04 15:40   ` Alex Bennée
2019-07-04 16:34   ` Alex Bennée
2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 5/6] monitor: adding info tb and tbs to monitor vandersonmr
2019-07-04 16:36   ` Alex Bennée
2019-07-05 13:54   ` Markus Armbruster
2019-07-05 14:36     ` Alex Bennée
2019-07-06  6:09       ` Markus Armbruster
2019-07-08  2:51         ` Vanderson Martins do Rosario
2019-07-08  4:49         ` Marc-André Lureau
2019-07-09  9:57   ` Dr. David Alan Gilbert
2019-07-02 21:00 ` [Qemu-devel] [PATCH v3 6/6] monitor: adding start_stats " vandersonmr
2019-07-04 16:43   ` Alex Bennée
2019-07-04 14:05 ` [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics Alex Bennée
2019-07-04 14:05 ` Alex Bennée

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).