All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v14 00/10] TCG code quality tracking
@ 2023-05-30  8:35 Fei Wu
  2023-05-30  8:35 ` [PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER Fei Wu
                   ` (9 more replies)
  0 siblings, 10 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel; +Cc: Fei Wu

v14
---
* cleanup TCGProfile in patch 04


Alex Bennée (1):
  tb-stats: reset the tracked TBs on a tb_flush

Fei Wu (3):
  accel/tcg: remove CONFIG_PROFILER
  accel/tcg: add jit stats and time to TBStatistics
  docs: add tb-stats how to

Vanderson M. do Rosario (6):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  debug: add -d tb_stats to control TBStatistics collection:
  monitor: adding tb_stats hmp command
  Adding info [tb-list|tb] commands to HMP (WIP)
  tb-stats: dump hot TBs at the end of the execution

 MAINTAINERS                   |   1 +
 accel/tcg/cpu-exec.c          |   6 +
 accel/tcg/meson.build         |   1 +
 accel/tcg/monitor.c           | 122 +++++-
 accel/tcg/tb-context.h        |   1 +
 accel/tcg/tb-hash.h           |   7 +
 accel/tcg/tb-maint.c          |  20 +
 accel/tcg/tb-stats.c          | 692 ++++++++++++++++++++++++++++++++++
 accel/tcg/tcg-accel-ops.c     |  15 +-
 accel/tcg/tcg-runtime.c       |   1 +
 accel/tcg/translate-all.c     | 143 +++++--
 accel/tcg/translator.c        |  30 ++
 disas/disas.c                 |  26 +-
 docs/devel/tcg-tbstats.rst    | 129 +++++++
 hmp-commands-info.hx          |  16 +
 hmp-commands.hx               |  16 +
 include/exec/exec-all.h       |   3 +
 include/exec/gen-icount.h     |   2 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  34 ++
 include/exec/tb-stats.h       | 164 ++++++++
 include/monitor/hmp.h         |   3 +
 include/qemu/log-for-trace.h  |   6 +-
 include/qemu/log.h            |   3 +
 include/qemu/timer.h          |   5 +-
 include/tcg/tcg.h             |  41 +-
 linux-user/exit.c             |   2 +
 meson.build                   |   2 -
 meson_options.txt             |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c            |  10 +-
 stubs/meson.build             |   1 +
 stubs/tb-stats.c              |  32 ++
 tcg/tcg.c                     | 230 +++--------
 tests/qtest/qmp-cmd-test.c    |   2 +-
 util/log.c                    | 103 ++++-
 36 files changed, 1601 insertions(+), 294 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/devel/tcg-tbstats.rst
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1



^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-05-30  8:35 ` [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure Fei Wu
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Fei Wu, Vanderson M . do Rosario, Paolo Bonzini,
	Marc-André Lureau, Daniel P. Berrangé,
	Thomas Huth, Philippe Mathieu-Daudé,
	Markus Armbruster, Laurent Vivier

TBStats will be introduced to replace CONFIG_PROFILER totally, here
remove all CONFIG_PROFILER related stuffs first.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/monitor.c           |  25 ----
 accel/tcg/tcg-accel-ops.c     |  10 --
 accel/tcg/translate-all.c     |  33 ------
 include/qemu/timer.h          |   9 --
 include/tcg/tcg.h             |  25 ----
 meson.build                   |   2 -
 meson_options.txt             |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c            |   9 --
 tcg/tcg.c                     | 208 ----------------------------------
 tests/qtest/qmp-cmd-test.c    |   3 -
 11 files changed, 329 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 92fce580f1..e903dd1d2e 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -80,36 +80,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
     return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-    g_autoptr(GString) buf = g_string_new("");
-    static int64_t last_cpu_exec_time;
-    int64_t cpu_exec_time;
-    int64_t delta;
-
-    cpu_exec_time = tcg_cpu_exec_time();
-    delta = cpu_exec_time - last_cpu_exec_time;
-
-    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
-    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-                           delta, delta / (double)NANOSECONDS_PER_SECOND);
-    last_cpu_exec_time = cpu_exec_time;
-    dev_time = 0;
-
-    return human_readable_text_from_str(buf);
-}
-#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
     error_setg(errp, "Internal profiler not compiled");
     return NULL;
 }
-#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 58c8e64096..3973591508 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,20 +70,10 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
     int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
     assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
     return ret;
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c87648b99e..4e035e0f79 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -203,10 +203,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                uintptr_t host_pc)
 {
     uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-    TCGProfile *prof = &tcg_ctx->prof;
-    int64_t ti = profile_getclock();
-#endif
     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
     if (insns_left < 0) {
@@ -223,12 +219,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     }
 
     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
-
-#ifdef CONFIG_PROFILER
-    qatomic_set(&prof->restore_time,
-                prof->restore_time + profile_getclock() - ti);
-    qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -291,13 +281,6 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
     tcg_ctx->cpu = NULL;
     *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-    qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
-    qatomic_set(&tcg_ctx->prof.interm_time,
-                tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-    *ti = profile_getclock();
-#endif
-
     return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
@@ -311,9 +294,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb_page_addr_t phys_pc;
     tcg_insn_unit *gen_code_buf;
     int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
-    TCGProfile *prof = &tcg_ctx->prof;
-#endif
     int64_t ti;
     void *host_pc;
 
@@ -365,12 +345,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-    /* includes aborted translations because of exceptions */
-    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
-    ti = profile_getclock();
-#endif
-
     trace_translate_block(tb, pc, tb->tc.ptr);
 
     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
@@ -425,13 +399,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      */
     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
 
-#ifdef CONFIG_PROFILER
-    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
-    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
-    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
-    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
-#endif
-
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(pc)) {
         FILE *logfile = qemu_log_trylock();
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index ee071e07d1..9a91cb1248 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -989,13 +989,4 @@ static inline int64_t cpu_get_host_ticks(void)
 }
 #endif
 
-#ifdef CONFIG_PROFILER
-static inline int64_t profile_getclock(void)
-{
-    return get_clock();
-}
-
-extern int64_t dev_time;
-#endif
-
 #endif
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 072c35f7f5..083cbd6580 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -522,27 +522,6 @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
     return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
 }
 
-typedef struct TCGProfile {
-    int64_t cpu_exec_time;
-    int64_t tb_count1;
-    int64_t tb_count;
-    int64_t op_count; /* total insn count */
-    int op_count_max; /* max insn per TB */
-    int temp_count_max;
-    int64_t temp_count;
-    int64_t del_op_count;
-    int64_t code_in_len;
-    int64_t code_out_len;
-    int64_t search_out_len;
-    int64_t interm_time;
-    int64_t code_time;
-    int64_t la_time;
-    int64_t opt_time;
-    int64_t restore_count;
-    int64_t restore_time;
-    int64_t table_op_count[NB_OPS];
-} TCGProfile;
-
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
     TCGPool *pool_first, *pool_current, *pool_first_large;
@@ -569,10 +548,6 @@ struct TCGContext {
     tcg_insn_unit *code_buf;      /* pointer for start of tb */
     tcg_insn_unit *code_ptr;      /* pointer for running end of tb */
 
-#ifdef CONFIG_PROFILER
-    TCGProfile prof;
-#endif
-
 #ifdef CONFIG_DEBUG_TCG
     int goto_tb_issue_mask;
     const TCGOpcode *vecop_list;
diff --git a/meson.build b/meson.build
index 2d48aa1e2e..f08280bc8b 100644
--- a/meson.build
+++ b/meson.build
@@ -2141,7 +2141,6 @@ if numa.found()
                                        dependencies: numa))
 endif
 config_host_data.set('CONFIG_OPENGL', opengl.found())
-config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_RDMA', rdma.found())
 config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
@@ -4110,7 +4109,6 @@ if 'objc' in all_languages
   summary_info += {'QEMU_OBJCFLAGS':    ' '.join(qemu_common_flags)}
 endif
 summary_info += {'QEMU_LDFLAGS':      ' '.join(qemu_ldflags)}
-summary_info += {'profiler':          get_option('profiler')}
 summary_info += {'link-time optimization (LTO)': get_option('b_lto')}
 summary_info += {'PIE':               get_option('b_pie')}
 summary_info += {'static build':      config_host.has_key('CONFIG_STATIC')}
diff --git a/meson_options.txt b/meson_options.txt
index 90237389e2..bbb5c7e886 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -345,8 +345,6 @@ option('qom_cast_debug', type: 'boolean', value: true,
 option('gprof', type: 'boolean', value: false,
        description: 'QEMU profiling with gprof',
        deprecated: true)
-option('profiler', type: 'boolean', value: false,
-       description: 'profiler support')
 option('slirp_smbd', type : 'feature', value : 'auto',
        description: 'use smbd (at path --smbd=*) in slirp networking')
 
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 5714fd93d9..7dd5709ef4 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -39,7 +39,6 @@ meson_options_help() {
   printf "%s\n" '                           jemalloc/system/tcmalloc)'
   printf "%s\n" '  --enable-module-upgrades try to load modules from alternate paths for'
   printf "%s\n" '                           upgrades'
-  printf "%s\n" '  --enable-profiler        profiler support'
   printf "%s\n" '  --enable-rng-none        dummy RNG, avoid using /dev/(u)random and'
   printf "%s\n" '                           getrandom()'
   printf "%s\n" '  --enable-safe-stack      SafeStack Stack Smash Protection (requires'
@@ -401,8 +400,6 @@ _meson_option_parse() {
     --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
     --enable-png) printf "%s" -Dpng=enabled ;;
     --disable-png) printf "%s" -Dpng=disabled ;;
-    --enable-profiler) printf "%s" -Dprofiler=true ;;
-    --disable-profiler) printf "%s" -Dprofiler=false ;;
     --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
     --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;;
     --enable-qcow1) printf "%s" -Dqcow1=enabled ;;
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 2f2396c819..bd50062ed0 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -728,18 +728,9 @@ static bool main_loop_should_exit(int *status)
 int qemu_main_loop(void)
 {
     int status = EXIT_SUCCESS;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
 
     while (!main_loop_should_exit(&status)) {
-#ifdef CONFIG_PROFILER
-        ti = profile_getclock();
-#endif
         main_loop_wait(false);
-#ifdef CONFIG_PROFILER
-        dev_time += profile_getclock() - ti;
-#endif
     }
 
     return status;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index ac30d484f5..47befdfcae 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2983,10 +2983,6 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
     QTAILQ_REMOVE(&s->ops, op, link);
     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
     s->nb_ops--;
-
-#ifdef CONFIG_PROFILER
-    qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
-#endif
 }
 
 void tcg_remove_ops_after(TCGOp *op)
@@ -5856,102 +5852,6 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
 }
 
-#ifdef CONFIG_PROFILER
-
-/* avoid copy/paste errors */
-#define PROF_ADD(to, from, field)                       \
-    do {                                                \
-        (to)->field += qatomic_read(&((from)->field));  \
-    } while (0)
-
-#define PROF_MAX(to, from, field)                                       \
-    do {                                                                \
-        typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
-        if (val__ > (to)->field) {                                      \
-            (to)->field = val__;                                        \
-        }                                                               \
-    } while (0)
-
-/* Pass in a zero'ed @prof */
-static inline
-void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
-{
-    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
-    unsigned int i;
-
-    for (i = 0; i < n_ctxs; i++) {
-        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-        const TCGProfile *orig = &s->prof;
-
-        if (counters) {
-            PROF_ADD(prof, orig, cpu_exec_time);
-            PROF_ADD(prof, orig, tb_count1);
-            PROF_ADD(prof, orig, tb_count);
-            PROF_ADD(prof, orig, op_count);
-            PROF_MAX(prof, orig, op_count_max);
-            PROF_ADD(prof, orig, temp_count);
-            PROF_MAX(prof, orig, temp_count_max);
-            PROF_ADD(prof, orig, del_op_count);
-            PROF_ADD(prof, orig, code_in_len);
-            PROF_ADD(prof, orig, code_out_len);
-            PROF_ADD(prof, orig, search_out_len);
-            PROF_ADD(prof, orig, interm_time);
-            PROF_ADD(prof, orig, code_time);
-            PROF_ADD(prof, orig, la_time);
-            PROF_ADD(prof, orig, opt_time);
-            PROF_ADD(prof, orig, restore_count);
-            PROF_ADD(prof, orig, restore_time);
-        }
-        if (table) {
-            int i;
-
-            for (i = 0; i < NB_OPS; i++) {
-                PROF_ADD(prof, orig, table_op_count[i]);
-            }
-        }
-    }
-}
-
-#undef PROF_ADD
-#undef PROF_MAX
-
-static void tcg_profile_snapshot_counters(TCGProfile *prof)
-{
-    tcg_profile_snapshot(prof, true, false);
-}
-
-static void tcg_profile_snapshot_table(TCGProfile *prof)
-{
-    tcg_profile_snapshot(prof, false, true);
-}
-
-void tcg_dump_op_count(GString *buf)
-{
-    TCGProfile prof = {};
-    int i;
-
-    tcg_profile_snapshot_table(&prof);
-    for (i = 0; i < NB_OPS; i++) {
-        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
-                               prof.table_op_count[i]);
-    }
-}
-
-int64_t tcg_cpu_exec_time(void)
-{
-    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
-    unsigned int i;
-    int64_t ret = 0;
-
-    for (i = 0; i < n_ctxs; i++) {
-        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-        const TCGProfile *prof = &s->prof;
-
-        ret += qatomic_read(&prof->cpu_exec_time);
-    }
-    return ret;
-}
-#else
 void tcg_dump_op_count(GString *buf)
 {
     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
@@ -5962,37 +5862,12 @@ int64_t tcg_cpu_exec_time(void)
     error_report("%s: TCG profiler not compiled", __func__);
     exit(EXIT_FAILURE);
 }
-#endif
-
 
 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
 {
-#ifdef CONFIG_PROFILER
-    TCGProfile *prof = &s->prof;
-#endif
     int i, num_insns;
     TCGOp *op;
 
-#ifdef CONFIG_PROFILER
-    {
-        int n = 0;
-
-        QTAILQ_FOREACH(op, &s->ops, link) {
-            n++;
-        }
-        qatomic_set(&prof->op_count, prof->op_count + n);
-        if (n > prof->op_count_max) {
-            qatomic_set(&prof->op_count_max, n);
-        }
-
-        n = s->nb_temps;
-        qatomic_set(&prof->temp_count, prof->temp_count + n);
-        if (n > prof->temp_count_max) {
-            qatomic_set(&prof->temp_count_max, n);
-        }
-    }
-#endif
-
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
                  && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
@@ -6021,17 +5896,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     }
 #endif
 
-#ifdef CONFIG_PROFILER
-    qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
-#endif
-
     tcg_optimize(s);
 
-#ifdef CONFIG_PROFILER
-    qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
-    qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
-#endif
-
     reachable_code_pass(s);
     liveness_pass_0(s);
     liveness_pass_1(s);
@@ -6055,10 +5921,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
         }
     }
 
-#ifdef CONFIG_PROFILER
-    qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
-#endif
-
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
                  && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
@@ -6097,10 +5959,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     QTAILQ_FOREACH(op, &s->ops, link) {
         TCGOpcode opc = op->opc;
 
-#ifdef CONFIG_PROFILER
-        qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
-#endif
-
         switch (opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
@@ -6195,76 +6053,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     return tcg_current_code_size(s);
 }
 
-#ifdef CONFIG_PROFILER
-void tcg_dump_info(GString *buf)
-{
-    TCGProfile prof = {};
-    const TCGProfile *s;
-    int64_t tb_count;
-    int64_t tb_div_count;
-    int64_t tot;
-
-    tcg_profile_snapshot_counters(&prof);
-    s = &prof;
-    tb_count = s->tb_count;
-    tb_div_count = tb_count ? tb_count : 1;
-    tot = s->interm_time + s->code_time;
-
-    g_string_append_printf(buf, "JIT cycles          %" PRId64
-                           " (%0.3f s at 2.4 GHz)\n",
-                           tot, tot / 2.4e9);
-    g_string_append_printf(buf, "translated TBs      %" PRId64
-                           " (aborted=%" PRId64 " %0.1f%%)\n",
-                           tb_count, s->tb_count1 - tb_count,
-                           (double)(s->tb_count1 - s->tb_count)
-                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
-    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
-                           (double)s->op_count / tb_div_count, s->op_count_max);
-    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
-                           (double)s->del_op_count / tb_div_count);
-    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
-                           (double)s->temp_count / tb_div_count,
-                           s->temp_count_max);
-    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
-                           (double)s->code_out_len / tb_div_count);
-    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
-                           (double)s->search_out_len / tb_div_count);
-
-    g_string_append_printf(buf, "cycles/op           %0.1f\n",
-                           s->op_count ? (double)tot / s->op_count : 0);
-    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
-                           s->code_in_len ? (double)tot / s->code_in_len : 0);
-    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
-                           s->code_out_len ? (double)tot / s->code_out_len : 0);
-    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
-                           s->search_out_len ?
-                           (double)tot / s->search_out_len : 0);
-    if (tot == 0) {
-        tot = 1;
-    }
-    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
-                           (double)s->interm_time / tot * 100.0);
-    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
-                           (double)s->code_time / tot * 100.0);
-    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
-                           (double)s->opt_time / (s->code_time ?
-                                                  s->code_time : 1)
-                           * 100.0);
-    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
-                           (double)s->la_time / (s->code_time ?
-                                                 s->code_time : 1) * 100.0);
-    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
-                           s->restore_count);
-    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
-                           s->restore_count ?
-                           (double)s->restore_time / s->restore_count : 0);
-}
-#else
 void tcg_dump_info(GString *buf)
 {
     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
 }
-#endif
 
 #ifdef ELF_HOST_MACHINE
 /* In order to use this feature, the backend needs to do three things:
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index a58de48d2a..73a670e8fa 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -46,9 +46,6 @@ static int query_error_class(const char *cmd)
         { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
         { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
         { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
-#ifndef CONFIG_PROFILER
-        { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
-#endif
         /* Only valid with a USB bus added */
         { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
         /* Only valid with accel=tcg */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
  2023-05-30  8:35 ` [PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-05-31 23:59   ` Richard Henderson
  2023-06-01  0:01   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 03/10] accel: collecting TB execution count Fei Wu
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Fei Wu, Paolo Bonzini

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-2-vandersonmr2@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 MAINTAINERS                   |  1 +
 accel/tcg/meson.build         |  1 +
 accel/tcg/tb-context.h        |  1 +
 accel/tcg/tb-hash.h           |  7 +++++
 accel/tcg/tb-maint.c          | 19 ++++++++++++
 accel/tcg/tb-stats.c          | 58 +++++++++++++++++++++++++++++++++++
 accel/tcg/translate-all.c     | 43 ++++++++++++++++++++++++++
 include/exec/exec-all.h       |  3 ++
 include/exec/tb-stats-flags.h | 21 +++++++++++++
 include/exec/tb-stats.h       | 56 +++++++++++++++++++++++++++++++++
 10 files changed, 210 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4b025a7b63..d72ecd12b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
     /* statistics */
     unsigned tb_flush_count;
     unsigned tb_phys_invalidate_count;
+    struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
     return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+                            uint32_t flags)
+{
+    return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..0980fca358 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+    const TBStatistics *a = ap;
+    const TBStatistics *b = bp;
+
+    return a->phys_pc == b->phys_pc &&
+        a->pc == b->pc &&
+        a->cs_base == b->cs_base &&
+        a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
     const TranslationBlock *a = ap;
@@ -60,6 +78,7 @@ void tb_htable_init(void)
     unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+    init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 0000000000..f988bd8a31
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario <vandersonmr2@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+    TB_STATS_DISABLED = 0,
+    TB_STATS_RUNNING,
+    TB_STATS_PAUSED,
+    TB_STATS_STOPPED
+};
+
+static enum TBStatsStatus tcg_collect_tb_stats;
+
+void init_tb_stats_htable(void)
+{
+    if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
+        qht_init(&tb_ctx.tb_stats, tb_stats_cmp,
+                CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE);
+    }
+}
+
+void enable_collect_tb_stats(void)
+{
+    tcg_collect_tb_stats = TB_STATS_RUNNING;
+    init_tb_stats_htable();
+}
+
+void disable_collect_tb_stats(void)
+{
+    tcg_collect_tb_stats = TB_STATS_STOPPED;
+}
+
+void pause_collect_tb_stats(void)
+{
+    tcg_collect_tb_stats = TB_STATS_PAUSED;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+    return tcg_collect_tb_stats == TB_STATS_RUNNING;
+}
+
+bool tb_stats_collection_paused(void)
+{
+    return tcg_collect_tb_stats == TB_STATS_PAUSED;
+}
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 4e035e0f79..bb9483785e 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -54,6 +54,7 @@
 #include "qemu/cacheinfo.h"
 #include "qemu/timer.h"
 #include "exec/log.h"
+#include "exec/tb-stats.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -284,6 +285,37 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
     return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
+static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
+                                  target_ulong cs_base, uint32_t flags)
+{
+    TBStatistics *new_stats = g_new0(TBStatistics, 1);
+    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
+    void *existing_stats = NULL;
+    new_stats->phys_pc = phys_pc;
+    new_stats->pc = pc;
+    new_stats->cs_base = cs_base;
+    new_stats->flags = flags;
+
+    /*
+     * All initialisation must be complete before we insert into qht
+     * table otherwise another thread might get a partially created
+     * structure.
+     */
+    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
+
+    if (unlikely(existing_stats)) {
+        /*
+         * If there is already a TBStatistic for this TB from a previous flush
+         * then just make the new TB point to the older TBStatistic
+         */
+        g_free(new_stats);
+        return existing_stats;
+    } else {
+        return new_stats;
+    }
+}
+
+
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
@@ -347,6 +379,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
     trace_translate_block(tb, pc, tb->tc.ptr);
 
+    /*
+     * We want to fetch the stats structure before we start code
+     * generation so we can count interesting things about this
+     * generation.
+     */
+    if (tb_stats_collection_enabled()) {
+        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+    } else {
+        tb->tb_stats = NULL;
+    }
+
     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
     if (unlikely(gen_code_size < 0)) {
         switch (gen_code_size) {
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d2b151986..f8a80f63d3 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -608,6 +608,9 @@ struct TranslationBlock {
     uintptr_t jmp_list_head;
     uintptr_t jmp_list_next[2];
     uintptr_t jmp_dest[2];
+
+    /* Pointer to a struct where statistics from the TB is stored */
+    struct TBStatistics *tb_stats;
 };
 
 /* Hide the qatomic_read to make code a little easier on the eyes */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
new file mode 100644
index 0000000000..87ee3d902e
--- /dev/null
+++ b/include/exec/tb-stats-flags.h
@@ -0,0 +1,21 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * We define the flags and control bits here to avoid complications of
+ * including TCG/CPU information in common code.
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario <vandersonmr2@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef TB_STATS_FLAGS
+#define TB_STATS_FLAGS
+
+/* TBStatistic collection controls */
+void enable_collect_tb_stats(void);
+void disable_collect_tb_stats(void);
+void pause_collect_tb_stats(void);
+bool tb_stats_collection_enabled(void);
+bool tb_stats_collection_paused(void);
+
+#endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
new file mode 100644
index 0000000000..b519465665
--- /dev/null
+++ b/include/exec/tb-stats.h
@@ -0,0 +1,56 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario <vandersonmr2@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TB_STATS_H
+
+#define TB_STATS_H
+
+#include "exec/cpu-common.h"
+#include "exec/exec-all.h"
+#include "exec/tb-stats-flags.h"
+#include "tcg/tcg.h"
+
+typedef struct TBStatistics TBStatistics;
+
+/*
+ * This struct stores statistics such as execution count of the
+ * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
+ * has its own TBStatistics which will persist over tb_flush.
+ *
+ * We include additional counters to track number of translations as
+ * well as variants for compile flags.
+ */
+struct TBStatistics {
+    tb_page_addr_t phys_pc;
+    target_ulong pc;
+    uint32_t     flags;
+    /* cs_base isn't included in the hash but we do check for matches */
+    target_ulong cs_base;
+};
+
+bool tb_stats_cmp(const void *ap, const void *bp);
+
+void init_tb_stats_htable(void);
+
+#endif
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 03/10] accel: collecting TB execution count
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
  2023-05-30  8:35 ` [PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER Fei Wu
  2023-05-30  8:35 ` [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-06-01  0:05   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics Fei Wu
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Fei Wu, Paolo Bonzini

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/cpu-exec.c          |  6 ++++++
 accel/tcg/tb-stats.c          |  6 ++++++
 accel/tcg/tcg-runtime.c       |  1 +
 accel/tcg/translate-all.c     |  7 +++++--
 accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
 include/exec/gen-icount.h     |  1 +
 include/exec/tb-stats-flags.h |  5 +++++
 include/exec/tb-stats.h       | 13 +++++++++++++
 8 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..c0d8f26237 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
             mmap_unlock();
         }
 
+        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+            tb->tb_stats->executions.atomic++;
+        }
+
         cpu_exec_enter(cpu);
+
         /* execute the generated code */
         trace_exec_tb(tb, pc);
         cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f988bd8a31..143a52ef5c 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
 {
     return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+    return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..11c6192064 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bb9483785e..dadf49954f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
     new_stats->pc = pc;
     new_stats->cs_base = cs_base;
     new_stats->flags = flags;
+    new_stats->stats_enabled = get_default_tbstats_flag();
 
     /*
      * All initialisation must be complete before we insert into qht
@@ -382,9 +383,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     /*
      * We want to fetch the stats structure before we start code
      * generation so we can count interesting things about this
-     * generation.
+     * generation. If dfilter is in effect we will only collect stats
+     * for the specified range.
      */
-    if (tb_stats_collection_enabled()) {
+    if (tb_stats_collection_enabled() &&
+        qemu_log_in_addr_range(tb->pc)) {
         tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
     } else {
         tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 6120ef2a92..80ffbfb455 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,6 +11,7 @@
 #include "qemu/error-report.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
@@ -18,6 +19,29 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+    if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+        TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+        tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+        if (sizeof(tb->tb_stats->executions.normal) == 4) {
+            TCGv_i32 t = tcg_temp_ebb_new_i32();
+            tcg_gen_ld_i32(t, ptr, 0);
+            tcg_gen_addi_i32(t, t, 1);
+            tcg_gen_st_i32(t, ptr, 0);
+            tcg_temp_free_i32(t);
+        } else {
+            TCGv_i64 t = tcg_temp_ebb_new_i64();
+            tcg_gen_ld_i64(t, ptr, 0);
+            tcg_gen_addi_i64(t, t, 1);
+            tcg_gen_st_i64(t, ptr, 0);
+            tcg_temp_free_i64(t);
+        }
+        tcg_temp_free_ptr(ptr);
+    }
+}
+
 bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
 {
     /* Suppress goto_tb if requested. */
@@ -56,6 +80,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
 
     /* Start translating.  */
     gen_tb_start(db->tb);
+    gen_tb_exec_count(tb);
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f6de79a6b4..20e7835ff0 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -2,6 +2,7 @@
 #define GEN_ICOUNT_H
 
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 87ee3d902e..fa71eb6f0c 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -11,6 +11,9 @@
 #ifndef TB_STATS_FLAGS
 #define TB_STATS_FLAGS
 
+#define TB_NOTHING    (1 << 0)
+#define TB_EXEC_STATS (1 << 1)
+
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
@@ -18,4 +21,6 @@ void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
+uint32_t get_default_tbstats_flag(void);
+
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index b519465665..eb1fa92a4e 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,9 @@
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#define tb_stats_enabled(tb, JIT_STATS) \
+    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+
 typedef struct TBStatistics TBStatistics;
 
 /*
@@ -47,6 +50,16 @@ struct TBStatistics {
     uint32_t     flags;
     /* cs_base isn't included in the hash but we do check for matches */
     target_ulong cs_base;
+
+    /* which stats are enabled for this TBStats */
+    uint32_t stats_enabled;
+
+    /* Execution stats */
+    struct {
+        unsigned long normal;
+        unsigned long atomic;
+    } executions;
+
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (2 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 03/10] accel: collecting TB execution count Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-05-30  9:37   ` Markus Armbruster
  2023-06-01  1:08   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection: Fei Wu
                   ` (5 subsequent siblings)
  9 siblings, 2 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Fei Wu, Vanderson M . do Rosario, Paolo Bonzini,
	Markus Armbruster, Thomas Huth, Laurent Vivier

This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/monitor.c           |  20 ++++-
 accel/tcg/tb-stats.c          | 146 ++++++++++++++++++++++++++++++++++
 accel/tcg/tcg-accel-ops.c     |   7 ++
 accel/tcg/translate-all.c     |  70 +++++++++++++++-
 accel/tcg/translator.c        |   7 +-
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h       |  46 +++++++++++
 include/qemu/timer.h          |   6 ++
 include/tcg/tcg.h             |  28 ++++++-
 softmmu/runstate.c            |   9 +++
 tcg/tcg.c                     |  88 ++++++++++++++++++--
 tests/qtest/qmp-cmd-test.c    |   3 +
 12 files changed, 417 insertions(+), 15 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index e903dd1d2e..2bc87f2642 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
     g_autoptr(GString) buf = g_string_new("");
 
+    if (!tb_stats_collection_enabled()) {
+        error_setg(errp, "TB information not being recorded.");
+        return NULL;
+    }
+
     if (!tcg_enabled()) {
         error_setg(errp,
                    "Opcode count information is only available with accel=tcg");
@@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
     return human_readable_text_from_str(buf);
 }
 
+#ifdef CONFIG_TCG
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+
+    dump_jit_exec_time_info(dev_time, buf);
+    dev_time = 0;
+
+    return human_readable_text_from_str(buf);
+}
+#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
-    error_setg(errp, "Internal profiler not compiled");
+    error_setg(errp, "TCG should be enabled!");
     return NULL;
 }
+#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 143a52ef5c..78a3104c7f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,11 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -24,6 +29,147 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+uint64_t dev_time;
+
+struct jit_profile_info {
+    uint64_t translations;
+    uint64_t aborted;
+    uint64_t ops;
+    unsigned ops_max;
+    uint64_t del_ops;
+    uint64_t temps;
+    unsigned temps_max;
+    uint64_t host;
+    uint64_t guest;
+    uint64_t search_data;
+
+    uint64_t interm_time;
+    uint64_t code_time;
+    uint64_t restore_count;
+    uint64_t restore_time;
+    uint64_t opt_time;
+    uint64_t la_time;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+    struct jit_profile_info *jpi = userp;
+    TBStatistics *tbs = p;
+
+    jpi->translations += tbs->translations.total;
+    jpi->ops += tbs->code.num_tcg_ops;
+    if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+        jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+    }
+    jpi->del_ops += tbs->code.deleted_ops;
+    jpi->temps += tbs->code.temps;
+    if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+        jpi->temps_max = stat_per_translation(tbs, code.temps);
+    }
+    jpi->host += tbs->code.out_len;
+    jpi->guest += tbs->code.in_len;
+    jpi->search_data += tbs->code.search_out_len;
+
+    jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+    jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+    jpi->la_time += stat_per_translation(tbs, gen_times.la);
+    jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+    /*
+     * The restore time covers how long we have spent restoring state
+     * from a given TB (e.g. recovering from a fault). It is therefor
+     * not related to the number of translations we have done.
+     */
+    jpi->restore_time += tbs->tb_restore_time;
+    jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
+{
+    static uint64_t last_cpu_exec_time;
+    uint64_t cpu_exec_time;
+    uint64_t delta;
+
+    cpu_exec_time = tcg_cpu_exec_time();
+    delta = cpu_exec_time - last_cpu_exec_time;
+
+    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
+    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+                           delta, delta / (double)NANOSECONDS_PER_SECOND);
+    last_cpu_exec_time = cpu_exec_time;
+}
+
+/* dump JIT statisticis using TCGProfile and TBStats */
+void dump_jit_profile_info(TCGProfile *s, GString *buf)
+{
+    if (!tb_stats_collection_enabled()) {
+        return;
+    }
+
+    struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+    qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+    if (jpi->translations) {
+        g_string_append_printf(buf, "translated TBs      %" PRId64 "\n",
+                jpi->translations);
+        g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
+                jpi->ops / (double) jpi->translations, jpi->ops_max);
+        g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
+                jpi->del_ops / (double) jpi->translations);
+        g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
+                jpi->temps / (double) jpi->translations, jpi->temps_max);
+        g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
+                jpi->host / (double) jpi->translations);
+        g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+                jpi->search_data / (double) jpi->translations);
+
+        uint64_t tot = jpi->interm_time + jpi->code_time;
+
+        g_string_append_printf(buf, "JIT cycles          %" PRId64
+                " (%0.3fs at 2.4 GHz)\n",
+                tot, tot / 2.4e9);
+        g_string_append_printf(buf, "  cycles/op           %0.1f\n",
+                jpi->ops ? (double)tot / jpi->ops : 0);
+        g_string_append_printf(buf, "  cycles/in byte      %0.1f\n",
+                jpi->guest ? (double)tot / jpi->guest : 0);
+        g_string_append_printf(buf, "  cycles/out byte     %0.1f\n",
+                jpi->host ? (double)tot / jpi->host : 0);
+        g_string_append_printf(buf, "  cycles/search byte  %0.1f\n",
+                jpi->search_data ? (double)tot / jpi->search_data : 0);
+        if (tot == 0) {
+            tot = 1;
+        }
+
+        g_string_append_printf(buf, "  gen_interm time     %0.1f%%\n",
+                (double)jpi->interm_time / tot * 100.0);
+        g_string_append_printf(buf, "  gen_code time       %0.1f%%\n",
+                (double)jpi->code_time / tot * 100.0);
+
+        g_string_append_printf(buf, "    optim./code time    %0.1f%%\n",
+                (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1)
+                    * 100.0);
+        g_string_append_printf(buf, "    liveness/code time  %0.1f%%\n",
+                (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1)
+                    * 100.0);
+
+        g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
+                jpi->restore_count);
+        g_string_append_printf(buf, "  avg cycles        %0.1f\n",
+                jpi->restore_count ?
+                    (double)jpi->restore_time / jpi->restore_count : 0);
+
+        if (s) {
+            g_string_append_printf(buf, "cpu exec time  %" PRId64 " (%0.3fs)\n",
+                s->cpu_exec_time,
+                s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND);
+        }
+    }
+    g_free(jpi);
+}
+
 void init_tb_stats_htable(void)
 {
     if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 3973591508..749ad182f2 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,10 +70,17 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
     int ret;
+    uint64_t ti;
+
     assert(tcg_enabled());
+    ti = profile_getclock();
+
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
+
+    qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);
+
     return ret;
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index dadf49954f..8733aec11b 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -204,6 +204,12 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                uintptr_t host_pc)
 {
     uint64_t data[TARGET_INSN_START_WORDS];
+    uint64_t ti = 0;
+
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        ti = get_clock();
+    }
+
     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
     if (insns_left < 0) {
@@ -220,6 +226,15 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     }
 
     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
+
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        TBStatistics *ts = tb->tb_stats;
+        uint64_t elapsed = get_clock() - ti;
+        qemu_mutex_lock(&ts->jit_stats_lock);
+        ts->tb_restore_time += elapsed;
+        ts->tb_restore_count++;
+        qemu_mutex_unlock(&ts->jit_stats_lock);
+    }
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -267,7 +282,7 @@ void page_init(void)
  */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
                            target_ulong pc, void *host_pc,
-                           int *max_insns, int64_t *ti)
+                           int *max_insns, TCGTime *tcg_time)
 {
     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
     if (unlikely(ret != 0)) {
@@ -282,7 +297,11 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
     tcg_ctx->cpu = NULL;
     *max_insns = tb->icount;
 
-    return tcg_gen_code(tcg_ctx, tb, pc);
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        tcg_time->ir_done = get_clock();
+    }
+
+    return tcg_gen_code(tcg_ctx, tb, pc, tcg_time);
 }
 
 static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
@@ -296,6 +315,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
     new_stats->cs_base = cs_base;
     new_stats->flags = flags;
     new_stats->stats_enabled = get_default_tbstats_flag();
+    new_stats->tbs = g_ptr_array_sized_new(4);
+    qemu_mutex_init(&new_stats->jit_stats_lock);
 
     /*
      * All initialisation must be complete before we insert into qht
@@ -309,6 +330,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
          * If there is already a TBStatistic for this TB from a previous flush
          * then just make the new TB point to the older TBStatistic
          */
+        g_ptr_array_free(new_stats->tbs, true);
         g_free(new_stats);
         return existing_stats;
     } else {
@@ -327,8 +349,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb_page_addr_t phys_pc;
     tcg_insn_unit *gen_code_buf;
     int gen_code_size, search_size, max_insns;
-    int64_t ti;
     void *host_pc;
+    TCGTime tcg_time;
 
     assert_memory_lock();
     qemu_thread_jit_write();
@@ -389,11 +411,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     if (tb_stats_collection_enabled() &&
         qemu_log_in_addr_range(tb->pc)) {
         tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+        if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+            tcg_time.start = get_clock();
+        }
     } else {
         tb->tb_stats = NULL;
     }
 
-    gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+    gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns,
+                                    &tcg_time);
     if (unlikely(gen_code_size < 0)) {
         switch (gen_code_size) {
         case -1:
@@ -445,6 +471,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      */
     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
 
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        tcg_time.code_done = get_clock();
+    }
+
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(pc)) {
         FILE *logfile = qemu_log_trylock();
@@ -547,6 +577,38 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         return tb;
     }
 
+    /*
+     * Collect JIT stats when enabled. We batch them all up here to
+     * avoid spamming the cache with atomic accesses
+     */
+    if (tb_stats_enabled(tb, (TB_JIT_STATS | TB_JIT_TIME))) {
+        TBStatistics *ts = tb->tb_stats;
+        qemu_mutex_lock(&ts->jit_stats_lock);
+
+        if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+            ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+            ts->code.in_len += tb->size;
+            ts->code.out_len += tb->tc.size;
+            ts->code.search_out_len += search_size;
+
+            ts->translations.total++;
+            if (tb_page_addr1(tb) != -1) {
+                ts->translations.spanning++;
+            }
+
+            g_ptr_array_add(ts->tbs, tb);
+        }
+
+        if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+            ts->gen_times.ir += tcg_time.ir_done - tcg_time.start;
+            ts->gen_times.ir_opt += tcg_time.opt_done - tcg_time.ir_done;
+            ts->gen_times.la += tcg_time.la_done - tcg_time.opt_done;
+            ts->gen_times.code += tcg_time.code_done - tcg_time.la_done;
+        }
+
+        qemu_mutex_unlock(&ts->jit_stats_lock);
+    }
+
     /*
      * Insert TB into the corresponding region tree before publishing it
      * through QHT. Otherwise rewinding happened in the TB might fail to
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 80ffbfb455..a60a92091b 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -19,7 +19,7 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
-static void gen_tb_exec_count(TranslationBlock *tb)
+static inline void gen_tb_exec_count(TranslationBlock *tb)
 {
     if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
         TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
@@ -147,6 +147,11 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
     tb->size = db->pc_next - db->pc_first;
     tb->icount = db->num_insns;
 
+    /* Save number of guest instructions for TB_JIT_STATS */
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        tb->tb_stats->code.num_guest_inst += db->num_insns;
+    }
+
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(db->pc_first)) {
         FILE *logfile = qemu_log_trylock();
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index fa71eb6f0c..f29eff7576 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -13,6 +13,8 @@
 
 #define TB_NOTHING    (1 << 0)
 #define TB_EXEC_STATS (1 << 1)
+#define TB_JIT_STATS  (1 << 2)
+#define TB_JIT_TIME   (1 << 3)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index eb1fa92a4e..d93d42e085 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -34,6 +34,9 @@
 #define tb_stats_enabled(tb, JIT_STATS) \
     (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
+#define stat_per_translation(stat, name) \
+    (stat->translations.total ? stat->name / stat->translations.total : 0)
+
 typedef struct TBStatistics TBStatistics;
 
 /*
@@ -60,10 +63,53 @@ struct TBStatistics {
         unsigned long atomic;
     } executions;
 
+    /* JIT Stats - protected by lock */
+    QemuMutex jit_stats_lock;
+
+    /* Sum of all operations for all translations */
+    struct {
+        unsigned num_guest_inst;
+        unsigned num_tcg_ops;
+        unsigned num_tcg_ops_opt;
+        unsigned spills;
+
+        /* CONFIG_PROFILE */
+        unsigned temps;
+        unsigned deleted_ops;
+        unsigned in_len;
+        unsigned out_len;
+        unsigned search_out_len;
+    } code;
+
+    struct {
+        unsigned long total;
+        unsigned long uncached;
+        unsigned long spanning;
+    } translations;
+
+    /*
+     * All persistent (cached) TranslationBlocks using
+     * this TBStats structure. Has to be reset on a tb_flush.
+     */
+    GPtrArray *tbs;
+
+    /* Recover state from TB */
+    uint64_t tb_restore_time;
+    uint64_t tb_restore_count;
+
+    struct {
+        uint64_t ir;
+        uint64_t ir_opt;
+        uint64_t la;
+        uint64_t code;
+    } gen_times;
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
 
 void init_tb_stats_htable(void);
 
+void dump_jit_profile_info(TCGProfile *s, GString *buf);
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf);
+
 #endif
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9a91cb1248..ad0da18a5f 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -989,4 +989,10 @@ static inline int64_t cpu_get_host_ticks(void)
 }
 #endif
 
+static inline int64_t profile_getclock(void)
+{
+    return get_clock();
+}
+
+extern uint64_t dev_time;
 #endif
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 083cbd6580..85bf6fb78e 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -522,6 +522,26 @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
     return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
 }
 
+/* Timestamps during translation  */
+typedef struct TCGTime {
+    uint64_t start;
+    uint64_t ir_done;
+    uint64_t opt_done;
+    uint64_t la_done;
+    uint64_t code_done;
+} TCGTime;
+
+/*
+ * The TCGProfile structure holds data for the lifetime of the translator.
+ */
+typedef struct TCGProfile {
+    /* exec time of this vcpu */
+    int64_t cpu_exec_time;
+
+    /* Lifetime count of TCGOps per TCGContext when tb_stats enabled */
+    size_t table_op_count[NB_OPS];
+} TCGProfile;
+
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
     TCGPool *pool_first, *pool_current, *pool_first_large;
@@ -548,6 +568,8 @@ struct TCGContext {
     tcg_insn_unit *code_buf;      /* pointer for start of tb */
     tcg_insn_unit *code_ptr;      /* pointer for running end of tb */
 
+    TCGProfile prof;
+
 #ifdef CONFIG_DEBUG_TCG
     int goto_tb_issue_mask;
     const TCGOpcode *vecop_list;
@@ -608,6 +630,7 @@ struct TCGContext {
 
     /* Exit to translator on overflow. */
     sigjmp_buf jmp_trans;
+    TranslationBlock *current_tb;
 };
 
 static inline bool temp_readonly(TCGTemp *ts)
@@ -827,7 +850,8 @@ void tcg_register_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start,
+                 TCGTime *tcg_time);
 
 void tb_target_set_jmp_target(const TranslationBlock *, int,
                               uintptr_t, uintptr_t);
@@ -885,7 +909,7 @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
     return temp_tcgv_ptr(t);
 }
 
-int64_t tcg_cpu_exec_time(void);
+uint64_t tcg_cpu_exec_time(void);
 void tcg_dump_info(GString *buf);
 void tcg_dump_op_count(GString *buf);
 
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index bd50062ed0..37390799f1 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -728,9 +728,18 @@ static bool main_loop_should_exit(int *status)
 int qemu_main_loop(void)
 {
     int status = EXIT_SUCCESS;
+#ifdef CONFIG_TCG
+    uint64_t ti;
+#endif
 
     while (!main_loop_should_exit(&status)) {
+#ifdef CONFIG_TCG
+        ti = profile_getclock();
+#endif
         main_loop_wait(false);
+#ifdef CONFIG_TCG
+        dev_time += profile_getclock() - ti;
+#endif
     }
 
     return status;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 47befdfcae..dc6b3226e2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -41,6 +41,7 @@
 #define NO_CPU_IO_DEFS
 
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg-op.h"
 
 #if UINTPTR_MAX == UINT32_MAX
@@ -2983,6 +2984,10 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
     QTAILQ_REMOVE(&s->ops, op, link);
     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
     s->nb_ops--;
+    /* ? won't this end up op_opt - op = del_op_count ? */
+    if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
+        s->gen_tb->tb_stats->code.deleted_ops++;
+    }
 }
 
 void tcg_remove_ops_after(TCGOp *op)
@@ -4148,6 +4153,10 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
     }
 
     /* We must spill something.  */
+    if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
+        s->gen_tb->tb_stats->code.spills++;
+    }
+
     for (j = f; j < 2; j++) {
         TCGRegSet set = reg_ct[j];
 
@@ -5852,22 +5861,57 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
 }
 
-void tcg_dump_op_count(GString *buf)
+/* avoid copy/paste errors */
+#define PROF_ADD(to, from, field)                       \
+    do {                                                \
+        (to)->field += qatomic_read(&((from)->field));  \
+    } while (0)
+
+static void collect_tcg_profiler(TCGProfile *prof)
 {
-    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+    unsigned int i;
+
+    for (i = 0; i < n_ctxs; i++) {
+        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+        const TCGProfile *orig = &s->prof;
+
+        PROF_ADD(prof, orig, cpu_exec_time);
+
+        for (i = 0; i < NB_OPS; i++) {
+            PROF_ADD(prof, orig, table_op_count[i]);
+        }
+    }
 }
 
-int64_t tcg_cpu_exec_time(void)
+uint64_t tcg_cpu_exec_time(void)
 {
-    error_report("%s: TCG profiler not compiled", __func__);
-    exit(EXIT_FAILURE);
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+    unsigned int i;
+    uint64_t ret = 0;
+
+    for (i = 0; i < n_ctxs; i++) {
+        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+        const TCGProfile *prof = &s->prof;
+
+        ret += qatomic_read(&prof->cpu_exec_time);
+    }
+    return ret;
 }
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start,
+                 TCGTime *tcg_time)
 {
     int i, num_insns;
     TCGOp *op;
 
+    s->current_tb = tb;
+    /* save pre-optimisation op count */
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        tb->tb_stats->code.num_tcg_ops += s->nb_ops;
+        tb->tb_stats->code.temps += s->nb_temps;
+    }
+
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
                  && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
@@ -5879,6 +5923,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
         }
     }
 
+
 #ifdef CONFIG_DEBUG_TCG
     /* Ensure all labels referenced have been emitted.  */
     {
@@ -5898,6 +5943,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
 
     tcg_optimize(s);
 
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        tcg_time->opt_done = get_clock();
+    }
+
     reachable_code_pass(s);
     liveness_pass_0(s);
     liveness_pass_1(s);
@@ -5921,6 +5970,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
         }
     }
 
+    if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+        tcg_time->la_done = get_clock();
+    }
+
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
                  && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
@@ -5955,6 +6008,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     s->pool_labels = NULL;
 #endif
 
+    if (tb_stats_collection_enabled()) {
+        QTAILQ_FOREACH(op, &s->ops, link) {
+            TCGOpcode opc = op->opc;
+            s->prof.table_op_count[opc]++;
+        }
+    }
+
     num_insns = -1;
     QTAILQ_FOREACH(op, &s->ops, link) {
         TCGOpcode opc = op->opc;
@@ -6053,9 +6113,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     return tcg_current_code_size(s);
 }
 
+void tcg_dump_op_count(GString *buf)
+{
+    TCGProfile prof = {};
+    int i;
+
+    collect_tcg_profiler(&prof);
+    for (i = 0; i < NB_OPS; i++) {
+        g_string_append_printf(buf, "%s %" PRId64 "\n",
+                tcg_op_defs[i].name, prof.table_op_count[i]);
+    }
+}
+
 void tcg_dump_info(GString *buf)
 {
-    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+    TCGProfile prof = {};
+    collect_tcg_profiler(&prof);
+    dump_jit_profile_info(&prof, buf);
 }
 
 #ifdef ELF_HOST_MACHINE
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index 73a670e8fa..749aafe4da 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -46,6 +46,9 @@ static int query_error_class(const char *cmd)
         { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
         { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
         { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
+#ifndef CONFIG_TCG
+        { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
+#endif
         /* Only valid with a USB bus added */
         { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
         /* Only valid with accel=tcg */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection:
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (3 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-06-01  1:18   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 06/10] monitor: adding tb_stats hmp command Fei Wu
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Fei Wu, Paolo Bonzini

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

 -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=<number>]]

"dump_limit" is used to limit the number of dumped TBStats in
linux-user mode.

[all+jit+exec+time] control the profilling level used
by the TBStats. Can be used as follow:

-d tb_stats
-d tb_stats,level=jit+time
-d tb_stats,dump_limit=15
...

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-7-vandersonmr2@gmail.com>
[AJB: fix authorship, reword title]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/tb-stats.c          |  5 +++++
 include/exec/gen-icount.h     |  1 +
 include/exec/tb-stats-flags.h |  3 +++
 include/exec/tb-stats.h       |  2 ++
 include/qemu/log.h            |  1 +
 stubs/meson.build             |  1 +
 stubs/tb-stats.c              | 27 +++++++++++++++++++++++++
 util/log.c                    | 37 +++++++++++++++++++++++++++++++++++
 8 files changed, 77 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 78a3104c7f..68ac7d3f73 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -208,3 +208,8 @@ uint32_t get_default_tbstats_flag(void)
 {
     return default_tbstats_flag;
 }
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+    default_tbstats_flag = flags;
+}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 20e7835ff0..4372817951 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -3,6 +3,7 @@
 
 #include "exec/exec-all.h"
 #include "exec/tb-stats.h"
+#include "tb-stats-flags.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index f29eff7576..04adaee8d9 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -15,6 +15,7 @@
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
 #define TB_JIT_TIME   (1 << 3)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
@@ -24,5 +25,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
 uint32_t get_default_tbstats_flag(void);
+void set_default_tbstats_flag(uint32_t);
+void set_tbstats_flags(uint32_t flags);
 
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index d93d42e085..72585c448a 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,8 @@
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#include "exec/tb-stats-flags.h"
+
 #define tb_stats_enabled(tb, JIT_STATS) \
     (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE         (1 << 19)
 #define LOG_PER_THREAD     (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index a56645e2f7..e926649d40 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -65,3 +65,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 0000000000..d212c2a1fa
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,27 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée <alex.bennee@linaro.org>
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+    return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+    return false;
+}
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+    return;
+}
diff --git a/util/log.c b/util/log.c
index 53b4f6c58e..7ae471d97c 100644
--- a/util/log.c
+++ b/util/log.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/qemu-print.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -27,6 +28,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
 #endif
@@ -47,6 +49,7 @@ static __thread Notifier qemu_log_thread_cleanup_notifier;
 int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
+int32_t max_num_hot_tbs_to_dump;
 
 /* Returns true if qemu_log() will really write somewhere. */
 bool qemu_log_enabled(void)
@@ -495,6 +498,10 @@ const QEMULogItem qemu_log_items[] = {
       "log every user-mode syscall, its input, and its result" },
     { LOG_PER_THREAD, "tid",
       "open a separate log file per thread; filename must contain '%d'" },
+    { CPU_LOG_TB_STATS,
+      "tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=<number>]]",
+      "enable collection of TBs statistics"
+      "(and dump until given a limit if in user mode).\n" },
     { 0, NULL, NULL },
 };
 
@@ -516,6 +523,36 @@ int qemu_str_to_log_mask(const char *str)
             trace_enable_events((*tmp) + 6);
             mask |= LOG_TRACE;
 #endif
+        } else if (g_str_has_prefix(*tmp, "tb_stats")) {
+            mask |= CPU_LOG_TB_STATS;
+            set_default_tbstats_flag(TB_ALL_STATS);
+            enable_collect_tb_stats();
+        } else if (tb_stats_collection_enabled() &&
+                   g_str_has_prefix(*tmp, "dump_limit=")) {
+            max_num_hot_tbs_to_dump = atoi((*tmp) + 11);
+        } else if (tb_stats_collection_enabled() &&
+                   g_str_has_prefix(*tmp, "level=")) {
+            uint32_t flags = 0;
+            char **level_parts = g_strsplit(*tmp + 6, "+", 0);
+            char **level_tmp;
+            for (level_tmp = level_parts; level_tmp && *level_tmp;
+                    level_tmp++) {
+                if (g_str_equal(*level_tmp, "jit")) {
+                    flags |= TB_JIT_STATS;
+                } else if (g_str_equal(*level_tmp, "exec")) {
+                    flags |= TB_EXEC_STATS;
+                } else if (g_str_equal(*level_tmp, "time")) {
+                    flags |= TB_JIT_TIME;
+                } else if (g_str_equal(*level_tmp, "all")) {
+                    flags |= TB_ALL_STATS;
+                } else {
+                    /* FIXME: set errp */
+                    fprintf(stderr, "no option level=%s, valid options are:"
+                            "all, jit, exec or/and time\n", *level_tmp);
+                    exit(1);
+                }
+                set_default_tbstats_flag(flags);
+            }
         } else {
             for (item = qemu_log_items; item->mask != 0; item++) {
                 if (g_str_equal(*tmp, item->name)) {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 06/10] monitor: adding tb_stats hmp command
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (4 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection: Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-06-01  1:23   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush Fei Wu
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Fei Wu,
	Paolo Bonzini, Dr. David Alan Gilbert

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

Adding tb_stats [start|pause|stop|filter] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the level of collection:
all, jit, or exec.

tb_stats filter allow to only collect statistics for the TB
in the last_search list.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-8-vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-9-vandersonmr2@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/monitor.c           |  45 +++++++++++++
 accel/tcg/tb-stats.c          | 121 +++++++++++++++++++++++++++++++++-
 hmp-commands.hx               |  16 +++++
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h       |  10 +++
 include/monitor/hmp.h         |   1 +
 6 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2bc87f2642..2e00f10267 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,7 +11,9 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -87,6 +89,49 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 }
 
 #ifdef CONFIG_TCG
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+
+    char *cmd = (char *) qdict_get_try_str(qdict, "command");
+    enum TbstatsCmd icmd = -1;
+
+    if (strcmp(cmd, "start") == 0) {
+        icmd = START;
+    } else if (strcmp(cmd, "pause") == 0) {
+        icmd = PAUSE;
+    } else if (strcmp(cmd, "stop") == 0) {
+        icmd = STOP;
+    } else if (strcmp(cmd, "filter") == 0) {
+        icmd = FILTER;
+    } else {
+        error_report("invalid command!");
+        return;
+    }
+
+    char *slevel = (char *) qdict_get_try_str(qdict, "level");
+    uint32_t level = TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME;
+    if (slevel) {
+        if (strcmp(slevel, "jit") == 0) {
+            level = TB_JIT_STATS;
+        } else if (strcmp(slevel, "exec") == 0) {
+            level = TB_EXEC_STATS;
+        } else if (strcmp(slevel, "time") == 0) {
+            level = TB_JIT_TIME;
+        }
+    }
+
+    struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+    tbscommand->cmd = icmd;
+    tbscommand->level = level;
+    async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+                          RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
     g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 68ac7d3f73..805e1fc74d 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -16,18 +16,20 @@
 #include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "tb-context.h"
 
 /* TBStatistic collection controls */
 enum TBStatsStatus {
     TB_STATS_DISABLED = 0,
     TB_STATS_RUNNING,
-    TB_STATS_PAUSED,
-    TB_STATS_STOPPED
+    TB_STATS_PAUSED
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+/* only accessed in safe work */
+static GList *last_search;
 
 uint64_t dev_time;
 
@@ -170,6 +172,101 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
     g_free(jpi);
 }
 
+static void free_tbstats(void *p, uint32_t hash, void *userp)
+{
+    g_free(p);
+}
+
+static void clean_tbstats(void)
+{
+    /* remove all tb_stats */
+    qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
+    qht_destroy(&tb_ctx.tb_stats);
+}
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+    struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+    int cmd = cmdinfo->cmd;
+    uint32_t level = cmdinfo->level;
+
+    switch (cmd) {
+    case START:
+        if (tb_stats_collection_enabled()) {
+            qemu_printf("TB information already being recorded");
+            return;
+        } else if (tb_stats_collection_paused()) {
+            set_tbstats_flags(level);
+        } else {
+            qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+                     QHT_MODE_AUTO_RESIZE);
+        }
+
+        set_default_tbstats_flag(level);
+        enable_collect_tb_stats();
+        tb_flush(cpu);
+        break;
+    case PAUSE:
+        if (!tb_stats_collection_enabled()) {
+            qemu_printf("TB information not being recorded");
+            return;
+        }
+
+        /*
+         * Continue to create TBStatistic structures but stop collecting
+         * statistics
+         */
+        pause_collect_tb_stats();
+        set_default_tbstats_flag(TB_NOTHING);
+        set_tbstats_flags(TB_PAUSED);
+        tb_flush(cpu);
+        break;
+    case STOP:
+        if (tb_stats_collection_disabled()) {
+            qemu_printf("TB information not being recorded");
+            return;
+        }
+
+        /* Dissalloc all TBStatistics structures and stop creating new ones */
+        disable_collect_tb_stats();
+        clean_tbstats();
+        tb_flush(cpu);
+        break;
+    case FILTER:
+        if (!tb_stats_collection_enabled()) {
+            qemu_printf("TB information not being recorded");
+            return;
+        }
+        if (!last_search) {
+            qemu_printf(
+                    "no search on record! execute info tbs before filtering!");
+            return;
+        }
+
+        set_default_tbstats_flag(TB_NOTHING);
+
+        /*
+         * Set all tbstats as paused, then return only the ones from last_search
+         */
+        pause_collect_tb_stats();
+        set_tbstats_flags(TB_PAUSED);
+
+        for (GList *iter = last_search; iter; iter = g_list_next(iter)) {
+            TBStatistics *tbs = iter->data;
+            tbs->stats_enabled = level;
+        }
+
+        tb_flush(cpu);
+
+        break;
+    default: /* INVALID */
+        g_assert_not_reached();
+        break;
+    }
+
+    g_free(cmdinfo);
+}
+
 void init_tb_stats_htable(void)
 {
     if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
@@ -186,7 +283,7 @@ void enable_collect_tb_stats(void)
 
 void disable_collect_tb_stats(void)
 {
-    tcg_collect_tb_stats = TB_STATS_STOPPED;
+    tcg_collect_tb_stats = TB_STATS_DISABLED;
 }
 
 void pause_collect_tb_stats(void)
@@ -199,11 +296,29 @@ bool tb_stats_collection_enabled(void)
     return tcg_collect_tb_stats == TB_STATS_RUNNING;
 }
 
+bool tb_stats_collection_disabled(void)
+{
+    return tcg_collect_tb_stats == TB_STATS_DISABLED;
+}
+
 bool tb_stats_collection_paused(void)
 {
     return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
 
+static void reset_tbstats_flag(void *p, uint32_t hash, void *userp)
+{
+    uint32_t flag = *((int *)userp);
+    TBStatistics *tbs = p;
+    tbs->stats_enabled = flag;
+}
+
+void set_tbstats_flags(uint32_t flag)
+{
+    /* iterate over tbstats setting their flag as TB_NOTHING */
+    qht_iter(&tb_ctx.tb_stats, reset_tbstats_flag, &flag);
+}
+
 uint32_t get_default_tbstats_flag(void)
 {
     return default_tbstats_flag;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2cbd0f77a0..9a40215d34 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1670,6 +1670,22 @@ SRST
   Executes a qemu-io command on the given block device.
 ERST
 
+#if defined(CONFIG_TCG)
+    {
+        .name       = "tb_stats",
+        .args_type  = "command:s,level:s?",
+        .params     = "command [stats_level]",
+        .help       = "Control tb statistics collection:"
+                        "tb_stats (start|pause|stop|filter) [all|jit_stats|exec_stats]",
+        .cmd        = hmp_tbstats,
+    },
+#endif
+
+SRST
+``tb_stats`` *command* *stats_level*
+  Control recording tb statistics
+ERST
+
     {
         .name       = "qom-list",
         .args_type  = "path:s?",
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 04adaee8d9..a3897c99b1 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -16,12 +16,14 @@
 #define TB_JIT_STATS  (1 << 2)
 #define TB_JIT_TIME   (1 << 3)
 #define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
+#define TB_PAUSED     (1 << 4)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
 void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
+bool tb_stats_collection_disabled(void);
 bool tb_stats_collection_paused(void);
 
 uint32_t get_default_tbstats_flag(void);
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 72585c448a..4bb343870b 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -33,6 +33,9 @@
 
 #include "exec/tb-stats-flags.h"
 
+enum SortBy { SORT_BY_HOTNESS, SORT_BY_HG /* Host/Guest */, SORT_BY_SPILLS };
+enum TbstatsCmd { START, PAUSE, STOP, FILTER };
+
 #define tb_stats_enabled(tb, JIT_STATS) \
     (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
@@ -114,4 +117,11 @@ void init_tb_stats_htable(void);
 void dump_jit_profile_info(TCGProfile *s, GString *buf);
 void dump_jit_exec_time_info(uint64_t dev_time, GString *buf);
 
+struct TbstatsCommand {
+    enum TbstatsCmd cmd;
+    uint32_t level;
+};
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 13f9a2dedb..2e7f141754 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -181,5 +181,6 @@ void hmp_ioport_write(Monitor *mon, const QDict *qdict);
 void hmp_boot_set(Monitor *mon, const QDict *qdict);
 void hmp_info_mtree(Monitor *mon, const QDict *qdict);
 void hmp_info_cryptodev(Monitor *mon, const QDict *qdict);
+void hmp_tbstats(Monitor *mon, const QDict *qdict);
 
 #endif
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (5 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 06/10] monitor: adding tb_stats hmp command Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-06-01  1:30   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP) Fei Wu
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel; +Cc: Fei Wu, Paolo Bonzini

From: Alex Bennée <alex.bennee@linaro.org>

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/tb-maint.c    |  1 +
 accel/tcg/tb-stats.c    | 19 +++++++++++++++++++
 include/exec/tb-stats.h |  8 ++++++++
 3 files changed, 28 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0980fca358..11ff0ddd90 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -763,6 +763,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
     tb_remove_all();
 
+    tbstats_reset_tbs();
     tcg_region_reset_all();
     /* XXX: flush processor icache at this point if cache flush is expensive */
     qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 805e1fc74d..139f049ffc 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -267,6 +267,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
     g_free(cmdinfo);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist and we no loner know if the
+ * current mapping is still valid.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+    TBStatistics *tbs = p;
+    g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+    if (tb_ctx.tb_stats.map) {
+        qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+    }
+}
+
 void init_tb_stats_htable(void)
 {
     if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 4bb343870b..30b788f7b2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -124,4 +124,12 @@ struct TbstatsCommand {
 
 void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (6 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-06-01  2:40   ` Richard Henderson
  2023-05-30  8:35 ` [PATCH v14 09/10] tb-stats: dump hot TBs at the end of the execution Fei Wu
  2023-05-30  8:35 ` [PATCH v14 10/10] docs: add tb-stats how to Fei Wu
  9 siblings, 1 reply; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Fei Wu,
	Paolo Bonzini, Dr. David Alan Gilbert

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:1 | phys:0x34d54 virt:0x0000000000034d54 flags:0x0000f0
	| exec:4828932/0 guest inst cov:16.38%
	| trans:1 ints: g:3 op:82 op_opt:34 spills:3
	| h/g (host bytes / guest insts): 90.666664
	| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
	| targets: 0x0000000000034d5e (id:11), 0x0000000000034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x0000000000034d0d flags:0x0000f0
	| exec:4825842/0 guest inst cov:21.82%
	| trans:1 ints: g:4 op:80 op_opt:38 spills:2
	| h/g (host bytes / guest insts): 84.000000
	| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
	| targets: 0x0000000000034d19 (id:12), 0x0000000000034d54 (id:1)

TB id:2 | phys:0x34d0d virt:0x0000000000034d0d flags:0x0000f0
	| exec:6956495/0  guest inst cov:21.82%
	| trans:2 ints: g:2 op:40 op_opt:19 spills:1
	| h/g (host bytes / guest insts): 84.000000
	| time to gen at 2.4GHz => code:3130.83(ns) IR:722.50(ns)
	| targets: 0x0000000000034d19 (id:12), 0x0000000000034d54 (id:1)

----------------
IN:
0x00034d0d:  89 de                    movl     %ebx, %esi
0x00034d0f:  26 8b 0e                 movl     %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80           testb    $0x80, %es:8(%esi)
0x00034d17:  75 3b                    jne      0x34d54

------------------------------

TB id:1 | phys:0x34d54 virt:0x0000000000034d54 flags:0x0000f0
	| exec:5202686/0 guest inst cov:11.28%
	| trans:1 ints: g:3 op:82 op_opt:34 spills:3
	| h/g (host bytes / guest insts): 90.666664
	| time to gen at 2.4GHz => code:2793.75(ns) IR:614.58(ns)
	| targets: 0x0000000000034d5e (id:3), 0x0000000000034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x0000000000034d0d flags:0x0000f0
	| exec:5199468/0 guest inst cov:15.03%
	| trans:1 ints: g:4 op:80 op_opt:38 spills:2
	| h/g (host bytes / guest insts): 84.000000
	| time to gen at 2.4GHz => code:2958.75(ns) IR:719.58(ns)
	| targets: 0x0000000000034d19 (id:4), 0x0000000000034d54 (id:1)

------------------------------
2 TBs to reach 25% of guest inst exec coverage
Total of guest insts exec: 138346727

------------------------------

Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-10-vandersonmr2@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/monitor.c          |  54 ++++++
 accel/tcg/tb-stats.c         | 322 +++++++++++++++++++++++++++++++++++
 disas/disas.c                |  26 ++-
 hmp-commands-info.hx         |  16 ++
 include/exec/tb-stats.h      |  33 +++-
 include/monitor/hmp.h        |   2 +
 include/qemu/log-for-trace.h |   6 +-
 include/qemu/log.h           |   2 +
 util/log.c                   |  66 +++++--
 9 files changed, 508 insertions(+), 19 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2e00f10267..a1780c5920 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -18,6 +19,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "tb-context.h"
 #include "internal.h"
 
 
@@ -132,6 +134,58 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+    int number_int;
+    const char *sortedby_str = NULL;
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+    if (!tb_ctx.tb_stats.map) {
+        error_report("no TB information recorded");
+        return;
+    }
+
+    number_int = qdict_get_try_int(qdict, "number", 10);
+    sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+    int sortedby = SORT_BY_HOTNESS;
+    if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+        sortedby = SORT_BY_HOTNESS;
+    } else if (strcmp(sortedby_str, "hg") == 0) {
+        sortedby = SORT_BY_HG;
+    } else if (strcmp(sortedby_str, "spills") == 0) {
+        sortedby = SORT_BY_SPILLS;
+    } else {
+        error_report("valid sort options are: hotness hg spills");
+        return;
+    }
+
+    dump_tbs_info(number_int, sortedby, true);
+}
+
+void hmp_info_tb(Monitor *mon, const QDict *qdict)
+{
+    const int id = qdict_get_int(qdict, "id");
+    const char *flags = qdict_get_try_str(qdict, "flags");
+    int mask;
+
+    if (!tcg_enabled()) {
+        error_report("TB information is only available with accel=tcg");
+        return;
+    }
+
+    mask = flags ? qemu_str_to_log_mask(flags) : CPU_LOG_TB_IN_ASM;
+
+    if (!mask) {
+        error_report("Unable to parse log flags, see 'help log'");
+        return;
+    }
+
+    dump_tb_info(id, mask, true);
+}
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
     g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 139f049ffc..4b358cb421 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -11,14 +11,18 @@
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
+#include "qapi/error.h"
 
 #include "qemu/qemu-print.h"
 #include "qemu/timer.h"
+#include "qemu/log.h"
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
 #include "tb-context.h"
 
+#include "internal.h"
+
 /* TBStatistic collection controls */
 enum TBStatsStatus {
     TB_STATS_DISABLED = 0,
@@ -31,8 +35,23 @@ static uint32_t default_tbstats_flag;
 /* only accessed in safe work */
 static GList *last_search;
 
+static int id = 1; /* display_id increment counter */
 uint64_t dev_time;
 
+static TBStatistics *get_tbstats_by_id(int id)
+{
+    GList *iter;
+
+    for (iter = last_search; iter; iter = g_list_next(iter)) {
+        TBStatistics *tbs = iter->data;
+        if (tbs && tbs->display_id == id) {
+            return tbs;
+            break;
+        }
+    }
+    return NULL;
+}
+
 struct jit_profile_info {
     uint64_t translations;
     uint64_t aborted;
@@ -294,6 +313,309 @@ void init_tb_stats_htable(void)
     }
 }
 
+static void collect_tb_stats(void *p, uint32_t hash, void *userp)
+{
+    last_search = g_list_prepend(last_search, p);
+}
+
+static void count_invalid_tbs(gpointer data, gpointer user_data)
+{
+    TranslationBlock *tb = (TranslationBlock *) data;
+    unsigned *counter = (unsigned *) user_data;
+    if (tb->cflags & CF_INVALID) {
+        *counter = *counter + 1;
+    }
+}
+
+static int dump_tb_header(TBStatistics *tbs)
+{
+    unsigned g = stat_per_translation(tbs, code.num_guest_inst);
+    unsigned ops = stat_per_translation(tbs, code.num_tcg_ops);
+    unsigned ops_opt = stat_per_translation(tbs, code.num_tcg_ops_opt);
+    unsigned spills = stat_per_translation(tbs, code.spills);
+    unsigned h = stat_per_translation(tbs, code.out_len);
+    unsigned act = tbs->tbs->len;
+    unsigned invalid = 0;
+
+    float guest_host_prop = g ? ((float) h / g) : 0;
+
+    g_ptr_array_foreach(tbs->tbs, &count_invalid_tbs, &invalid);
+
+    qemu_log("TB id:%d | phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx
+             " flags:0x%08x %d inv/%d\n",
+             tbs->display_id, tbs->phys_pc, tbs->pc, tbs->flags,
+             invalid, act);
+
+    if (tbs_stats_enabled(tbs, TB_EXEC_STATS)) {
+        qemu_log("\t| exec:%lu/%lu guest inst cov:%.2f%%\n",
+                tbs->executions.normal,
+                tbs->executions.atomic, tbs->executions.coverage / 100.0f);
+    }
+
+    if (tbs_stats_enabled(tbs, TB_JIT_STATS)) {
+        qemu_log("\t| trans:%lu ints: g:%u op:%u op_opt:%u spills:%d"
+             "\n\t| h/g (host bytes / guest insts): %f\n",
+             tbs->translations.total, g, ops, ops_opt, spills, guest_host_prop);
+    }
+
+    if (tbs_stats_enabled(tbs, TB_JIT_TIME)) {
+        qemu_log("\t| time to gen at 2.4GHz => code:%0.2lf(ns) IR:%0.2lf(ns)\n",
+             tbs->gen_times.code / 2.4, tbs->gen_times.ir / 2.4);
+    }
+
+    qemu_log("\n");
+
+    return act - invalid;
+}
+
+static gint
+inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by)
+{
+    const TBStatistics *tbs1 = (TBStatistics *) p1;
+    const TBStatistics *tbs2 = (TBStatistics *) p2;
+    int sort_by = *((enum SortBy *) psort_by);
+    unsigned long c1 = 0;
+    unsigned long c2 = 0;
+
+    if (sort_by == SORT_BY_SPILLS) {
+        c1 = stat_per_translation(tbs1, code.spills);
+        c2 = stat_per_translation(tbs2, code.spills);
+    } else if (sort_by == SORT_BY_HOTNESS) {
+        c1 = stat_per_translation(tbs1, executions.normal);
+        c2 = stat_per_translation(tbs2, executions.normal);
+    } else if (sort_by == SORT_BY_HG) {
+        if (tbs1->code.num_guest_inst == 0) {
+            return -1;
+        }
+        if (tbs2->code.num_guest_inst == 0) {
+            return 1;
+        }
+
+        c1 = tbs1->code.out_len / tbs1->code.num_guest_inst;
+        c2 = tbs2->code.out_len / tbs2->code.num_guest_inst;
+    }
+    return c1 < c2 ? 1 : c1 == c2 ? 0 : -1;
+}
+
+static void dump_last_search_headers(int count)
+{
+    if (!last_search) {
+        qemu_log("No data collected yet\n");
+        return;
+    }
+
+    GList *l = last_search;
+    while (l != NULL && count--) {
+        TBStatistics *tbs = (TBStatistics *) l->data;
+        GList *next = l->next;
+        dump_tb_header(tbs);
+        l = next;
+    }
+}
+
+static uint64_t calculate_last_search_coverages(void)
+{
+    uint64_t total_exec_count = 0;
+    GList *i;
+
+    /* Compute total execution count for all tbs */
+    for (i = last_search; i; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        total_exec_count +=
+            (tbs->executions.atomic + tbs->executions.normal)
+                * tbs->code.num_guest_inst;
+    }
+
+    for (i = last_search; i; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        uint64_t tb_total_execs =
+            (tbs->executions.atomic + tbs->executions.normal)
+                * tbs->code.num_guest_inst;
+        tbs->executions.coverage =
+            (10000 * tb_total_execs) / (total_exec_count + 1);
+    }
+
+    return total_exec_count;
+}
+
+static void do_dump_tbs_info(int total, int sort_by)
+{
+    id = 1;
+    GList *i;
+    int count = total;
+
+    g_list_free(last_search);
+    last_search = NULL;
+
+    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
+
+    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
+                                        &sort_by);
+
+    if (!last_search) {
+        qemu_printf("No data collected yet!\n");
+        return;
+    }
+
+    calculate_last_search_coverages();
+
+    for (i = last_search; i && count--; i = i->next) {
+        TBStatistics *tbs = (TBStatistics *) i->data;
+        tbs->display_id = id++;
+    }
+
+    /* free the unused bits */
+    if (i) {
+        if (i->next) {
+            i->next->prev = NULL;
+        }
+        g_list_free(i->next);
+        i->next = NULL;
+    }
+
+    dump_last_search_headers(total);
+}
+
+struct tbs_dump_info {
+    int count;
+    int sort_by;
+};
+
+static void do_dump_tbs_info_safe(CPUState *cpu, run_on_cpu_data tbdi)
+{
+    struct tbs_dump_info *info = tbdi.host_ptr;
+    qemu_log_to_monitor(true);
+    do_dump_tbs_info(info->count, info->sort_by);
+    qemu_log_to_monitor(false);
+    g_free(info);
+}
+
+/*
+ * When we dump_tbs_info on a live system via the HMP we want to
+ * ensure the system is quiessent before we start outputting stuff.
+ * Otherwise we could pollute the output with other logging output.
+ */
+
+void dump_tbs_info(int count, int sort_by, bool use_monitor)
+{
+    if (use_monitor) {
+        struct tbs_dump_info *tbdi = g_new(struct tbs_dump_info, 1);
+        tbdi->count = count;
+        tbdi->sort_by = sort_by;
+        async_safe_run_on_cpu(first_cpu, do_dump_tbs_info_safe,
+                              RUN_ON_CPU_HOST_PTR(tbdi));
+    } else {
+        do_dump_tbs_info(count, sort_by);
+    }
+}
+
+/*
+ * We cannot always re-generate the code even if we know there are
+ * valid translations still in the cache. The reason being the guest
+ * may have un-mapped the page code. In real life this would be
+ * un-reachable as the jump cache is cleared and the following QHT
+ * lookup will do a get_page_addr_code() and fault.
+ *
+ * TODO: can we do this safely? We need to
+ *  a) somehow recover the mmu_idx for this translation
+ *  b) probe MMU_INST_FETCH to know it will succeed
+ */
+static GString *get_code_string(TBStatistics *tbs, int log_flags)
+{
+    int old_log_flags = qemu_loglevel;
+
+    CPUState *cpu = first_cpu;
+    uint32_t cflags = curr_cflags(cpu);
+    TranslationBlock *tb = NULL;
+
+    GString *code_s = g_string_new(NULL);
+    qemu_log_to_string(true, code_s);
+
+    Error *err = NULL;
+    if (!qemu_set_log(log_flags, &err)) {
+        g_string_append_printf(code_s, "%s", error_get_pretty(err));
+        error_free(err);
+    }
+
+    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+        mmap_lock();
+        tb = tb_gen_code(cpu, tbs->pc, tbs->cs_base, tbs->flags, cflags);
+        tb_phys_invalidate(tb, -1);
+        mmap_unlock();
+    } else {
+        /*
+         * The mmap_lock is dropped by tb_gen_code if it runs out of
+         * memory.
+         */
+        qemu_log("\ncould not gen code for this TB (no longer mapped?)\n");
+        assert_no_pages_locked();
+    }
+
+    qemu_set_log(old_log_flags, &err);
+    qemu_log_to_string(false, NULL);
+
+    return code_s;
+}
+
+static void do_tb_dump_with_statistics(TBStatistics *tbs, int log_flags)
+{
+    g_autoptr(GString) code_s = NULL;
+
+    qemu_log("\n------------------------------\n\n");
+
+    if (dump_tb_header(tbs) > 0) {
+        code_s = get_code_string(tbs, log_flags);
+    } else {
+        code_s = g_string_new("cannot re-translate non-active translation");
+    }
+    qemu_log("%s", code_s->str);
+    qemu_log("------------------------------\n");
+}
+
+struct tb_dump_info {
+    int id;
+    int log_flags;
+    bool use_monitor;
+};
+
+static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info)
+{
+    struct tb_dump_info *tbdi = (struct tb_dump_info *) info.host_ptr;
+
+    if (!last_search) {
+        qemu_log("no search on record\n");
+        return;
+    }
+
+    qemu_log_to_monitor(tbdi->use_monitor);
+
+    TBStatistics *tbs = get_tbstats_by_id(tbdi->id);
+    if (tbs) {
+        do_tb_dump_with_statistics(tbs, tbdi->log_flags);
+    } else {
+        qemu_log("no TB statitics found with id %d\n", tbdi->id);
+    }
+
+    qemu_log_to_monitor(false);
+
+    g_free(tbdi);
+}
+
+void dump_tb_info(int id, int log_mask, bool use_monitor)
+{
+    struct tb_dump_info *tbdi = g_new(struct tb_dump_info, 1);
+
+    tbdi->id = id;
+    tbdi->log_flags = log_mask;
+    tbdi->use_monitor = use_monitor;
+
+    async_safe_run_on_cpu(first_cpu, do_dump_tb_info_safe,
+                          RUN_ON_CPU_HOST_PTR(tbdi));
+
+    /* tbdi free'd by do_dump_tb_info_safe */
+}
+
+
 void enable_collect_tb_stats(void)
 {
     tcg_collect_tb_stats = TB_STATS_RUNNING;
diff --git a/disas/disas.c b/disas/disas.c
index 0d2d06c2ec..0f31733646 100644
--- a/disas/disas.c
+++ b/disas/disas.c
@@ -8,6 +8,8 @@
 #include "hw/core/cpu.h"
 #include "exec/memory.h"
 
+#include "qemu/log-for-trace.h"
+
 /* Filled in by elfload.c.  Simplistic, but will do for now. */
 struct syminfo *syminfos = NULL;
 
@@ -199,6 +201,24 @@ static void initialize_debug_host(CPUDebug *s)
 #endif
 }
 
+static int
+__attribute__((format(printf, 2, 3)))
+fprintf_log(FILE *a, const char *b, ...)
+{
+    va_list ap;
+    va_start(ap, b);
+
+    if (!to_string) {
+        vfprintf(a, b, ap);
+    } else {
+        qemu_vlog(b, ap);
+    }
+
+    va_end(ap);
+
+    return 1;
+}
+
 /* Disassemble this for me please... (debugging).  */
 void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
 {
@@ -207,7 +227,7 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
     CPUDebug s;
 
     disas_initialize_debug_target(&s, cpu);
-    s.info.fprintf_func = fprintf;
+    s.info.fprintf_func = fprintf_log;
     s.info.stream = out;
     s.info.buffer_vma = code;
     s.info.buffer_length = size;
@@ -221,9 +241,9 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
     }
 
     for (pc = code; size > 0; pc += count, size -= count) {
-        fprintf(out, "0x%08" PRIx64 ":  ", pc);
+        fprintf_log(out, "0x%08" PRIx64 ":  ", pc);
         count = s.info.print_insn(pc, &s.info);
-        fprintf(out, "\n");
+        fprintf_log(out, "\n");
         if (count < 0) {
             break;
         }
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 47d63d26db..01d9658aea 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -262,6 +262,22 @@ ERST
         .params     = "",
         .help       = "show dynamic compiler info",
     },
+    {
+        .name       = "tb-list",
+        .args_type  = "number:i?,sortedby:s?",
+        .params     = "[number sortedby]",
+        .help       = "show a [number] translated blocks sorted by [sortedby]"
+                      "sortedby opts: hotness hg spills",
+        .cmd        = hmp_info_tblist,
+    },
+    {
+        .name       = "tb",
+        .args_type  = "id:i,flags:s?",
+        .params     = "id [flag1,flag2,...]",
+        .help       = "show information about one translated block by id."
+                      "dump flags can be used to set dump code level: out_asm in_asm op",
+        .cmd        = hmp_info_tb,
+    },
 #endif
 
 SRST
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 30b788f7b2..f2a6b09da9 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -36,8 +36,11 @@
 enum SortBy { SORT_BY_HOTNESS, SORT_BY_HG /* Host/Guest */, SORT_BY_SPILLS };
 enum TbstatsCmd { START, PAUSE, STOP, FILTER };
 
+#define tbs_stats_enabled(tbs, JIT_STATS) \
+    (tbs && (tbs->stats_enabled & JIT_STATS))
+
 #define tb_stats_enabled(tb, JIT_STATS) \
-    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+    (tb && tb->tb_stats && tbs_stats_enabled(tb->tb_stats, JIT_STATS))
 
 #define stat_per_translation(stat, name) \
     (stat->translations.total ? stat->name / stat->translations.total : 0)
@@ -66,6 +69,8 @@ struct TBStatistics {
     struct {
         unsigned long normal;
         unsigned long atomic;
+        /* filled only when dumping x% cover set */
+        uint16_t coverage;
     } executions;
 
     /* JIT Stats - protected by lock */
@@ -88,7 +93,6 @@ struct TBStatistics {
 
     struct {
         unsigned long total;
-        unsigned long uncached;
         unsigned long spanning;
     } translations;
 
@@ -108,6 +112,9 @@ struct TBStatistics {
         uint64_t la;
         uint64_t code;
     } gen_times;
+
+    /* HMP information - used for referring to previous search */
+    int display_id;
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
@@ -132,4 +139,26 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
  */
 void tbstats_reset_tbs(void);
 
+/**
+ * dump_tbs_info: report the hottest blocks
+ *
+ * @count: the limit of hotblocks
+ * @sort_by: property in which the dump will be sorted
+ * @use_monitor: redirect output to monitor
+ *
+ * Report the hottest blocks to either the log or monitor
+ */
+void dump_tbs_info(int count, int sort_by, bool use_monitor);
+
+/**
+ * dump_tb_info: dump information about one TB
+ *
+ * @id: the display id of the block (from previous search)
+ * @mask: the temporary logging mask
+ * @Use_monitor: redirect output to monitor
+ *
+ * Re-run a translation of a block at addr for the purposes of debug output
+ */
+void dump_tb_info(int id, int log_mask, bool use_monitor);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 2e7f141754..acdd6f1561 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -182,5 +182,7 @@ void hmp_boot_set(Monitor *mon, const QDict *qdict);
 void hmp_info_mtree(Monitor *mon, const QDict *qdict);
 void hmp_info_cryptodev(Monitor *mon, const QDict *qdict);
 void hmp_tbstats(Monitor *mon, const QDict *qdict);
+void hmp_info_tblist(Monitor *mon, const QDict *qdict);
+void hmp_info_tb(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h
index d47c9cd446..5d0afc56c7 100644
--- a/include/qemu/log-for-trace.h
+++ b/include/qemu/log-for-trace.h
@@ -20,6 +20,9 @@
 
 /* Private global variable, don't use */
 extern int qemu_loglevel;
+extern bool to_string;
+
+extern int32_t max_num_hot_tbs_to_dump;
 
 #define LOG_TRACE          (1 << 15)
 
@@ -30,6 +33,7 @@ static inline bool qemu_loglevel_mask(int mask)
 }
 
 /* main logging function */
-void G_GNUC_PRINTF(1, 2) qemu_log(const char *fmt, ...);
+int G_GNUC_PRINTF(1, 2) qemu_log(const char *fmt, ...);
+int G_GNUC_PRINTF(1, 0) qemu_vlog(const char *fmt, va_list va);
 
 #endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 6f3b8091cd..26b33151f3 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -85,6 +85,8 @@ extern const QEMULogItem qemu_log_items[];
 bool qemu_set_log(int log_flags, Error **errp);
 bool qemu_set_log_filename(const char *filename, Error **errp);
 bool qemu_set_log_filename_flags(const char *name, int flags, Error **errp);
+void qemu_log_to_monitor(bool enable);
+void qemu_log_to_string(bool enable, GString *s);
 void qemu_set_dfilter_ranges(const char *ranges, Error **errp);
 bool qemu_log_in_addr_range(uint64_t addr);
 int qemu_str_to_log_mask(const char *str);
diff --git a/util/log.c b/util/log.c
index 7ae471d97c..6477eb5a5f 100644
--- a/util/log.c
+++ b/util/log.c
@@ -50,6 +50,8 @@ int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
 int32_t max_num_hot_tbs_to_dump;
+static bool to_monitor;
+bool to_string;
 
 /* Returns true if qemu_log() will really write somewhere. */
 bool qemu_log_enabled(void)
@@ -146,19 +148,6 @@ void qemu_log_unlock(FILE *logfile)
     }
 }
 
-void qemu_log(const char *fmt, ...)
-{
-    FILE *f = qemu_log_trylock();
-    if (f) {
-        va_list ap;
-
-        va_start(ap, fmt);
-        vfprintf(f, fmt, ap);
-        va_end(ap);
-        qemu_log_unlock(f);
-    }
-}
-
 static void __attribute__((__constructor__)) startup(void)
 {
     qemu_mutex_init(&global_mutex);
@@ -206,6 +195,55 @@ valid_filename_template(const char *filename, bool per_thread, Error **errp)
     return filename ? vft_strdup : vft_stderr;
 }
 
+GString *string;
+
+int qemu_vlog(const char *fmt, va_list va)
+{
+    int ret = 0;
+
+    if (to_string && string) {
+        g_string_append_vprintf(string, fmt, va);
+    } else if (to_monitor) {
+        ret = qemu_vprintf(fmt, va);
+    } else {
+        FILE *f = qemu_log_trylock();
+        if (f) {
+            ret = vfprintf(f, fmt, va);
+        }
+        qemu_log_unlock(f);
+    }
+
+    /* Don't pass back error results.  */
+    if (ret < 0) {
+        ret = 0;
+    }
+    return ret;
+}
+
+/* Return the number of characters emitted.  */
+int qemu_log(const char *fmt, ...)
+{
+    int ret = 0;
+    va_list ap;
+
+    va_start(ap, fmt);
+    ret = qemu_vlog(fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+
+void qemu_log_to_monitor(bool enable)
+{
+    to_monitor = enable;
+}
+
+void qemu_log_to_string(bool enable, GString *s)
+{
+    to_string = enable;
+    string = s;
+}
+
 /* enable or disable low levels log */
 static bool qemu_set_log_internal(const char *filename, bool changed_name,
                                   int log_flags, Error **errp)
@@ -523,6 +561,7 @@ int qemu_str_to_log_mask(const char *str)
             trace_enable_events((*tmp) + 6);
             mask |= LOG_TRACE;
 #endif
+#ifdef CONFIG_TCG
         } else if (g_str_has_prefix(*tmp, "tb_stats")) {
             mask |= CPU_LOG_TB_STATS;
             set_default_tbstats_flag(TB_ALL_STATS);
@@ -553,6 +592,7 @@ int qemu_str_to_log_mask(const char *str)
                 }
                 set_default_tbstats_flag(flags);
             }
+#endif
         } else {
             for (item = qemu_log_items; item->mask != 0; item++) {
                 if (g_str_equal(*tmp, item->name)) {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 09/10] tb-stats: dump hot TBs at the end of the execution
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (7 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP) Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  2023-05-30  8:35 ` [PATCH v14 10/10] docs: add tb-stats how to Fei Wu
  9 siblings, 0 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Fei Wu, Paolo Bonzini, Laurent Vivier

From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

Dump the hottest TBs if -d tb_stats,dump_limit=N is used.

Example of output for the 3 hottest TBs:

TB id:1 | phys:0x34d54 virt:0x0000000000034d54 flags:0x0000f0
        | exec:4828932/0 guest inst cov:16.38%
        | trans:1 ints: g:3 op:82 op_opt:34 spills:3
        | h/g (host bytes / guest insts): 90.666664
        | time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
        | targets: 0x0000000000034d5e (id:11), 0x0000000000034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x0000000000034d0d flags:0x0000f0
        | exec:4825842/0 guest inst cov:21.82%
        | trans:1 ints: g:4 op:80 op_opt:38 spills:2
        | h/g (host bytes / guest insts): 84.000000
        | time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
        | targets: 0x0000000000034d19 (id:12), 0x0000000000034d54 (id:1)

TB id:3 | phys:0xec1c1 virt:0x00000000000ec1c1 flags:0x0000b0
        | exec:872032/0 guest inst cov:1.97%
        | trans:1 ints: g:2 op:56 op_opt:26 spills:1
        | h/g (host bytes / guest insts): 68.000000
        | time to gen at 2.4GHz => code:1692.08(ns) IR:473.75(ns)
        | targets: 0x00000000000ec1c5 (id:4), 0x00000000000ec1cb (id:13)

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-12-vandersonmr2@gmail.com>
[AJB: fix authorship, ad softmmu support]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/tb-stats.c          | 21 +++++++++++++++++++++
 include/exec/tb-stats-dump.h  | 21 +++++++++++++++++++++
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c             |  2 ++
 softmmu/runstate.c            |  2 ++
 stubs/tb-stats.c              |  5 +++++
 util/log.c                    |  4 ++--
 7 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 4b358cb421..d77891492a 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -19,6 +19,7 @@
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -32,6 +33,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+static int max_dump_tbs;
 /* only accessed in safe work */
 static GList *last_search;
 
@@ -616,6 +618,20 @@ void dump_tb_info(int id, int log_mask, bool use_monitor)
 }
 
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+    if (!tb_stats_collection_enabled()) {
+        return;
+    }
+
+    dump_tbs_info(max_dump_tbs, SORT_BY_HOTNESS, false);
+}
+
+/* TBStatistic collection controls */
+
 void enable_collect_tb_stats(void)
 {
     tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -669,3 +685,8 @@ void set_default_tbstats_flag(uint32_t flags)
 {
     default_tbstats_flag = flags;
 }
+
+void set_tbstats_max_tbs(int max)
+{
+    max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 0000000000..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index a3897c99b1..484a4c9c68 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -26,6 +26,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_disabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_tbstats_max_tbs(int max);
 uint32_t get_default_tbstats_flag(void);
 void set_default_tbstats_flag(uint32_t);
 void set_tbstats_flags(uint32_t flags);
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include <sys/gmon.h>
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
         gdb_exit(code);
         qemu_plugin_user_exit();
         perf_exit();
+        tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 37390799f1..b5ceb55ffd 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "migration/misc.h"
@@ -827,6 +828,7 @@ void qemu_cleanup(void)
     vm_shutdown();
     replay_finish();
 
+    tb_stats_dump();
     job_cancel_sync_all();
     bdrv_close_all();
 
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
index d212c2a1fa..a3e1406b88 100644
--- a/stubs/tb-stats.c
+++ b/stubs/tb-stats.c
@@ -21,6 +21,11 @@ bool tb_stats_collection_enabled(void)
     return false;
 }
 
+void set_tbstats_max_tbs(int max)
+{
+    return;
+}
+
 void set_default_tbstats_flag(uint32_t flags)
 {
     return;
diff --git a/util/log.c b/util/log.c
index 6477eb5a5f..d159ca6916 100644
--- a/util/log.c
+++ b/util/log.c
@@ -49,7 +49,6 @@ static __thread Notifier qemu_log_thread_cleanup_notifier;
 int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
-int32_t max_num_hot_tbs_to_dump;
 static bool to_monitor;
 bool to_string;
 
@@ -568,7 +567,8 @@ int qemu_str_to_log_mask(const char *str)
             enable_collect_tb_stats();
         } else if (tb_stats_collection_enabled() &&
                    g_str_has_prefix(*tmp, "dump_limit=")) {
-            max_num_hot_tbs_to_dump = atoi((*tmp) + 11);
+            int hot_tbs = atoi((*tmp) + 11);
+            set_tbstats_max_tbs(hot_tbs);
         } else if (tb_stats_collection_enabled() &&
                    g_str_has_prefix(*tmp, "level=")) {
             uint32_t flags = 0;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v14 10/10] docs: add tb-stats how to
  2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
                   ` (8 preceding siblings ...)
  2023-05-30  8:35 ` [PATCH v14 09/10] tb-stats: dump hot TBs at the end of the execution Fei Wu
@ 2023-05-30  8:35 ` Fei Wu
  9 siblings, 0 replies; 47+ messages in thread
From: Fei Wu @ 2023-05-30  8:35 UTC (permalink / raw)
  To: richard.henderson, alex.bennee, qemu-devel; +Cc: Fei Wu, Paolo Bonzini

Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 docs/devel/tcg-tbstats.rst | 129 +++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 docs/devel/tcg-tbstats.rst

diff --git a/docs/devel/tcg-tbstats.rst b/docs/devel/tcg-tbstats.rst
new file mode 100644
index 0000000000..bfba222e96
--- /dev/null
+++ b/docs/devel/tcg-tbstats.rst
@@ -0,0 +1,129 @@
+============
+TBStatistics
+============
+
+What is TBStatistics
+====================
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+
+How to use TBStatistics
+=======================
+
+1. HMP interface
+----------------
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info::
+
+    (qemu) help tb_stats
+    tb_stats command [stats_level] -- Control tb statistics collection:tb_stats (start|pause|stop|filter) [all|jit_stats|exec_stats]
+
+    (qemu) help info tb-list
+    info tb-list [number sortedby] -- show a [number] translated blocks sorted by [sortedby]sortedby opts: hotness hg spills
+
+    (qemu) help info tb
+    info tb id [flag1,flag2,...] -- show information about one translated block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics::
+
+    (qemu) tb_stats start all
+    (qemu)
+
+* Get interested TB list::
+
+    (qemu) info tb-list 2
+    TB id:1 | phys:0x79bca0 virt:0xffffffff8059bca0 flags:0x01024001 0 inv/1
+            | exec:1464084/0 guest inst cov:0.15%
+            | trans:1 ints: g:3 op:16 op_opt:15 spills:0
+            | h/g (host bytes / guest insts): 64.000000
+            | time to gen at 2.4GHz => code:607.08(ns) IR:250.83(ns)
+
+    TB id:2 | phys:0x2adf0c virt:0xffffffff800adf0c flags:0x01024001 0 inv/1
+            | exec:1033298/0 guest inst cov:0.28%
+            | trans:1 ints: g:8 op:35 op_opt:33 spills:0
+            | h/g (host bytes / guest insts): 86.000000
+            | time to gen at 2.4GHz => code:1429.58(ns) IR:545.42(ns)
+
+* Dive into the specific TB::
+
+    (qemu) info tb 1 op
+    ------------------------------
+
+    TB id:1 | phys:0x79bca0 virt:0xffffffff8059bca0 flags:0x01024001 7 inv/19
+            | exec:2038349/0 guest inst cov:0.15%
+            | trans:19 ints: g:3 op:16 op_opt:15 spills:0
+            | h/g (host bytes / guest insts): 64.000000
+            | time to gen at 2.4GHz => code:133434.17(ns) IR:176988.33(ns)
+
+    OP:
+     ld_i32 loc1,env,$0xfffffffffffffff0
+     brcond_i32 loc1,$0x0,lt,$L0
+     mov_i64 loc3,$0x7f3c70b3a4e0
+     call inc_exec_freq,$0x1,$0,loc3
+
+     ---- ffffffff8059bca0 0000000000006422
+     add_i64 loc5,x2/sp,$0x8
+     qemu_ld_i64 x8/s0,loc5,un+leq,1
+
+     ---- ffffffff8059bca2 0000000000000000
+     add_i64 x2/sp,x2/sp,$0x10
+
+     ---- ffffffff8059bca4 0000000000000000
+     mov_i64 pc,x1/ra
+     and_i64 pc,pc,$0xfffffffffffffffe
+     call lookup_tb_ptr,$0x6,$1,tmp9,env
+     goto_ptr tmp9
+     set_label $L0
+     exit_tb $0x7f3e887a8043
+
+    ------------------------------
+
+* Stop TBStatistics after investigation, this will disable TBStatistics completely.::
+
+    (qemu) tb_stats stop
+    (qemu)
+
+* Alternatively, TBStatistics can be paused, the previous collected TBStatistics
+  are not cleared but there is no TBStatistics recorded for new TBs.::
+
+    (qemu) tb_stats pause
+    (qemu)
+
+* Definitely, TBStatistics can be restarted for another round of investigation.::
+
+    (qemu) tb_stats start all
+    (qemu)
+
+
+2. Dump at exit
+---------------
+
+New command line options have been added to enable dump TB information at exit:::
+
+    -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=<number>]]
+
+e.g. starting qemu like this:::
+
+    -d tb_stats,dump_limit=2
+
+Qemu prints the following at exit:::
+
+    QEMU: Terminated
+    TB id:1 | phys:0x61be02 virt:0xffffffff8041be02 flags:0x01024001 0 inv/1
+            | exec:72739176/0 guest inst cov:20.22%
+            | trans:1 ints: g:9 op:35 op_opt:33 spills:0
+            | h/g (host bytes / guest insts): 51.555557
+            | time to gen at 2.4GHz => code:1065.42(ns) IR:554.17(ns)
+
+    TB id:2 | phys:0x61bc66 virt:0xffffffff8041bc66 flags:0x01024001 0 inv/1
+            | exec:25069507/0 guest inst cov:0.77%
+            | trans:1 ints: g:1 op:15 op_opt:14 spills:0
+            | h/g (host bytes / guest insts): 128.000000
+            | time to gen at 2.4GHz => code:312.50(ns) IR:152.08(ns)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-05-30  8:35 ` [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics Fei Wu
@ 2023-05-30  9:37   ` Markus Armbruster
  2023-05-31  0:54     ` Wu, Fei
  2023-06-01  1:08   ` Richard Henderson
  1 sibling, 1 reply; 47+ messages in thread
From: Markus Armbruster @ 2023-05-30  9:37 UTC (permalink / raw)
  To: Fei Wu
  Cc: richard.henderson, alex.bennee, qemu-devel,
	Vanderson M . do Rosario, Paolo Bonzini, Thomas Huth,
	Laurent Vivier

Fei Wu <fei2.wu@intel.com> writes:

> This collects all the statistics for TBStatistics, not only for the
> whole emulation but for each TB.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>  accel/tcg/monitor.c           |  20 ++++-
>  accel/tcg/tb-stats.c          | 146 ++++++++++++++++++++++++++++++++++
>  accel/tcg/tcg-accel-ops.c     |   7 ++
>  accel/tcg/translate-all.c     |  70 +++++++++++++++-
>  accel/tcg/translator.c        |   7 +-
>  include/exec/tb-stats-flags.h |   2 +
>  include/exec/tb-stats.h       |  46 +++++++++++
>  include/qemu/timer.h          |   6 ++
>  include/tcg/tcg.h             |  28 ++++++-
>  softmmu/runstate.c            |   9 +++
>  tcg/tcg.c                     |  88 ++++++++++++++++++--
>  tests/qtest/qmp-cmd-test.c    |   3 +
>  12 files changed, 417 insertions(+), 15 deletions(-)
>
> diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
> index e903dd1d2e..2bc87f2642 100644
> --- a/accel/tcg/monitor.c
> +++ b/accel/tcg/monitor.c
> @@ -15,6 +15,7 @@
>  #include "sysemu/cpus.h"
>  #include "sysemu/cpu-timers.h"
>  #include "sysemu/tcg.h"
> +#include "exec/tb-stats.h"
>  #include "internal.h"
>  
>  
> @@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
>  {
>      g_autoptr(GString) buf = g_string_new("");
>  
> +    if (!tb_stats_collection_enabled()) {
> +        error_setg(errp, "TB information not being recorded.");

From error_setg()'s contract in include/qapi/error.h:

     * The resulting message should be a single phrase, with no newline or
     * trailing punctuation.

Please drop the period.  Same elsewhere, not flagging it again there.

> +        return NULL;
> +    }
> +
>      if (!tcg_enabled()) {
>          error_setg(errp,
>                     "Opcode count information is only available with accel=tcg");
> @@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
>      return human_readable_text_from_str(buf);
>  }
>  
> +#ifdef CONFIG_TCG
> +HumanReadableText *qmp_x_query_profile(Error **errp)
> +{
> +    g_autoptr(GString) buf = g_string_new("");
> +
> +    dump_jit_exec_time_info(dev_time, buf);
> +    dev_time = 0;
> +
> +    return human_readable_text_from_str(buf);
> +}
> +#else
>  HumanReadableText *qmp_x_query_profile(Error **errp)
>  {
> -    error_setg(errp, "Internal profiler not compiled");
> +    error_setg(errp, "TCG should be enabled!");
>      return NULL;
>  }
> +#endif

machine.json has

   ##
   # @x-query-profile:
   #
   # Query TCG profiling information
   #
   # Features:
   #
   # @unstable: This command is meant for debugging.
   #
   # Returns: profile information
   #
   # Since: 6.2
   ##
   { 'command': 'x-query-profile',
     'returns': 'HumanReadableText',
     'if': 'CONFIG_TCG',
     'features': [ 'unstable' ] }

Not changed in this series.

Note the command is conditional on CONFIG_TCG, i.e. code generated for
it is #if defined(CONFIG_TCG).

The only other use is in hmp-commands-info.hx, and it is also guarded by
CONFIG_TCG.

Therefore, your #else is unreachable.  You can delete it along with...

>  
>  static void hmp_tcg_register(void)
>  {

[...]

> diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
> index 73a670e8fa..749aafe4da 100644
> --- a/tests/qtest/qmp-cmd-test.c
> +++ b/tests/qtest/qmp-cmd-test.c
> @@ -46,6 +46,9 @@ static int query_error_class(const char *cmd)
>          { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
>          { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
>          { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
> +#ifndef CONFIG_TCG
> +        { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
> +#endif

... this entry.

>          /* Only valid with a USB bus added */
>          { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
>          /* Only valid with accel=tcg */



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-05-30  9:37   ` Markus Armbruster
@ 2023-05-31  0:54     ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-05-31  0:54 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: richard.henderson, alex.bennee, qemu-devel,
	Vanderson M . do Rosario, Paolo Bonzini, Thomas Huth,
	Laurent Vivier

On 5/30/2023 5:37 PM, Markus Armbruster wrote:
> Fei Wu <fei2.wu@intel.com> writes:
> 
>> This collects all the statistics for TBStatistics, not only for the
>> whole emulation but for each TB.
>>
>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>  accel/tcg/monitor.c           |  20 ++++-
>>  accel/tcg/tb-stats.c          | 146 ++++++++++++++++++++++++++++++++++
>>  accel/tcg/tcg-accel-ops.c     |   7 ++
>>  accel/tcg/translate-all.c     |  70 +++++++++++++++-
>>  accel/tcg/translator.c        |   7 +-
>>  include/exec/tb-stats-flags.h |   2 +
>>  include/exec/tb-stats.h       |  46 +++++++++++
>>  include/qemu/timer.h          |   6 ++
>>  include/tcg/tcg.h             |  28 ++++++-
>>  softmmu/runstate.c            |   9 +++
>>  tcg/tcg.c                     |  88 ++++++++++++++++++--
>>  tests/qtest/qmp-cmd-test.c    |   3 +
>>  12 files changed, 417 insertions(+), 15 deletions(-)
>>
>> diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
>> index e903dd1d2e..2bc87f2642 100644
>> --- a/accel/tcg/monitor.c
>> +++ b/accel/tcg/monitor.c
>> @@ -15,6 +15,7 @@
>>  #include "sysemu/cpus.h"
>>  #include "sysemu/cpu-timers.h"
>>  #include "sysemu/tcg.h"
>> +#include "exec/tb-stats.h"
>>  #include "internal.h"
>>  
>>  
>> @@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
>>  {
>>      g_autoptr(GString) buf = g_string_new("");
>>  
>> +    if (!tb_stats_collection_enabled()) {
>> +        error_setg(errp, "TB information not being recorded.");
> 
> From error_setg()'s contract in include/qapi/error.h:
> 
>      * The resulting message should be a single phrase, with no newline or
>      * trailing punctuation.
> 
> Please drop the period.  Same elsewhere, not flagging it again there.
> 
Got it, will do.

>> +        return NULL;
>> +    }
>> +
>>      if (!tcg_enabled()) {
>>          error_setg(errp,
>>                     "Opcode count information is only available with accel=tcg");
>> @@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
>>      return human_readable_text_from_str(buf);
>>  }
>>  
>> +#ifdef CONFIG_TCG
>> +HumanReadableText *qmp_x_query_profile(Error **errp)
>> +{
>> +    g_autoptr(GString) buf = g_string_new("");
>> +
>> +    dump_jit_exec_time_info(dev_time, buf);
>> +    dev_time = 0;
>> +
>> +    return human_readable_text_from_str(buf);
>> +}
>> +#else
>>  HumanReadableText *qmp_x_query_profile(Error **errp)
>>  {
>> -    error_setg(errp, "Internal profiler not compiled");
>> +    error_setg(errp, "TCG should be enabled!");
>>      return NULL;
>>  }
>> +#endif
> 
> machine.json has
> 
>    ##
>    # @x-query-profile:
>    #
>    # Query TCG profiling information
>    #
>    # Features:
>    #
>    # @unstable: This command is meant for debugging.
>    #
>    # Returns: profile information
>    #
>    # Since: 6.2
>    ##
>    { 'command': 'x-query-profile',
>      'returns': 'HumanReadableText',
>      'if': 'CONFIG_TCG',
>      'features': [ 'unstable' ] }
> 
> Not changed in this series.
> 
> Note the command is conditional on CONFIG_TCG, i.e. code generated for
> it is #if defined(CONFIG_TCG).
> 
> The only other use is in hmp-commands-info.hx, and it is also guarded by
> CONFIG_TCG.
> 
> Therefore, your #else is unreachable.  You can delete it along with...
> 
OK, so qmp_x_query_profile won't get called #ifndef CONFIG_TCG, I will
remove it.

Thanks,
Fei.

>>  
>>  static void hmp_tcg_register(void)
>>  {
> 
> [...]
> 
>> diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
>> index 73a670e8fa..749aafe4da 100644
>> --- a/tests/qtest/qmp-cmd-test.c
>> +++ b/tests/qtest/qmp-cmd-test.c
>> @@ -46,6 +46,9 @@ static int query_error_class(const char *cmd)
>>          { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
>>          { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
>>          { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
>> +#ifndef CONFIG_TCG
>> +        { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
>> +#endif
> 
> ... this entry.
> 
>>          /* Only valid with a USB bus added */
>>          { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
>>          /* Only valid with accel=tcg */
> 



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-05-30  8:35 ` [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure Fei Wu
@ 2023-05-31 23:59   ` Richard Henderson
  2023-06-01  1:30     ` Wu, Fei
  2023-06-01  0:01   ` Richard Henderson
  1 sibling, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-05-31 23:59 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/30/23 01:35, Fei Wu wrote:
> +    /*
> +     * We want to fetch the stats structure before we start code
> +     * generation so we can count interesting things about this
> +     * generation.
> +     */
> +    if (tb_stats_collection_enabled()) {
> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);

If cflags & CF_PCREL, then 'pc' should not be cached or matched.
Using 'phys_pc' will suffice, so pass 0 in that case.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-05-30  8:35 ` [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure Fei Wu
  2023-05-31 23:59   ` Richard Henderson
@ 2023-06-01  0:01   ` Richard Henderson
  2023-06-01  3:19     ` Wu, Fei
  1 sibling, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  0:01 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/30/23 01:35, Fei Wu wrote:
> +/* TBStatistic collection controls */
> +enum TBStatsStatus {
> +    TB_STATS_DISABLED = 0,
> +    TB_STATS_RUNNING,
> +    TB_STATS_PAUSED,
> +    TB_STATS_STOPPED
> +};

I don't see what PAUSED or STOPPED actually do.
As far as I can see, stats are either being collected or not: a boolean.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 03/10] accel: collecting TB execution count
  2023-05-30  8:35 ` [PATCH v14 03/10] accel: collecting TB execution count Fei Wu
@ 2023-06-01  0:05   ` Richard Henderson
  2023-06-01  5:44     ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  0:05 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/30/23 01:35, Fei Wu wrote:
> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
> 
> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
> enabled, then we instrument the start code of this TB
> to atomically count the number of times it is executed.
> We count both the number of "normal" executions and atomic
> executions of a TB.
> 
> The execution count of the TB is stored in its respective
> TBS.
> 
> All TBStatistics are created by default with the flags from
> default_tbstats_flag.
> 
> [Richard Henderson created the inline gen_tb_exec_count]
> 
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
> [AJB: Fix author]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>   accel/tcg/cpu-exec.c          |  6 ++++++
>   accel/tcg/tb-stats.c          |  6 ++++++
>   accel/tcg/tcg-runtime.c       |  1 +
>   accel/tcg/translate-all.c     |  7 +++++--
>   accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>   include/exec/gen-icount.h     |  1 +
>   include/exec/tb-stats-flags.h |  5 +++++
>   include/exec/tb-stats.h       | 13 +++++++++++++
>   8 files changed, 62 insertions(+), 2 deletions(-)
> 
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 0e741960da..c0d8f26237 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -25,6 +25,7 @@
>   #include "trace.h"
>   #include "disas/disas.h"
>   #include "exec/exec-all.h"
> +#include "exec/tb-stats.h"
>   #include "tcg/tcg.h"
>   #include "qemu/atomic.h"
>   #include "qemu/rcu.h"
> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>               mmap_unlock();
>           }
>   
> +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
> +            tb->tb_stats->executions.atomic++;
> +        }
> +
>           cpu_exec_enter(cpu);
> +
>           /* execute the generated code */
>           trace_exec_tb(tb, pc);
>           cpu_tb_exec(cpu, tb, &tb_exit);
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index f988bd8a31..143a52ef5c 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>   };
>   
>   static enum TBStatsStatus tcg_collect_tb_stats;
> +static uint32_t default_tbstats_flag;
>   
>   void init_tb_stats_htable(void)
>   {
> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>   {
>       return tcg_collect_tb_stats == TB_STATS_PAUSED;
>   }
> +
> +uint32_t get_default_tbstats_flag(void)
> +{
> +    return default_tbstats_flag;
> +}

What is the purpose of this function, instead of a global variable?
What is the meaning of 'default' in its name?

> @@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
>       new_stats->pc = pc;
>       new_stats->cs_base = cs_base;
>       new_stats->flags = flags;
> +    new_stats->stats_enabled = get_default_tbstats_flag();

Is this merely to record how we have generated a given TB?
What is the purpose of this flag over the global variable?

> diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
> index 87ee3d902e..fa71eb6f0c 100644
> --- a/include/exec/tb-stats-flags.h
> +++ b/include/exec/tb-stats-flags.h
> @@ -11,6 +11,9 @@
>   #ifndef TB_STATS_FLAGS
>   #define TB_STATS_FLAGS
>   
> +#define TB_NOTHING    (1 << 0)
> +#define TB_EXEC_STATS (1 << 1)

Why is NOTHING a non-zero flag?

> --- a/include/exec/tb-stats.h
> +++ b/include/exec/tb-stats.h
> @@ -31,6 +31,9 @@
>   #include "exec/tb-stats-flags.h"
>   #include "tcg/tcg.h"
>   
> +#define tb_stats_enabled(tb, JIT_STATS) \
> +    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))

Inline function, though again, why stats_enabled vs global variable?


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-05-30  8:35 ` [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics Fei Wu
  2023-05-30  9:37   ` Markus Armbruster
@ 2023-06-01  1:08   ` Richard Henderson
  2023-06-01  6:48     ` Wu, Fei
  1 sibling, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  1:08 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel
  Cc: Vanderson M . do Rosario, Paolo Bonzini, Markus Armbruster,
	Thomas Huth, Laurent Vivier

On 5/30/23 01:35, Fei Wu wrote:
> +static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
> +{
> +    struct jit_profile_info *jpi = userp;
> +    TBStatistics *tbs = p;
> +
> +    jpi->translations += tbs->translations.total;
> +    jpi->ops += tbs->code.num_tcg_ops;
> +    if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
> +        jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
> +    }
> +    jpi->del_ops += tbs->code.deleted_ops;
> +    jpi->temps += tbs->code.temps;
> +    if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
> +        jpi->temps_max = stat_per_translation(tbs, code.temps);
> +    }
> +    jpi->host += tbs->code.out_len;
> +    jpi->guest += tbs->code.in_len;
> +    jpi->search_data += tbs->code.search_out_len;
> +
> +    jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
> +    jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
> +    jpi->la_time += stat_per_translation(tbs, gen_times.la);
> +    jpi->code_time += stat_per_translation(tbs, gen_times.code);
> +
> +    /*
> +     * The restore time covers how long we have spent restoring state
> +     * from a given TB (e.g. recovering from a fault). It is therefor
> +     * not related to the number of translations we have done.
> +     */
> +    jpi->restore_time += tbs->tb_restore_time;
> +    jpi->restore_count += tbs->tb_restore_count;
> +}

Why do sums of averages (stats_per_translation) instead of accumulating the complete total 
and dividing by jpi->translations?

> +void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
> +{
> +    static uint64_t last_cpu_exec_time;
> +    uint64_t cpu_exec_time;
> +    uint64_t delta;
> +
> +    cpu_exec_time = tcg_cpu_exec_time();
> +    delta = cpu_exec_time - last_cpu_exec_time;
> +
> +    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
> +                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
> +    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
> +                           delta, delta / (double)NANOSECONDS_PER_SECOND);
> +    last_cpu_exec_time = cpu_exec_time;
> +}
> +
> +/* dump JIT statisticis using TCGProfile and TBStats */

"statistics"

> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
> index 3973591508..749ad182f2 100644
> --- a/accel/tcg/tcg-accel-ops.c
> +++ b/accel/tcg/tcg-accel-ops.c
> @@ -70,10 +70,17 @@ void tcg_cpus_destroy(CPUState *cpu)
>   int tcg_cpus_exec(CPUState *cpu)
>   {
>       int ret;
> +    uint64_t ti;
> +
>       assert(tcg_enabled());
> +    ti = profile_getclock();
> +
>       cpu_exec_start(cpu);
>       ret = cpu_exec(cpu);
>       cpu_exec_end(cpu);
> +
> +    qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);

You can't qatomic_add a 64-bit value on a 32-bit host.
Nor is tcg_ctx a good place to put this.

If you want to accumulate per-cpu data, put it on the cpu where there's no chance of 
racing with anyone.

Finally, I suspect that this isn't even where you want to accumulate time for execution as 
separate from translation.  You'd to that in cpu_exec_enter/cpu_exec_exit.

> @@ -296,6 +315,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
>       new_stats->cs_base = cs_base;
>       new_stats->flags = flags;
>       new_stats->stats_enabled = get_default_tbstats_flag();
> +    new_stats->tbs = g_ptr_array_sized_new(4);

Why default to 4?  Is that just a guess, or are we really recomputing tbs that frequently? 
  Is there a good reason not to use g_ptr_array_new()?

> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 80ffbfb455..a60a92091b 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -19,7 +19,7 @@
>   #include "exec/plugin-gen.h"
>   #include "exec/replay-core.h"
>   
> -static void gen_tb_exec_count(TranslationBlock *tb)
> +static inline void gen_tb_exec_count(TranslationBlock *tb)

Why?

>   {
>       if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>           TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
> @@ -147,6 +147,11 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
>       tb->size = db->pc_next - db->pc_first;
>       tb->icount = db->num_insns;
>   
> +    /* Save number of guest instructions for TB_JIT_STATS */
> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +        tb->tb_stats->code.num_guest_inst += db->num_insns;
> +    }

Why do this here, as opposed to the block in tb_gen_code?

> +#define stat_per_translation(stat, name) \
> +    (stat->translations.total ? stat->name / stat->translations.total : 0)

Not a fan of this macro, and as per above, the reason for doing the division here is 
questionable.

> diff --git a/include/qemu/timer.h b/include/qemu/timer.h
> index 9a91cb1248..ad0da18a5f 100644
> --- a/include/qemu/timer.h
> +++ b/include/qemu/timer.h
> @@ -989,4 +989,10 @@ static inline int64_t cpu_get_host_ticks(void)
>   }
>   #endif
>   
> +static inline int64_t profile_getclock(void)
> +{
> +    return get_clock();
> +}

Why?  You use get_clock directly most places.

> +/* Timestamps during translation  */
> +typedef struct TCGTime {
> +    uint64_t start;
> +    uint64_t ir_done;
> +    uint64_t opt_done;
> +    uint64_t la_done;
> +    uint64_t code_done;
> +} TCGTime;

int64_t would match the result of get_clock().

> +
> +/*
> + * The TCGProfile structure holds data for the lifetime of the translator.
> + */
> +typedef struct TCGProfile {
> +    /* exec time of this vcpu */
> +    int64_t cpu_exec_time;

TCGContext is not per-cpu, and therefore TCGProfile does not correspond either.

> @@ -608,6 +630,7 @@ struct TCGContext {
>   
>       /* Exit to translator on overflow. */
>       sigjmp_buf jmp_trans;
> +    TranslationBlock *current_tb;

TCGContext.gen_tb already exists.

> -int64_t tcg_cpu_exec_time(void);
> +uint64_t tcg_cpu_exec_time(void);

Why?  (Also, probably wants removing, per above.)

> --- a/softmmu/runstate.c
> +++ b/softmmu/runstate.c
> @@ -728,9 +728,18 @@ static bool main_loop_should_exit(int *status)
>   int qemu_main_loop(void)
>   {
>       int status = EXIT_SUCCESS;
> +#ifdef CONFIG_TCG
> +    uint64_t ti;
> +#endif
>   
>       while (!main_loop_should_exit(&status)) {
> +#ifdef CONFIG_TCG
> +        ti = profile_getclock();
> +#endif
>           main_loop_wait(false);
> +#ifdef CONFIG_TCG
> +        dev_time += profile_getclock() - ti;
> +#endif
>       }

What is this intending to collect?  Because I don't think it measures anything.  Certainly 
nothing related to TCG, CPUs or even devices.

> +    /* ? won't this end up op_opt - op = del_op_count ? */
> +    if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
> +        s->gen_tb->tb_stats->code.deleted_ops++;
> +    }

Not quite.  We can emit new ops as well.  But how useful this is on its own is debatable.

> +/* avoid copy/paste errors */
> +#define PROF_ADD(to, from, field)                       \
> +    do {                                                \
> +        (to)->field += qatomic_read(&((from)->field));  \
> +    } while (0)

It is only used twice.
In addition, you can't use qatomic_read of a 64-bit variable on a 32-bit host.
You really really need to build e.g. i386.

> @@ -5879,6 +5923,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
>           }
>       }
>   
> +
>   #ifdef CONFIG_DEBUG_TCG

Stray.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection:
  2023-05-30  8:35 ` [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection: Fei Wu
@ 2023-06-01  1:18   ` Richard Henderson
  2023-06-01  6:59     ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  1:18 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/30/23 01:35, Fei Wu wrote:
> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
> 
>   -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=<number>]]
> 
> "dump_limit" is used to limit the number of dumped TBStats in
> linux-user mode.

Why is user-mode special?

> 
> [all+jit+exec+time] control the profilling level used
> by the TBStats. Can be used as follow:
> 
> -d tb_stats
> -d tb_stats,level=jit+time

Comma is already used to separate different -d options.
You should not overload that.

"level" doesn't make sense for things that aren't hierarchical.

What's wrong with tb-stats-{all,jit,time,exec}?


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 06/10] monitor: adding tb_stats hmp command
  2023-05-30  8:35 ` [PATCH v14 06/10] monitor: adding tb_stats hmp command Fei Wu
@ 2023-06-01  1:23   ` Richard Henderson
  2023-06-01  7:20     ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  1:23 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 5/30/23 01:35, Fei Wu wrote:
> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
> 
> Adding tb_stats [start|pause|stop|filter] command to hmp.
> This allows controlling the collection of statistics.
> It is also possible to set the level of collection:
> all, jit, or exec.
> 
> tb_stats filter allow to only collect statistics for the TB
> in the last_search list.
> 
> The goal of this command is to allow the dynamic exploration
> of the TCG behavior and quality. Therefore, for now, a
> corresponding QMP command is not worthwhile.
> 
> Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Message-Id: <20190829173437.5926-8-vandersonmr2@gmail.com>
> Message-Id: <20190829173437.5926-9-vandersonmr2@gmail.com>
> [AJB: fix authorship]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---


I still don't see what pause does.

> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index 68ac7d3f73..805e1fc74d 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -16,18 +16,20 @@
>   #include "qemu/timer.h"
>   
>   #include "exec/tb-stats.h"
> +#include "exec/tb-flush.h"
>   #include "tb-context.h"
>   
>   /* TBStatistic collection controls */
>   enum TBStatsStatus {
>       TB_STATS_DISABLED = 0,
>       TB_STATS_RUNNING,
> -    TB_STATS_PAUSED,
> -    TB_STATS_STOPPED
> +    TB_STATS_PAUSED
>   };

Why?

>   
>   static enum TBStatsStatus tcg_collect_tb_stats;
>   static uint32_t default_tbstats_flag;
> +/* only accessed in safe work */
> +static GList *last_search;
>   
>   uint64_t dev_time;
>   
> @@ -170,6 +172,101 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
>       g_free(jpi);
>   }
>   
> +static void free_tbstats(void *p, uint32_t hash, void *userp)
> +{
> +    g_free(p);
> +}
> +
> +static void clean_tbstats(void)
> +{
> +    /* remove all tb_stats */
> +    qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
> +    qht_destroy(&tb_ctx.tb_stats);
> +}
> +
> +void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
> +{
> +    struct TbstatsCommand *cmdinfo = icmd.host_ptr;
> +    int cmd = cmdinfo->cmd;
> +    uint32_t level = cmdinfo->level;
> +
> +    switch (cmd) {
> +    case START:
> +        if (tb_stats_collection_enabled()) {
> +            qemu_printf("TB information already being recorded");
> +            return;
> +        } else if (tb_stats_collection_paused()) {
> +            set_tbstats_flags(level);
> +        } else {
> +            qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
> +                     QHT_MODE_AUTO_RESIZE);
> +        }
> +
> +        set_default_tbstats_flag(level);
> +        enable_collect_tb_stats();
> +        tb_flush(cpu);
> +        break;
> +    case PAUSE:
> +        if (!tb_stats_collection_enabled()) {
> +            qemu_printf("TB information not being recorded");
> +            return;
> +        }
> +
> +        /*
> +         * Continue to create TBStatistic structures but stop collecting
> +         * statistics
> +         */
> +        pause_collect_tb_stats();
> +        set_default_tbstats_flag(TB_NOTHING);
> +        set_tbstats_flags(TB_PAUSED);
> +        tb_flush(cpu);
> +        break;
> +    case STOP:
> +        if (tb_stats_collection_disabled()) {
> +            qemu_printf("TB information not being recorded");
> +            return;
> +        }
> +
> +        /* Dissalloc all TBStatistics structures and stop creating new ones */
> +        disable_collect_tb_stats();
> +        clean_tbstats();
> +        tb_flush(cpu);
> +        break;
> +    case FILTER:
> +        if (!tb_stats_collection_enabled()) {
> +            qemu_printf("TB information not being recorded");
> +            return;
> +        }
> +        if (!last_search) {
> +            qemu_printf(
> +                    "no search on record! execute info tbs before filtering!");
> +            return;
> +        }
> +
> +        set_default_tbstats_flag(TB_NOTHING);
> +
> +        /*
> +         * Set all tbstats as paused, then return only the ones from last_search
> +         */
> +        pause_collect_tb_stats();
> +        set_tbstats_flags(TB_PAUSED);
> +
> +        for (GList *iter = last_search; iter; iter = g_list_next(iter)) {
> +            TBStatistics *tbs = iter->data;
> +            tbs->stats_enabled = level;
> +        }
> +
> +        tb_flush(cpu);
> +
> +        break;
> +    default: /* INVALID */
> +        g_assert_not_reached();
> +        break;
> +    }
> +
> +    g_free(cmdinfo);
> +}

Why isn't all of this in monitor.c?
It's not used or usable from user-only mode.

> +void set_tbstats_flags(uint32_t flag)
> +{
> +    /* iterate over tbstats setting their flag as TB_NOTHING */
> +    qht_iter(&tb_ctx.tb_stats, reset_tbstats_flag, &flag);
> +}

Again, I question why a global variable is not more appropriate.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush
  2023-05-30  8:35 ` [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush Fei Wu
@ 2023-06-01  1:30   ` Richard Henderson
  2023-06-01  7:22     ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  1:30 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel; +Cc: Paolo Bonzini

On 5/30/23 01:35, Fei Wu wrote:
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index 805e1fc74d..139f049ffc 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -267,6 +267,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
>       g_free(cmdinfo);
>   }
>   
> +/*
> + * We have to reset the tbs array on a tb_flush as those
> + * TranslationBlocks no longer exist and we no loner know if the
> + * current mapping is still valid.
> + */

The "and we no longer..." part is irrelevant: that's what phys_pc checks.
But the TranslationBlocks no longer exist, so that is sufficient.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-05-31 23:59   ` Richard Henderson
@ 2023-06-01  1:30     ` Wu, Fei
  2023-06-01  2:48       ` Richard Henderson
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  1:30 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 7:59 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> +    /*
>> +     * We want to fetch the stats structure before we start code
>> +     * generation so we can count interesting things about this
>> +     * generation.
>> +     */
>> +    if (tb_stats_collection_enabled()) {
>> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
> 
> If cflags & CF_PCREL, then 'pc' should not be cached or matched.
> Using 'phys_pc' will suffice, so pass 0 in that case.
> 
OK, tb_get_stats(phys_pc, (cflags & CF_PCREL ? 0 : pc), cs_base, flags);

btw, is it possible to drop 'pc' even w/o CF_PCREL setting in cflags?
Two TBs with different 'pc' but same 'phys_pc', are they the same?

Thanks,
Fei.

> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-05-30  8:35 ` [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP) Fei Wu
@ 2023-06-01  2:40   ` Richard Henderson
  2023-06-01 12:12     ` Wu, Fei
  2023-06-07 12:49     ` Wu, Fei
  0 siblings, 2 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  2:40 UTC (permalink / raw)
  To: Fei Wu, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 5/30/23 01:35, Fei Wu wrote:
> +static void do_dump_tbs_info(int total, int sort_by)
> +{
> +    id = 1;
> +    GList *i;
> +    int count = total;
> +
> +    g_list_free(last_search);
> +    last_search = NULL;
> +
> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
> +
> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
> +                                        &sort_by);
> +

Why are you sorting on a list and not an array?
Intuitively, sorting a list of 1 million elements seems like the wrong choice.

Why did you put all the comparisons in one inverse_sort_tbs function, and require 
examining sort_by?  Better would be N sorting functions where sort_by is evaluated once 
before starting the sort.


> +++ b/disas/disas.c
> @@ -8,6 +8,8 @@
>  #include "hw/core/cpu.h"
>  #include "exec/memory.h"
>  
> +#include "qemu/log-for-trace.h"
> +
>  /* Filled in by elfload.c.  Simplistic, but will do for now. */
>  struct syminfo *syminfos = NULL;
>  
> @@ -199,6 +201,24 @@ static void initialize_debug_host(CPUDebug *s)
>  #endif
>  }
>  
> +static int
> +__attribute__((format(printf, 2, 3)))
> +fprintf_log(FILE *a, const char *b, ...)
> +{
> +    va_list ap;
> +    va_start(ap, b);
> +
> +    if (!to_string) {
> +        vfprintf(a, b, ap);
> +    } else {
> +        qemu_vlog(b, ap);
> +    }
> +
> +    va_end(ap);
> +
> +    return 1;
> +}
> +

Not need on this either.  Global variable being checked on each callback, instead of 
selecting the proper callback earlier -- preferably without the global variable.

Did you really need something different than monitor_disas?  You almost certainly want to 
read physical memory and not virtual anyway.

> +void qemu_log_to_monitor(bool enable)
> +{
> +    to_monitor = enable;
> +}
> +
> +void qemu_log_to_string(bool enable, GString *s)
> +{
> +    to_string = enable;
> +    string = s;
> +}

What are these for, and why do you need them?
Why would to_string ever be anything other than (string != NULL)?
Why are you using such very generic names for global variables?


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-06-01  1:30     ` Wu, Fei
@ 2023-06-01  2:48       ` Richard Henderson
  0 siblings, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  2:48 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/31/23 18:30, Wu, Fei wrote:
> On 6/1/2023 7:59 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> +    /*
>>> +     * We want to fetch the stats structure before we start code
>>> +     * generation so we can count interesting things about this
>>> +     * generation.
>>> +     */
>>> +    if (tb_stats_collection_enabled()) {
>>> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
>>
>> If cflags & CF_PCREL, then 'pc' should not be cached or matched.
>> Using 'phys_pc' will suffice, so pass 0 in that case.
>>
> OK, tb_get_stats(phys_pc, (cflags & CF_PCREL ? 0 : pc), cs_base, flags);
> 
> btw, is it possible to drop 'pc' even w/o CF_PCREL setting in cflags?
> Two TBs with different 'pc' but same 'phys_pc', are they the same?

For the purpose of statistics, yes, plausibly.

Knowing how many different translations of the same bit of libc.so, for a guest that does 
not support CF_PCREL, could be revealing.  In any case, you can get back to the virtual 
addresses via tb_stats->tbs[i]->pc, so long as tb_stats->tb[i].cflags & CF_PCREL is not set.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-06-01  0:01   ` Richard Henderson
@ 2023-06-01  3:19     ` Wu, Fei
  2023-06-01  4:16       ` Richard Henderson
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  3:19 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 8:01 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> +/* TBStatistic collection controls */
>> +enum TBStatsStatus {
>> +    TB_STATS_DISABLED = 0,
>> +    TB_STATS_RUNNING,
>> +    TB_STATS_PAUSED,
>> +    TB_STATS_STOPPED
>> +};
> 
> I don't see what PAUSED or STOPPED actually do.
> As far as I can see, stats are either being collected or not: a boolean.
> 
If STOPPED, clean_tbstats() gets called, all the tbstats history is
destroyed, but it's not for PAUSED.

Thanks,
Fei.

> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-06-01  3:19     ` Wu, Fei
@ 2023-06-01  4:16       ` Richard Henderson
  2023-06-01  5:36         ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01  4:16 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/31/23 20:19, Wu, Fei wrote:
> On 6/1/2023 8:01 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> +/* TBStatistic collection controls */
>>> +enum TBStatsStatus {
>>> +    TB_STATS_DISABLED = 0,
>>> +    TB_STATS_RUNNING,
>>> +    TB_STATS_PAUSED,
>>> +    TB_STATS_STOPPED
>>> +};
>>
>> I don't see what PAUSED or STOPPED actually do.
>> As far as I can see, stats are either being collected or not: a boolean.
>>
> If STOPPED, clean_tbstats() gets called, all the tbstats history is
> destroyed, but it's not for PAUSED.

But it doesn't PAUSE everything either, since (1) code is built into each tb, and (2) each 
tb->tb_stats->stats_enabled neither changed.  Indeed, tb_stats_collection_enabled() is 
only checked in a fraction of the places stats are collected.


r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure
  2023-06-01  4:16       ` Richard Henderson
@ 2023-06-01  5:36         ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  5:36 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 12:16 PM, Richard Henderson wrote:
> On 5/31/23 20:19, Wu, Fei wrote:
>> On 6/1/2023 8:01 AM, Richard Henderson wrote:
>>> On 5/30/23 01:35, Fei Wu wrote:
>>>> +/* TBStatistic collection controls */
>>>> +enum TBStatsStatus {
>>>> +    TB_STATS_DISABLED = 0,
>>>> +    TB_STATS_RUNNING,
>>>> +    TB_STATS_PAUSED,
>>>> +    TB_STATS_STOPPED
>>>> +};
>>>
>>> I don't see what PAUSED or STOPPED actually do.
>>> As far as I can see, stats are either being collected or not: a boolean.
>>>
>> If STOPPED, clean_tbstats() gets called, all the tbstats history is
>> destroyed, but it's not for PAUSED.
> 
> But it doesn't PAUSE everything either, since (1) code is built into
> each tb, and (2) each tb->tb_stats->stats_enabled neither changed. 
> Indeed, tb_stats_collection_enabled() is only checked in a fraction of
> the places stats are collected.
> 
tbs are always flushed at the end of hmp tb-stats cmd by tb_flush(),
stats_enabled can be changed e.g. during pause/filter cmd. It should be
a bug if stats are collected w/o tb_stats_collection_enabled().

Thanks,
Fei.
> 
> r~
> 



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 03/10] accel: collecting TB execution count
  2023-06-01  0:05   ` Richard Henderson
@ 2023-06-01  5:44     ` Wu, Fei
  2023-06-01 14:03       ` Richard Henderson
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  5:44 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 8:05 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>
>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>> enabled, then we instrument the start code of this TB
>> to atomically count the number of times it is executed.
>> We count both the number of "normal" executions and atomic
>> executions of a TB.
>>
>> The execution count of the TB is stored in its respective
>> TBS.
>>
>> All TBStatistics are created by default with the flags from
>> default_tbstats_flag.
>>
>> [Richard Henderson created the inline gen_tb_exec_count]
>>
>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>> [AJB: Fix author]
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>   accel/tcg/cpu-exec.c          |  6 ++++++
>>   accel/tcg/tb-stats.c          |  6 ++++++
>>   accel/tcg/tcg-runtime.c       |  1 +
>>   accel/tcg/translate-all.c     |  7 +++++--
>>   accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>   include/exec/gen-icount.h     |  1 +
>>   include/exec/tb-stats-flags.h |  5 +++++
>>   include/exec/tb-stats.h       | 13 +++++++++++++
>>   8 files changed, 62 insertions(+), 2 deletions(-)
>>
>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>> index 0e741960da..c0d8f26237 100644
>> --- a/accel/tcg/cpu-exec.c
>> +++ b/accel/tcg/cpu-exec.c
>> @@ -25,6 +25,7 @@
>>   #include "trace.h"
>>   #include "disas/disas.h"
>>   #include "exec/exec-all.h"
>> +#include "exec/tb-stats.h"
>>   #include "tcg/tcg.h"
>>   #include "qemu/atomic.h"
>>   #include "qemu/rcu.h"
>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>               mmap_unlock();
>>           }
>>   +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>> +            tb->tb_stats->executions.atomic++;
>> +        }
>> +
>>           cpu_exec_enter(cpu);
>> +
>>           /* execute the generated code */
>>           trace_exec_tb(tb, pc);
>>           cpu_tb_exec(cpu, tb, &tb_exit);
>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>> index f988bd8a31..143a52ef5c 100644
>> --- a/accel/tcg/tb-stats.c
>> +++ b/accel/tcg/tb-stats.c
>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>   };
>>     static enum TBStatsStatus tcg_collect_tb_stats;
>> +static uint32_t default_tbstats_flag;
>>     void init_tb_stats_htable(void)
>>   {
>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>   {
>>       return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>   }
>> +
>> +uint32_t get_default_tbstats_flag(void)
>> +{
>> +    return default_tbstats_flag;
>> +}
> 
> What is the purpose of this function, instead of a global variable?
> What is the meaning of 'default' in its name?
> 
tbs have their specific settings, e.g. after 'filter' cmd:
* the last_search tbs has their stats_enabled kept
* tbs not in the list sets their flag to TB_PAUSED

I guess 'default' means the flag for creating new tbstats.

>> @@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t
>> phys_pc, target_ulong pc,
>>       new_stats->pc = pc;
>>       new_stats->cs_base = cs_base;
>>       new_stats->flags = flags;
>> +    new_stats->stats_enabled = get_default_tbstats_flag();
> 
> Is this merely to record how we have generated a given TB?
> What is the purpose of this flag over the global variable?
> 
see above.

>> diff --git a/include/exec/tb-stats-flags.h
>> b/include/exec/tb-stats-flags.h
>> index 87ee3d902e..fa71eb6f0c 100644
>> --- a/include/exec/tb-stats-flags.h
>> +++ b/include/exec/tb-stats-flags.h
>> @@ -11,6 +11,9 @@
>>   #ifndef TB_STATS_FLAGS
>>   #define TB_STATS_FLAGS
>>   +#define TB_NOTHING    (1 << 0)
>> +#define TB_EXEC_STATS (1 << 1)
> 
> Why is NOTHING a non-zero flag?
> 
yes, it might looks better. But there is no correctness issue either as
it checks if the specific bit is enabled during collecting stats.

>> --- a/include/exec/tb-stats.h
>> +++ b/include/exec/tb-stats.h
>> @@ -31,6 +31,9 @@
>>   #include "exec/tb-stats-flags.h"
>>   #include "tcg/tcg.h"
>>   +#define tb_stats_enabled(tb, JIT_STATS) \
>> +    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
> 
> Inline function, though again, why stats_enabled vs global variable?
> 
will inline it.

Thanks,
Fei.

> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-06-01  1:08   ` Richard Henderson
@ 2023-06-01  6:48     ` Wu, Fei
  2023-06-01 14:10       ` Richard Henderson
  2023-06-01 15:10       ` Richard Henderson
  0 siblings, 2 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  6:48 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M . do Rosario, Paolo Bonzini, Markus Armbruster,
	Thomas Huth, Laurent Vivier

On 6/1/2023 9:08 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> +static void collect_jit_profile_info(void *p, uint32_t hash, void
>> *userp)
>> +{
>> +    struct jit_profile_info *jpi = userp;
>> +    TBStatistics *tbs = p;
>> +
>> +    jpi->translations += tbs->translations.total;
>> +    jpi->ops += tbs->code.num_tcg_ops;
>> +    if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
>> +        jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
>> +    }
>> +    jpi->del_ops += tbs->code.deleted_ops;
>> +    jpi->temps += tbs->code.temps;
>> +    if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
>> +        jpi->temps_max = stat_per_translation(tbs, code.temps);
>> +    }
>> +    jpi->host += tbs->code.out_len;
>> +    jpi->guest += tbs->code.in_len;
>> +    jpi->search_data += tbs->code.search_out_len;
>> +
>> +    jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
>> +    jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
>> +    jpi->la_time += stat_per_translation(tbs, gen_times.la);
>> +    jpi->code_time += stat_per_translation(tbs, gen_times.code);
>> +
>> +    /*
>> +     * The restore time covers how long we have spent restoring state
>> +     * from a given TB (e.g. recovering from a fault). It is therefor
>> +     * not related to the number of translations we have done.
>> +     */
>> +    jpi->restore_time += tbs->tb_restore_time;
>> +    jpi->restore_count += tbs->tb_restore_count;
>> +}
> 
> Why do sums of averages (stats_per_translation) instead of accumulating
> the complete total and dividing by jpi->translations?
> 
There has some inconsistency.

>> +void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
>> +{
>> +    static uint64_t last_cpu_exec_time;
>> +    uint64_t cpu_exec_time;
>> +    uint64_t delta;
>> +
>> +    cpu_exec_time = tcg_cpu_exec_time();
>> +    delta = cpu_exec_time - last_cpu_exec_time;
>> +
>> +    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
>> +                           dev_time, dev_time /
>> (double)NANOSECONDS_PER_SECOND);
>> +    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
>> +                           delta, delta /
>> (double)NANOSECONDS_PER_SECOND);
>> +    last_cpu_exec_time = cpu_exec_time;
>> +}
>> +
>> +/* dump JIT statisticis using TCGProfile and TBStats */
> 
> "statistics"
> 
ok.

>> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
>> index 3973591508..749ad182f2 100644
>> --- a/accel/tcg/tcg-accel-ops.c
>> +++ b/accel/tcg/tcg-accel-ops.c
>> @@ -70,10 +70,17 @@ void tcg_cpus_destroy(CPUState *cpu)
>>   int tcg_cpus_exec(CPUState *cpu)
>>   {
>>       int ret;
>> +    uint64_t ti;
>> +
>>       assert(tcg_enabled());
>> +    ti = profile_getclock();
>> +
>>       cpu_exec_start(cpu);
>>       ret = cpu_exec(cpu);
>>       cpu_exec_end(cpu);
>> +
>> +    qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);
> 
> You can't qatomic_add a 64-bit value on a 32-bit host.

Right, I only changed the counters to size_t, didn't fix time part. Is
it possible to support it with some kind of locks on 32-bit as a generic
fallback solution? After all, 32-bit host seems not popular, it might be
sacrifice performance a little bit.

For me, the times 'dev_time' and 'cpu_exec_time' is only for the
developer to get a sense of how large part of time spent on such as "ir"
and "code". Alex mentioned in another thread that it's arguable if they
are useful. If not, we can remove it.

> Nor is tcg_ctx a good place to put this.
> 
> If you want to accumulate per-cpu data, put it on the cpu where there's
> no chance of racing with anyone.
> 
> Finally, I suspect that this isn't even where you want to accumulate
> time for execution as separate from translation.  You'd to that in
> cpu_exec_enter/cpu_exec_exit.
> 
>> @@ -296,6 +315,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t
>> phys_pc, target_ulong pc,
>>       new_stats->cs_base = cs_base;
>>       new_stats->flags = flags;
>>       new_stats->stats_enabled = get_default_tbstats_flag();
>> +    new_stats->tbs = g_ptr_array_sized_new(4);
> 
> Why default to 4?  Is that just a guess, or are we really recomputing
> tbs that frequently?  Is there a good reason not to use g_ptr_array_new()?
> 
Is this the same question in 2019 :) I cannot find the original link
right now. I will try to find it again later.

>> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
>> index 80ffbfb455..a60a92091b 100644
>> --- a/accel/tcg/translator.c
>> +++ b/accel/tcg/translator.c
>> @@ -19,7 +19,7 @@
>>   #include "exec/plugin-gen.h"
>>   #include "exec/replay-core.h"
>>   -static void gen_tb_exec_count(TranslationBlock *tb)
>> +static inline void gen_tb_exec_count(TranslationBlock *tb)
> 
> Why?
> 
will remove.

>>   {
>>       if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>>           TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
>> @@ -147,6 +147,11 @@ void translator_loop(CPUState *cpu,
>> TranslationBlock *tb, int *max_insns,
>>       tb->size = db->pc_next - db->pc_first;
>>       tb->icount = db->num_insns;
>>   +    /* Save number of guest instructions for TB_JIT_STATS */
>> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
>> +        tb->tb_stats->code.num_guest_inst += db->num_insns;
>> +    }
> 
> Why do this here, as opposed to the block in tb_gen_code?
> 
close to scene of action?

>> +#define stat_per_translation(stat, name) \
>> +    (stat->translations.total ? stat->name / stat->translations.total
>> : 0)
> 
> Not a fan of this macro, and as per above, the reason for doing the
> division here is questionable.
> 
>> diff --git a/include/qemu/timer.h b/include/qemu/timer.h
>> index 9a91cb1248..ad0da18a5f 100644
>> --- a/include/qemu/timer.h
>> +++ b/include/qemu/timer.h
>> @@ -989,4 +989,10 @@ static inline int64_t cpu_get_host_ticks(void)
>>   }
>>   #endif
>>   +static inline int64_t profile_getclock(void)
>> +{
>> +    return get_clock();
>> +}
> 
> Why?  You use get_clock directly most places.
> 
for the ones still in TCGProfile, but I think it's okay to remove.

>> +/* Timestamps during translation  */
>> +typedef struct TCGTime {
>> +    uint64_t start;
>> +    uint64_t ir_done;
>> +    uint64_t opt_done;
>> +    uint64_t la_done;
>> +    uint64_t code_done;
>> +} TCGTime;
> 
> int64_t would match the result of get_clock().
> 
OK. btw, there are still several occurrences of u64 for time in the
existing code.

>> +
>> +/*
>> + * The TCGProfile structure holds data for the lifetime of the
>> translator.
>> + */
>> +typedef struct TCGProfile {
>> +    /* exec time of this vcpu */
>> +    int64_t cpu_exec_time;
> 
> TCGContext is not per-cpu, and therefore TCGProfile does not correspond
> either.
> 
>> @@ -608,6 +630,7 @@ struct TCGContext {
>>         /* Exit to translator on overflow. */
>>       sigjmp_buf jmp_trans;
>> +    TranslationBlock *current_tb;
> 
> TCGContext.gen_tb already exists.
> 
no user actually, will remove it.

>> -int64_t tcg_cpu_exec_time(void);
>> +uint64_t tcg_cpu_exec_time(void);
> 
> Why?  (Also, probably wants removing, per above.)
> 
The original patch mentioned 'make it an uint64_t as we won't be dealing
in negative numbers.'

>> --- a/softmmu/runstate.c
>> +++ b/softmmu/runstate.c
>> @@ -728,9 +728,18 @@ static bool main_loop_should_exit(int *status)
>>   int qemu_main_loop(void)
>>   {
>>       int status = EXIT_SUCCESS;
>> +#ifdef CONFIG_TCG
>> +    uint64_t ti;
>> +#endif
>>         while (!main_loop_should_exit(&status)) {
>> +#ifdef CONFIG_TCG
>> +        ti = profile_getclock();
>> +#endif
>>           main_loop_wait(false);
>> +#ifdef CONFIG_TCG
>> +        dev_time += profile_getclock() - ti;
>> +#endif
>>       }
> 
> What is this intending to collect?  Because I don't think it measures
> anything.  Certainly nothing related to TCG, CPUs or even devices.
> 
It's exported to hmp cmd in dump_jit_exec_time_info() together with
cpu_exec_time.

>> +    /* ? won't this end up op_opt - op = del_op_count ? */
>> +    if (tb_stats_enabled(s->gen_tb, TB_JIT_STATS)) {
>> +        s->gen_tb->tb_stats->code.deleted_ops++;
>> +    }
> 
> Not quite.  We can emit new ops as well.  But how useful this is on its
> own is debatable.
> 
okay, so I will remove the comment.

>> +/* avoid copy/paste errors */
>> +#define PROF_ADD(to, from, field)                       \
>> +    do {                                                \
>> +        (to)->field += qatomic_read(&((from)->field));  \
>> +    } while (0)
> 
> It is only used twice.
> In addition, you can't use qatomic_read of a 64-bit variable on a 32-bit
> host.
> You really really need to build e.g. i386.
> 
>> @@ -5879,6 +5923,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock
>> *tb, uint64_t pc_start)
>>           }
>>       }
>>   +
>>   #ifdef CONFIG_DEBUG_TCG
> 
> Stray.
> 
will fix.

Thanks,
Fei.

> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection:
  2023-06-01  1:18   ` Richard Henderson
@ 2023-06-01  6:59     ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  6:59 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 9:18 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>
>>   -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=<number>]]
>>
>> "dump_limit" is used to limit the number of dumped TBStats in
>> linux-user mode.
> 
> Why is user-mode special?
> 
Should be an issue in comment, not in code.

>>
>> [all+jit+exec+time] control the profilling level used
>> by the TBStats. Can be used as follow:
>>
>> -d tb_stats
>> -d tb_stats,level=jit+time
> 
> Comma is already used to separate different -d options.
> You should not overload that.
> 
> "level" doesn't make sense for things that aren't hierarchical.
> 
> What's wrong with tb-stats-{all,jit,time,exec}?
> 
Let me try.

Thanks,
Fei.
> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 06/10] monitor: adding tb_stats hmp command
  2023-06-01  1:23   ` Richard Henderson
@ 2023-06-01  7:20     ` Wu, Fei
  2023-06-01 14:25       ` Richard Henderson
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  7:20 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/1/2023 9:23 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>
>> Adding tb_stats [start|pause|stop|filter] command to hmp.
>> This allows controlling the collection of statistics.
>> It is also possible to set the level of collection:
>> all, jit, or exec.
>>
>> tb_stats filter allow to only collect statistics for the TB
>> in the last_search list.
>>
>> The goal of this command is to allow the dynamic exploration
>> of the TCG behavior and quality. Therefore, for now, a
>> corresponding QMP command is not worthwhile.
>>
>> Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> Message-Id: <20190829173437.5926-8-vandersonmr2@gmail.com>
>> Message-Id: <20190829173437.5926-9-vandersonmr2@gmail.com>
>> [AJB: fix authorship]
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
> 
> 
> I still don't see what pause does.
> 
maybe it's necessary to discuss the user scenario, I suppose the
original design is for this case:
* start
* pause - there are some interesting stats we want to keep
* not interested timeline
* start again - continue to investigate the interesting ones

>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>> index 68ac7d3f73..805e1fc74d 100644
>> --- a/accel/tcg/tb-stats.c
>> +++ b/accel/tcg/tb-stats.c
>> @@ -16,18 +16,20 @@
>>   #include "qemu/timer.h"
>>     #include "exec/tb-stats.h"
>> +#include "exec/tb-flush.h"
>>   #include "tb-context.h"
>>     /* TBStatistic collection controls */
>>   enum TBStatsStatus {
>>       TB_STATS_DISABLED = 0,
>>       TB_STATS_RUNNING,
>> -    TB_STATS_PAUSED,
>> -    TB_STATS_STOPPED
>> +    TB_STATS_PAUSED
>>   };
> 
> Why?
> 
STOPPED is the same as DISABLED.

>>     static enum TBStatsStatus tcg_collect_tb_stats;
>>   static uint32_t default_tbstats_flag;
>> +/* only accessed in safe work */
>> +static GList *last_search;
>>     uint64_t dev_time;
>>   @@ -170,6 +172,101 @@ void dump_jit_profile_info(TCGProfile *s,
>> GString *buf)
>>       g_free(jpi);
>>   }
>>   +static void free_tbstats(void *p, uint32_t hash, void *userp)
>> +{
>> +    g_free(p);
>> +}
>> +
>> +static void clean_tbstats(void)
>> +{
>> +    /* remove all tb_stats */
>> +    qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
>> +    qht_destroy(&tb_ctx.tb_stats);
>> +}
>> +
>> +void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
>> +{
>> +    struct TbstatsCommand *cmdinfo = icmd.host_ptr;
>> +    int cmd = cmdinfo->cmd;
>> +    uint32_t level = cmdinfo->level;
>> +
>> +    switch (cmd) {
>> +    case START:
>> +        if (tb_stats_collection_enabled()) {
>> +            qemu_printf("TB information already being recorded");
>> +            return;
>> +        } else if (tb_stats_collection_paused()) {
>> +            set_tbstats_flags(level);
>> +        } else {
>> +            qht_init(&tb_ctx.tb_stats, tb_stats_cmp,
>> CODE_GEN_HTABLE_SIZE,
>> +                     QHT_MODE_AUTO_RESIZE);
>> +        }
>> +
>> +        set_default_tbstats_flag(level);
>> +        enable_collect_tb_stats();
>> +        tb_flush(cpu);
>> +        break;
>> +    case PAUSE:
>> +        if (!tb_stats_collection_enabled()) {
>> +            qemu_printf("TB information not being recorded");
>> +            return;
>> +        }
>> +
>> +        /*
>> +         * Continue to create TBStatistic structures but stop collecting
>> +         * statistics
>> +         */
>> +        pause_collect_tb_stats();
>> +        set_default_tbstats_flag(TB_NOTHING);
>> +        set_tbstats_flags(TB_PAUSED);
>> +        tb_flush(cpu);
>> +        break;
>> +    case STOP:
>> +        if (tb_stats_collection_disabled()) {
>> +            qemu_printf("TB information not being recorded");
>> +            return;
>> +        }
>> +
>> +        /* Dissalloc all TBStatistics structures and stop creating
>> new ones */
>> +        disable_collect_tb_stats();
>> +        clean_tbstats();
>> +        tb_flush(cpu);
>> +        break;
>> +    case FILTER:
>> +        if (!tb_stats_collection_enabled()) {
>> +            qemu_printf("TB information not being recorded");
>> +            return;
>> +        }
>> +        if (!last_search) {
>> +            qemu_printf(
>> +                    "no search on record! execute info tbs before
>> filtering!");
>> +            return;
>> +        }
>> +
>> +        set_default_tbstats_flag(TB_NOTHING);
>> +
>> +        /*
>> +         * Set all tbstats as paused, then return only the ones from
>> last_search
>> +         */
>> +        pause_collect_tb_stats();
>> +        set_tbstats_flags(TB_PAUSED);
>> +
>> +        for (GList *iter = last_search; iter; iter =
>> g_list_next(iter)) {
>> +            TBStatistics *tbs = iter->data;
>> +            tbs->stats_enabled = level;
>> +        }
>> +
>> +        tb_flush(cpu);
>> +
>> +        break;
>> +    default: /* INVALID */
>> +        g_assert_not_reached();
>> +        break;
>> +    }
>> +
>> +    g_free(cmdinfo);
>> +}
> 
> Why isn't all of this in monitor.c?
> It's not used or usable from user-only mode.
> 
closer to tb-stats, or closer to monitor? It seems to me monitor.c only
contains the shim layer of qmp/hmp, the real stuffs which are large
enough are put together with their logic part, e.g. dump_exec_info().

Thanks,
Fei.

>> +void set_tbstats_flags(uint32_t flag)
>> +{
>> +    /* iterate over tbstats setting their flag as TB_NOTHING */
>> +    qht_iter(&tb_ctx.tb_stats, reset_tbstats_flag, &flag);
>> +}
> 
> Again, I question why a global variable is not more appropriate.
> 
> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush
  2023-06-01  1:30   ` Richard Henderson
@ 2023-06-01  7:22     ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-01  7:22 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel; +Cc: Paolo Bonzini

On 6/1/2023 9:30 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>> index 805e1fc74d..139f049ffc 100644
>> --- a/accel/tcg/tb-stats.c
>> +++ b/accel/tcg/tb-stats.c
>> @@ -267,6 +267,25 @@ void do_hmp_tbstats_safe(CPUState *cpu,
>> run_on_cpu_data icmd)
>>       g_free(cmdinfo);
>>   }
>>   +/*
>> + * We have to reset the tbs array on a tb_flush as those
>> + * TranslationBlocks no longer exist and we no loner know if the
>> + * current mapping is still valid.
>> + */
> 
> The "and we no longer..." part is irrelevant: that's what phys_pc checks.
> But the TranslationBlocks no longer exist, so that is sufficient.
> 
Will remove this part from comment.

Thanks,
Fei.

> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-01  2:40   ` Richard Henderson
@ 2023-06-01 12:12     ` Wu, Fei
  2023-06-06  7:30       ` Wu, Fei
  2023-06-07 12:49     ` Wu, Fei
  1 sibling, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-01 12:12 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/1/2023 10:40 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> +static void do_dump_tbs_info(int total, int sort_by)
>> +{
>> +    id = 1;
>> +    GList *i;
>> +    int count = total;
>> +
>> +    g_list_free(last_search);
>> +    last_search = NULL;
>> +
>> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
>> +
>> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
>> +                                        &sort_by);
>> +
> 
> Why are you sorting on a list and not an array?
> Intuitively, sorting a list of 1 million elements seems like the wrong
> choice.
> 
> Why did you put all the comparisons in one inverse_sort_tbs function,
> and require examining sort_by?  Better would be N sorting functions
> where sort_by is evaluated once before starting the sort.
> 
> 
>> +++ b/disas/disas.c
>> @@ -8,6 +8,8 @@
>>  #include "hw/core/cpu.h"
>>  #include "exec/memory.h"
>>  
>> +#include "qemu/log-for-trace.h"
>> +
>>  /* Filled in by elfload.c.  Simplistic, but will do for now. */
>>  struct syminfo *syminfos = NULL;
>>  
>> @@ -199,6 +201,24 @@ static void initialize_debug_host(CPUDebug *s)
>>  #endif
>>  }
>>  
>> +static int
>> +__attribute__((format(printf, 2, 3)))
>> +fprintf_log(FILE *a, const char *b, ...)
>> +{
>> +    va_list ap;
>> +    va_start(ap, b);
>> +
>> +    if (!to_string) {
>> +        vfprintf(a, b, ap);
>> +    } else {
>> +        qemu_vlog(b, ap);
>> +    }
>> +
>> +    va_end(ap);
>> +
>> +    return 1;
>> +}
>> +
> 
> Not need on this either.  Global variable being checked on each
> callback, instead of selecting the proper callback earlier -- preferably
> without the global variable.
> 
> Did you really need something different than monitor_disas?  You almost
> certainly want to read physical memory and not virtual anyway.
> 
This makes me think the necessity of 'info tb', the primary extra info
it adds is guest instruction, which can be gotten from 'x/i' w/o calling
tb_gen_code.

(qemu) info tb 1
------------------------------

TB id:1 | phys:0x79bc86 virt:0xffffffff8059bc86 flags:0x01024001 0 inv/1
        | exec:56962444/0 guest inst cov:0.61%
        | trans:1 ints: g:8 op:27 op_opt:22 spills:0
        | h/g (host bytes / guest insts): 26.000000
        | time to gen at 2.4GHz => code:747.08(ns) IR:477.92(ns)

----------------
IN:
Priv: 1; Virt: 0

0xffffffff8059bc86:  00000013          nop
0xffffffff8059bc8a:  00000013          nop
0xffffffff8059bc8e:  00000013          nop
0xffffffff8059bc92:  00000013          nop
0xffffffff8059bc96:  1141              addi                    sp,sp,-16
0xffffffff8059bc98:  e422              sd                      s0,8(sp)
0xffffffff8059bc9a:  0800              addi                    s0,sp,16
0xffffffff8059bc9c:  c0102573          rdtime                  a0
------------------------------

(qemu) x/8i 0xffffffff8059bc86
x/8i 0xffffffff8059bc86
0xffffffff8059bc86:  00000013          nop
0xffffffff8059bc8a:  00000013          nop
0xffffffff8059bc8e:  00000013          nop
0xffffffff8059bc92:  00000013          nop
0xffffffff8059bc96:  1141              addi                    sp,sp,-16
0xffffffff8059bc98:  e422              sd                      s0,8(sp)
0xffffffff8059bc9a:  0800              addi                    s0,sp,16
0xffffffff8059bc9c:  c0102573          rdtime                  a0


Thanks,
Fei.

>> +void qemu_log_to_monitor(bool enable)
>> +{
>> +    to_monitor = enable;
>> +}
>> +
>> +void qemu_log_to_string(bool enable, GString *s)
>> +{
>> +    to_string = enable;
>> +    string = s;
>> +}
> 
> What are these for, and why do you need them?
> Why would to_string ever be anything other than (string != NULL)?
> Why are you using such very generic names for global variables?
> 
> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 03/10] accel: collecting TB execution count
  2023-06-01  5:44     ` Wu, Fei
@ 2023-06-01 14:03       ` Richard Henderson
  2023-06-02  1:54         ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Richard Henderson @ 2023-06-01 14:03 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 5/31/23 22:44, Wu, Fei wrote:
> On 6/1/2023 8:05 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>>
>>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>>> enabled, then we instrument the start code of this TB
>>> to atomically count the number of times it is executed.
>>> We count both the number of "normal" executions and atomic
>>> executions of a TB.
>>>
>>> The execution count of the TB is stored in its respective
>>> TBS.
>>>
>>> All TBStatistics are created by default with the flags from
>>> default_tbstats_flag.
>>>
>>> [Richard Henderson created the inline gen_tb_exec_count]
>>>
>>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>>> [AJB: Fix author]
>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> ---
>>>    accel/tcg/cpu-exec.c          |  6 ++++++
>>>    accel/tcg/tb-stats.c          |  6 ++++++
>>>    accel/tcg/tcg-runtime.c       |  1 +
>>>    accel/tcg/translate-all.c     |  7 +++++--
>>>    accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>>    include/exec/gen-icount.h     |  1 +
>>>    include/exec/tb-stats-flags.h |  5 +++++
>>>    include/exec/tb-stats.h       | 13 +++++++++++++
>>>    8 files changed, 62 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>>> index 0e741960da..c0d8f26237 100644
>>> --- a/accel/tcg/cpu-exec.c
>>> +++ b/accel/tcg/cpu-exec.c
>>> @@ -25,6 +25,7 @@
>>>    #include "trace.h"
>>>    #include "disas/disas.h"
>>>    #include "exec/exec-all.h"
>>> +#include "exec/tb-stats.h"
>>>    #include "tcg/tcg.h"
>>>    #include "qemu/atomic.h"
>>>    #include "qemu/rcu.h"
>>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>>                mmap_unlock();
>>>            }
>>>    +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>>> +            tb->tb_stats->executions.atomic++;
>>> +        }
>>> +
>>>            cpu_exec_enter(cpu);
>>> +
>>>            /* execute the generated code */
>>>            trace_exec_tb(tb, pc);
>>>            cpu_tb_exec(cpu, tb, &tb_exit);
>>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>>> index f988bd8a31..143a52ef5c 100644
>>> --- a/accel/tcg/tb-stats.c
>>> +++ b/accel/tcg/tb-stats.c
>>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>>    };
>>>      static enum TBStatsStatus tcg_collect_tb_stats;
>>> +static uint32_t default_tbstats_flag;
>>>      void init_tb_stats_htable(void)
>>>    {
>>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>>    {
>>>        return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>>    }
>>> +
>>> +uint32_t get_default_tbstats_flag(void)
>>> +{
>>> +    return default_tbstats_flag;
>>> +}
>>
>> What is the purpose of this function, instead of a global variable?
>> What is the meaning of 'default' in its name?
>>
> tbs have their specific settings, e.g. after 'filter' cmd:
> * the last_search tbs has their stats_enabled kept
> * tbs not in the list sets their flag to TB_PAUSED

How does this affect anything at all?

We are not *checking* the tb->tb_stats->stats_enabled bit except at code generation time, 
not code execution time.  Therefore nothing ever reads the TB_PAUSED bit (or, 
correspondingly, the clearing of the other bits).  The setting of the bit is permanent.

> yes, it might looks better. But there is no correctness issue either as
> it checks if the specific bit is enabled during collecting stats.

No, it does not.  See above.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-06-01  6:48     ` Wu, Fei
@ 2023-06-01 14:10       ` Richard Henderson
  2023-06-01 15:10       ` Richard Henderson
  1 sibling, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-01 14:10 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel
  Cc: Vanderson M . do Rosario, Paolo Bonzini, Markus Armbruster,
	Thomas Huth, Laurent Vivier

On 5/31/23 23:48, Wu, Fei wrote:
>>> -int64_t tcg_cpu_exec_time(void);
>>> +uint64_t tcg_cpu_exec_time(void);
>>
>> Why?  (Also, probably wants removing, per above.)
>>
> The original patch mentioned 'make it an uint64_t as we won't be dealing
> in negative numbers.'

The signed vs unsigned thing is something that should be handled throughout everything 
that handles times, not adjusted here and there by only profiling.

>>> --- a/softmmu/runstate.c
>>> +++ b/softmmu/runstate.c
>>> @@ -728,9 +728,18 @@ static bool main_loop_should_exit(int *status)
>>>    int qemu_main_loop(void)
>>>    {
>>>        int status = EXIT_SUCCESS;
>>> +#ifdef CONFIG_TCG
>>> +    uint64_t ti;
>>> +#endif
>>>          while (!main_loop_should_exit(&status)) {
>>> +#ifdef CONFIG_TCG
>>> +        ti = profile_getclock();
>>> +#endif
>>>            main_loop_wait(false);
>>> +#ifdef CONFIG_TCG
>>> +        dev_time += profile_getclock() - ti;
>>> +#endif
>>>        }
>>
>> What is this intending to collect?  Because I don't think it measures
>> anything.  Certainly nothing related to TCG, CPUs or even devices.
>>
> It's exported to hmp cmd in dump_jit_exec_time_info() together with
> cpu_exec_time.

That doesn't answer my question: What do you think it measures?

r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 06/10] monitor: adding tb_stats hmp command
  2023-06-01  7:20     ` Wu, Fei
@ 2023-06-01 14:25       ` Richard Henderson
  0 siblings, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-01 14:25 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/1/23 00:20, Wu, Fei wrote:
> On 6/1/2023 9:23 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>>
>>> Adding tb_stats [start|pause|stop|filter] command to hmp.
>>> This allows controlling the collection of statistics.
>>> It is also possible to set the level of collection:
>>> all, jit, or exec.
>>>
>>> tb_stats filter allow to only collect statistics for the TB
>>> in the last_search list.
>>>
>>> The goal of this command is to allow the dynamic exploration
>>> of the TCG behavior and quality. Therefore, for now, a
>>> corresponding QMP command is not worthwhile.
>>>
>>> Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>> Message-Id: <20190829173437.5926-8-vandersonmr2@gmail.com>
>>> Message-Id: <20190829173437.5926-9-vandersonmr2@gmail.com>
>>> [AJB: fix authorship]
>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> ---
>>
>>
>> I still don't see what pause does.
>>
> maybe it's necessary to discuss the user scenario, I suppose the
> original design is for this case:
> * start
> * pause - there are some interesting stats we want to keep
> * not interested timeline
> * start again - continue to investigate the interesting ones

This use case seems very complicated.  Perhaps start with something simpler.

>>>    enum TBStatsStatus {
>>>        TB_STATS_DISABLED = 0,
>>>        TB_STATS_RUNNING,
>>> -    TB_STATS_PAUSED,
>>> -    TB_STATS_STOPPED
>>> +    TB_STATS_PAUSED
>>>    };
>>
>> Why?
>>
> STOPPED is the same as DISABLED.

Then this should be squashed back to patch 2, so that it is never added.

>> Why isn't all of this in monitor.c?
>> It's not used or usable from user-only mode.
>>
> closer to tb-stats, or closer to monitor? It seems to me monitor.c only
> contains the shim layer of qmp/hmp, the real stuffs which are large
> enough are put together with their logic part, e.g. dump_exec_info().

I mean accel/tcg/monitor.c, which is built only when there exists a monitor.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics
  2023-06-01  6:48     ` Wu, Fei
  2023-06-01 14:10       ` Richard Henderson
@ 2023-06-01 15:10       ` Richard Henderson
  1 sibling, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-01 15:10 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel
  Cc: Vanderson M . do Rosario, Paolo Bonzini, Markus Armbruster,
	Thomas Huth, Laurent Vivier

On 5/31/23 23:48, Wu, Fei wrote:
> On 6/1/2023 9:08 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> +    qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);
>>
>> You can't qatomic_add a 64-bit value on a 32-bit host.
> 
> Right, I only changed the counters to size_t, didn't fix time part. Is
> it possible to support it with some kind of locks on 32-bit as a generic
> fallback solution? After all, 32-bit host seems not popular, it might be
> sacrifice performance a little bit.

We have

int64_t  qatomic_read_i64(const int64_t *ptr);
uint64_t qatomic_read_u64(const uint64_t *ptr);
void qatomic_set_i64(int64_t *ptr, int64_t val);
void qatomic_set_u64(uint64_t *ptr, uint64_t val);

to support 32-bit hosts. You must also use

typedef int64_t aligned_int64_t __attribute__((aligned(8)));
typedef uint64_t aligned_uint64_t __attribute__((aligned(8)));


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 03/10] accel: collecting TB execution count
  2023-06-01 14:03       ` Richard Henderson
@ 2023-06-02  1:54         ` Wu, Fei
  2023-06-02  4:02           ` Richard Henderson
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-02  1:54 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/2023 10:03 PM, Richard Henderson wrote:
> On 5/31/23 22:44, Wu, Fei wrote:
>> On 6/1/2023 8:05 AM, Richard Henderson wrote:
>>> On 5/30/23 01:35, Fei Wu wrote:
>>>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>>>
>>>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>>>> enabled, then we instrument the start code of this TB
>>>> to atomically count the number of times it is executed.
>>>> We count both the number of "normal" executions and atomic
>>>> executions of a TB.
>>>>
>>>> The execution count of the TB is stored in its respective
>>>> TBS.
>>>>
>>>> All TBStatistics are created by default with the flags from
>>>> default_tbstats_flag.
>>>>
>>>> [Richard Henderson created the inline gen_tb_exec_count]
>>>>
>>>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>>>> [AJB: Fix author]
>>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>> ---
>>>>    accel/tcg/cpu-exec.c          |  6 ++++++
>>>>    accel/tcg/tb-stats.c          |  6 ++++++
>>>>    accel/tcg/tcg-runtime.c       |  1 +
>>>>    accel/tcg/translate-all.c     |  7 +++++--
>>>>    accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>>>    include/exec/gen-icount.h     |  1 +
>>>>    include/exec/tb-stats-flags.h |  5 +++++
>>>>    include/exec/tb-stats.h       | 13 +++++++++++++
>>>>    8 files changed, 62 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>>>> index 0e741960da..c0d8f26237 100644
>>>> --- a/accel/tcg/cpu-exec.c
>>>> +++ b/accel/tcg/cpu-exec.c
>>>> @@ -25,6 +25,7 @@
>>>>    #include "trace.h"
>>>>    #include "disas/disas.h"
>>>>    #include "exec/exec-all.h"
>>>> +#include "exec/tb-stats.h"
>>>>    #include "tcg/tcg.h"
>>>>    #include "qemu/atomic.h"
>>>>    #include "qemu/rcu.h"
>>>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>>>                mmap_unlock();
>>>>            }
>>>>    +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>>>> +            tb->tb_stats->executions.atomic++;
>>>> +        }
>>>> +
>>>>            cpu_exec_enter(cpu);
>>>> +
>>>>            /* execute the generated code */
>>>>            trace_exec_tb(tb, pc);
>>>>            cpu_tb_exec(cpu, tb, &tb_exit);
>>>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>>>> index f988bd8a31..143a52ef5c 100644
>>>> --- a/accel/tcg/tb-stats.c
>>>> +++ b/accel/tcg/tb-stats.c
>>>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>>>    };
>>>>      static enum TBStatsStatus tcg_collect_tb_stats;
>>>> +static uint32_t default_tbstats_flag;
>>>>      void init_tb_stats_htable(void)
>>>>    {
>>>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>>>    {
>>>>        return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>>>    }
>>>> +
>>>> +uint32_t get_default_tbstats_flag(void)
>>>> +{
>>>> +    return default_tbstats_flag;
>>>> +}
>>>
>>> What is the purpose of this function, instead of a global variable?
>>> What is the meaning of 'default' in its name?
>>>
>> tbs have their specific settings, e.g. after 'filter' cmd:
>> * the last_search tbs has their stats_enabled kept
>> * tbs not in the list sets their flag to TB_PAUSED
> 
> How does this affect anything at all?
> 
> We are not *checking* the tb->tb_stats->stats_enabled bit except at code
> generation time, not code execution time.  Therefore nothing ever reads
> the TB_PAUSED bit (or, correspondingly, the clearing of the other
> bits).  The setting of the bit is permanent.
> 
At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
the question is whether FILTER is necessary at all? If not, we can
remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.

Thanks,
Fei.

>> yes, it might looks better. But there is no correctness issue either as
>> it checks if the specific bit is enabled during collecting stats.
> 
> No, it does not.  See above.
> 
> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 03/10] accel: collecting TB execution count
  2023-06-02  1:54         ` Wu, Fei
@ 2023-06-02  4:02           ` Richard Henderson
  0 siblings, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2023-06-02  4:02 UTC (permalink / raw)
  To: Wu, Fei, alex.bennee, qemu-devel; +Cc: Vanderson M. do Rosario, Paolo Bonzini

On 6/1/23 18:54, Wu, Fei wrote:
>> We are not *checking* the tb->tb_stats->stats_enabled bit except at code
>> generation time, not code execution time.  Therefore nothing ever reads
>> the TB_PAUSED bit (or, correspondingly, the clearing of the other
>> bits).  The setting of the bit is permanent.
>>
> At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
> the question is whether FILTER is necessary at all? If not, we can
> remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.

Let's start simpler and remove FILTER and PAUSE.


r~


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-01 12:12     ` Wu, Fei
@ 2023-06-06  7:30       ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-06  7:30 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/1/2023 8:12 PM, Wu, Fei wrote:
> On 6/1/2023 10:40 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> +static void do_dump_tbs_info(int total, int sort_by)
>>> +{
>>> +    id = 1;
>>> +    GList *i;
>>> +    int count = total;
>>> +
>>> +    g_list_free(last_search);
>>> +    last_search = NULL;
>>> +
>>> +    qht_iter(&tb_ctx.tb_stats, collect_tb_stats, NULL);
>>> +
>>> +    last_search = g_list_sort_with_data(last_search, inverse_sort_tbs,
>>> +                                        &sort_by);
>>> +
>>
>> Why are you sorting on a list and not an array?
>> Intuitively, sorting a list of 1 million elements seems like the wrong
>> choice.
>>
>> Why did you put all the comparisons in one inverse_sort_tbs function,
>> and require examining sort_by?  Better would be N sorting functions
>> where sort_by is evaluated once before starting the sort.
>>
>>
>>> +++ b/disas/disas.c
>>> @@ -8,6 +8,8 @@
>>>  #include "hw/core/cpu.h"
>>>  #include "exec/memory.h"
>>>  
>>> +#include "qemu/log-for-trace.h"
>>> +
>>>  /* Filled in by elfload.c.  Simplistic, but will do for now. */
>>>  struct syminfo *syminfos = NULL;
>>>  
>>> @@ -199,6 +201,24 @@ static void initialize_debug_host(CPUDebug *s)
>>>  #endif
>>>  }
>>>  
>>> +static int
>>> +__attribute__((format(printf, 2, 3)))
>>> +fprintf_log(FILE *a, const char *b, ...)
>>> +{
>>> +    va_list ap;
>>> +    va_start(ap, b);
>>> +
>>> +    if (!to_string) {
>>> +        vfprintf(a, b, ap);
>>> +    } else {
>>> +        qemu_vlog(b, ap);
>>> +    }
>>> +
>>> +    va_end(ap);
>>> +
>>> +    return 1;
>>> +}
>>> +
>>
>> Not need on this either.  Global variable being checked on each
>> callback, instead of selecting the proper callback earlier -- preferably
>> without the global variable.
>>
>> Did you really need something different than monitor_disas?  You almost
>> certainly want to read physical memory and not virtual anyway.
>>
'info tb' has extra flag to specify dump out_asm, in_asm & op, so there
looks more user cases than monitor_disas, I think it's nice to have, but
not strongly required, usually we have a general idea if we get the IN
ops, plus the other counters such as g/op/... in the output.

Right now I am inclined to switch to monitor_disas, I found it causes
guest kernel panic with the selective virt addr, it happens to the
original v10 too.

------------------------------

TB id:1998 | phys:0x9c5f5ce2 virt:0x00ffffff8ff8ece2 flags:0x00024018 0
inv/1
        | exec:68/0 guest inst cov:0.00%
        | trans:1 ints: g:1 op:17 op_opt:16 spills:0
        | h/g (host bytes / guest insts): 129.000000
        | time to gen at 2.4GHz => code:1960.42(ns) IR:915.83(ns)


could not gen code for this TB (no longer mapped?)
------------------------------
[   98.213363] Unable to handle kernel paging request at virtual address
00ffffff8ff8ece2
[   98.219153] Oops [#1]
[   98.219299] Modules linked in: binfmt_misc nls_ascii nls_cp437 vfat
fat cfg80211 rfkill drm fuse drm_panel_orientation_quirks configfs
i2c_core ip_tables x_tables autofs4 ext4 crc32c_generic crc16 mbcache
jbd2 virtio_net net_failover virtio_blk failover virtio_mmio virtio
virtio_ring
[   98.220832] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.2.0+ #1
[   98.221122] Hardware name: riscv-virtio,qemu (DT)
[   98.221330] epc : arch_cpu_idle+0x1e/0x28
[   98.221985]  ra : default_idle_call+0x3c/0xb8
[   98.222150] epc : ffffffff80004398 ra : ffffffff808a0bc2 sp :
ff2000000068bf40
[   98.222339]  gp : ffffffff815688e8 tp : ff6000007ffcc240 t0 :
ff2000000064bad8
[   98.222562]  t1 : 0000000000000001 t2 : ffffffff80e02ed0 s0 :
ff2000000068bf50
[   98.222780]  s1 : 0000000000000001 a0 : 00000000000154ac a1 :
ff6000027d0f3000
[   98.223001]  a2 : ff6000007ffcc240 a3 : 0000000000000001 a4 :
ff6000027d0f3000
[   98.223217]  a5 : ff600001fdd5d500 a6 : 4000000000000000 a7 :
0000000000000000
[   98.223427]  s2 : ffffffff8156a220 s3 : ffffffff8156a418 s4 :
ffffffff80c6ba18
[   98.223636]  s5 : ffffffff815a9588 s6 : 0000000000000000 s7 :
0000000000000000
[   98.223853]  s8 : fffffffffffffffe s9 : 000000008003d6a8 s10:
0000000000000000
[   98.224065]  s11: 0000000000000000 t3 : ffffffff8156a438 t4 :
0000000000000000
[   98.224272]  t5 : 0000000000000000 t6 : 0000000000000000
[   98.224436] status: 0000000200000100 badaddr: 00ffffff8ff8ece2 cause:
000000000000000c
[   98.224778] [<ffffffff80004398>] arch_cpu_idle+0x1e/0x28
[   98.226393] ---[ end trace 0000000000000000 ]---
[   98.226770] Kernel panic - not syncing: Attempted to kill the idle task!
[   98.227188] SMP: stopping secondary CPUs
[   98.227980] ---[ end Kernel panic - not syncing: Attempted to kill
the idle task! ]---

Thanks,
Fei.

> This makes me think the necessity of 'info tb', the primary extra info
> it adds is guest instruction, which can be gotten from 'x/i' w/o calling
> tb_gen_code.
> 
> (qemu) info tb 1
> ------------------------------
> 
> TB id:1 | phys:0x79bc86 virt:0xffffffff8059bc86 flags:0x01024001 0 inv/1
>         | exec:56962444/0 guest inst cov:0.61%
>         | trans:1 ints: g:8 op:27 op_opt:22 spills:0
>         | h/g (host bytes / guest insts): 26.000000
>         | time to gen at 2.4GHz => code:747.08(ns) IR:477.92(ns)
> 
> ----------------
> IN:
> Priv: 1; Virt: 0
> 
> 0xffffffff8059bc86:  00000013          nop
> 0xffffffff8059bc8a:  00000013          nop
> 0xffffffff8059bc8e:  00000013          nop
> 0xffffffff8059bc92:  00000013          nop
> 0xffffffff8059bc96:  1141              addi                    sp,sp,-16
> 0xffffffff8059bc98:  e422              sd                      s0,8(sp)
> 0xffffffff8059bc9a:  0800              addi                    s0,sp,16
> 0xffffffff8059bc9c:  c0102573          rdtime                  a0
> ------------------------------
> 
> (qemu) x/8i 0xffffffff8059bc86
> x/8i 0xffffffff8059bc86
> 0xffffffff8059bc86:  00000013          nop
> 0xffffffff8059bc8a:  00000013          nop
> 0xffffffff8059bc8e:  00000013          nop
> 0xffffffff8059bc92:  00000013          nop
> 0xffffffff8059bc96:  1141              addi                    sp,sp,-16
> 0xffffffff8059bc98:  e422              sd                      s0,8(sp)
> 0xffffffff8059bc9a:  0800              addi                    s0,sp,16
> 0xffffffff8059bc9c:  c0102573          rdtime                  a0
> 
> 
> Thanks,
> Fei.
> 
>>> +void qemu_log_to_monitor(bool enable)
>>> +{
>>> +    to_monitor = enable;
>>> +}
>>> +
>>> +void qemu_log_to_string(bool enable, GString *s)
>>> +{
>>> +    to_string = enable;
>>> +    string = s;
>>> +}
>>
>> What are these for, and why do you need them?
>> Why would to_string ever be anything other than (string != NULL)?
>> Why are you using such very generic names for global variables?
>>
>>
>> r~
> 



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-01  2:40   ` Richard Henderson
  2023-06-01 12:12     ` Wu, Fei
@ 2023-06-07 12:49     ` Wu, Fei
  2023-06-08  7:38       ` Wu, Fei
  1 sibling, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-07 12:49 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/1/2023 10:40 AM, Richard Henderson wrote:
>> +static int
>> +__attribute__((format(printf, 2, 3)))
>> +fprintf_log(FILE *a, const char *b, ...)
>> +{
>> +    va_list ap;
>> +    va_start(ap, b);
>> +
>> +    if (!to_string) {
>> +        vfprintf(a, b, ap);
>> +    } else {
>> +        qemu_vlog(b, ap);
>> +    }
>> +
>> +    va_end(ap);
>> +
>> +    return 1;
>> +}
>> +
> 
> Not need on this either.  Global variable being checked on each
> callback, instead of selecting the proper callback earlier -- preferably
> without the global variable.
> 
> Did you really need something different than monitor_disas?  You almost
> certainly want to read physical memory and not virtual anyway.
> 
Yes, it's usually okay for kernel address, but cannot re-gen the code
for userspace virtual address (guest kernel panic on riscv guest). I
tried monitor_disas() but it failed to disas too:
    monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);

How to use this function correctly?

Thanks,
Fei.

>> +void qemu_log_to_monitor(bool enable)
>> +{
>> +    to_monitor = enable;
>> +}
>> +
>> +void qemu_log_to_string(bool enable, GString *s)
>> +{
>> +    to_string = enable;
>> +    string = s;
>> +}
> 
> What are these for, and why do you need them?
> Why would to_string ever be anything other than (string != NULL)?
> Why are you using such very generic names for global variables?
> 
> 
> r~



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-07 12:49     ` Wu, Fei
@ 2023-06-08  7:38       ` Wu, Fei
  2023-06-08  9:23         ` Peter Maydell
  0 siblings, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-08  7:38 UTC (permalink / raw)
  To: Richard Henderson, alex.bennee, qemu-devel
  Cc: Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/7/2023 8:49 PM, Wu, Fei wrote:
> On 6/1/2023 10:40 AM, Richard Henderson wrote:
>>> +static int
>>> +__attribute__((format(printf, 2, 3)))
>>> +fprintf_log(FILE *a, const char *b, ...)
>>> +{
>>> +    va_list ap;
>>> +    va_start(ap, b);
>>> +
>>> +    if (!to_string) {
>>> +        vfprintf(a, b, ap);
>>> +    } else {
>>> +        qemu_vlog(b, ap);
>>> +    }
>>> +
>>> +    va_end(ap);
>>> +
>>> +    return 1;
>>> +}
>>> +
>>
>> Not need on this either.  Global variable being checked on each
>> callback, instead of selecting the proper callback earlier -- preferably
>> without the global variable.
>>
>> Did you really need something different than monitor_disas?  You almost
>> certainly want to read physical memory and not virtual anyway.
>>
> Yes, it's usually okay for kernel address, but cannot re-gen the code
> for userspace virtual address (guest kernel panic on riscv guest). I
> tried monitor_disas() but it failed to disas too:
>     monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
> 
> How to use this function correctly?
> 
'phys_pc' in tbs is returned by get_page_addr_code_hostp(), which is not
guest phys address actually, but ram_addr_t instead, so it's always
wrong for monitor_disas. After some dirty changes, tbs can record the
guest pa. Now we can disas both kernel and user space code. But still,
no code is regenerated, disas in 'info tb' is just a convenient way to 'xp'.

Is there any existing function to convert ram_addr_t to guest pa?


(qemu) info tb-list 2
TB id:0 | phys:0x11b97762e virt:0x00aaaaaab76c262e flags:0x00024010 0 inv/1
        | exec:4447539979/0 guest inst cov:69.06%
        | trans:1 ints: g:8 op:28 op_opt:23 spills:0
        | h/g (host bytes / guest insts): 37.000000

TB id:1 | phys:0x8063474e virt:0xffffffff8043474e flags:0x01024001 0 inv/1
        | exec:131719290/0 guest inst cov:2.38%
        | trans:1 ints: g:9 op:37 op_opt:35 spills:0
        | h/g (host bytes / guest insts): 51.555557

(qemu) info tb 0
------------------------------

TB id:0 | phys:0x11b97762e virt:0x00aaaaaab76c262e flags:0x00024010 0 inv/1
        | exec:5841751800/0 guest inst cov:69.06%
        | trans:1 ints: g:8 op:28 op_opt:23 spills:0
        | h/g (host bytes / guest insts): 37.000000

0x11b97762e:  00002797          auipc                   a5,8192
       # 0x11b97962e
0x11b977632:  a2278793          addi                    a5,a5,-1502
0x11b977636:  639c              ld                      a5,0(a5)
0x11b977638:  00178713          addi                    a4,a5,1
0x11b97763c:  00002797          auipc                   a5,8192
       # 0x11b97963c
0x11b977640:  a1478793          addi                    a5,a5,-1516
0x11b977644:  e398              sd                      a4,0(a5)
0x11b977646:  b7e5              j                       -24
       # 0x11b97762e

------------------------------

(qemu) xp/8i 0x11b97762e
0x11b97762e:  00002797          auipc                   a5,8192
       # 0x11b97962e
0x11b977632:  a2278793          addi                    a5,a5,-1502
0x11b977636:  639c              ld                      a5,0(a5)
0x11b977638:  00178713          addi                    a4,a5,1
0x11b97763c:  00002797          auipc                   a5,8192
       # 0x11b97963c
0x11b977640:  a1478793          addi                    a5,a5,-1516
0x11b977644:  e398              sd                      a4,0(a5)
0x11b977646:  b7e5              j                       -24
       # 0x11b97762e
(qemu)

(qemu) info tb 1
------------------------------

TB id:1 | phys:0x8063474e virt:0xffffffff8043474e flags:0x01024001 0 inv/1
        | exec:131719290/0 guest inst cov:2.38%
        | trans:1 ints: g:9 op:37 op_opt:35 spills:0
        | h/g (host bytes / guest insts): 51.555557

0x8063474e:  00194a83          lbu                     s5,1(s2)
0x80634752:  00094803          lbu                     a6,0(s2)
0x80634756:  0b09              addi                    s6,s6,2
0x80634758:  008a9a9b          slliw                   s5,s5,8
0x8063475c:  01586833          or                      a6,a6,s5
0x80634760:  ff0b1f23          sh                      a6,-2(s6)
0x80634764:  1c7d              addi                    s8,s8,-1
0x80634766:  0909              addi                    s2,s2,2
0x80634768:  fe0c13e3          bnez                    s8,-26
      # 0x8063474e

------------------------------

(qemu) xp/9i 0x8063474e
0x8063474e:  00194a83          lbu                     s5,1(s2)
0x80634752:  00094803          lbu                     a6,0(s2)
0x80634756:  0b09              addi                    s6,s6,2
0x80634758:  008a9a9b          slliw                   s5,s5,8
0x8063475c:  01586833          or                      a6,a6,s5
0x80634760:  ff0b1f23          sh                      a6,-2(s6)
0x80634764:  1c7d              addi                    s8,s8,-1
0x80634766:  0909              addi                    s2,s2,2
0x80634768:  fe0c13e3          bnez                    s8,-26
      # 0x8063474e

Thanks,
Fei.

> Thanks,
> Fei.
> 
>>> +void qemu_log_to_monitor(bool enable)
>>> +{
>>> +    to_monitor = enable;
>>> +}
>>> +
>>> +void qemu_log_to_string(bool enable, GString *s)
>>> +{
>>> +    to_string = enable;
>>> +    string = s;
>>> +}
>>
>> What are these for, and why do you need them?
>> Why would to_string ever be anything other than (string != NULL)?
>> Why are you using such very generic names for global variables?
>>
>>
>> r~
> 



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-08  7:38       ` Wu, Fei
@ 2023-06-08  9:23         ` Peter Maydell
  2023-06-08 12:06           ` Dr. David Alan Gilbert
  2023-06-09 14:32           ` Wu, Fei
  0 siblings, 2 replies; 47+ messages in thread
From: Peter Maydell @ 2023-06-08  9:23 UTC (permalink / raw)
  To: Wu, Fei
  Cc: Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
>
> On 6/7/2023 8:49 PM, Wu, Fei wrote:
> > On 6/1/2023 10:40 AM, Richard Henderson wrote:
> >> Did you really need something different than monitor_disas?  You almost
> >> certainly want to read physical memory and not virtual anyway.
> >>
> > Yes, it's usually okay for kernel address, but cannot re-gen the code
> > for userspace virtual address (guest kernel panic on riscv guest). I
> > tried monitor_disas() but it failed to disas too:
> >     monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
> >
> > How to use this function correctly?
> >
> 'phys_pc' in tbs is returned by get_page_addr_code_hostp(), which is not
> guest phys address actually, but ram_addr_t instead, so it's always
> wrong for monitor_disas. After some dirty changes, tbs can record the
> guest pa. Now we can disas both kernel and user space code. But still,
> no code is regenerated, disas in 'info tb' is just a convenient way to 'xp'.
>
> Is there any existing function to convert ram_addr_t to guest pa?

Such a function would not be well-defined, because a block of RAM
as specified by a ram_addr_t could be present at (aliased to) multiple
guest physical addresses, or even currently not mapped to any guest
physical address at all. And it could be present at different physical
addresses for different vCPUs.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-08  9:23         ` Peter Maydell
@ 2023-06-08 12:06           ` Dr. David Alan Gilbert
  2023-06-08 12:22             ` Peter Maydell
  2023-06-09 14:32           ` Wu, Fei
  1 sibling, 1 reply; 47+ messages in thread
From: Dr. David Alan Gilbert @ 2023-06-08 12:06 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Wu, Fei, Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini

* Peter Maydell (peter.maydell@linaro.org) wrote:
> On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
> >
> > On 6/7/2023 8:49 PM, Wu, Fei wrote:
> > > On 6/1/2023 10:40 AM, Richard Henderson wrote:
> > >> Did you really need something different than monitor_disas?  You almost
> > >> certainly want to read physical memory and not virtual anyway.
> > >>
> > > Yes, it's usually okay for kernel address, but cannot re-gen the code
> > > for userspace virtual address (guest kernel panic on riscv guest). I
> > > tried monitor_disas() but it failed to disas too:
> > >     monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
> > >
> > > How to use this function correctly?
> > >
> > 'phys_pc' in tbs is returned by get_page_addr_code_hostp(), which is not
> > guest phys address actually, but ram_addr_t instead, so it's always
> > wrong for monitor_disas. After some dirty changes, tbs can record the
> > guest pa. Now we can disas both kernel and user space code. But still,
> > no code is regenerated, disas in 'info tb' is just a convenient way to 'xp'.
> >
> > Is there any existing function to convert ram_addr_t to guest pa?
> 
> Such a function would not be well-defined, because a block of RAM
> as specified by a ram_addr_t could be present at (aliased to) multiple
> guest physical addresses, or even currently not mapped to any guest
> physical address at all. And it could be present at different physical
> addresses for different vCPUs.

True, but isn't there a similar mechanism for when an MCE happens
in the host memory?

Dave

> thanks
> -- PMM
-- 
 -----Open up your eyes, open up your mind, open up your code -------   
/ Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
\        dave @ treblig.org |                               | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-08 12:06           ` Dr. David Alan Gilbert
@ 2023-06-08 12:22             ` Peter Maydell
  0 siblings, 0 replies; 47+ messages in thread
From: Peter Maydell @ 2023-06-08 12:22 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Wu, Fei, Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini

On Thu, 8 Jun 2023 at 13:06, Dr. David Alan Gilbert <dave@treblig.org> wrote:
>
> * Peter Maydell (peter.maydell@linaro.org) wrote:
> > On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
> > >
> > > On 6/7/2023 8:49 PM, Wu, Fei wrote:
> > > > On 6/1/2023 10:40 AM, Richard Henderson wrote:
> > > >> Did you really need something different than monitor_disas?  You almost
> > > >> certainly want to read physical memory and not virtual anyway.
> > > >>
> > > > Yes, it's usually okay for kernel address, but cannot re-gen the code
> > > > for userspace virtual address (guest kernel panic on riscv guest). I
> > > > tried monitor_disas() but it failed to disas too:
> > > >     monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
> > > >
> > > > How to use this function correctly?
> > > >
> > > 'phys_pc' in tbs is returned by get_page_addr_code_hostp(), which is not
> > > guest phys address actually, but ram_addr_t instead, so it's always
> > > wrong for monitor_disas. After some dirty changes, tbs can record the
> > > guest pa. Now we can disas both kernel and user space code. But still,
> > > no code is regenerated, disas in 'info tb' is just a convenient way to 'xp'.
> > >
> > > Is there any existing function to convert ram_addr_t to guest pa?
> >
> > Such a function would not be well-defined, because a block of RAM
> > as specified by a ram_addr_t could be present at (aliased to) multiple
> > guest physical addresses, or even currently not mapped to any guest
> > physical address at all. And it could be present at different physical
> > addresses for different vCPUs.
>
> True, but isn't there a similar mechanism for when an MCE happens
> in the host memory?

That goes via kvm_physical_memory_addr_from_host(), which looks
up the host address in the KVMSlot array (and just picks the
first hit). It won't work for TCG.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-08  9:23         ` Peter Maydell
  2023-06-08 12:06           ` Dr. David Alan Gilbert
@ 2023-06-09 14:32           ` Wu, Fei
  2023-06-09 15:51             ` Peter Maydell
  1 sibling, 1 reply; 47+ messages in thread
From: Wu, Fei @ 2023-06-09 14:32 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/8/2023 5:23 PM, Peter Maydell wrote:
> On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
>>
>> On 6/7/2023 8:49 PM, Wu, Fei wrote:
>>> On 6/1/2023 10:40 AM, Richard Henderson wrote:
>>>> Did you really need something different than monitor_disas?  You almost
>>>> certainly want to read physical memory and not virtual anyway.
>>>>
>>> Yes, it's usually okay for kernel address, but cannot re-gen the code
>>> for userspace virtual address (guest kernel panic on riscv guest). I
>>> tried monitor_disas() but it failed to disas too:
>>>     monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
>>>
>>> How to use this function correctly?
>>>
>> 'phys_pc' in tbs is returned by get_page_addr_code_hostp(), which is not
>> guest phys address actually, but ram_addr_t instead, so it's always
>> wrong for monitor_disas. After some dirty changes, tbs can record the
>> guest pa. Now we can disas both kernel and user space code. But still,
>> no code is regenerated, disas in 'info tb' is just a convenient way to 'xp'.
>>
>> Is there any existing function to convert ram_addr_t to guest pa?
> 
> Such a function would not be well-defined, because a block of RAM
> as specified by a ram_addr_t could be present at (aliased to) multiple
> guest physical addresses, or even currently not mapped to any guest
> physical address at all. And it could be present at different physical
> addresses for different vCPUs.
> 
Thank you, Peter. What's the scenario of the last different physical
addresses for different vCPUs?

For this specific case, I found I don't have to convert ram_addr_t to
gpa, the real requirement is converting ram_addr_t to hva, this one
seems well defined using qemu_map_ram_ptr(0, ram_addr) ?

Thanks,
Fei.

> thanks
> -- PMM



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-09 14:32           ` Wu, Fei
@ 2023-06-09 15:51             ` Peter Maydell
  2023-06-12  1:20               ` Wu, Fei
  0 siblings, 1 reply; 47+ messages in thread
From: Peter Maydell @ 2023-06-09 15:51 UTC (permalink / raw)
  To: Wu, Fei
  Cc: Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On Fri, 9 Jun 2023 at 15:32, Wu, Fei <fei2.wu@intel.com> wrote:
>
> On 6/8/2023 5:23 PM, Peter Maydell wrote:
> > On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
> >> Is there any existing function to convert ram_addr_t to guest pa?
> >
> > Such a function would not be well-defined, because a block of RAM
> > as specified by a ram_addr_t could be present at (aliased to) multiple
> > guest physical addresses, or even currently not mapped to any guest
> > physical address at all. And it could be present at different physical
> > addresses for different vCPUs.
> >
> Thank you, Peter. What's the scenario of the last different physical
> addresses for different vCPUs?

You can have a board with two different CPUs, where one of them
has a RAM MemoryRegion that it puts at address A in its address
space, and the other puts it at address B. This is most likely
with 'heterogenous' configurations where you have an application
processor A and a board-support processor B. (I don't know if
we actually have any board models like that currently, but it's
logically possible.)

> For this specific case, I found I don't have to convert ram_addr_t to
> gpa, the real requirement is converting ram_addr_t to hva, this one
> seems well defined using qemu_map_ram_ptr(0, ram_addr) ?

That does work for ram_addr_t to hva, but
but note the warning on the comment above that function:
you need to be in an RCU critical section to avoid some other
thread coming along and de-allocating the RAM under your feet.

(Also it's tempting to remove that support for passing in 0
as the RAMBlock, because every other use in the codebase seems
to pass in a RAMBlock pointer.)

thanks
-- PMM


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)
  2023-06-09 15:51             ` Peter Maydell
@ 2023-06-12  1:20               ` Wu, Fei
  0 siblings, 0 replies; 47+ messages in thread
From: Wu, Fei @ 2023-06-12  1:20 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Richard Henderson, alex.bennee, qemu-devel,
	Vanderson M. do Rosario, Dr . David Alan Gilbert, Paolo Bonzini,
	Dr. David Alan Gilbert

On 6/9/2023 11:51 PM, Peter Maydell wrote:
> On Fri, 9 Jun 2023 at 15:32, Wu, Fei <fei2.wu@intel.com> wrote:
>>
>> On 6/8/2023 5:23 PM, Peter Maydell wrote:
>>> On Thu, 8 Jun 2023 at 08:44, Wu, Fei <fei2.wu@intel.com> wrote:
>>>> Is there any existing function to convert ram_addr_t to guest pa?
>>>
>>> Such a function would not be well-defined, because a block of RAM
>>> as specified by a ram_addr_t could be present at (aliased to) multiple
>>> guest physical addresses, or even currently not mapped to any guest
>>> physical address at all. And it could be present at different physical
>>> addresses for different vCPUs.
>>>
>> Thank you, Peter. What's the scenario of the last different physical
>> addresses for different vCPUs?
> 
> You can have a board with two different CPUs, where one of them
> has a RAM MemoryRegion that it puts at address A in its address
> space, and the other puts it at address B. This is most likely
> with 'heterogenous' configurations where you have an application
> processor A and a board-support processor B. (I don't know if
> we actually have any board models like that currently, but it's
> logically possible.)
> 
>> For this specific case, I found I don't have to convert ram_addr_t to
>> gpa, the real requirement is converting ram_addr_t to hva, this one
>> seems well defined using qemu_map_ram_ptr(0, ram_addr) ?
> 
> That does work for ram_addr_t to hva, but
> but note the warning on the comment above that function:
> you need to be in an RCU critical section to avoid some other
> thread coming along and de-allocating the RAM under your feet.
> 
> (Also it's tempting to remove that support for passing in 0
> as the RAMBlock, because every other use in the codebase seems
> to pass in a RAMBlock pointer.)
> 
Got it, thank you very much.

Fei.

> thanks
> -- PMM



^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2023-06-12  1:21 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-30  8:35 [PATCH v14 00/10] TCG code quality tracking Fei Wu
2023-05-30  8:35 ` [PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER Fei Wu
2023-05-30  8:35 ` [PATCH v14 02/10] accel/tcg: introduce TBStatistics structure Fei Wu
2023-05-31 23:59   ` Richard Henderson
2023-06-01  1:30     ` Wu, Fei
2023-06-01  2:48       ` Richard Henderson
2023-06-01  0:01   ` Richard Henderson
2023-06-01  3:19     ` Wu, Fei
2023-06-01  4:16       ` Richard Henderson
2023-06-01  5:36         ` Wu, Fei
2023-05-30  8:35 ` [PATCH v14 03/10] accel: collecting TB execution count Fei Wu
2023-06-01  0:05   ` Richard Henderson
2023-06-01  5:44     ` Wu, Fei
2023-06-01 14:03       ` Richard Henderson
2023-06-02  1:54         ` Wu, Fei
2023-06-02  4:02           ` Richard Henderson
2023-05-30  8:35 ` [PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics Fei Wu
2023-05-30  9:37   ` Markus Armbruster
2023-05-31  0:54     ` Wu, Fei
2023-06-01  1:08   ` Richard Henderson
2023-06-01  6:48     ` Wu, Fei
2023-06-01 14:10       ` Richard Henderson
2023-06-01 15:10       ` Richard Henderson
2023-05-30  8:35 ` [PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection: Fei Wu
2023-06-01  1:18   ` Richard Henderson
2023-06-01  6:59     ` Wu, Fei
2023-05-30  8:35 ` [PATCH v14 06/10] monitor: adding tb_stats hmp command Fei Wu
2023-06-01  1:23   ` Richard Henderson
2023-06-01  7:20     ` Wu, Fei
2023-06-01 14:25       ` Richard Henderson
2023-05-30  8:35 ` [PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush Fei Wu
2023-06-01  1:30   ` Richard Henderson
2023-06-01  7:22     ` Wu, Fei
2023-05-30  8:35 ` [PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP) Fei Wu
2023-06-01  2:40   ` Richard Henderson
2023-06-01 12:12     ` Wu, Fei
2023-06-06  7:30       ` Wu, Fei
2023-06-07 12:49     ` Wu, Fei
2023-06-08  7:38       ` Wu, Fei
2023-06-08  9:23         ` Peter Maydell
2023-06-08 12:06           ` Dr. David Alan Gilbert
2023-06-08 12:22             ` Peter Maydell
2023-06-09 14:32           ` Wu, Fei
2023-06-09 15:51             ` Peter Maydell
2023-06-12  1:20               ` Wu, Fei
2023-05-30  8:35 ` [PATCH v14 09/10] tb-stats: dump hot TBs at the end of the execution Fei Wu
2023-05-30  8:35 ` [PATCH v14 10/10] docs: add tb-stats how to Fei Wu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.