qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	vandersonmr <vandersonmr2@gmail.com>,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH v5 01/10] accel: introducing TBStatistics structure
Date: Thu, 15 Aug 2019 14:13:44 +0100	[thread overview]
Message-ID: <87sgq2mv07.fsf@linaro.org> (raw)
In-Reply-To: <20190815021857.19526-2-vandersonmr2@gmail.com>


vandersonmr <vandersonmr2@gmail.com> writes:

> To store statistics for each TB, we created a TBStatistics structure
> which is linked with the TBs. TBStatistics can stay alive after
> tb_flush and be relinked to a regenerated TB. So the statistics can
> be accumulated even through flushes.
>
> The goal is to have all present and future qemu/tcg statistics and
> meta-data stored in this new structure.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  accel/tcg/Makefile.objs      |  2 +-
>  accel/tcg/perf/Makefile.objs |  1 +
>  accel/tcg/tb-stats.c         | 39 ++++++++++++++++++++++++
>  accel/tcg/translate-all.c    | 57 ++++++++++++++++++++++++++++++++++++
>  include/exec/exec-all.h      | 15 +++-------
>  include/exec/tb-context.h    | 12 ++++++++
>  include/exec/tb-hash.h       |  7 +++++
>  include/exec/tb-stats.h      | 43 +++++++++++++++++++++++++++
>  util/log.c                   |  2 ++
>  9 files changed, 166 insertions(+), 12 deletions(-)
>  create mode 100644 accel/tcg/perf/Makefile.objs
>  create mode 100644 accel/tcg/tb-stats.c
>  create mode 100644 include/exec/tb-stats.h
>
> diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
> index d381a02f34..49ffe81b5d 100644
> --- a/accel/tcg/Makefile.objs
> +++ b/accel/tcg/Makefile.objs
> @@ -2,7 +2,7 @@ obj-$(CONFIG_SOFTMMU) += tcg-all.o
>  obj-$(CONFIG_SOFTMMU) += cputlb.o
>  obj-y += tcg-runtime.o tcg-runtime-gvec.o
>  obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
> -obj-y += translator.o
> +obj-y += translator.o tb-stats.o
>
>  obj-$(CONFIG_USER_ONLY) += user-exec.o
>  obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
> diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
> new file mode 100644
> index 0000000000..f82fba35e5
> --- /dev/null
> +++ b/accel/tcg/perf/Makefile.objs
> @@ -0,0 +1 @@
> +obj-y += jitdump.o
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> new file mode 100644
> index 0000000000..02844717cb
> --- /dev/null
> +++ b/accel/tcg/tb-stats.c
> @@ -0,0 +1,39 @@
> +#include "qemu/osdep.h"
> +
> +#include "disas/disas.h"
> +
> +#include "exec/tb-stats.h"
> +
> +void init_tb_stats_htable_if_not(void)
> +{
> +    if (tb_stats_collection_enabled() && !tb_ctx.tb_stats.map) {
> +        qht_init(&tb_ctx.tb_stats, tb_stats_cmp,
> +                CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE);
> +    }
> +}
> +
> +void enable_collect_tb_stats(void)
> +{
> +    init_tb_stats_htable_if_not();
> +    tcg_collect_tb_stats = TB_STATS_RUNNING;
> +}
> +
> +void disable_collect_tb_stats(void)
> +{
> +    tcg_collect_tb_stats = TB_STATS_PAUSED;
> +}
> +
> +void pause_collect_tb_stats(void)
> +{
> +    tcg_collect_tb_stats = TB_STATS_STOPPED;
> +}
> +
> +bool tb_stats_collection_enabled(void)
> +{
> +    return tcg_collect_tb_stats == TB_STATS_RUNNING;
> +}
> +
> +bool tb_stats_collection_paused(void)
> +{
> +    return tcg_collect_tb_stats == TB_STATS_PAUSED;
> +}
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..b7bccacd3b 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size)
>      }
>  }
>
> +/*
> + * This is the more or less the same compare as tb_cmp(), but the
> + * data persists over tb_flush. We also aggregate the various
> + * variations of cflags under one record and ignore the details of
> + * page overlap (although we can count it).
> + */
> +bool tb_stats_cmp(const void *ap, const void *bp)
> +{
> +    const TBStatistics *a = ap;
> +    const TBStatistics *b = bp;
> +
> +    return a->phys_pc == b->phys_pc &&
> +        a->pc == b->pc &&
> +        a->cs_base == b->cs_base &&
> +        a->flags == b->flags;
> +}
> +
>  static bool tb_cmp(const void *ap, const void *bp)
>  {
>      const TranslationBlock *a = ap;
> @@ -1137,6 +1154,7 @@ static void tb_htable_init(void)
>      unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>      qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    init_tb_stats_htable_if_not();
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1666,6 +1684,34 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>      return tb;
>  }
>
> +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
> +                                  target_ulong cs_base, uint32_t flags,
> +                                  TranslationBlock *current_tb)
> +{
> +    TBStatistics *new_stats = g_new0(TBStatistics, 1);
> +    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
> +    void *existing_stats = NULL;
> +    new_stats->phys_pc = phys_pc;
> +    new_stats->pc = pc;
> +    new_stats->cs_base = cs_base;
> +    new_stats->flags = flags;
> +    new_stats->tb = current_tb;
> +
> +    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
> +
> +    if (unlikely(existing_stats)) {
> +        /*
> +         * If there is already a TBStatistic for this TB from a previous flush
> +         * then just make the new TB point to the older TBStatistic
> +         */
> +        g_free(new_stats);
> +        return existing_stats;
> +    } else {
> +        return new_stats;
> +    }
> +}
> +
> +
>  /* Called with mmap_lock held for user mode emulation.  */
>  TranslationBlock *tb_gen_code(CPUState *cpu,
>                                target_ulong pc, target_ulong cs_base,
> @@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      ti = profile_getclock();
>  #endif
>
> +    /*
> +     * We want to fetch the stats structure before we start code
> +     * generation so we can count interesting things about this
> +     * generation.
> +     */
> +    if (tb_stats_collection_enabled()) {
> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags, tb);
> +    } else {
> +        tb->tb_stats = NULL;
> +    }
> +
>      tcg_func_start(tcg_ctx);
>
>      tcg_ctx->cpu = env_cpu(env);
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 16034ee651..24bd6a0a0c 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -22,21 +22,11 @@
>
>  #include "exec/tb-context.h"
>  #include "sysemu/cpus.h"
> +#include "exec/tb-stats.h"
>
>  /* allow to see translation results - the slowdown should be negligible, so we leave it */
>  #define DEBUG_DISAS
>
> -/* Page tracking code uses ram addresses in system mode, and virtual
> -   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
> -   type.  */
> -#if defined(CONFIG_USER_ONLY)
> -typedef abi_ulong tb_page_addr_t;
> -#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx
> -#else
> -typedef ram_addr_t tb_page_addr_t;
> -#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
> -#endif
> -
>  #include "qemu/log.h"
>
>  void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
> @@ -403,6 +393,9 @@ struct TranslationBlock {
>      uintptr_t jmp_list_head;
>      uintptr_t jmp_list_next[2];
>      uintptr_t jmp_dest[2];
> +
> +    /* Pointer to a struct where statistics from the TB is stored */
> +    TBStatistics *tb_stats;
>  };
>
>  extern bool parallel_cpus;
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..3cfb62a338 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -23,6 +23,17 @@
>  #include "qemu/thread.h"
>  #include "qemu/qht.h"
>
> +/* Page tracking code uses ram addresses in system mode, and virtual
> +   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
> +   type.  */
> +#if defined(CONFIG_USER_ONLY)
> +typedef abi_ulong tb_page_addr_t;
> +#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx
> +#else
> +typedef ram_addr_t tb_page_addr_t;
> +#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
> +#endif
> +
>  #define CODE_GEN_HTABLE_BITS     15
>  #define CODE_GEN_HTABLE_SIZE     (1 << CODE_GEN_HTABLE_BITS)
>
> @@ -35,6 +46,7 @@ struct TBContext {
>
>      /* statistics */
>      unsigned tb_flush_count;
> +    struct qht tb_stats;
>  };
>
>  extern TBContext tb_ctx;
> diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
> index 4f3a37d927..54c477fe79 100644
> --- a/include/exec/tb-hash.h
> +++ b/include/exec/tb-hash.h
> @@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
>      return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
>  }
>
> +static inline
> +uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
> +                            uint32_t flags)
> +{
> +    return qemu_xxhash5(phys_pc, pc, flags);
> +}
> +
>  #endif
> diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
> new file mode 100644
> index 0000000000..cc8f8a6ce6
> --- /dev/null
> +++ b/include/exec/tb-stats.h
> @@ -0,0 +1,43 @@
> +#ifndef TB_STATS_H
> +
> +#define TB_STATS_H
> +
> +#include "exec/cpu-common.h"
> +#include "exec/tb-context.h"
> +#include "tcg.h"
> +
> +typedef struct TBStatistics TBStatistics;
> +
> +/*
> + * This struct stores statistics such as execution count of the
> + * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
> + * has its own TBStatistics which will persist over tb_flush.
> + *
> + * We include additional counters to track number of translations as
> + * well as variants for compile flags.
> + */
> +struct TBStatistics {
> +    tb_page_addr_t phys_pc;
> +    target_ulong pc;
> +    uint32_t     flags;
> +    /* cs_base isn't included in the hash but we do check for matches */
> +    target_ulong cs_base;
> +    /* current TB linked to this TBStatistics */
> +    TranslationBlock *tb;
> +};
> +
> +bool tb_stats_cmp(const void *ap, const void *bp);
> +
> +void init_tb_stats_htable_if_not(void);
> +
> +/* TBStatistic collection controls */
> +enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED };
> +extern int tcg_collect_tb_stats;
> +
> +void enable_collect_tb_stats(void);
> +void disable_collect_tb_stats(void);
> +void pause_collect_tb_stats(void);
> +bool tb_stats_collection_enabled(void);
> +bool tb_stats_collection_paused(void);
> +
> +#endif
> diff --git a/util/log.c b/util/log.c
> index 1d1b33f7d9..393a17115b 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -31,6 +31,8 @@ int qemu_loglevel;
>  static int log_append = 0;
>  static GArray *debug_regions;
>
> +int tcg_collect_tb_stats;
> +
>  /* Return the number of characters emitted.  */
>  int qemu_log(const char *fmt, ...)
>  {


--
Alex Bennée


  reply	other threads:[~2019-08-15 13:14 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-15  2:18 [Qemu-devel] [PATCH v5 00/10] Measure Tiny Code Generation Quality vandersonmr
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 01/10] accel: introducing TBStatistics structure vandersonmr
2019-08-15 13:13   ` Alex Bennée [this message]
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 02/10] accel: collecting TB execution count vandersonmr
2019-08-15 13:38   ` Alex Bennée
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 03/10] accel: collecting JIT statistics vandersonmr
2019-08-15 14:29   ` Alex Bennée
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 04/10] accel: replacing part of CONFIG_PROFILER with TBStats vandersonmr
2019-08-15 14:54   ` Alex Bennée
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 05/10] accel: adding TB_JIT_TIME and full replacing CONFIG_PROFILER vandersonmr
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 06/10] log: adding -d tb_stats to control tbstats vandersonmr
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 07/10] monitor: adding tb_stats hmp command vandersonmr
2019-08-15  8:53   ` Dr. David Alan Gilbert
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 08/10] Adding info [tbs|tb|coverset] commands to HMP. These commands allow the exploration of TBs generated by the TCG. Understand which one hotter, with more guest/host instructions... and examine their guest, host and IR code vandersonmr
2019-08-15  8:59   ` Dr. David Alan Gilbert
2019-08-21 14:16     ` Vanderson Martins do Rosario
2019-08-21 14:29       ` Dr. David Alan Gilbert
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 09/10] monitor: adding new info cfg command vandersonmr
2019-08-15  9:14   ` Dr. David Alan Gilbert
2019-08-15  2:18 ` [Qemu-devel] [PATCH v5 10/10] linux-user: dumping hot TBs at the end of the execution vandersonmr
2019-08-15 14:26   ` Aleksandar Markovic

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87sgq2mv07.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=vandersonmr2@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).