All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Emilio G. Cota" <cota@braap.org>
To: qemu-devel@nongnu.org
Cc: Richard Henderson <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH 17/22] tcg: distribute profiling counters across TCGContext's
Date: Sun,  9 Jul 2017 03:50:09 -0400	[thread overview]
Message-ID: <1499586614-20507-18-git-send-email-cota@braap.org> (raw)
In-Reply-To: <1499586614-20507-1-git-send-email-cota@braap.org>

TCGContext is about to be made thread-local. To avoid scalability issues
when profiling info is enabled, this patch makes the profiling info counters
distributed via the following changes:

1) Consolidate profile info into its own struct, TCGProfile, which
   TCGContext also includes. Note that tcg_table_op_count is brought
   into TCGProfile after dropping the tcg_ prefix.
2) Iterate over the TCG contexts in the system to obtain the total counts.

Note that this change also requires updating the accessors to TCGProfile
fields to use atomic_read/set whenever there may be concurrent accesses
to them.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tcg/tcg.h                 |  38 ++++++++--------
 accel/tcg/translate-all.c |  23 +++++-----
 tcg/tcg.c                 | 108 ++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 8e1cd45..2a64ee2 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -641,6 +641,26 @@ QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
 /* Make sure that we don't overflow 64 bits without noticing.  */
 QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
 
+typedef struct TCGProfile {
+    int64_t tb_count1;
+    int64_t tb_count;
+    int64_t op_count; /* total insn count */
+    int op_count_max; /* max insn per TB */
+    int64_t temp_count;
+    int temp_count_max;
+    int64_t del_op_count;
+    int64_t code_in_len;
+    int64_t code_out_len;
+    int64_t search_out_len;
+    int64_t interm_time;
+    int64_t code_time;
+    int64_t la_time;
+    int64_t opt_time;
+    int64_t restore_count;
+    int64_t restore_time;
+    int64_t table_op_count[NB_OPS];
+} TCGProfile;
+
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
     TCGPool *pool_first, *pool_current, *pool_first_large;
@@ -664,23 +684,7 @@ struct TCGContext {
     tcg_insn_unit *code_ptr;
 
 #ifdef CONFIG_PROFILER
-    /* profiling info */
-    int64_t tb_count1;
-    int64_t tb_count;
-    int64_t op_count; /* total insn count */
-    int op_count_max; /* max insn per TB */
-    int64_t temp_count;
-    int temp_count_max;
-    int64_t del_op_count;
-    int64_t code_in_len;
-    int64_t code_out_len;
-    int64_t search_out_len;
-    int64_t interm_time;
-    int64_t code_time;
-    int64_t la_time;
-    int64_t opt_time;
-    int64_t restore_count;
-    int64_t restore_time;
+    TCGProfile prof;
 #endif
 
 #ifdef CONFIG_DEBUG_TCG
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 84e19d9..31a9d42 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -287,6 +287,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     uint8_t *p = tb->tc_search;
     int i, j, num_insns = tb->icount;
 #ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx.prof;
     int64_t ti = profile_getclock();
 #endif
 
@@ -321,8 +322,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     restore_state_to_opc(env, tb, data);
 
 #ifdef CONFIG_PROFILER
-    tcg_ctx.restore_time += profile_getclock() - ti;
-    tcg_ctx.restore_count++;
+    atomic_set(&prof->restore_time,
+                prof->restore_time + profile_getclock() - ti);
+    atomic_set(&prof->restore_count, prof->restore_count + 1);
 #endif
     return 0;
 }
@@ -1269,6 +1271,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tcg_insn_unit *gen_code_buf;
     int gen_code_size, search_size;
 #ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx.prof;
     int64_t ti;
 #endif
     assert_memory_lock();
@@ -1298,8 +1301,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb->invalid = false;
 
 #ifdef CONFIG_PROFILER
-    tcg_ctx.tb_count1++; /* includes aborted translations because of
-                       exceptions */
+    /* includes aborted translations because of exceptions */
+    atomic_set(&prof->tb_count1, prof->tb_count1 + 1);
     ti = profile_getclock();
 #endif
 
@@ -1324,8 +1327,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 #endif
 
 #ifdef CONFIG_PROFILER
-    tcg_ctx.tb_count++;
-    tcg_ctx.interm_time += profile_getclock() - ti;
+    atomic_set(&prof->tb_count, prof->tb_count + 1);
+    atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti);
     ti = profile_getclock();
 #endif
 
@@ -1345,10 +1348,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb->tc_size = gen_code_size;
 
 #ifdef CONFIG_PROFILER
-    tcg_ctx.code_time += profile_getclock() - ti;
-    tcg_ctx.code_in_len += tb->size;
-    tcg_ctx.code_out_len += gen_code_size;
-    tcg_ctx.search_out_len += search_size;
+    atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
+    atomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
+    atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
+    atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
 #endif
 
 #ifdef DEBUG_DISAS
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0da7c61..c19c473 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1362,7 +1362,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
     memset(op, 0, sizeof(*op));
 
 #ifdef CONFIG_PROFILER
-    s->del_op_count++;
+    atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
 #endif
 }
 
@@ -2533,15 +2533,77 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
 
 #ifdef CONFIG_PROFILER
 
-static int64_t tcg_table_op_count[NB_OPS];
+/* avoid copy/paste errors */
+#define PROF_ADD(to, from, field)                       \
+    (to)->field += atomic_read(&((from)->field))
+
+#define PROF_ADD_MAX(to, from, field)                                   \
+    do {                                                                \
+        typeof((from)->field) val__ = atomic_read(&((from)->field));    \
+        if (val__ > (to)->field) {                                      \
+            (to)->field = val__;                                        \
+        }                                                               \
+    } while (0)
+
+/* Pass in a zero'ed @prof */
+static inline
+void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
+{
+    const TCGContext *s;
+
+    QSIMPLEQ_FOREACH(s, &ctx_list, entry) {
+        const TCGProfile *orig = &s->prof;
+
+        if (counters) {
+            PROF_ADD(prof, orig, tb_count1);
+            PROF_ADD(prof, orig, tb_count);
+            PROF_ADD(prof, orig, op_count);
+            PROF_ADD_MAX(prof, orig, op_count_max);
+            PROF_ADD(prof, orig, temp_count);
+            PROF_ADD_MAX(prof, orig, temp_count_max);
+            PROF_ADD(prof, orig, del_op_count);
+            PROF_ADD(prof, orig, code_in_len);
+            PROF_ADD(prof, orig, code_out_len);
+            PROF_ADD(prof, orig, search_out_len);
+            PROF_ADD(prof, orig, interm_time);
+            PROF_ADD(prof, orig, code_time);
+            PROF_ADD(prof, orig, la_time);
+            PROF_ADD(prof, orig, opt_time);
+            PROF_ADD(prof, orig, restore_count);
+            PROF_ADD(prof, orig, restore_time);
+        }
+        if (table) {
+            int i;
+
+            for (i = 0; i < NB_OPS; i++) {
+                PROF_ADD(prof, orig, table_op_count[i]);
+            }
+        }
+    }
+}
+
+#undef PROF_ADD
+#undef PROF_ADD_MAX
+
+static void tcg_profile_snapshot_counters(TCGProfile *prof)
+{
+    tcg_profile_snapshot(prof, true, false);
+}
+
+static void tcg_profile_snapshot_table(TCGProfile *prof)
+{
+    tcg_profile_snapshot(prof, false, true);
+}
 
 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 {
+    TCGProfile prof = {};
     int i;
 
+    tcg_profile_snapshot_table(&prof);
     for (i = 0; i < NB_OPS; i++) {
         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
-                    tcg_table_op_count[i]);
+                    prof.table_op_count[i]);
     }
 }
 #else
@@ -2554,6 +2616,9 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 
 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 {
+#ifdef CONFIG_PROFILER
+    TCGProfile *prof = &s->prof;
+#endif
     int i, oi, oi_next, num_insns;
 
 #ifdef CONFIG_PROFILER
@@ -2561,15 +2626,15 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         int n;
 
         n = s->gen_op_buf[0].prev + 1;
-        s->op_count += n;
-        if (n > s->op_count_max) {
-            s->op_count_max = n;
+        atomic_set(&prof->op_count, prof->op_count + n);
+        if (n > prof->op_count_max) {
+            atomic_set(&prof->op_count_max, n);
         }
 
         n = s->nb_temps;
-        s->temp_count += n;
-        if (n > s->temp_count_max) {
-            s->temp_count_max = n;
+        atomic_set(&prof->temp_count, prof->temp_count + n);
+        if (n > prof->temp_count_max) {
+            atomic_set(&prof->temp_count_max, n);
         }
     }
 #endif
@@ -2586,7 +2651,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #endif
 
 #ifdef CONFIG_PROFILER
-    s->opt_time -= profile_getclock();
+    atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
 #endif
 
 #ifdef USE_TCG_OPTIMIZATIONS
@@ -2594,8 +2659,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #endif
 
 #ifdef CONFIG_PROFILER
-    s->opt_time += profile_getclock();
-    s->la_time -= profile_getclock();
+    atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
+    atomic_set(&prof->la_time, prof->la_time - profile_getclock());
 #endif
 
     {
@@ -2623,7 +2688,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     }
 
 #ifdef CONFIG_PROFILER
-    s->la_time += profile_getclock();
+    atomic_set(&prof->la_time, prof->la_time + profile_getclock());
 #endif
 
 #ifdef DEBUG_DISAS
@@ -2654,7 +2719,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 
         oi_next = op->next;
 #ifdef CONFIG_PROFILER
-        tcg_table_op_count[opc]++;
+        atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
 #endif
 
         switch (opc) {
@@ -2730,10 +2795,17 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef CONFIG_PROFILER
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
-    TCGContext *s = &tcg_ctx;
-    int64_t tb_count = s->tb_count;
-    int64_t tb_div_count = tb_count ? tb_count : 1;
-    int64_t tot = s->interm_time + s->code_time;
+    TCGProfile prof = {};
+    const TCGProfile *s;
+    int64_t tb_count;
+    int64_t tb_div_count;
+    int64_t tot;
+
+    tcg_profile_snapshot_counters(&prof);
+    s = &prof;
+    tb_count = s->tb_count;
+    tb_div_count = tb_count ? tb_count : 1;
+    tot = s->interm_time + s->code_time;
 
     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
                 tot, tot / 2.4e9);
-- 
2.7.4

  parent reply	other threads:[~2017-07-09  7:50 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-09  7:49 [Qemu-devel] [PATCH 00/22] tcg: per-thread TCG Emilio G. Cota
2017-07-09  7:49 ` [Qemu-devel] [PATCH 01/22] vl: fix breakage of -tb-size Emilio G. Cota
2017-07-09 19:56   ` Richard Henderson
2017-07-11 15:37   ` Alex Bennée
2017-07-09  7:49 ` [Qemu-devel] [PATCH 02/22] translate-all: remove redundant !tcg_enabled check in dump_exec_info Emilio G. Cota
2017-07-09 19:57   ` Richard Henderson
2017-07-10  6:15   ` Thomas Huth
2017-07-12 12:32   ` Alex Bennée
2017-07-09  7:49 ` [Qemu-devel] [PATCH 03/22] cputlb: bring back tlb_flush_count under !TLB_DEBUG Emilio G. Cota
2017-07-09 20:00   ` Richard Henderson
2017-07-09 20:56     ` Emilio G. Cota
2017-07-09 21:20       ` Emilio G. Cota
2017-07-12 13:26   ` Alex Bennée
2017-07-12 18:19     ` Emilio G. Cota
2017-07-09  7:49 ` [Qemu-devel] [PATCH 04/22] tcg: fix corruption of code_time profiling counter upon tb_flush Emilio G. Cota
2017-07-09 20:01   ` Richard Henderson
2017-07-12 14:36   ` Alex Bennée
2017-07-12 17:09   ` Philippe Mathieu-Daudé
2017-07-09  7:49 ` [Qemu-devel] [PATCH 05/22] exec-all: fix typos in TranslationBlock's documentation Emilio G. Cota
2017-07-12 14:37   ` Alex Bennée
2017-07-09  7:49 ` [Qemu-devel] [PATCH 06/22] translate-all: make have_tb_lock static Emilio G. Cota
2017-07-09 20:02   ` Richard Henderson
2017-07-12 14:38   ` Alex Bennée
2017-07-12 18:22     ` Emilio G. Cota
2017-07-09  7:49 ` [Qemu-devel] [PATCH 07/22] tcg/i386: constify tcg_target_callee_save_regs Emilio G. Cota
2017-07-09 20:02   ` Richard Henderson
2017-07-12 14:39   ` Alex Bennée
2017-07-12 17:00   ` Philippe Mathieu-Daudé
2017-07-09  7:50 ` [Qemu-devel] [PATCH 08/22] tcg/mips: " Emilio G. Cota
2017-07-09 20:02   ` Richard Henderson
2017-07-12 14:39   ` Alex Bennée
2017-07-12 17:01   ` Philippe Mathieu-Daudé
2017-07-09  7:50 ` [Qemu-devel] [PATCH 09/22] exec-all: shrink tb->invalid to uint8_t Emilio G. Cota
2017-07-09 20:11   ` Richard Henderson
2017-07-10 23:57     ` Emilio G. Cota
2017-07-12  0:53       ` Richard Henderson
2017-07-12 20:48         ` Emilio G. Cota
2017-07-12 23:06           ` Richard Henderson
2017-07-16  1:43             ` Emilio G. Cota
2017-07-16  7:22               ` Richard Henderson
2017-07-09  7:50 ` [Qemu-devel] [PATCH 10/22] exec-all: move tb->invalid to the end of the struct Emilio G. Cota
2017-07-09  7:50 ` [Qemu-devel] [PATCH 11/22] translate-all: use a binary search tree to track TBs in TBContext Emilio G. Cota
2017-07-09 20:33   ` Richard Henderson
2017-07-09 21:01     ` Emilio G. Cota
2017-07-12 15:10   ` Alex Bennée
2017-07-12 18:38     ` Emilio G. Cota
2017-07-09  7:50 ` [Qemu-devel] [PATCH 12/22] translate-all: report correct avg host TB size Emilio G. Cota
2017-07-12 15:25   ` Alex Bennée
2017-07-12 18:45     ` Emilio G. Cota
2017-07-09  7:50 ` [Qemu-devel] [PATCH 13/22] tcg: take tb_ctx out of TCGContext Emilio G. Cota
2017-07-12 15:27   ` Alex Bennée
2017-07-09  7:50 ` [Qemu-devel] [PATCH 14/22] tcg: take .helpers " Emilio G. Cota
2017-07-09 20:35   ` Richard Henderson
2017-07-12 15:28   ` Alex Bennée
2017-07-09  7:50 ` [Qemu-devel] [PATCH 15/22] gen-icount: fold exitreq_label into TCGContext Emilio G. Cota
2017-07-09 20:36   ` Richard Henderson
2017-07-12 15:29   ` Alex Bennée
2017-07-09  7:50 ` [Qemu-devel] [PATCH 16/22] tcg: keep a list of TCGContext's Emilio G. Cota
2017-07-09 20:43   ` Richard Henderson
2017-07-12 15:32   ` Alex Bennée
2017-07-09  7:50 ` Emilio G. Cota [this message]
2017-07-09 20:45   ` [Qemu-devel] [PATCH 17/22] tcg: distribute profiling counters across TCGContext's Richard Henderson
2017-07-09 21:14     ` Emilio G. Cota
2017-07-09 21:44       ` Richard Henderson
2017-07-10 16:00         ` Emilio G. Cota
2017-07-09  7:50 ` [Qemu-devel] [PATCH 18/22] tcg: define TCG_HIGHWATER Emilio G. Cota
2017-07-09 20:46   ` Richard Henderson
2017-07-12 15:33   ` Alex Bennée
2017-07-09  7:50 ` [Qemu-devel] [PATCH 19/22] tcg: introduce tcg_context_clone Emilio G. Cota
2017-07-09 20:48   ` Richard Henderson
2017-07-09 21:04     ` Emilio G. Cota
2017-07-12 16:02   ` Alex Bennée
2017-07-12 17:25     ` Richard Henderson
2017-07-12 17:47       ` Alex Bennée
2017-07-09  7:50 ` [Qemu-devel] [PATCH 20/22] tcg: dynamically allocate from code_gen_buffer using equally-sized regions Emilio G. Cota
2017-07-09 21:03   ` Richard Henderson
2017-07-09  7:50 ` [Qemu-devel] [PATCH 21/22] tcg: enable per-thread TCG for softmmu Emilio G. Cota
2017-07-09 21:07   ` Richard Henderson
2017-07-09 21:19   ` Richard Henderson
2017-07-09 21:29     ` Emilio G. Cota
2017-07-09 21:48       ` Richard Henderson
2017-07-10  3:54         ` Emilio G. Cota
2017-07-10 12:05   ` Paolo Bonzini
2017-07-10 21:14     ` Emilio G. Cota
2017-07-10 21:33       ` Paolo Bonzini
2017-07-10 22:13         ` Emilio G. Cota
2017-07-11  8:02           ` Paolo Bonzini
2017-07-09  7:50 ` [Qemu-devel] [PATCH 22/22] translate-all: do not hold tb_lock during code generation in softmmu Emilio G. Cota
2017-07-09 21:38   ` Richard Henderson
2017-07-10  3:51     ` Emilio G. Cota
2017-07-10  5:59       ` Richard Henderson
2017-07-10 15:28         ` Emilio G. Cota
2017-07-09 18:27 ` [Qemu-devel] [PATCH 00/22] tcg: per-thread TCG Emilio G. Cota
2017-07-10  9:50 ` Alex Bennée
2017-07-10 17:04   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1499586614-20507-18-git-send-email-cota@braap.org \
    --to=cota@braap.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.