All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH  0/2] insn plugin tweaks for measuring frequency
@ 2021-12-03 14:44 Alex Bennée
  2021-12-03 14:44 ` [RFC PATCH 1/2] tests/plugin: allow libinsn.so per-CPU counts Alex Bennée
  2021-12-03 14:44 ` [RFC PATCH 2/2] tests/plugins: add instruction matching to libinsn.so Alex Bennée
  0 siblings, 2 replies; 3+ messages in thread
From: Alex Bennée @ 2021-12-03 14:44 UTC (permalink / raw)
  To: qemu-devel
  Cc: minyihh, robhenry, mahmoudabdalghany, aaron, cota, kuhn.chenqun,
	ma.mandourr, Alex Bennée

Hi,

This series was prompted by yesterdays email thread:

  Subject: Suggestions for TCG performance improvements
  Date: Thu, 2 Dec 2021 09:47:13 +0000
  Message-ID: <c76bde31-8f3b-2d03-b7c7-9e026d4b5873@huawei.com>

which made me think if we could leverage the TCG plugins to measure
how frequently we these flush events happen. My initial measurements
with a Debian arm64 system indicate we do some sort of tlbi
instruction every 47 thousand instructions.

Alex Bennée (2):
  tests/plugin: allow libinsn.so per-CPU counts
  tests/plugins: add instruction matching to libinsn.so

 tests/plugin/insn.c | 125 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 117 insertions(+), 8 deletions(-)

-- 
2.30.2



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC PATCH  1/2] tests/plugin: allow libinsn.so per-CPU counts
  2021-12-03 14:44 [RFC PATCH 0/2] insn plugin tweaks for measuring frequency Alex Bennée
@ 2021-12-03 14:44 ` Alex Bennée
  2021-12-03 14:44 ` [RFC PATCH 2/2] tests/plugins: add instruction matching to libinsn.so Alex Bennée
  1 sibling, 0 replies; 3+ messages in thread
From: Alex Bennée @ 2021-12-03 14:44 UTC (permalink / raw)
  To: qemu-devel
  Cc: minyihh, robhenry, mahmoudabdalghany, aaron, cota,
	Alexandre Iooss, kuhn.chenqun, ma.mandourr, Alex Bennée

We won't go fully flexible but for most system emulation 8 vCPUs
resolution should be enough for anybody ;-)

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 tests/plugin/insn.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
index d229fdc001..d5a0a08cb4 100644
--- a/tests/plugin/insn.c
+++ b/tests/plugin/insn.c
@@ -16,22 +16,33 @@
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-static uint64_t insn_count;
+#define MAX_CPUS 8 /* lets not go nuts */
+
+typedef struct {
+    uint64_t last_pc;
+    uint64_t insn_count;
+} InstructionCount;
+
+static InstructionCount counts[MAX_CPUS];
+static uint64_t inline_insn_count;
+
 static bool do_inline;
 static bool do_size;
+static bool do_frequency;
 static GArray *sizes;
 
 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
 {
-    static uint64_t last_pc;
+    unsigned int i = cpu_index % MAX_CPUS;
+    InstructionCount *c = &counts[i];
     uint64_t this_pc = GPOINTER_TO_UINT(udata);
-    if (this_pc == last_pc) {
+    if (this_pc == c->last_pc) {
         g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%"
                                                 PRIx64 "\n", this_pc);
         qemu_plugin_outs(out);
     }
-    last_pc = this_pc;
-    insn_count++;
+    c->last_pc = this_pc;
+    c->insn_count++;
 }
 
 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
@@ -44,7 +55,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 
         if (do_inline) {
             qemu_plugin_register_vcpu_insn_exec_inline(
-                insn, QEMU_PLUGIN_INLINE_ADD_U64, &insn_count, 1);
+                insn, QEMU_PLUGIN_INLINE_ADD_U64, &inline_insn_count, 1);
         } else {
             uint64_t vaddr = qemu_plugin_insn_vaddr(insn);
             qemu_plugin_register_vcpu_insn_exec_cb(
@@ -66,9 +77,9 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
     g_autoptr(GString) out = g_string_new(NULL);
+    int i;
 
     if (do_size) {
-        int i;
         for (i = 0; i <= sizes->len; i++) {
             unsigned long *cnt = &g_array_index(sizes, unsigned long, i);
             if (*cnt) {
@@ -76,8 +87,20 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
                                        "len %d bytes: %ld insns\n", i, *cnt);
             }
         }
+    } else if (do_inline) {
+        g_string_append_printf(out, "insns: %" PRIu64 "\n", inline_insn_count);
     } else {
-        g_string_append_printf(out, "insns: %" PRIu64 "\n", insn_count);
+        uint64_t total_insns = 0;
+        for (i = 0; i < MAX_CPUS; i++) {
+            InstructionCount *c = &counts[i];
+            if (c->insn_count) {
+                g_string_append_printf(out, "cpu %d insns: %" PRIu64 "\n",
+                                       i, c->insn_count);
+                total_insns += c->insn_count;
+            }
+        }
+        g_string_append_printf(out, "total insns: %" PRIu64 "\n",
+                               total_insns);
     }
     qemu_plugin_outs(out->str);
 }
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 2/2] tests/plugins: add instruction matching to libinsn.so
  2021-12-03 14:44 [RFC PATCH 0/2] insn plugin tweaks for measuring frequency Alex Bennée
  2021-12-03 14:44 ` [RFC PATCH 1/2] tests/plugin: allow libinsn.so per-CPU counts Alex Bennée
@ 2021-12-03 14:44 ` Alex Bennée
  1 sibling, 0 replies; 3+ messages in thread
From: Alex Bennée @ 2021-12-03 14:44 UTC (permalink / raw)
  To: qemu-devel
  Cc: Vasilev Oleg, Richard Henderson, minyihh, robhenry,
	mahmoudabdalghany, aaron, cota, Alexandre Iooss, kuhn.chenqun,
	ma.mandourr, Alex Bennée

This adds simple instruction matching to the libinsn.so plugin which
is useful for examining the execution distance between instructions.
For example to track how often we flush in ARM due to TLB updates:

  -plugin ./tests/plugin/libinsn.so,match=tlbi

which leads to output like this:

  0xffffffc01018fa00, tlbi aside1is, x0,  339, 32774 match hits, 23822 since last, avg 47279
  0xffffffc01018fa00, tlbi aside1is, x0,  340, 32775 match hits, 565051 since last, avg 47295
  0xffffffc0101915a4, tlbi vae1is, x0,  155, 32776 match hits, 151135 since last, avg 47298
  0xffffffc01018fc60, tlbi vae1is, x4,  224, 32777 match hits, 814 since last, avg 47297
  0xffffffc010194a44, tlbi vale1is, x1,  8835, 32778 match hits, 52027 since last, avg 47297
  0xffffffc010194a44, tlbi vale1is, x1,  8836, 32779 match hits, 8347 since last, avg 47296
  0xffffffc010194a44, tlbi vale1is, x1,  8837, 32780 match hits, 33677 since last, avg 47295

showing we do some sort of TLBI invalidation every 47 thousand
instructions.

Cc: Vasilev Oleg <vasilev.oleg@huawei.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Emilio Cota <cota@braap.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 tests/plugin/insn.c | 88 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
index d5a0a08cb4..3f48c86fd7 100644
--- a/tests/plugin/insn.c
+++ b/tests/plugin/insn.c
@@ -28,9 +28,25 @@ static uint64_t inline_insn_count;
 
 static bool do_inline;
 static bool do_size;
-static bool do_frequency;
 static GArray *sizes;
 
+typedef struct {
+    char *match_string;
+    uint64_t hits[MAX_CPUS];
+    uint64_t last_hit[MAX_CPUS];
+    uint64_t total_delta[MAX_CPUS];
+    GPtrArray *history[MAX_CPUS];
+} Match;
+
+static GArray *matches;
+
+typedef struct {
+    Match *match;
+    uint64_t vaddr;
+    uint64_t hits;
+    char *disas;
+} Instruction;
+
 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
 {
     unsigned int i = cpu_index % MAX_CPUS;
@@ -45,6 +61,36 @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
     c->insn_count++;
 }
 
+static void vcpu_insn_matched_exec_before(unsigned int cpu_index, void *udata)
+{
+    unsigned int i = cpu_index % MAX_CPUS;
+    Instruction *insn = (Instruction *) udata;
+    Match *match = insn->match;
+    g_autoptr(GString) ts = g_string_new("");
+
+    insn->hits++;
+    g_string_append_printf(ts, "0x%" PRIx64 ", %s, % "PRId64,
+                           insn->vaddr, insn->disas, insn->hits);
+
+    uint64_t icount = counts[i].insn_count;
+    uint64_t delta = icount - match->last_hit[i];
+
+    match->hits[i]++;
+    match->total_delta[i] += delta;
+
+    g_string_append_printf(ts,
+                           ", %"PRId64" match hits, %"PRId64
+                           " since last, avg %"PRId64"\n",
+                           match->hits[i], delta,
+                           match->total_delta[i] / match->hits[i]);
+
+    match->last_hit[i] = icount;
+
+    qemu_plugin_outs(ts->str);
+
+    g_ptr_array_add(match->history[i], insn);
+}
+
 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 {
     size_t n = qemu_plugin_tb_n_insns(tb);
@@ -71,6 +117,29 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
             unsigned long *cnt = &g_array_index(sizes, unsigned long, sz);
             (*cnt)++;
         }
+
+        /*
+         * If we are tracking certain instructions we will need more
+         * information about the instruction which we also need to
+         * save if there is a hit.
+         */
+        if (matches) {
+            char *insn_disas = qemu_plugin_insn_disas(insn);
+            int j;
+            for (j = 0; j < matches->len; j++) {
+                Match *m = &g_array_index(matches, Match, j);
+                if (g_str_has_prefix(insn_disas, m->match_string)) {
+                    Instruction *rec = g_new0(Instruction, 1);
+                    rec->disas = g_strdup(insn_disas);
+                    rec->vaddr = qemu_plugin_insn_vaddr(insn);
+                    rec->match = m;
+                    qemu_plugin_register_vcpu_insn_exec_cb(
+                        insn, vcpu_insn_matched_exec_before,
+                        QEMU_PLUGIN_CB_NO_REGS, rec);
+                }
+            }
+            g_free(insn_disas);
+        }
     }
 }
 
@@ -105,6 +174,21 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
     qemu_plugin_outs(out->str);
 }
 
+
+/* Add a match to the array of matches */
+static void parse_match(char *match)
+{
+    Match new_match = { .match_string = match };
+    int i;
+    for (i = 0; i < MAX_CPUS; i++) {
+        new_match.history[i] = g_ptr_array_new();
+    }
+    if (!matches) {
+        matches = g_array_new(false, true, sizeof(Match));
+    }
+    g_array_append_val(matches, new_match);
+}
+
 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
@@ -122,6 +206,8 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                 fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
                 return -1;
             }
+        } else if (g_strcmp0(tokens[0], "match") == 0) {
+            parse_match(tokens[1]);
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
-- 
2.30.2



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-12-03 14:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-03 14:44 [RFC PATCH 0/2] insn plugin tweaks for measuring frequency Alex Bennée
2021-12-03 14:44 ` [RFC PATCH 1/2] tests/plugin: allow libinsn.so per-CPU counts Alex Bennée
2021-12-03 14:44 ` [RFC PATCH 2/2] tests/plugins: add instruction matching to libinsn.so Alex Bennée

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.