All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: qemu-devel@nongnu.org
Cc: Pavel.Dovgaluk@ispras.ru, vilanova@ac.upc.edu, cota@braap.org,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [RFC PATCH 07/21] accel/tcg/cputlb: convert tlb_flush debugging into trace events
Date: Fri,  5 Oct 2018 16:48:56 +0100	[thread overview]
Message-ID: <20181005154910.3099-8-alex.bennee@linaro.org> (raw)
In-Reply-To: <20181005154910.3099-1-alex.bennee@linaro.org>

Given the range of costs for various SoftMMU TLB operations from
deferring work for the currently running vCPU to bring the whole
emulated machine to a stop for synchronised updates simple counters
are less useful. Instead we log events via the trace infrastructure
and we can then post-process the data in a range of ways.

  tlb_flush_self - the vCPU flushed its own TLB
  tlb_flush_async_schedule - work was scheduled and the vCPU kicked
  tlb_flush_synced_schedule - exclusive work was scheduled on a vCPU
  tlb_flush_work - scheduled work was done
  tlb_flush_work_complete - scheduled work was completed

We can use the difference between the work being scheduled and
tlb_flush_work to calculate the latency introduced.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v2
  - add tlb_flush_work_completed
  - removed tlb_debugs at the same time
---
 accel/tcg/cputlb.c     | 72 ++++++++++++++++++++++++------------------
 accel/tcg/trace-events | 16 ++++++++++
 2 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f4702ce91f..f1d4f7da44 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -20,6 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
+#include "trace.h"
 #include "exec/exec-all.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
@@ -138,7 +139,9 @@ static void tlb_flush_nocheck(CPUState *cpu)
 
 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
 {
+    trace_tlb_flush_all_work(cpu->cpu_index);
     tlb_flush_nocheck(cpu);
+    trace_tlb_flush_work_complete(__LINE__, cpu->cpu_index);
 }
 
 void tlb_flush(CPUState *cpu)
@@ -146,17 +149,22 @@ void tlb_flush(CPUState *cpu)
     if (cpu->created && !qemu_cpu_is_self(cpu)) {
         if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
             atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
+            trace_tlb_flush_async_schedule(__LINE__, current_cpu ?
+                                           current_cpu->cpu_index :
+                                           cpu->cpu_index, cpu->cpu_index);
             async_run_on_cpu(cpu, tlb_flush_global_async_work,
                              RUN_ON_CPU_NULL);
         }
     } else {
-        tlb_flush_nocheck(cpu);
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
+        tlb_flush_global_async_work(cpu, RUN_ON_CPU_NULL);
     }
 }
 
 void tlb_flush_all_cpus(CPUState *src_cpu)
 {
     const run_on_cpu_func fn = tlb_flush_global_async_work;
+    trace_tlb_flush_async_schedule(__LINE__, src_cpu->cpu_index, -1);
     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
     fn(src_cpu, RUN_ON_CPU_NULL);
 }
@@ -164,6 +172,7 @@ void tlb_flush_all_cpus(CPUState *src_cpu)
 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 {
     const run_on_cpu_func fn = tlb_flush_global_async_work;
+    trace_tlb_flush_synced_schedule(__LINE__, src_cpu->cpu_index, -1);
     flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
 }
@@ -174,15 +183,12 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
     unsigned long mmu_idx_bitmask = data.host_int;
     int mmu_idx;
 
-    assert_cpu_is_self(cpu);
+    trace_tlb_flush_work(__LINE__, cpu->cpu_index, mmu_idx_bitmask);
 
-    tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
+    assert_cpu_is_self(cpu);
 
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-
         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
-            tlb_debug("%d\n", mmu_idx);
-
             memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
             memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
         }
@@ -190,25 +196,26 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 
     cpu_tb_jmp_cache_clear(cpu);
 
-    tlb_debug("done\n");
+    trace_tlb_flush_work_complete(__LINE__, cpu->cpu_index);
 }
 
 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 {
-    tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
-
     if (!qemu_cpu_is_self(cpu)) {
         uint16_t pending_flushes = idxmap;
         pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
 
         if (pending_flushes) {
-            tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
+            trace_tlb_flush_async_schedule(__LINE__,
+                                           current_cpu->cpu_index,
+                                           cpu->cpu_index);
 
             atomic_or(&cpu->pending_tlb_flush, pending_flushes);
             async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
                              RUN_ON_CPU_HOST_INT(pending_flushes));
         }
     } else {
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
         tlb_flush_by_mmuidx_async_work(cpu,
                                        RUN_ON_CPU_HOST_INT(idxmap));
     }
@@ -218,7 +225,7 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
 {
     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 
-    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+    trace_tlb_flush_async_schedule(__LINE__, src_cpu->cpu_index, -1);
 
     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
@@ -229,7 +236,7 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 {
     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 
-    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+    trace_tlb_flush_synced_schedule(__LINE__, src_cpu->cpu_index, -1);
 
     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
@@ -268,14 +275,10 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+    trace_tlb_flush_work(__LINE__, cpu->cpu_index, addr);
 
     /* Check if we need to flush due to large pages.  */
     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-        tlb_debug("forcing full flush ("
-                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-                  env->tlb_flush_addr, env->tlb_flush_mask);
-
         tlb_flush(cpu);
         return;
     }
@@ -288,16 +291,19 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
     }
 
     tb_flush_jmp_cache(cpu, addr);
+
+    trace_tlb_flush_work_complete(__LINE__, cpu->cpu_index);
 }
 
 void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
-    tlb_debug("page :" TARGET_FMT_lx "\n", addr);
-
     if (!qemu_cpu_is_self(cpu)) {
+        trace_tlb_flush_async_schedule(__LINE__, current_cpu->cpu_index,
+                                       cpu->cpu_index);
         async_run_on_cpu(cpu, tlb_flush_page_async_work,
                          RUN_ON_CPU_TARGET_PTR(addr));
     } else {
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
         tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
     }
 }
@@ -319,8 +325,7 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
-              page, addr, mmu_idx_bitmap);
+    trace_tlb_flush_work(__LINE__, cpu->cpu_index, addr_and_mmuidx);
 
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
@@ -330,6 +335,8 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
     }
 
     tb_flush_jmp_cache(cpu, addr);
+
+    trace_tlb_flush_work_complete(__LINE__, cpu->cpu_index);
 }
 
 static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
@@ -340,35 +347,36 @@ static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
 
-    tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
+    trace_tlb_flush_work(__LINE__, cpu->cpu_index, addr_and_mmuidx);
 
     /* Check if we need to flush due to large pages.  */
     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-        tlb_debug("forced full flush ("
-                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-                  env->tlb_flush_addr, env->tlb_flush_mask);
-
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
         tlb_flush_by_mmuidx_async_work(cpu,
                                        RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
     } else {
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
         tlb_flush_page_by_mmuidx_async_work(cpu, data);
     }
+
+    trace_tlb_flush_work_complete(__LINE__, cpu->cpu_index);
 }
 
 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 {
     target_ulong addr_and_mmu_idx;
 
-    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
-
     /* This should already be page aligned */
     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
     addr_and_mmu_idx |= idxmap;
 
     if (!qemu_cpu_is_self(cpu)) {
+        trace_tlb_flush_async_schedule(__LINE__, current_cpu->cpu_index,
+                                       cpu->cpu_index);
         async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
     } else {
+        trace_tlb_flush_self(__LINE__, cpu->cpu_index);
         tlb_check_page_and_flush_by_mmuidx_async_work(
             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
     }
@@ -380,7 +388,7 @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
     target_ulong addr_and_mmu_idx;
 
-    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
+    trace_tlb_flush_async_schedule(__LINE__, src_cpu->cpu_index, -1);
 
     /* This should already be page aligned */
     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
@@ -397,7 +405,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
     const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
     target_ulong addr_and_mmu_idx;
 
-    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
+    trace_tlb_flush_synced_schedule(__LINE__, src_cpu->cpu_index, -1);
 
     /* This should already be page aligned */
     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
@@ -411,6 +419,8 @@ void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
 {
     const run_on_cpu_func fn = tlb_flush_page_async_work;
 
+    trace_tlb_flush_async_schedule(__LINE__, src->cpu_index, -1);
+
     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
     fn(src, RUN_ON_CPU_TARGET_PTR(addr));
 }
@@ -420,6 +430,8 @@ void tlb_flush_page_all_cpus_synced(CPUState *src,
 {
     const run_on_cpu_func fn = tlb_flush_page_async_work;
 
+    trace_tlb_flush_synced_schedule(__LINE__, src->cpu_index, -1);
+
     flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
     async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
 }
diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
index 618cc07738..0d2b1c47ac 100644
--- a/accel/tcg/trace-events
+++ b/accel/tcg/trace-events
@@ -1,5 +1,21 @@
 # Trace events for debugging and performance instrumentation
 
+# cputlb.c - SoftMMU TLB flushes
+#
+# There a numerous different functions for variations on flushing by
+# page/mmuidx and other critera. However we use common events and tag
+# them with the appropriate line number in cputlb.c
+#
+#   - setting to=-1 indicates work scheduled on all vCPUs
+#
+tlb_flush_self(int line, int vcpu) "cputlb.c:%d cpu %d"
+tlb_flush_async_schedule(int line, int from, int to) "cputlb.c:%d from_cpu=%d to_cpu=%d"
+tlb_flush_synced_schedule(int line, int from, int to) "cputlb.c:%d from_cpu=%d to_cpu=%d"
+# The following are the workers, usually running from a async work queue
+tlb_flush_all_work(int vcpu) "cpu %d"
+tlb_flush_work(int line, int vcpu, unsigned long data) "cputlb.c:%d cpu %d, %lux"
+tlb_flush_work_complete(int line, int vcpu) "cputlb.c:%d cpu %d"
+
 # TCG related tracing (you still need -d nochain to get a full picture
 # as otherwise you'll only see the first TB executed in a chain)
 # cpu-exec.c
-- 
2.17.1

  parent reply	other threads:[~2018-10-05 15:49 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-05 15:48 [Qemu-devel] [RFC PATCH 00/21] Trace updates and plugin RFC Alex Bennée
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 01/21] util/log: allow -dfilter to stack Alex Bennée
2018-10-06 16:57   ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 02/21] util/log: add qemu_dfilter_append_range() Alex Bennée
2018-10-06 17:00   ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 03/21] linux-user: add -dfilter progtext shortcut Alex Bennée
2018-10-06 17:12   ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 04/21] trace: enable the exec_tb trace events Alex Bennée
2018-10-06 17:15   ` Richard Henderson
2018-10-07  1:42   ` Emilio G. Cota
2018-10-08  9:41     ` Alex Bennée
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 05/21] trace: keep a count of trace-point hits Alex Bennée
2018-10-06 18:26   ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 06/21] trace: show trace point counts in the monitor Alex Bennée
2018-10-06 18:27   ` Richard Henderson
2018-10-08 12:51   ` Markus Armbruster
2018-10-17 11:52   ` Dr. David Alan Gilbert
2018-10-05 15:48 ` Alex Bennée [this message]
2018-10-15 16:33   ` [Qemu-devel] [RFC PATCH 07/21] accel/tcg/cputlb: convert tlb_flush debugging into trace events Richard Henderson
2018-10-15 18:23     ` Alex Bennée
2018-10-15 18:37       ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 08/21] accel/tcg/cputlb: convert remaining tlb_debug() to " Alex Bennée
2018-10-15 16:38   ` Richard Henderson
2018-10-15 18:17     ` Alex Bennée
2018-10-15 18:35       ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 09/21] trace: suppress log output of trace points Alex Bennée
2018-10-15 16:47   ` Richard Henderson
2018-10-05 15:48 ` [Qemu-devel] [RFC PATCH 10/21] qom/cpu: add a cpu_exit trace event Alex Bennée
2018-10-06 18:51   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 11/21] trace: expose a plugin fn pointer in TraceEvent Alex Bennée
2018-10-15 16:52   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 12/21] configure: expose a plugin to the trace-backends Alex Bennée
2018-10-15 17:14   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 13/21] tracetool: generate plugin snippets Alex Bennée
2018-10-15 17:02   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 14/21] trace: add support for plugin infrastructure Alex Bennée
2018-10-07  1:29   ` Emilio G. Cota
2018-10-15 17:11   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 15/21] trace: add linux-user plugin support Alex Bennée
2018-10-15 17:13   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 16/21] trace: add infrastructure for building plugins Alex Bennée
2018-10-15 17:24   ` Richard Henderson
2018-10-15 18:16     ` Alex Bennée
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 17/21] hmp: expose status of plugins to the monitor Alex Bennée
2018-10-15 17:27   ` Richard Henderson
2018-10-17 12:04   ` Dr. David Alan Gilbert
2018-10-17 17:36     ` Markus Armbruster
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 18/21] linux-user: allow dumping of plugin status at end of run Alex Bennée
2018-10-15 17:28   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 19/21] plugins: add an example hotblocks plugin Alex Bennée
2018-10-08 12:59   ` Pavel Dovgalyuk
2018-10-08 14:05     ` Alex Bennée
2018-10-15 17:33   ` Richard Henderson
2018-10-15 18:15     ` Alex Bennée
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 20/21] plugins: add hotness summary to hotblocks Alex Bennée
2018-10-15 17:34   ` Richard Henderson
2018-10-05 15:49 ` [Qemu-devel] [RFC PATCH 21/21] plugin: add tlbflush stats plugin Alex Bennée
2018-10-07  1:23 ` [Qemu-devel] [RFC PATCH 00/21] Trace updates and plugin RFC Emilio G. Cota
2018-10-08 10:28   ` Alex Bennée
2018-10-08 12:51     ` Philippe Mathieu-Daudé
2018-10-08 13:59     ` Emilio G. Cota
2018-10-08 14:15       ` Alex Bennée
2018-10-09  6:28 ` Pavel Dovgalyuk
2018-10-09  8:31   ` Alex Bennée
2018-10-09  8:38     ` Pavel Dovgalyuk
2018-10-09  9:26       ` Alex Bennée
2018-10-29  7:46         ` Pavel Dovgalyuk
2018-10-29 11:30           ` Alex Bennée
2018-10-29 12:24             ` Pavel Dovgalyuk
2018-10-29 15:03               ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181005154910.3099-8-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=Pavel.Dovgaluk@ispras.ru \
    --cc=cota@braap.org \
    --cc=crosthwaite.peter@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=vilanova@ac.upc.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.