qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Andrey Gruzdev via <qemu-devel@nongnu.org>
To: qemu-devel@nongnu.org
Cc: Juan Quintela <quintela@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Peter Xu <peterx@redhat.com>,
	Markus Armbruster <armbru@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>, Den Lunev <den@openvz.org>,
	Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
Subject: [PATCH v10 5/5] migration: introduce 'userfaultfd-wrlat.py' script
Date: Thu, 17 Dec 2020 19:57:12 +0300	[thread overview]
Message-ID: <20201217165712.369061-6-andrey.gruzdev@virtuozzo.com> (raw)
In-Reply-To: <20201217165712.369061-1-andrey.gruzdev@virtuozzo.com>

Add BCC/eBPF script to analyze userfaultfd write fault latency distribution.

Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
---
 scripts/userfaultfd-wrlat.py | 148 +++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100755 scripts/userfaultfd-wrlat.py

diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py
new file mode 100755
index 0000000000..5ffd3c6c9a
--- /dev/null
+++ b/scripts/userfaultfd-wrlat.py
@@ -0,0 +1,148 @@
+#!/usr/bin/python3
+#
+# userfaultfd-wrlat Summarize userfaultfd write fault latencies.
+#                   Events are continuously accumulated for the
+#                   run, while latency distribution histogram is
+#                   dumped each 'interval' seconds.
+#
+#                   For Linux, uses BCC, eBPF.
+#
+# USAGE: userfaultfd-lat [interval [count]]
+#
+# Copyright Virtuozzo GmbH, 2020
+#
+# Authors:
+#   Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from __future__ import print_function
+from bcc import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+    print("USAGE: %s [interval [count]]" % argv[0])
+    exit()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/mm.h>
+
+/*
+ * UFFD page fault event descriptor.
+ * Used as a key to BPF_HASH table.
+ */
+struct ev_desc {
+    u64 pid;
+    u64 addr;
+};
+
+BPF_HASH(ev_start, struct ev_desc, u64);
+BPF_HASH(ctx_handle_userfault, u64, u64);
+BPF_HISTOGRAM(ev_delta_hist, u64);
+
+/* Trace UFFD page fault start event. */
+static void do_event_start(u64 pid, u64 address)
+{
+    struct ev_desc desc = { .pid = pid, .addr = address };
+    u64 ts = bpf_ktime_get_ns();
+
+    ev_start.insert(&desc, &ts);
+}
+
+/* Trace UFFD page fault end event. */
+static void do_event_end(u64 pid, u64 address)
+{
+    struct ev_desc desc = { .pid = pid, .addr = address };
+    u64 ts = bpf_ktime_get_ns();
+    u64 *tsp;
+
+    tsp = ev_start.lookup(&desc);
+    if (tsp) {
+        u64 delta = ts - (*tsp);
+        /* Transform time delta to milliseconds */
+        ev_delta_hist.increment(bpf_log2l(delta / 1000000));
+        ev_start.delete(&desc);
+    }
+}
+
+/* KPROBE for handle_userfault(). */
+int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf,
+        unsigned long reason)
+{
+    /* Trace only UFFD write faults. */
+    if (reason & VM_UFFD_WP) {
+        u64 pid = (u32) bpf_get_current_pid_tgid();
+        u64 addr = vmf->address;
+
+        do_event_start(pid, addr);
+        ctx_handle_userfault.update(&pid, &addr);
+    }
+    return 0;
+}
+
+/* KRETPROBE for handle_userfault(). */
+int retprobe_handle_userfault(struct pt_regs *ctx)
+{
+    u64 pid = (u32) bpf_get_current_pid_tgid();
+    u64 *addr_p;
+
+    /*
+     * Here we just ignore the return value. In case of spurious wakeup
+     * or pending signal we'll still get (at least for v5.8.0 kernel)
+     * VM_FAULT_RETRY or (VM_FAULT_RETRY | VM_FAULT_MAJOR) here.
+     * Anyhow, handle_userfault() would be re-entered if such case happens,
+     * keeping initial timestamp unchanged for the faulting thread.
+     */
+    addr_p = ctx_handle_userfault.lookup(&pid);
+    if (addr_p) {
+        do_event_end(pid, *addr_p);
+        ctx_handle_userfault.delete(&pid);
+    }
+    return 0;
+}
+"""
+
+# arguments
+interval = 10
+count = -1
+if len(argv) > 1:
+    try:
+        interval = int(argv[1])
+        if interval == 0:
+            raise
+        if len(argv) > 2:
+            count = int(argv[2])
+    except:    # also catches -h, --help
+        usage()
+
+# load BPF program
+b = BPF(text=bpf_text)
+# attach KRPOBEs
+b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault")
+b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault")
+
+# header
+print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.")
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+    if count > 0:
+        loop += 1
+        if loop > count:
+            exit()
+    try:
+        sleep(interval)
+    except KeyboardInterrupt:
+        pass; do_exit = 1
+
+    print()
+    b["ev_delta_hist"].print_log2_hist("msecs")
+    if do_exit:
+        exit()
-- 
2.25.1



  parent reply	other threads:[~2020-12-17 17:13 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-17 16:57 [PATCH v10 0/5] UFFD write-tracking migration/snapshots Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 1/5] migration: introduce 'background-snapshot' migration capability Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 2/5] migration: introduce UFFD-WP low-level interface helpers Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 3/5] migration: support UFFD write fault processing in ram_save_iterate() Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 4/5] migration: implementation of background snapshot thread Andrey Gruzdev via
2020-12-17 16:57 ` Andrey Gruzdev via [this message]
2020-12-21 12:44 ` [PATCH v10 0/5] UFFD write-tracking migration/snapshots Andrey Gruzdev
2020-12-21 15:17   ` Peter Xu
2020-12-21 15:36     ` Andrey Gruzdev
2020-12-21 14:52 ` Andrey Gruzdev
2021-01-05 19:36 ` Peter Xu
2021-01-06 15:19   ` Andrey Gruzdev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201217165712.369061-6-andrey.gruzdev@virtuozzo.com \
    --to=qemu-devel@nongnu.org \
    --cc=andrey.gruzdev@virtuozzo.com \
    --cc=armbru@redhat.com \
    --cc=den@openvz.org \
    --cc=dgilbert@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).