From: Andrey Gruzdev via <qemu-devel@nongnu.org>
To: qemu-devel@nongnu.org
Cc: Juan Quintela <quintela@redhat.com>,
"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
Peter Xu <peterx@redhat.com>,
Markus Armbruster <armbru@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>, Den Lunev <den@openvz.org>,
Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
Subject: [PATCH v10 5/5] migration: introduce 'userfaultfd-wrlat.py' script
Date: Thu, 17 Dec 2020 19:57:12 +0300 [thread overview]
Message-ID: <20201217165712.369061-6-andrey.gruzdev@virtuozzo.com> (raw)
In-Reply-To: <20201217165712.369061-1-andrey.gruzdev@virtuozzo.com>
Add BCC/eBPF script to analyze userfaultfd write fault latency distribution.
Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
---
scripts/userfaultfd-wrlat.py | 148 +++++++++++++++++++++++++++++++++++
1 file changed, 148 insertions(+)
create mode 100755 scripts/userfaultfd-wrlat.py
diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py
new file mode 100755
index 0000000000..5ffd3c6c9a
--- /dev/null
+++ b/scripts/userfaultfd-wrlat.py
@@ -0,0 +1,148 @@
+#!/usr/bin/python3
+#
+# userfaultfd-wrlat Summarize userfaultfd write fault latencies.
+# Events are continuously accumulated for the
+# run, while latency distribution histogram is
+# dumped each 'interval' seconds.
+#
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: userfaultfd-lat [interval [count]]
+#
+# Copyright Virtuozzo GmbH, 2020
+#
+# Authors:
+# Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+
+from __future__ import print_function
+from bcc import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+ print("USAGE: %s [interval [count]]" % argv[0])
+ exit()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/mm.h>
+
+/*
+ * UFFD page fault event descriptor.
+ * Used as a key to BPF_HASH table.
+ */
+struct ev_desc {
+ u64 pid;
+ u64 addr;
+};
+
+BPF_HASH(ev_start, struct ev_desc, u64);
+BPF_HASH(ctx_handle_userfault, u64, u64);
+BPF_HISTOGRAM(ev_delta_hist, u64);
+
+/* Trace UFFD page fault start event. */
+static void do_event_start(u64 pid, u64 address)
+{
+ struct ev_desc desc = { .pid = pid, .addr = address };
+ u64 ts = bpf_ktime_get_ns();
+
+ ev_start.insert(&desc, &ts);
+}
+
+/* Trace UFFD page fault end event. */
+static void do_event_end(u64 pid, u64 address)
+{
+ struct ev_desc desc = { .pid = pid, .addr = address };
+ u64 ts = bpf_ktime_get_ns();
+ u64 *tsp;
+
+ tsp = ev_start.lookup(&desc);
+ if (tsp) {
+ u64 delta = ts - (*tsp);
+ /* Transform time delta to milliseconds */
+ ev_delta_hist.increment(bpf_log2l(delta / 1000000));
+ ev_start.delete(&desc);
+ }
+}
+
+/* KPROBE for handle_userfault(). */
+int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf,
+ unsigned long reason)
+{
+ /* Trace only UFFD write faults. */
+ if (reason & VM_UFFD_WP) {
+ u64 pid = (u32) bpf_get_current_pid_tgid();
+ u64 addr = vmf->address;
+
+ do_event_start(pid, addr);
+ ctx_handle_userfault.update(&pid, &addr);
+ }
+ return 0;
+}
+
+/* KRETPROBE for handle_userfault(). */
+int retprobe_handle_userfault(struct pt_regs *ctx)
+{
+ u64 pid = (u32) bpf_get_current_pid_tgid();
+ u64 *addr_p;
+
+ /*
+ * Here we just ignore the return value. In case of spurious wakeup
+ * or pending signal we'll still get (at least for v5.8.0 kernel)
+ * VM_FAULT_RETRY or (VM_FAULT_RETRY | VM_FAULT_MAJOR) here.
+ * Anyhow, handle_userfault() would be re-entered if such case happens,
+ * keeping initial timestamp unchanged for the faulting thread.
+ */
+ addr_p = ctx_handle_userfault.lookup(&pid);
+ if (addr_p) {
+ do_event_end(pid, *addr_p);
+ ctx_handle_userfault.delete(&pid);
+ }
+ return 0;
+}
+"""
+
+# arguments
+interval = 10
+count = -1
+if len(argv) > 1:
+ try:
+ interval = int(argv[1])
+ if interval == 0:
+ raise
+ if len(argv) > 2:
+ count = int(argv[2])
+ except: # also catches -h, --help
+ usage()
+
+# load BPF program
+b = BPF(text=bpf_text)
+# attach KRPOBEs
+b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault")
+b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault")
+
+# header
+print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.")
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+ if count > 0:
+ loop += 1
+ if loop > count:
+ exit()
+ try:
+ sleep(interval)
+ except KeyboardInterrupt:
+ pass; do_exit = 1
+
+ print()
+ b["ev_delta_hist"].print_log2_hist("msecs")
+ if do_exit:
+ exit()
--
2.25.1
next prev parent reply other threads:[~2020-12-17 17:13 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-17 16:57 [PATCH v10 0/5] UFFD write-tracking migration/snapshots Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 1/5] migration: introduce 'background-snapshot' migration capability Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 2/5] migration: introduce UFFD-WP low-level interface helpers Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 3/5] migration: support UFFD write fault processing in ram_save_iterate() Andrey Gruzdev via
2020-12-17 16:57 ` [PATCH v10 4/5] migration: implementation of background snapshot thread Andrey Gruzdev via
2020-12-17 16:57 ` Andrey Gruzdev via [this message]
2020-12-21 12:44 ` [PATCH v10 0/5] UFFD write-tracking migration/snapshots Andrey Gruzdev
2020-12-21 15:17 ` Peter Xu
2020-12-21 15:36 ` Andrey Gruzdev
2020-12-21 14:52 ` Andrey Gruzdev
2021-01-05 19:36 ` Peter Xu
2021-01-06 15:19 ` Andrey Gruzdev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201217165712.369061-6-andrey.gruzdev@virtuozzo.com \
--to=qemu-devel@nongnu.org \
--cc=andrey.gruzdev@virtuozzo.com \
--cc=armbru@redhat.com \
--cc=den@openvz.org \
--cc=dgilbert@redhat.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).