All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrey Gruzdev via <qemu-devel@nongnu.org>
To: qemu-devel@nongnu.org
Cc: Juan Quintela <quintela@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Peter Xu <peterx@redhat.com>,
	Markus Armbruster <armbru@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>, Den Lunev <den@openvz.org>,
	Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
Subject: [PATCH v9 5/5] migration: introduce 'userfaultfd-wrlat.py' script
Date: Thu, 17 Dec 2020 17:45:40 +0300	[thread overview]
Message-ID: <20201217144540.365903-6-andrey.gruzdev@virtuozzo.com> (raw)
In-Reply-To: <20201217144540.365903-1-andrey.gruzdev@virtuozzo.com>

Add BCC/eBPF script to analyze userfaultfd write fault latency distribution.

Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
---
 scripts/userfaultfd-wrlat.py | 148 +++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100755 scripts/userfaultfd-wrlat.py

diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py
new file mode 100755
index 0000000000..5ffd3c6c9a
--- /dev/null
+++ b/scripts/userfaultfd-wrlat.py
@@ -0,0 +1,148 @@
+#!/usr/bin/python3
+#
+# userfaultfd-wrlat Summarize userfaultfd write fault latencies.
+#                   Events are continuously accumulated for the
+#                   run, while latency distribution histogram is
+#                   dumped each 'interval' seconds.
+#
+#                   For Linux, uses BCC, eBPF.
+#
+# USAGE: userfaultfd-lat [interval [count]]
+#
+# Copyright Virtuozzo GmbH, 2020
+#
+# Authors:
+#   Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from __future__ import print_function
+from bcc import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+    print("USAGE: %s [interval [count]]" % argv[0])
+    exit()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/mm.h>
+
+/*
+ * UFFD page fault event descriptor.
+ * Used as a key to BPF_HASH table.
+ */
+struct ev_desc {
+    u64 pid;
+    u64 addr;
+};
+
+BPF_HASH(ev_start, struct ev_desc, u64);
+BPF_HASH(ctx_handle_userfault, u64, u64);
+BPF_HISTOGRAM(ev_delta_hist, u64);
+
+/* Trace UFFD page fault start event. */
+static void do_event_start(u64 pid, u64 address)
+{
+    struct ev_desc desc = { .pid = pid, .addr = address };
+    u64 ts = bpf_ktime_get_ns();
+
+    ev_start.insert(&desc, &ts);
+}
+
+/* Trace UFFD page fault end event. */
+static void do_event_end(u64 pid, u64 address)
+{
+    struct ev_desc desc = { .pid = pid, .addr = address };
+    u64 ts = bpf_ktime_get_ns();
+    u64 *tsp;
+
+    tsp = ev_start.lookup(&desc);
+    if (tsp) {
+        u64 delta = ts - (*tsp);
+        /* Transform time delta to milliseconds */
+        ev_delta_hist.increment(bpf_log2l(delta / 1000000));
+        ev_start.delete(&desc);
+    }
+}
+
+/* KPROBE for handle_userfault(). */
+int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf,
+        unsigned long reason)
+{
+    /* Trace only UFFD write faults. */
+    if (reason & VM_UFFD_WP) {
+        u64 pid = (u32) bpf_get_current_pid_tgid();
+        u64 addr = vmf->address;
+
+        do_event_start(pid, addr);
+        ctx_handle_userfault.update(&pid, &addr);
+    }
+    return 0;
+}
+
+/* KRETPROBE for handle_userfault(). */
+int retprobe_handle_userfault(struct pt_regs *ctx)
+{
+    u64 pid = (u32) bpf_get_current_pid_tgid();
+    u64 *addr_p;
+
+    /*
+     * Here we just ignore the return value. In case of spurious wakeup
+     * or pending signal we'll still get (at least for v5.8.0 kernel)
+     * VM_FAULT_RETRY or (VM_FAULT_RETRY | VM_FAULT_MAJOR) here.
+     * Anyhow, handle_userfault() would be re-entered if such case happens,
+     * keeping initial timestamp unchanged for the faulting thread.
+     */
+    addr_p = ctx_handle_userfault.lookup(&pid);
+    if (addr_p) {
+        do_event_end(pid, *addr_p);
+        ctx_handle_userfault.delete(&pid);
+    }
+    return 0;
+}
+"""
+
+# arguments
+interval = 10
+count = -1
+if len(argv) > 1:
+    try:
+        interval = int(argv[1])
+        if interval == 0:
+            raise
+        if len(argv) > 2:
+            count = int(argv[2])
+    except:    # also catches -h, --help
+        usage()
+
+# load BPF program
+b = BPF(text=bpf_text)
+# attach KRPOBEs
+b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault")
+b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault")
+
+# header
+print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.")
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+    if count > 0:
+        loop += 1
+        if loop > count:
+            exit()
+    try:
+        sleep(interval)
+    except KeyboardInterrupt:
+        pass; do_exit = 1
+
+    print()
+    b["ev_delta_hist"].print_log2_hist("msecs")
+    if do_exit:
+        exit()
-- 
2.25.1



      parent reply	other threads:[~2020-12-17 14:51 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-17 14:45 [PATCH v9 0/5] UFFD write-tracking migration/snapshots Andrey Gruzdev via
2020-12-17 14:45 ` [PATCH v9 1/5] migration: introduce 'background-snapshot' migration capability Andrey Gruzdev via
2020-12-17 16:29   ` Markus Armbruster
2020-12-17 16:40     ` Andrey Gruzdev
2020-12-17 14:45 ` [PATCH v9 2/5] migration: introduce UFFD-WP low-level interface helpers Andrey Gruzdev via
2020-12-17 14:45 ` [PATCH v9 3/5] migration: support UFFD write fault processing in ram_save_iterate() Andrey Gruzdev via
2020-12-17 14:45 ` [PATCH v9 4/5] migration: implementation of background snapshot thread Andrey Gruzdev via
2020-12-17 14:45 ` Andrey Gruzdev via [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201217144540.365903-6-andrey.gruzdev@virtuozzo.com \
    --to=qemu-devel@nongnu.org \
    --cc=andrey.gruzdev@virtuozzo.com \
    --cc=armbru@redhat.com \
    --cc=den@openvz.org \
    --cc=dgilbert@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.