linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
@ 2021-05-06 21:14 Steven Rostedt
  2021-05-07  0:32 ` Dario Faggioli
  0 siblings, 1 reply; 7+ messages in thread
From: Steven Rostedt @ 2021-05-06 21:14 UTC (permalink / raw)
  To: Linux Trace Devel

From 17c8c11792d35c7a8ec50e09b32842177eba625e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 5 May 2021 08:04:57 -0400
Subject: [PATCH] trace-cmd: Find PID of host-guest task from tracing vsock
 connection

Searching for the qemu task to find what task the CID is for is not a
reliable method, as the qemu executable may be called different names on
different systems, and qemu may not even be used.

Instead, trace the sched_waking and kvm_exit events and do a vsock
connection to the CID. By doing so, you can find the task that runs the
guest.

 trace-cmd wakes up "vhost-<pid>"
 vhost-<pid> wakes up "CPUX/KVM"
 CPUX/KVM calls "kvm_exit"

The CPUX/KVM is the task that runs the vCPU of the guest, but we still
need the task group leader of this task to find the task that KVM has
for timestamp offsets and multipliers. To do that, look at the proc
file system for the PID of the CPUX/KVM and read its status file.
This holds "Tgid:    <pid>" where "<pid>" is the PID of the task that
has the KVM information.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
Changes since v1:

  - Open code the tracefs_event_enable() functions as that comes in 
    libtracefs 1.2 and we still want this only supporting 1.1.

  - Check for interrupts before searching the pid list, as that's
    the faster check.

 tracecmd/trace-vm.c | 227 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 226 insertions(+), 1 deletion(-)

diff --git a/tracecmd/trace-vm.c b/tracecmd/trace-vm.c
index c0333b67..d3a37eee 100644
--- a/tracecmd/trace-vm.c
+++ b/tracecmd/trace-vm.c
@@ -9,6 +9,7 @@
 #include <sys/types.h>
 #include <dirent.h>
 #include <limits.h>
+#include <unistd.h>
 
 #include "trace-local.h"
 #include "trace-msg.h"
@@ -87,6 +88,227 @@ static struct trace_guest *add_guest(unsigned int cid, const char *name)
 	return &guests[guests_len - 1];
 }
 
+static struct tracefs_instance *start_trace_connect(void)
+{
+	struct tracefs_instance *open_instance;
+
+	open_instance = tracefs_instance_create("vsock_find_pid");
+	if (!open_instance)
+		return NULL;
+
+	tracefs_instance_file_write(open_instance, "events/sched/sched_waking/enable", "1");
+	tracefs_instance_file_write(open_instance, "events/kvm/kvm_exit/enable", "1");
+	tracefs_trace_on(open_instance);
+	return open_instance;
+}
+
+struct pids {
+	struct pids		*next;
+	int			pid;
+};
+
+struct trace_fields {
+	struct tep_event		*sched_waking;
+	struct tep_event		*kvm_exit;
+	struct tep_format_field		*common_pid;
+	struct tep_format_field		*sched_next;
+	struct pids			*pids;
+	int				found_pid;
+};
+
+static void free_pids(struct pids *pids)
+{
+	struct pids *next;
+
+	while (pids) {
+		next = pids;
+		pids = pids->next;
+		free(next);
+	}
+}
+
+static void add_pid(struct pids **pids, int pid)
+{
+	struct pids *new_pid;
+
+	new_pid = malloc(sizeof(*new_pid));
+	if (!new_pid)
+		return;
+
+	new_pid->pid = pid;
+	new_pid->next = *pids;
+	*pids = new_pid;
+}
+
+static bool match_pid(struct pids *pids, int pid)
+{
+	while (pids) {
+		if (pids->pid == pid)
+			return true;
+		pids = pids->next;
+	}
+	return false;
+}
+
+static int callback(struct tep_event *event, struct tep_record *record, int cpu,
+		    void *data)
+{
+	struct trace_fields *fields = data;
+	struct tep_handle *tep = event->tep;
+	unsigned long long val;
+	int flags;
+	int type;
+	int pid;
+	int ret;
+
+	ret = tep_read_number_field(fields->common_pid, record->data, &val);
+	if (ret < 0)
+		return 0;
+
+	flags = tep_data_flags(tep, record);
+
+	/* Ignore events in interrupts */
+	if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
+		return 0;
+
+	/*
+	 * First make sure that this event comes from a PID from
+	 * this task (or a task woken by this task)
+	 */
+	pid = val;
+	if (!match_pid(fields->pids, pid))
+		return 0;
+
+	type = tep_data_type(tep, record);
+
+	/*
+	 * If this event is a kvm_exit, we have our PID
+	 * and we can stop processing.
+	 */
+	if (type == fields->kvm_exit->id) {
+		fields->found_pid = pid;
+		return -1;
+	}
+
+	if (type != fields->sched_waking->id)
+		return 0;
+
+	ret = tep_read_number_field(fields->sched_next, record->data, &val);
+	if (ret < 0)
+		return 0;
+
+	/* This is a task woken by our task or a chain of wake ups */
+	add_pid(&fields->pids, (int)val);
+	return 0;
+}
+
+static int find_tgid(int pid)
+{
+	FILE *fp;
+	char *path;
+	char *buf = NULL;
+	char *save;
+	size_t l = 0;
+	int tgid = -1;
+
+	if (asprintf(&path, "/proc/%d/status", pid) < 0)
+		return -1;
+
+	fp = fopen(path, "r");
+	free(path);
+	if (!fp)
+		return -1;
+
+	while (getline(&buf, &l, fp) > 0) {
+		char *tok;
+
+		if (strncmp(buf, "Tgid:", 5) != 0)
+			continue;
+		tok = strtok_r(buf, ":", &save);
+		if (!tok)
+			continue;
+		tok = strtok_r(NULL, ":", &save);
+		if (!tok)
+			continue;
+		while (isspace(*tok))
+			tok++;
+		tgid = strtol(tok, NULL, 0);
+		break;
+	}
+	free(buf);
+	fclose(fp);
+
+	return tgid;
+}
+
+static int stop_trace_connect(struct tracefs_instance *open_instance)
+{
+	const char *systems[] = { "kvm", "sched", NULL};
+	struct tep_handle *tep;
+	struct trace_fields trace_fields;
+	int tgid = -1;
+
+	if (!open_instance)
+		return -1;
+
+	/* The connection is finished, stop tracing, we have what we want */
+	tracefs_trace_off(open_instance);
+	tracefs_instance_file_write(open_instance, "events//enable", "0");
+
+	tep = tracefs_local_events_system(NULL, systems);
+
+	trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
+	if (!trace_fields.sched_waking)
+		goto out;
+	trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+	if (!trace_fields.kvm_exit)
+		goto out;
+	trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
+							"common_pid");
+	if (!trace_fields.common_pid)
+		goto out;
+	trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
+							"pid");
+	if (!trace_fields.sched_next)
+		goto out;
+
+	trace_fields.found_pid = -1;
+	trace_fields.pids = NULL;
+	add_pid(&trace_fields.pids, getpid());
+	tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
+	free_pids(trace_fields.pids);
+ out:
+	tracefs_instance_destroy(open_instance);
+	tracefs_instance_free(open_instance);
+
+	if (trace_fields.found_pid > 0)
+		tgid = find_tgid(trace_fields.found_pid);
+
+	return tgid;
+}
+
+/*
+ * In order to find the guest that is associated to the given cid,
+ * trace the sched_waking and kvm_exit events, connect to the cid
+ * (doesn't matter what port, use -1 to not connect to anything)
+ * and find what task gets woken up from this code and calls kvm_exit,
+ * then that is the task that is running the guest.
+ * Then look at the /proc/<guest-pid>/status file to find the task group
+ * id (Tgid), and this is the PID of the task running all the threads.
+ */
+static void find_pid_by_cid(struct trace_guest *guest)
+{
+	struct tracefs_instance *instance;
+	int fd;
+
+	instance = start_trace_connect();
+	fd = trace_open_vsock(guest->cid, -1);
+	guest->pid = stop_trace_connect(instance);
+	/* Just in case! */
+	if (fd >= 0)
+		close(fd);
+}
+
 struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
 {
 	struct trace_guest *guest = NULL;
@@ -99,8 +321,11 @@ struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
 
 	if (cid > 0) {
 		guest = get_guest_by_cid(cid);
-		if (!guest && name)
+		if (!guest && name) {
 			guest = add_guest(cid, name);
+			if (guest)
+				find_pid_by_cid(guest);
+		}
 	}
 	return guest;
 }
-- 
2.29.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-06 21:14 [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection Steven Rostedt
@ 2021-05-07  0:32 ` Dario Faggioli
  2021-05-07  1:40   ` Steven Rostedt
  0 siblings, 1 reply; 7+ messages in thread
From: Dario Faggioli @ 2021-05-07  0:32 UTC (permalink / raw)
  To: Steven Rostedt, Linux Trace Devel

[-- Attachment #1: Type: text/plain, Size: 2474 bytes --]

On Thu, 2021-05-06 at 17:14 -0400, Steven Rostedt wrote:
> Searching for the qemu task to find what task the CID is for is not a
> reliable method, as the qemu executable may be called different names
> on
> different systems, and qemu may not even be used.
> 
> Instead, trace the sched_waking and kvm_exit events and do a vsock
> connection to the CID. By doing so, you can find the task that runs the
> guest.
> 
>  trace-cmd wakes up "vhost-<pid>"
>  vhost-<pid> wakes up "CPUX/KVM"
>  CPUX/KVM calls "kvm_exit"
> 
This may very well be me, but I don't get why we need to follow the
wakeup chains.

Doesn't a VMExit always happen in the thread that runs the vCPU anyway?

I mean, just by tracing 'kvm:kvm_exit', I get:

 <...>-74350 [003] 903542.374465: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74355 [012] 903542.374465: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74350 [003] 903542.374470: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74355 [012] 903542.374470: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74355 [012] 903542.374485: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74355 [012] 903542.374487: kvm_exit:             reason HLT rip 0xffffffffacca465d info 0 0
 <...>-74350 [003] 903542.374488: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0
 <...>-74350 [003] 903542.374490: kvm_exit:             reason HLT rip 0xffffffffacca465d info 0 0
 <...>-74341 [126] 903542.374813: kvm_exit:             reason MSR_WRITE rip 0xffffffffac4757d4 info 0 0

This tells us already that 74341, 74350 and 74355 are the pids of some
vcpu threads on the host.

At this point, /proc will tell us that these are in fact vcpus of the
same VM, and which is the PID of the thread group leader that we want
to use in /sys/kernel/debug/kvm:

grep Tgid /proc/{74341,74350,74355}/status
/proc/74341/status:Tgid:        74306
/proc/74350/status:Tgid:        74306
/proc/74355/status:Tgid:        74306

ls /sys/kernel/debug/kvm/
74306-19

What am I missing?

Regards
-- 
Dario Faggioli, Ph.D
http://about.me/dario.faggioli
Virtualization Software Engineer
SUSE Labs, SUSE https://www.suse.com/
-------------------------------------------------------------------
<<This happens because _I_ choose it to happen!>> (Raistlin Majere)

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-07  0:32 ` Dario Faggioli
@ 2021-05-07  1:40   ` Steven Rostedt
  2021-05-07  4:20     ` Tzvetomir Stoyanov
  0 siblings, 1 reply; 7+ messages in thread
From: Steven Rostedt @ 2021-05-07  1:40 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: Linux Trace Devel

On Fri, 07 May 2021 02:32:05 +0200
Dario Faggioli <dfaggioli@suse.com> wrote:

> What am I missing?

When you have 10 guests running, and you are only tracing one. How do
you know which guest that kvm exit was for?

-- Steve

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-07  1:40   ` Steven Rostedt
@ 2021-05-07  4:20     ` Tzvetomir Stoyanov
  2021-05-07 11:58       ` Steven Rostedt
  0 siblings, 1 reply; 7+ messages in thread
From: Tzvetomir Stoyanov @ 2021-05-07  4:20 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Dario Faggioli, Linux Trace Devel

On Fri, May 7, 2021 at 5:12 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 07 May 2021 02:32:05 +0200
> Dario Faggioli <dfaggioli@suse.com> wrote:
>
> > What am I missing?
>
> When you have 10 guests running, and you are only tracing one. How do
> you know which guest that kvm exit was for?
>

As Dario said, we can get the guest from the /proc/{kvm_exit task
ID}/status - the Tgid written there is the guest, the host task which
runs that guest.

> -- Steve



-- 
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-07  4:20     ` Tzvetomir Stoyanov
@ 2021-05-07 11:58       ` Steven Rostedt
  2021-05-07 12:48         ` Dario Faggioli
  0 siblings, 1 reply; 7+ messages in thread
From: Steven Rostedt @ 2021-05-07 11:58 UTC (permalink / raw)
  To: Tzvetomir Stoyanov; +Cc: Dario Faggioli, Linux Trace Devel

On Fri, 7 May 2021 07:20:15 +0300
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> As Dario said, we can get the guest from the /proc/{kvm_exit task
> ID}/status - the Tgid written there is the guest, the host task which
> runs that guest.

Maybe I'm not understanding the question.

> Doesn't a VMExit always happen in the thread that runs the vCPU anyway?

The problem is that we do not know what Tgid we are looking for. If you
have 10 guests, you have 10 different Tgid's that are running those guests.
How do you know which guest is attached to the cid you are looking for?
There's currently no way to find that mapping. In fact, that's why I posted
this patch:


  https://lore.kernel.org/kvm/20210505163855.32dad8e7@gandalf.local.home/


So you trace the kvm exit, and all 10 guests were active at that time, and
you have 100 threads that called kvm exit. Now how do you find the pid of
the task that owns the cid you are looking for, especially if each guest
has their own cid?

Oh, and the guest is run by a fork of qemu that has some other name! So you
can not rely on looking at the thread's cmdline.

If you look at the code I have, I use the wake ups to find the thread of
the task that is woken up by the current task.

 my-task:

    start tracing kvm_exit and sched_waking

    connect to cid I want.
    my_task -> wakes up vhost-worker
    vhost-worker -> wakes up guest thread with cid I want
    guest-thread -> does kvm_exit

    stop tracing.

    Follow the wake ups to find that guest-thread pid

    Now look at /proc/guest-thread-pid/status
    and get the Tgid of the task in /debug/kvm.

That's what this patch does.

Again, how else can you map the cid to the guest thread? That's the missing
link that following the wake ups give you.

-- Steve

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-07 11:58       ` Steven Rostedt
@ 2021-05-07 12:48         ` Dario Faggioli
  2021-05-07 13:13           ` Steven Rostedt
  0 siblings, 1 reply; 7+ messages in thread
From: Dario Faggioli @ 2021-05-07 12:48 UTC (permalink / raw)
  To: Steven Rostedt, Tzvetomir Stoyanov; +Cc: Linux Trace Devel

[-- Attachment #1: Type: text/plain, Size: 2269 bytes --]

On Fri, 2021-05-07 at 07:58 -0400, Steven Rostedt wrote:
> On Fri, 7 May 2021 07:20:15 +0300
> Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:
> > 
> 
> > Doesn't a VMExit always happen in the thread that runs the vCPU
> > anyway?
> 
> The problem is that we do not know what Tgid we are looking for. If
> you
> have 10 guests, you have 10 different Tgid's that are running those
> guests.
> How do you know which guest is attached to the cid you are looking
> for?
> There's currently no way to find that mapping. 
>
Yes, it is true that this part is missing. I have, like, a thought in
the back of my head that this may be doable in some way, but I can't
actually come up with a working solution!

What prompted my question was the fact that I was not understanding the
focus on this side of the issue, when we had no link to the CID anyway.
And that because...

> In fact, that's why I posted
> this patch:
>       
> https://lore.kernel.org/kvm/20210505163855.32dad8e7@gandalf.local.home/
> 
... I had missed this patch. :-)

> So you trace the kvm exit, and all 10 guests were active at that
> time, and
> you have 100 threads that called kvm exit. Now how do you find the
> pid of
> the task that owns the cid you are looking for, especially if each
> guest
> has their own cid?
> 
Yeah, as said, I see it now.

> Oh, and the guest is run by a fork of qemu that has some other name!
> So you
> can not rely on looking at the thread's cmdline.
> 
Yeah, that probably depends where you look. Plus, we know don't want to
rely on QEMU doing things in a specific way.

> Again, how else can you map the cid to the guest thread? That's the
> missing
> link that following the wake ups give you.
> 
Yep, understood. And despite that weird feeling that there may be
other/simpler ways, I can't name any, so we should go ahead with this
approach I guess. :-)

Thanks for taking the time to explain it, and sorry for the noise. :-D

Regards
-- 
Dario Faggioli, Ph.D
http://about.me/dario.faggioli
Virtualization Software Engineer
SUSE Labs, SUSE https://www.suse.com/
-------------------------------------------------------------------
<<This happens because _I_ choose it to happen!>> (Raistlin Majere)

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection
  2021-05-07 12:48         ` Dario Faggioli
@ 2021-05-07 13:13           ` Steven Rostedt
  0 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2021-05-07 13:13 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: Tzvetomir Stoyanov, Linux Trace Devel

On Fri, 07 May 2021 14:48:25 +0200
Dario Faggioli <dfaggioli@suse.com> wrote:

> Thanks for taking the time to explain it, and sorry for the noise. :-D

This isn't noise. I welcome the discussion. This solution is a horrible
hack. Unfortunately, the only solutions that seem to work are all horrible
hacks, and this just happens to be the best solution of the horrible hacks!

:-p

-- Steve

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-05-07 13:13 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-06 21:14 [PATCH v2] trace-cmd: Find PID of host-guest task from tracing vsock connection Steven Rostedt
2021-05-07  0:32 ` Dario Faggioli
2021-05-07  1:40   ` Steven Rostedt
2021-05-07  4:20     ` Tzvetomir Stoyanov
2021-05-07 11:58       ` Steven Rostedt
2021-05-07 12:48         ` Dario Faggioli
2021-05-07 13:13           ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).