linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vineeth Pillai <vineethrp@google.com>
To: linux-trace-devel@vger.kernel.org
Cc: Vineeth Pillai <vineethrp@google.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>
Subject: [PATCH] trace-cmd: rework of the pid detection of vcpus
Date: Wed,  4 May 2022 01:02:42 +0000	[thread overview]
Message-ID: <20220504010242.1388192-1-vineethrp@google.com> (raw)

The current detection of vcpu pid is based on the assumption that vcpuid
is monotonically increasing starting from 0 to max_cpus - 1. But the
crosvm uses the apic id as the vcpuid if topology is exposed to the
guest. And apicid can be non-contiguous.

This patch gets the max apicid from guest and then uses it to detect the
vcpu pids. Even this approach is not fool proof an it assumes that
apicid monotonically increases. If we encounter a scenario where it is
not the case, we might need to implement a mechanism to get a map of
cpuid to vcpu from the guest and then use that to detect the pid.

Signed-off-by: Vineeth Pillai <vineethrp@google.com>
---
 .../include/private/trace-cmd-private.h       |  5 +-
 lib/trace-cmd/trace-msg.c                     | 12 ++--
 lib/trace-cmd/trace-util.c                    | 49 ++++++++++++--
 tracecmd/include/trace-local.h                |  1 +
 tracecmd/trace-agent.c                        | 11 ++--
 tracecmd/trace-record.c                       | 65 +++++++++++--------
 6 files changed, 98 insertions(+), 45 deletions(-)

diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h
index 766e0a7..49612b9 100644
--- a/lib/trace-cmd/include/private/trace-cmd-private.h
+++ b/lib/trace-cmd/include/private/trace-cmd-private.h
@@ -434,12 +434,12 @@ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
 				struct tracecmd_tsync_protos **protos);
 
 int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
-				 int nr_cpus, int page_size,
+				 int nr_cpus, int max_apicid, int page_size,
 				 unsigned int *ports, bool use_fifos,
 				 unsigned long long trace_id,
 				 const char *tsync_proto, unsigned int tsync_port);
 int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
-				 int *nr_cpus, int *page_size,
+				 int *nr_cpus, int *max_apicid, int *page_size,
 				 unsigned int **ports, bool *use_fifos,
 				 unsigned long long *trace_id,
 				 char **tsync_proto,
@@ -595,6 +595,7 @@ int tracecmd_set_logfile(char *logfile);
 /* --- System --- */
 unsigned long long tracecmd_generate_traceid(void);
 int tracecmd_count_cpus(void);
+int tracecmd_count_cpus_apicid(int *max_apicid);
 
 /* --- Hack! --- */
 int tracecmd_blk_hack(struct tracecmd_input *handle);
diff --git a/lib/trace-cmd/trace-msg.c b/lib/trace-cmd/trace-msg.c
index 39465ad..1bb56a0 100644
--- a/lib/trace-cmd/trace-msg.c
+++ b/lib/trace-cmd/trace-msg.c
@@ -73,6 +73,7 @@ struct tracecmd_msg_trace_req {
 struct tracecmd_msg_trace_resp {
 	be32 flags;
 	be32 cpus;
+	be32 max_apicid;
 	be32 page_size;
 	u64 trace_id;
 	char tsync_proto_name[TRACECMD_TSYNC_PNAME_LENGTH];
@@ -1295,7 +1296,8 @@ out:
 	return ret;
 }
 
-static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
+static int make_trace_resp(struct tracecmd_msg *msg, int page_size,
+			   int nr_cpus, int max_apicid,
 			   unsigned int *ports, bool use_fifos,
 			   unsigned long long trace_id,
 			   const char *tsync_proto,
@@ -1319,6 +1321,7 @@ static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
 	msg->trace_resp.tsync_port = htonl(tsync_port);
 
 	msg->trace_resp.cpus = htonl(nr_cpus);
+	msg->trace_resp.max_apicid = htonl(max_apicid);
 	msg->trace_resp.page_size = htonl(page_size);
 	msg->trace_resp.trace_id = htonll(trace_id);
 
@@ -1326,7 +1329,7 @@ static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
 }
 
 int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
-				 int nr_cpus, int page_size,
+				 int nr_cpus, int max_apicid, int page_size,
 				 unsigned int *ports, bool use_fifos,
 				 unsigned long long trace_id,
 				 const char *tsync_proto, unsigned int tsync_port)
@@ -1335,7 +1338,7 @@ int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
 	int ret;
 
 	tracecmd_msg_init(MSG_TRACE_RESP, &msg);
-	ret = make_trace_resp(&msg, page_size, nr_cpus, ports,
+	ret = make_trace_resp(&msg, page_size, nr_cpus, max_apicid, ports,
 			      use_fifos, trace_id, tsync_proto, tsync_port);
 	if (ret < 0)
 		return ret;
@@ -1344,7 +1347,7 @@ int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
 }
 
 int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
-				 int *nr_cpus, int *page_size,
+				 int *nr_cpus, int *max_apicid, int *page_size,
 				 unsigned int **ports, bool *use_fifos,
 				 unsigned long long *trace_id,
 				 char **tsync_proto,
@@ -1371,6 +1374,7 @@ int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
 	}
 
 	*use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS;
+	*max_apicid = ntohl(msg.trace_resp.max_apicid);
 	*nr_cpus = ntohl(msg.trace_resp.cpus);
 	*page_size = ntohl(msg.trace_resp.page_size);
 	*trace_id = ntohll(msg.trace_resp.trace_id);
diff --git a/lib/trace-cmd/trace-util.c b/lib/trace-cmd/trace-util.c
index 9564c81..08cf978 100644
--- a/lib/trace-cmd/trace-util.c
+++ b/lib/trace-cmd/trace-util.c
@@ -550,6 +550,18 @@ int tracecmd_stack_tracer_status(int *status)
  * Returns the number of CPUs in the system, or 0 in case of an error
  */
 int tracecmd_count_cpus(void)
+{
+	return tracecmd_count_cpus_apicid(NULL);
+}
+
+/**
+ * tracecmd_count_cpus_apicid - Get the number of CPUs in the system
+ * @max_apicid: if not NULL, returns the max apicid in the system.
+ *
+ * Returns the number of CPUs in the system and optionally max apicid,
+ * or 0 in case of an error
+ */
+int tracecmd_count_cpus_apicid(int *max_apicid)
 {
 	static int once;
 	char buf[1024];
@@ -560,16 +572,22 @@ int tracecmd_count_cpus(void)
 	size_t n;
 	int r;
 
-	cpus = sysconf(_SC_NPROCESSORS_CONF);
-	if (cpus > 0)
-		return cpus;
+	if (!max_apicid) {
+		cpus = sysconf(_SC_NPROCESSORS_CONF);
+		if (cpus > 0)
+			return cpus;
 
-	if (!once) {
-		once++;
-		tracecmd_warning("sysconf could not determine number of CPUS");
+		if (!once) {
+			once++;
+			tracecmd_warning("sysconf could not determine number of CPUS");
+		}
 	}
 
-	/* Do the hack to figure out # of CPUS */
+	/*
+	 * We reach here if we need to return max apicid or if
+	 * _SC_NPROCESSORS_CONF failed.
+	 */
+	*max_apicid = 0;
 	n = 1024;
 	pn = &n;
 	pbuf = buf;
@@ -583,6 +601,23 @@ int tracecmd_count_cpus(void)
 	while ((r = getline(&pbuf, pn, fp)) >= 0) {
 		char *p;
 
+		if (max_apicid && strncmp(buf, "apicid", 6) == 0) {
+			for (p = buf+6; isspace(*p); p++)
+				;
+			if (*p == ':') {
+				int apicid;
+				for (p = p+1; isspace(*p); p++)
+					;
+				apicid = strtol(p, NULL, 0);
+				if (errno != 0) {
+					tracecmd_warning("max apicid could not be read!");
+				} else if (*max_apicid < apicid) {
+					*max_apicid = apicid;
+				}
+			}
+			continue;
+		}
+
 		if (strncmp(buf, "processor", 9) != 0)
 			continue;
 		for (p = buf+9; isspace(*p); p++)
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index e3fec13..13064db 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -278,6 +278,7 @@ struct buffer_instance {
 	int			tracing_on_fd;
 	int			buffer_size;
 	int			cpu_count;
+	int			max_apicid;
 
 	int			argc;
 	char			**argv;
diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c
index f0723a6..81d57ec 100644
--- a/tracecmd/trace-agent.c
+++ b/tracecmd/trace-agent.c
@@ -122,7 +122,8 @@ static void trace_print_connection(int fd, const char *network)
 		tracecmd_debug("Could not print connection fd:%d\n", fd);
 }
 
-static void agent_handle(int sd, int nr_cpus, int page_size, const char *network)
+static void agent_handle(int sd, int nr_cpus, int max_apicid,
+			 int page_size, const char *network)
 {
 	struct tracecmd_tsync_protos *tsync_protos = NULL;
 	struct tracecmd_time_sync *tsync = NULL;
@@ -197,7 +198,7 @@ static void agent_handle(int sd, int nr_cpus, int page_size, const char *network
 		}
 	}
 	trace_id = tracecmd_generate_traceid();
-	ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
+	ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, max_apicid, page_size,
 					   ports, use_fifos, trace_id,
 					   tsync_proto, tsync_port);
 	if (ret < 0)
@@ -255,7 +256,7 @@ static void agent_serve(unsigned int port, bool do_daemon, const char *network)
 	struct sockaddr *addr = NULL;
 	socklen_t *addr_len_p = NULL;
 	socklen_t addr_len = sizeof(net_addr);
-	int sd, cd, nr_cpus;
+	int sd, cd, nr_cpus, max_apicid;
 	unsigned int cid;
 	pid_t pid;
 
@@ -266,7 +267,7 @@ static void agent_serve(unsigned int port, bool do_daemon, const char *network)
 		addr_len_p = &addr_len;
 	}
 
-	nr_cpus = tracecmd_count_cpus();
+	nr_cpus = tracecmd_count_cpus_apicid(&max_apicid);
 	page_size = getpagesize();
 
 	if (network) {
@@ -311,7 +312,7 @@ static void agent_serve(unsigned int port, bool do_daemon, const char *network)
 		if (pid == 0) {
 			close(sd);
 			signal(SIGCHLD, SIG_DFL);
-			agent_handle(cd, nr_cpus, page_size, network);
+			agent_handle(cd, nr_cpus, max_apicid, page_size, network);
 		}
 		if (pid > 0)
 			handler_pid = pid;
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 27c4e7b..6a680b5 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -3865,7 +3865,9 @@ struct trace_mapping {
 	struct tep_format_field		*common_pid;
 	int				*pids;
 	int				*map;
+	int				nr_cpus;
 	int				max_cpus;
+	int				max_apicid;
 };
 
 static void start_mapping_vcpus(struct trace_guest *guest)
@@ -3937,22 +3939,18 @@ static int map_vcpus(struct tep_event *event, struct tep_record *record,
 	cpu = (int)val;
 
 	/* Sanity check, warn? */
-	if (cpu >= tmap->max_cpus)
+	if (cpu > tmap->max_apicid)
 		return 0;
 
 	/* Already have this one? Should we check if it is the same? */
-	if (tmap->map[cpu] >= 0)
+	if (tmap->map[cpu] > 0)
 		return 0;
 
 	tmap->map[cpu] = pid;
+	tmap->nr_cpus++;
 
 	/* Did we get them all */
-	for (i = 0; i < tmap->max_cpus; i++) {
-		if (tmap->map[i] < 0)
-			break;
-	}
-
-	return i == tmap->max_cpus;
+	return tmap->nr_cpus == tmap->max_cpus;
 }
 
 static void stop_mapping_vcpus(struct buffer_instance *instance,
@@ -3961,21 +3959,19 @@ static void stop_mapping_vcpus(struct buffer_instance *instance,
 	struct trace_mapping tmap = { };
 	struct tep_handle *tep;
 	const char *systems[] = { "kvm", NULL };
-	int i;
 
 	if (!guest->instance)
 		return;
 
 	tmap.pids = guest->task_pids;
+	tmap.nr_cpus = 0;
 	tmap.max_cpus = instance->cpu_count;
+	tmap.max_apicid = instance->max_apicid;
 
-	tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus);
+	tmap.map = calloc((tmap.max_apicid + 1), sizeof(*tmap.map));
 	if (!tmap.map)
 		return;
 
-	for (i = 0; i < tmap.max_cpus; i++)
-		tmap.map[i] = -1;
-
 	tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0");
 
 	tep = tracefs_local_events_system(NULL, systems);
@@ -3996,17 +3992,25 @@ static void stop_mapping_vcpus(struct buffer_instance *instance,
 
 	tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap);
 
-	for (i = 0; i < tmap.max_cpus; i++) {
-		if (tmap.map[i] < 0)
-			break;
-	}
-	/* We found all the mapped CPUs */
-	if (i == tmap.max_cpus) {
-		guest->cpu_pid = tmap.map;
-		guest->cpu_max = tmap.max_cpus;
-		tmap.map = NULL;
+	if (tmap.max_apicid > tmap.max_cpus) {
+		int *map = calloc(tmap.max_cpus, sizeof(int));
+		int j = 0;
+		for (int i = 0; i <= tmap.max_apicid; i++) {
+			if (tmap.map[i] == 0)
+				continue;
+			map[j++] = tmap.map[i];
+			/* We found all the mapped CPUs */
+			if (j >= tmap.max_cpus)
+				break;
+		}
+		free(tmap.map);
+		tmap.map = map;
 	}
 
+	guest->cpu_pid = tmap.map;
+	guest->cpu_max = tmap.max_cpus;
+	tmap.map = NULL;
+
  out_free:
 	tep_free(tep);
  out:
@@ -4057,7 +4061,7 @@ static void connect_to_agent(struct common_record_context *ctx,
 			     struct buffer_instance *instance)
 {
 	struct tracecmd_tsync_protos *protos = NULL;
-	int sd, ret, nr_fifos, nr_cpus, page_size;
+	int sd, ret, nr_fifos, nr_cpus, max_apicid, page_size;
 	struct tracecmd_msg_handle *msg_handle;
 	enum tracecmd_time_sync_role role;
 	char *tsync_protos_reply = NULL;
@@ -4105,18 +4109,26 @@ static void connect_to_agent(struct common_record_context *ctx,
 		free(protos->names);
 		free(protos);
 	}
-	ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size,
+	ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &max_apicid, &page_size,
 					   &ports, &use_fifos,
 					   &instance->trace_id,
 					   &tsync_protos_reply, &tsync_port);
 	if (ret < 0)
 		die("Failed to receive trace response %d", ret);
+
+	/*
+	 * This shouldn't happen, but if max apicid was wrong/zero for some reason
+	 * reset it to max cpus.
+	 */
+	if (max_apicid < nr_cpus)
+		max_apicid = nr_cpus;
+	instance->max_apicid = max_apicid;
+	instance->cpu_count = nr_cpus;
 	if (tsync_protos_reply && tsync_protos_reply[0]) {
 		if (tsync_proto_is_supported(tsync_protos_reply)) {
 			printf("Negotiated %s time sync protocol with guest %s\n",
 				tsync_protos_reply,
 				instance->name);
-			instance->cpu_count = nr_cpus;
 			host_tsync(ctx, instance, tsync_port, tsync_protos_reply);
 		} else
 			warning("Failed to negotiate timestamps synchronization with the guest");
@@ -4128,7 +4140,7 @@ static void connect_to_agent(struct common_record_context *ctx,
 			warning("number of FIFOs (%d) for guest %s differs "
 				"from number of virtual CPUs (%d)",
 				nr_fifos, instance->name, nr_cpus);
-			nr_cpus = nr_cpus < nr_fifos ? nr_cpus : nr_fifos;
+			instance->cpu_count = nr_cpus < nr_fifos ? nr_cpus : nr_fifos;
 		}
 		free(ports);
 		instance->fds = fds;
@@ -4140,7 +4152,6 @@ static void connect_to_agent(struct common_record_context *ctx,
 	}
 
 	instance->use_fifos = use_fifos;
-	instance->cpu_count = nr_cpus;
 
 	/* the msg_handle now points to the guest fd */
 	instance->msg_handle = msg_handle;
-- 
2.36.0.464.gb9c8b46e94-goog


             reply	other threads:[~2022-05-04  1:02 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-04  1:02 Vineeth Pillai [this message]
2022-05-20 20:08 ` [PATCH] trace-cmd: rework of the pid detection of vcpus Steven Rostedt
2022-05-23 13:39   ` Vineeth Pillai
2022-05-23 13:47     ` Steven Rostedt
2022-05-24 15:35       ` Vineeth Pillai
2022-05-24 15:53         ` Steven Rostedt
2022-07-11 19:16 ` Steven Rostedt
2022-07-12 14:17   ` Vineeth Pillai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220504010242.1388192-1-vineethrp@google.com \
    --to=vineethrp@google.com \
    --cc=joel@joelfernandes.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).