Linux-Trace-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
To: rostedt@goodmis.org
Cc: linux-trace-devel@vger.kernel.org
Subject: [PATCH 1/4] libtracefs: Iterate over raw events in sorted order
Date: Tue,  6 Apr 2021 07:19:58 +0300
Message-ID: <20210406042001.912544-2-tz.stoyanov@gmail.com> (raw)
In-Reply-To: <20210406042001.912544-1-tz.stoyanov@gmail.com>

Changed the logic of tracefs_iterate_raw_events(): instead of iterating
in CPU order, walk through the events from all CPU buffers and read the
oldest first.
Fixed the problem with cpu number in the record, passed to the callback
- set the real CPU number instead of 0.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 src/tracefs-events.c | 246 +++++++++++++++++++++++++++----------------
 1 file changed, 156 insertions(+), 90 deletions(-)

diff --git a/src/tracefs-events.c b/src/tracefs-events.c
index 825f916..4095665 100644
--- a/src/tracefs-events.c
+++ b/src/tracefs-events.c
@@ -51,120 +51,130 @@ page_to_kbuf(struct tep_handle *tep, void *page, int size)
 	return kbuf;
 }
 
-static int read_kbuf_record(struct kbuffer *kbuf, struct tep_record *record)
+struct cpu_iterate {
+	struct tep_record record;
+	struct tep_event *event;
+	struct kbuffer *kbuf;
+	void *page;
+	int psize;
+	int rsize;
+	int cpu;
+	int fd;
+};
+
+static int read_kbuf_record(struct cpu_iterate *cpu)
 {
 	unsigned long long ts;
 	void *ptr;
 
-	ptr = kbuffer_read_event(kbuf, &ts);
-	if (!ptr || !record)
+	if (!cpu || !cpu->kbuf)
+		return -1;
+	ptr = kbuffer_read_event(cpu->kbuf, &ts);
+	if (!ptr)
 		return -1;
 
-	memset(record, 0, sizeof(*record));
-	record->ts = ts;
-	record->size = kbuffer_event_size(kbuf);
-	record->record_size = kbuffer_curr_size(kbuf);
-	record->cpu = 0;
-	record->data = ptr;
-	record->ref_count = 1;
+	memset(&(cpu->record), 0, sizeof(cpu->record));
+	cpu->record.ts = ts;
+	cpu->record.size = kbuffer_event_size(cpu->kbuf);
+	cpu->record.record_size = kbuffer_curr_size(cpu->kbuf);
+	cpu->record.cpu = cpu->cpu;
+	cpu->record.data = ptr;
+	cpu->record.ref_count = 1;
 
-	kbuffer_next_event(kbuf, NULL);
+	kbuffer_next_event(cpu->kbuf, NULL);
 
 	return 0;
 }
 
-static int
-get_events_in_page(struct tep_handle *tep, void *page,
-		   int size, int cpu,
-		   int (*callback)(struct tep_event *,
-				   struct tep_record *,
-				   int, void *),
-		   void *callback_context)
+int read_next_page(struct tep_handle *tep, struct cpu_iterate *cpu)
 {
-	struct tep_record record;
-	struct tep_event *event;
-	struct kbuffer *kbuf;
-	int id, cnt = 0;
+	cpu->rsize = read(cpu->fd, cpu->page, cpu->psize);
+	if (cpu->rsize <= 0)
+		return -1;
+
+	cpu->kbuf = page_to_kbuf(tep, cpu->page, cpu->rsize);
+	if (!cpu->kbuf)
+		return -1;
+
+	return 0;
+}
+
+int read_next_record(struct tep_handle *tep, struct cpu_iterate *cpu)
+{
+	int id;
+
+	do {
+		while (!read_kbuf_record(cpu)) {
+			id = tep_data_type(tep, &(cpu->record));
+			cpu->event = tep_find_event(tep, id);
+			if (cpu->event)
+				return 0;
+		}
+	} while (!read_next_page(tep, cpu));
+
+	return -1;
+}
+
+static int read_cpu_pages(struct tep_handle *tep, struct cpu_iterate *cpus, int count,
+			  int (*callback)(struct tep_event *,
+					  struct tep_record *,
+					  int, void *),
+			  void *callback_context)
+{
+	bool has_data = false;
 	int ret;
+	int i, j;
 
-	if (size <= 0)
-		return 0;
+	for (i = 0; i < count; i++) {
+		ret = read_next_record(tep, cpus + i);
+		if (!ret)
+			has_data = true;
+	}
 
-	kbuf = page_to_kbuf(tep, page, size);
-	if (!kbuf)
-		return 0;
-
-	ret = read_kbuf_record(kbuf, &record);
-	while (!ret) {
-		id = tep_data_type(tep, &record);
-		event = tep_find_event(tep, id);
-		if (event) {
-			cnt++;
+	while (has_data) {
+		j = count;
+		for (i = 0; i < count; i++) {
+			if (!cpus[i].event)
+				continue;
+			if (j == count || cpus[j].record.ts > cpus[i].record.ts)
+				j = i;
+		}
+		if (j < count) {
 			if (callback &&
-			    callback(event, &record, cpu, callback_context))
+			    callback(cpus[j].event, &(cpus[j].record), cpus[j].cpu, callback_context))
 				break;
+			cpus[j].event = NULL;
+			read_next_record(tep, cpus + j);
+		} else {
+			has_data = false;
 		}
-		ret = read_kbuf_record(kbuf, &record);
 	}
 
-	kbuffer_free(kbuf);
-
-	return cnt;
+	return 0;
 }
 
-/*
- * tracefs_iterate_raw_events - Iterate through events in trace_pipe_raw,
- *				per CPU trace buffers
- * @tep: a handle to the trace event parser context
- * @instance: ftrace instance, can be NULL for the top instance
- * @cpus: Iterate only through the buffers of CPUs, set in the mask.
- *	  If NULL, iterate through all CPUs.
- * @cpu_size: size of @cpus set
- * @callback: A user function, called for each record from the file
- * @callback_context: A custom context, passed to the user callback function
- *
- * If the @callback returns non-zero, the iteration stops - in that case all
- * records from the current page will be lost from future reads
- *
- * Returns -1 in case of an error, or 0 otherwise
- */
-int tracefs_iterate_raw_events(struct tep_handle *tep,
-				struct tracefs_instance *instance,
-				cpu_set_t *cpus, int cpu_size,
-				int (*callback)(struct tep_event *,
-						struct tep_record *,
-						int, void *),
-				void *callback_context)
+static int open_cpu_fies(struct tracefs_instance *instance, cpu_set_t *cpus,
+			 int cpu_size, struct cpu_iterate **all_cpus, int *count)
 {
+	struct cpu_iterate *tmp;
 	unsigned int p_size;
 	struct dirent *dent;
 	char file[PATH_MAX];
-	void *page = NULL;
 	struct stat st;
+	int ret = -1;
+	int fd = -1;
 	char *path;
 	DIR *dir;
-	int ret;
 	int cpu;
-	int fd;
-	int r;
+	int i = 0;
 
-	if (!tep || !callback)
-		return -1;
-
-	p_size = getpagesize();
 	path = tracefs_instance_get_file(instance, "per_cpu");
 	if (!path)
 		return -1;
 	dir = opendir(path);
-	if (!dir) {
-		ret = -1;
-		goto error;
-	}
-	page = malloc(p_size);
-	if (!page) {
-		ret = -1;
-		goto error;
-	}
+	if (!dir)
+		goto out;
+	p_size = getpagesize();
 	while ((dent = readdir(dir))) {
 		const char *name = dent->d_name;
 
@@ -174,32 +184,88 @@ int tracefs_iterate_raw_events(struct tep_handle *tep,
 		if (cpus && !CPU_ISSET_S(cpu, cpu_size, cpus))
 			continue;
 		sprintf(file, "%s/%s", path, name);
-		ret = stat(file, &st);
-		if (ret < 0 || !S_ISDIR(st.st_mode))
+		if (stat(file, &st) < 0 || !S_ISDIR(st.st_mode))
 			continue;
 
 		sprintf(file, "%s/%s/trace_pipe_raw", path, name);
 		fd = open(file, O_RDONLY | O_NONBLOCK);
 		if (fd < 0)
 			continue;
-		do {
-			r = read(fd, page, p_size);
-			if (r > 0)
-				get_events_in_page(tep, page, r, cpu,
-						   callback, callback_context);
-		} while (r > 0);
-		close(fd);
+		tmp = realloc(*all_cpus, (i + 1) * sizeof(struct cpu_iterate));
+		if (!tmp) {
+			close(fd);
+			goto out;
+		}
+		memset(tmp + i, 0, sizeof(struct cpu_iterate));
+		tmp[i].fd = fd;
+		tmp[i].cpu = cpu;
+		tmp[i].page =  malloc(p_size);
+		tmp[i].psize = p_size;
+		*all_cpus = tmp;
+		*count = i + 1;
+		if (!tmp[i++].page)
+			goto out;
 	}
+
 	ret = 0;
 
-error:
+out:
 	if (dir)
 		closedir(dir);
-	free(page);
 	tracefs_put_tracing_file(path);
 	return ret;
 }
 
+/*
+ * tracefs_iterate_raw_events - Iterate through events in trace_pipe_raw,
+ *				per CPU trace buffers
+ * @tep: a handle to the trace event parser context
+ * @instance: ftrace instance, can be NULL for the top instance
+ * @cpus: Iterate only through the buffers of CPUs, set in the mask.
+ *	  If NULL, iterate through all CPUs.
+ * @cpu_size: size of @cpus set
+ * @callback: A user function, called for each record from the file
+ * @callback_context: A custom context, passed to the user callback function
+ *
+ * If the @callback returns non-zero, the iteration stops - in that case all
+ * records from the current page will be lost from future reads
+ * The events are iterated in sorted order, oldest first.
+ *
+ * Returns -1 in case of an error, or 0 otherwise
+ */
+int tracefs_iterate_raw_events(struct tep_handle *tep,
+				struct tracefs_instance *instance,
+				cpu_set_t *cpus, int cpu_size,
+				int (*callback)(struct tep_event *,
+						struct tep_record *,
+						int, void *),
+				void *callback_context)
+{
+	struct cpu_iterate *all_cpus = NULL;
+	int count = 0;
+	int ret;
+	int i;
+
+	if (!tep || !callback)
+		return -1;
+
+	ret = open_cpu_fies(instance, cpus, cpu_size, &all_cpus, &count);
+	if (ret < 0)
+		goto out;
+	ret = read_cpu_pages(tep, all_cpus, count, callback, callback_context);
+
+out:
+	if (all_cpus) {
+		for (i = 0; i < count; i++) {
+			close(all_cpus[i].fd);
+			free(all_cpus[i].page);
+		}
+		free(all_cpus);
+	}
+
+	return ret;
+}
+
 static char **add_list_string(char **list, const char *name, int len)
 {
 	if (!list)
-- 
2.30.2


  reply index

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-06  4:19 [PATCH 0/4] " Tzvetomir Stoyanov (VMware)
2021-04-06  4:19 ` Tzvetomir Stoyanov (VMware) [this message]
2021-04-06 16:01   ` [PATCH 1/4] libtracefs: " Steven Rostedt
2021-04-06  4:19 ` [PATCH 2/4] libtracefs: Unit test for iterate " Tzvetomir Stoyanov (VMware)
2021-04-06  4:20 ` [PATCH 3/4] libtracefs: Unit test for iterate over raw events in selected CPUs Tzvetomir Stoyanov (VMware)
2021-04-06  4:20 ` [PATCH 4/4] libtracefs: tracefs_iterate_raw_events() documentation update Tzvetomir Stoyanov (VMware)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210406042001.912544-2-tz.stoyanov@gmail.com \
    --to=tz.stoyanov@gmail.com \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Trace-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-trace-devel/0 linux-trace-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-trace-devel linux-trace-devel/ https://lore.kernel.org/linux-trace-devel \
		linux-trace-devel@vger.kernel.org
	public-inbox-index linux-trace-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-trace-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git