All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions
@ 2022-10-25 18:32 Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 1/5] libtracefs: Add reading of per cpu files Steven Rostedt
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add functions that allow applications to read the trace_pipe_raw files:

    tracefs_cpu_create_fd()
    tracefs_cpu_open()
    tracefs_cpu_close()
    tracefs_cpu_read_size()
    tracefs_cpu_read()
    tracefs_cpu_buffered_read()
    tracefs_cpu_write()
    tracefs_cpu_stop()
    tracefs_cpu_flush()
    tracefs_cpu_flush_write()
    tracefs_cpu_pipe()

Changes since v1: https://lore.kernel.org/all/20221021182345.092cdb50@gandalf.local.home/

 - Changed the names of tracefs_cpu_pipe_read() to tracefs_cpu_buffered_read()
   and tracefs_cpu_pipe_write() to just tracefs_cpu_write()

 - Added tracefs_cpu_create_fd() to attach to an already opened file descriptor
   that could be just a copy of a trace_pipe_raw file, or to a socket for
   remote tracing.

 - Added tracefs_cpu_pipe() to write directly into a pipe (saves on a splice).

 - Fixed some blocking issues.

 - Added unit tests

Steven Rostedt (Google) (5):
  libtracefs: Add reading of per cpu files
  libtracefs: Add tracefs_cpu_create_fd()
  libtracefs: Add tracefs_cpu_pipe()
  libtracefs utest: Make helper functions for affinity
  libtracefs: Add unit tests for tracefs_cpu functions

 Documentation/libtracefs-cpu.txt | 239 +++++++++++++
 Documentation/libtracefs.txt     |  12 +
 include/tracefs.h                |  15 +
 samples/Makefile                 |   1 +
 scripts/utils.mk                 |   2 +-
 src/Makefile                     |   1 +
 src/tracefs-record.c             | 586 +++++++++++++++++++++++++++++++
 utest/tracefs-utest.c            | 372 +++++++++++++++++++-
 8 files changed, 1213 insertions(+), 15 deletions(-)
 create mode 100644 Documentation/libtracefs-cpu.txt
 create mode 100644 src/tracefs-record.c

-- 
2.35.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 1/5] libtracefs: Add reading of per cpu files
  2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
@ 2022-10-25 18:32 ` Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 2/5] libtracefs: Add tracefs_cpu_create_fd() Steven Rostedt
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add the APIs:

    tracefs_cpu_open()
    tracefs_cpu_close()
    tracefs_cpu_read_size()
    tracefs_cpu_read()
    tracefs_cpu_buffered_read()
    tracefs_cpu_write()
    tracefs_cpu_stop()
    tracefs_cpu_flush()
    tracefs_cpu_flush_write()

That will attach to a trace_pipe_raw file for a given instance and allow
opening, reading and writing to a file from it.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/libtracefs-cpu.txt | 239 +++++++++++++++
 Documentation/libtracefs.txt     |  12 +
 include/tracefs.h                |  14 +
 samples/Makefile                 |   1 +
 scripts/utils.mk                 |   2 +-
 src/Makefile                     |   1 +
 src/tracefs-record.c             | 505 +++++++++++++++++++++++++++++++
 7 files changed, 773 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/libtracefs-cpu.txt
 create mode 100644 src/tracefs-record.c

diff --git a/Documentation/libtracefs-cpu.txt b/Documentation/libtracefs-cpu.txt
new file mode 100644
index 000000000000..d664ebb3082f
--- /dev/null
+++ b/Documentation/libtracefs-cpu.txt
@@ -0,0 +1,239 @@
+libtracefs(3)
+=============
+
+NAME
+----
+tracefs_cpu_open, tracefs_cpu_close, tracefs_cpu_read_size, tracefs_cpu_read,
+tracefs_cpu_buffered_read, tracefs_cpu_write, tracefs_cpu_stop, tracefs_cpu_flush,
+tracefs_cpu_flush_write - Reading trace_pipe_raw data
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <tracefs.h>*
+
+struct tracefs_cpu pass:[*]*tracefs_cpu_open*(struct tracefs_instance pass:[*]_instance_,
+				     int _cpu_, bool _nonblock_);
+void *tracefs_cpu_close*(struct tracefs_cpu pass:[*]_tcpu_);
+int *tracefs_cpu_read_size*(struct tracefs_cpu pass:[*]_tcpu_);
+int *tracefs_cpu_read*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_, bool _nonblock_);
+int *tracefs_cpu_buffered_read*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_, bool _nonblock_);
+int *tracefs_cpu_write*(struct tracefs_cpu pass:[*]_tcpu_, int _wfd_, bool _nonblock_);
+int *tracefs_cpu_stop*(struct tracefs_cpu pass:[*]_tcpu_);
+int *tracefs_cpu_flush*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_);
+int *tracefs_cpu_flush_write*(struct tracefs_cpu pass:[*]_tcpu_, int _wfd_);
+--
+
+DESCRIPTION
+-----------
+This set of APIs can be used to read the raw data from the trace_pipe_raw
+files in the tracefs file system.
+
+The *tracefs_cpu_open()* creates a descriptor that can read the tracefs
+trace_pipe_raw file for a given _cpu_ in a given _instance_. If _instance_ is
+NULL than the toplevel trace_pipe_raw file is used.
+
+The *tracefs_cpu_close()* closes all the file descriptors associated to the trace_pipe_raw
+opened by *tracefs_cpu_open()*.
+
+The *tracefs_cpu_read_size()* returns the subbuffer size of the trace_pipe_raw. This
+returns the minimum size of the buffer that is passed to the below functions.
+
+The *tracefs_cpu_read()* reads the trace_pipe_raw files associated to _tcpu_ into _buffer_.
+_buffer_ must be at least the size of the sub buffer of the ring buffer,
+which is returned by *tracefs_cpu_read_size()*. If _nonblock_ is set, and
+there's no data available, it will return immediately. Otherwise depending
+on how _tcpu_ was opened, it will block. If _tcpu_ was opened with nonblock
+set, then this _nonblock_ will make no difference.
+
+The *tracefs_cpu_buffered_read()* is basically the same as *tracefs_cpu_read()*
+except that it uses a pipe through splice to buffer reads. This will batch
+reads keeping the reading from the ring buffer less intrusive to the system,
+as just reading all the time can cause quite a disturbance. Note, one
+difference between this and *tracefs_cpu_read()* is that it will read only in
+sub buffer pages. If the ring buffer has not filled a page, then it will not
+return anything, even with _nonblock_ set.  Calls to *tracefs_cpu_flush()*
+should be done to read the rest of the file at the end of the trace.
+
+The *tracefs_cpu_write()* will pipe the data from the trace_pipe_raw
+file associated with _tcpu_ into the _wfd_ file descriptor. If _nonblock_ is set,
+then it will not block on if there's nothing to write. Note, it will only write
+sub buffer size data to _wfd_. Calls to tracefs_cpu_flush_write() are needed to
+write out the rest.
+
+The *tracefs_cpu_stop()* will attempt to unblock a task blocked on _tcpu_ reading it.
+On older kernels, it may not do anything for the pipe reads, as older kernels do not
+wake up tasks waiting on the ring buffer. Returns 0 if it definitely woke up any possible
+waiters, but returns 1 if it is not sure it worked and waiters may need to have a signal
+sent to them.
+
+The *tracefs_cpu_flush()* reads the trace_pipe_raw file associated by the _tcpu_ and puts it
+into _buffer_, which must be the size of the sub buffer which is retrieved.
+by *tracefs_cpu_read_size()*. This should be called at the end of tracing
+to get the rest of the data. This call will convert the file descriptor of
+trace_pipe_raw into non-blocking mode.
+
+The *tracefs_cpu_flush_write()* same as *trace_cpu_flush()* except it takes a file
+descriptor _wfd_ to flush the data into.
+
+RETURN VALUE
+------------
+The *tracefs_cpu_open()* returns a struct tracefs_cpu descriptor that can be
+used by the other functions or NULL on error.
+
+The *tracefs_cpu_read_size()* returns the minimum size of the buffers to be
+used with *tracefs_cpu_read()*, *tracefs_cpu_buffered_read()* and *tracefs_cpu_flush()*.
+Returns negative on error.
+
+The *tracefs_cpu_read()* returns the number of bytes read, or negative on error.
+
+The *tracefs_cpu_buffered_read()* returns the number of bytes read or negative on error.
+
+The *tracefs_cpu_write()* returns the number of bytes written to the file
+or negative on error.
+
+The *tracefs_cpu_stop()* returns zero if any waiters were guaranteed to be
+woken up from waiting on input, or returns one if this is an older kernel
+that does not supply that guarantee, and a signal may need to be sent to
+any waiters. Returns negative on error.
+
+The *tracefs_cpu_flush()* returns the number of bytes read or negative on error.
+
+The *tracefs_cpu_flush_write()* returns the number of bytes written to the
+file  or negative on error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#define _LARGEFILE64_SOURCE
+#include <stdlib.h>
+#include <ctype.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <tracefs.h>
+
+struct thread_data {
+	struct tracefs_cpu	*tcpu;
+	int			done;
+	int			fd;
+};
+
+static void *thread_run(void *arg)
+{
+	struct thread_data *data = arg;
+	struct tracefs_cpu *tcpu = data->tcpu;
+	int fd = data->fd;
+	int ret;
+
+	while (!data->done) {
+		ret = tracefs_cpu_write(tcpu, fd, false);
+		printf("wrote %d\n", ret);
+	}
+	return NULL;
+}
+
+int main (int argc, char **argv)
+{
+	struct tracefs_instance *instance;
+	struct thread_data data;
+	pthread_t thread;
+	char *file;
+	int secs = 10;
+	int cpu;
+	int ret;
+
+	if (argc < 3 || !isdigit(argv[1][0])) {
+		printf("usage: %s cpu file_destination [sleep secs]\n\n", argv[0]);
+		exit(-1);
+	}
+
+	cpu = atoi(argv[1]);
+	file = argv[2];
+
+	if (argc > 3)
+		secs = atoi(argv[3]);
+
+	instance = tracefs_instance_create("cpu_write");
+	if (!instance) {
+		perror("create instance");
+		exit(-1);
+	}
+
+	memset(&data, 0, sizeof(data));
+
+	data.tcpu = tracefs_cpu_open(instance, cpu, 0);
+	if (!data.tcpu) {
+		perror("Open instance");
+		exit(-1);
+	}
+
+	data.fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+	if (data.fd < 0) {
+		perror(file);
+		exit(-1);
+	}
+
+	pthread_create(&thread, NULL, thread_run, &data);
+
+	sleep(secs);
+
+	data.done = 1;
+	printf("stopping\n");
+	ret = tracefs_cpu_stop(data.tcpu);
+
+	printf("joining %d\n", ret);
+	pthread_join(thread, NULL);
+
+	tracefs_trace_off(instance);
+	do {
+		ret = tracefs_cpu_flush_write(data.tcpu, data.fd);
+		printf("flushed %d\n", ret);
+	} while (ret > 0);
+	tracefs_trace_on(instance);
+
+	tracefs_cpu_close(data.tcpu);
+	close(data.fd);
+
+	return 0;
+}
+--
+FILES
+-----
+[verse]
+--
+*tracefs.h*
+	Header file to include in order to have access to the library APIs.
+*-ltracefs*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+*libtracefs*(3),
+*libtraceevent*(3),
+*trace-cmd*(1)
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracefs is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/
+
+COPYING
+-------
+Copyright \(C) 2022 Google, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracefs.txt b/Documentation/libtracefs.txt
index b81c0301c27a..d41c7ab382ee 100644
--- a/Documentation/libtracefs.txt
+++ b/Documentation/libtracefs.txt
@@ -267,6 +267,18 @@ Histograms:
 	int *tracefs_hist_continue*(struct tracefs_instance pass:[*]_instance_, struct tracefs_hist pass:[*]_hist_);
 	int *tracefs_hist_reset*(struct tracefs_instance pass:[*]_instance_, struct tracefs_hist pass:[*]_hist_);
 
+Recording of trace_pipe_raw files:
+	struct tracefs_cpu pass:[*]*tracefs_cpu_open*(struct tracefs_instance pass:[*]_instance_,
+					     int _cpu_, bool _nonblock_);
+	void *tracefs_cpu_close*(struct tracefs_cpu pass:[*]_tcpu_);
+	int *tracefs_cpu_read_size*(struct tracefs_cpu pass:[*]_tcpu_);
+	int *tracefs_cpu_read*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_, bool _nonblock_);
+	int *tracefs_cpu_buffered_read*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_, bool _nonblock_);
+	int *tracefs_cpu_write*(struct tracefs_cpu pass:[*]_tcpu_, int _wfd_, bool _nonblock_);
+	int *tracefs_cpu_stop*(struct tracefs_cpu pass:[*]_tcpu_);
+	int *tracefs_cpu_flush*(struct tracefs_cpu pass:[*]_tcpu_, void pass:[*]_buffer_);
+	int *tracefs_cpu_flush_write*(struct tracefs_cpu pass:[*]_tcpu_, int _wfd_);
+
 --
 
 DESCRIPTION
diff --git a/include/tracefs.h b/include/tracefs.h
index 539548f30a74..f500cb47c372 100644
--- a/include/tracefs.h
+++ b/include/tracefs.h
@@ -595,4 +595,18 @@ struct tracefs_synth *tracefs_sql(struct tep_handle *tep, const char *name,
 struct tep_event *
 tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
 
+struct tracefs_cpu;
+
+struct tracefs_cpu *tracefs_cpu_open(struct tracefs_instance *instance,
+				     int cpu, bool nonblock);
+void tracefs_cpu_close(struct tracefs_cpu *tcpu);
+int tracefs_cpu_read_size(struct tracefs_cpu *tcpu);
+int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock);
+int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock);
+int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock);
+int tracefs_cpu_stop(struct tracefs_cpu *tcpu);
+int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer);
+int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd);
+
+
 #endif /* _TRACE_FS_H */
diff --git a/samples/Makefile b/samples/Makefile
index 7bc7ff4f00e1..743bddb67c2d 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -21,6 +21,7 @@ EXAMPLES += hist-cont
 EXAMPLES += tracer
 EXAMPLES += stream
 EXAMPLES += instances-affinity
+EXAMPLES += cpu
 
 TARGETS :=
 TARGETS += sqlhist
diff --git a/scripts/utils.mk b/scripts/utils.mk
index b432e67fd732..4d0f8bc14faa 100644
--- a/scripts/utils.mk
+++ b/scripts/utils.mk
@@ -101,7 +101,7 @@ extract_example =				\
 
 do_sample_build =							\
 	$(Q)($(print_sample_build)					\
-	$(CC) -o $1 $2 $(CFLAGS) $(LIBTRACEFS_STATIC) $(LIBTRACEEVENT_LIBS))
+	$(CC) -o $1 $2 $(CFLAGS) $(LIBTRACEFS_STATIC) $(LIBTRACEEVENT_LIBS) -lpthread)
 
 do_sample_obj =									\
 	$(Q)($(print_sample_obj)						\
diff --git a/src/Makefile b/src/Makefile
index d28b8f419016..e2965bc5e1e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,6 +14,7 @@ OBJS += tracefs-filter.o
 OBJS += tracefs-dynevents.o
 OBJS += tracefs-eprobes.o
 OBJS += tracefs-uprobes.o
+OBJS += tracefs-record.o
 
 # Order matters for the the three below
 OBJS += sqlhist-lex.o
diff --git a/src/tracefs-record.c b/src/tracefs-record.c
new file mode 100644
index 000000000000..a59614de05ab
--- /dev/null
+++ b/src/tracefs-record.c
@@ -0,0 +1,505 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+
+#include <kbuffer.h>
+
+#include "tracefs.h"
+#include "tracefs-local.h"
+
+enum {
+	TC_STOP		= 1 << 0,	/* Stop reading */
+	TC_NONBLOCK	= 1 << 1,	/* read is non blocking */
+};
+
+struct tracefs_cpu {
+	int		cpu;
+	int		fd;
+	int		flags;
+	int		nfds;
+	int		ctrl_pipe[2];
+	int		splice_pipe[2];
+	int		pipe_size;
+	int		subbuf_size;
+	int		buffered;
+	int		splice_read_flags;
+};
+
+/**
+ * tracefs_cpu_open - open an instance raw trace file
+ * @instance: the instance (NULL for toplevel) of the cpu raw file to open
+ * @cpu: The CPU that the raw trace file is associated with
+ * @nonblock: If true, the file will be opened in O_NONBLOCK mode
+ *
+ * Return a descriptor that can read the tracefs trace_pipe_raw file
+ * for a give @cpu in a given @instance.
+ *
+ * Returns NULL on error.
+ */
+struct tracefs_cpu *
+tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
+{
+	struct tracefs_cpu *tcpu;
+	struct tep_handle *tep;
+	int mode = O_RDONLY;
+	char path[128];
+	char *buf;
+	int len;
+	int ret;
+
+	tcpu = calloc(1, sizeof(*tcpu));
+	if (!tcpu)
+		return NULL;
+
+	if (nonblock) {
+		mode |= O_NONBLOCK;
+		tcpu->flags |= TC_NONBLOCK;
+	}
+
+	tcpu->splice_pipe[0] = -1;
+	tcpu->splice_pipe[1] = -1;
+
+	sprintf(path, "per_cpu/cpu%d/trace_pipe_raw", cpu);
+
+	tcpu->cpu = cpu;
+	tcpu->fd = tracefs_instance_file_open(instance, path, mode);
+	if (tcpu->fd < 0) {
+		free(tcpu);
+		return NULL;
+	}
+
+	tep = tep_alloc();
+	if (!tep)
+		goto fail;
+
+	/* Get the size of the page */
+	buf = tracefs_instance_file_read(NULL, "events/header_page", &len);
+	if (!buf)
+		goto fail;
+
+	ret = tep_parse_header_page(tep, buf, len, sizeof(long));
+	free(buf);
+	if (ret < 0)
+		goto fail;
+
+	tcpu->subbuf_size = tep_get_sub_buffer_size(tep);
+	tep_free(tep);
+	tep = NULL;
+
+	if (tcpu->flags & TC_NONBLOCK) {
+		tcpu->ctrl_pipe[0] = -1;
+		tcpu->ctrl_pipe[1] = -1;
+	} else {
+		/* ctrl_pipe is used to break out of blocked reads */
+		ret = pipe(tcpu->ctrl_pipe);
+		if (ret < 0)
+			goto fail;
+		if (tcpu->ctrl_pipe[0] > tcpu->fd)
+			tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
+		else
+			tcpu->nfds = tcpu->fd + 1;
+	}
+
+	return tcpu;
+ fail:
+	tep_free(tep);
+	close(tcpu->fd);
+	free(tcpu);
+	return NULL;
+}
+
+static void close_fd(int fd)
+{
+	if (fd < 0)
+		return;
+	close(fd);
+}
+
+/**
+ * tracefs_cpu_close - clean up and close a raw trace descriptor
+ * @tcpu: The descriptor created with tracefs_cpu_open()
+ *
+ * Closes all the file descriptors associated to the trace_pipe_raw
+ * opened by tracefs_cpu_open().
+ */
+void tracefs_cpu_close(struct tracefs_cpu *tcpu)
+{
+	if (!tcpu)
+		return;
+
+	close(tcpu->fd);
+	close_fd(tcpu->ctrl_pipe[0]);
+	close_fd(tcpu->ctrl_pipe[1]);
+	close_fd(tcpu->splice_pipe[0]);
+	close_fd(tcpu->splice_pipe[1]);
+
+	free(tcpu);
+}
+
+/**
+ * tracefs_cpu_read_size - Return the size of the sub buffer
+ * @tcpu: The descriptor that holds the size of the sub buffer
+ *
+ * A lot of the functions that read the data from the trace_pipe_raw
+ * expect the caller to have allocated enough space to store a full
+ * subbuffer. Calling this function is a requirement to do so.
+ */
+int tracefs_cpu_read_size(struct tracefs_cpu *tcpu)
+{
+	if (!tcpu)
+		return -1;
+	return tcpu->subbuf_size;
+}
+
+static void set_nonblock(struct tracefs_cpu *tcpu)
+{
+	long flags;
+
+	flags = fcntl(tcpu->fd, F_GETFL);
+	fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
+	tcpu->flags |= TC_NONBLOCK;
+}
+
+/*
+ * If set to blocking mode, block until the watermark has been
+ * reached, or the control has said to stop. If the contol is
+ * set, then nonblock will be set to true on the way out.
+ */
+static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
+{
+	struct timeval tv, *ptv = NULL;
+	fd_set rfds;
+	int ret;
+
+	if (tcpu->flags & TC_NONBLOCK)
+		return 1;
+
+	if (nonblock) {
+		tv.tv_sec = 0;
+		tv.tv_usec = 0;
+		ptv = &tv;
+	}
+
+	FD_ZERO(&rfds);
+	FD_SET(tcpu->fd, &rfds);
+	FD_SET(tcpu->ctrl_pipe[0], &rfds);
+
+	ret = select(tcpu->nfds, &rfds, NULL, NULL, ptv);
+
+	/* Let the application decide what to do with signals and such */
+	if (ret < 0)
+		return ret;
+
+	if (FD_ISSET(tcpu->ctrl_pipe[0], &rfds)) {
+		/* Flush the ctrl pipe */
+		read(tcpu->ctrl_pipe[0], &ret, 1);
+
+		/* Make nonblock as it is now stopped */
+		set_nonblock(tcpu);
+	}
+
+	return FD_ISSET(tcpu->fd, &rfds);
+}
+
+/**
+ * tracefs_cpu_read - read from the raw trace file
+ * @tcpu: The descriptor representing the raw trace file
+ * @buffer: Where to read into (must be at least the size of the subbuffer)
+ * @nonblock: Hint to not block on the read if there's no data.
+ *
+ * Reads the trace_pipe_raw files associated to @tcpu into @buffer.
+ * @buffer must be at least the size of the sub buffer of the ring buffer,
+ * which is returned by tracefs_cpu_read_size().
+ *
+ * If @nonblock is set, and there's no data available, it will return
+ * immediately. Otherwise depending on how @tcpu was opened, it will
+ * block. If @tcpu was opened with nonblock set, then this @nonblock
+ * will make no difference.
+ *
+ * Returns the amount read or -1 on error.
+ */
+int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
+{
+	bool orig_nonblock = nonblock;
+	long flags = 0;
+	int ret;
+
+	/*
+	 * If nonblock is set, then the wait_on_input() will return
+	 * immediately, if there's nothing in the buffer, with
+	 * ret == 0.
+	 */
+	ret = wait_on_input(tcpu, nonblock);
+	if (ret <= 0)
+		return ret;
+
+	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
+
+	if (nonblock != orig_nonblock && !(tcpu->flags & TC_NONBLOCK))
+		fcntl(tcpu->fd, F_SETFL, flags);
+
+	return ret;
+}
+
+static int init_splice(struct tracefs_cpu *tcpu)
+{
+	int ret;
+
+	if (tcpu->splice_pipe[0] >= 0)
+		return 0;
+
+	ret = pipe(tcpu->splice_pipe);
+	if (ret < 0)
+		return ret;
+
+	ret = fcntl(tcpu->splice_pipe[0], F_GETPIPE_SZ, &tcpu->pipe_size);
+	/*
+	 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
+	 * in 2.6.31. If we are running on an older kernel, just fall
+	 * back to using subbuf_size for splice(). It could also return
+	 * the size of the pipe and not set pipe_size.
+	 */
+	if (ret > 0 && !tcpu->pipe_size)
+		tcpu->pipe_size = ret;
+	else if (ret < 0)
+		tcpu->pipe_size = tcpu->subbuf_size;
+
+	tcpu->splice_read_flags = SPLICE_F_MOVE;
+	if (tcpu->flags & TC_NONBLOCK)
+		tcpu->splice_read_flags |= SPLICE_F_NONBLOCK;
+
+	return 0;
+}
+
+/**
+ * tracefs_cpu_buffered_read - Read the raw trace data buffering through a pipe
+ * @tcpu: The descriptor representing the raw trace file
+ * @buffer: Where to read into (must be at least the size of the subbuffer)
+ * @nonblock: Hint to not block on the read if there's no data.
+ *
+ * This is basically the same as tracefs_cpu_read() except that it uses
+ * a pipe through splice to buffer reads. This will batch reads keeping
+ * the reading from the ring buffer less intrusive to the system, as
+ * just reading all the time can cause quite a disturbance.
+ *
+ * Note, one difference between this and tracefs_cpu_read() is that it
+ * will read only in sub buffer pages. If the ring buffer has not filled
+ * a page, then it will not return anything, even with @nonblock set.
+ * Calls to tracefs_cpu_flush() should be done to read the rest of
+ * the file at the end of the trace.
+ *
+ * Returns the amount read or -1 on error.
+ */
+int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
+{
+	int mode = SPLICE_F_MOVE;
+	int ret;
+
+	if (tcpu->buffered < 0)
+		tcpu->buffered = 0;
+
+	if (tcpu->buffered)
+		goto do_read;
+
+	ret = wait_on_input(tcpu, nonblock);
+	if (ret <= 0)
+		return ret;
+
+	if (nonblock || tcpu->flags & TC_NONBLOCK)
+		mode |= SPLICE_F_NONBLOCK;
+
+	ret = init_splice(tcpu);
+	if (ret < 0)
+		return ret;
+
+	ret = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
+		     tcpu->pipe_size, mode);
+	if (ret <= 0)
+		return ret;
+
+	tcpu->buffered = ret;
+
+ do_read:
+	ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
+	if (ret > 0)
+		tcpu->buffered -= ret;
+	return ret;
+}
+
+/**
+ * tracefs_cpu_stop - Stop a blocked read of the raw tracing file
+ * @tcpu: The descriptor representing the raw trace file
+ *
+ * This will attempt to unblock a task blocked on @tcpu reading it.
+ * On older kernels, it may not do anything for the pipe reads, as
+ * older kernels do not wake up tasks waiting on the ring buffer.
+ *
+ * Returns 0 if the tasks reading the raw tracing file does not
+ * need a nudge.
+ *
+ * Returns 1 if that tasks may need a nudge (send a signal).
+ *
+ * Returns negative on error.
+ */
+int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
+{
+	int ret = 1;
+
+	if (tcpu->flags & TC_NONBLOCK)
+		return 0;
+
+	ret = write(tcpu->ctrl_pipe[1], &ret, 1);
+	if (ret < 0)
+		return ret;
+
+	/* Calling ioctl() on recent kernels will wake up the waiters */
+	ret = ioctl(tcpu->fd, 0);
+	if (ret < 0)
+		ret = 1;
+	else
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * tracefs_cpu_flush - Finish out and read the rest of the raw tracing file
+ * @tcpu: The descriptor representing the raw trace file
+ * @buffer: Where to read into (must be at least the size of the subbuffer)
+ *
+ * Reads the trace_pipe_raw file associated by the @tcpu and puts it
+ * into @buffer, which must be the size of the sub buffer which is retrieved.
+ * by tracefs_cpu_read_size(). This should be called at the end of tracing
+ * to get the rest of the data.
+ *
+ * This will set the file descriptor for reading to non-blocking mode.
+ *
+ * Returns the number of bytes read, or negative on error.
+ */
+int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
+{
+	int ret;
+
+	/* Make sure that reading is now non blocking */
+	if (!(tcpu->flags & TC_NONBLOCK))
+		set_nonblock(tcpu);
+
+	if (tcpu->buffered < 0)
+		tcpu->buffered = 0;
+
+	if (tcpu->buffered)
+		goto do_read;
+
+ do_read:
+	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
+	if (ret > 0 && tcpu->buffered)
+		tcpu->buffered -= ret;
+
+	/* It's OK if there's no data to read */
+	if (ret < 0 && errno == EAGAIN)
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * tracefs_cpu_flush_write - Finish out and read the rest of the raw tracing file
+ * @tcpu: The descriptor representing the raw trace file
+ * @wfd: The write file descriptor to write the data to
+ *
+ * Reads the trace_pipe_raw file associated by the @tcpu and writes it to
+ * @wfd. This should be called at the end of tracing to get the rest of the data.
+ *
+ * Returns the number of bytes written, or negative on error.
+ */
+int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd)
+{
+	char buffer[tcpu->subbuf_size];
+	int ret;
+
+	ret = tracefs_cpu_flush(tcpu, buffer);
+	if (ret > 0)
+		ret = write(wfd, buffer, ret);
+
+	return ret;
+}
+
+/**
+ * tracefs_cpu_write - Write the raw trace file into a file descriptor
+ * @tcpu: The descriptor representing the raw trace file
+ * @wfd: The write file descriptor to write the data to
+ * @nonblock: Hint to not block on the read if there's no data.
+ *
+ * This will pipe the data from the trace_pipe_raw file associated with @tcpu
+ * into the @wfd file descriptor. If @nonblock is set, then it will not
+ * block on if there's nothing to write. Note, it will only write sub buffer
+ * size data to @wfd. Calls to tracefs_cpu_flush_write() are needed to
+ * write out the rest.
+ *
+ * Returns the number of bytes read or negative on error.
+ */
+int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
+{
+	char buffer[tcpu->subbuf_size];
+	int mode = SPLICE_F_MOVE;
+	int tot_write = 0;
+	int tot;
+	int ret;
+
+	ret = wait_on_input(tcpu, nonblock);
+	if (ret <= 0)
+		return ret;
+
+	if (nonblock || tcpu->flags & TC_NONBLOCK)
+		mode |= SPLICE_F_NONBLOCK;
+
+	ret = init_splice(tcpu);
+	if (ret < 0)
+		return ret;
+
+	tot = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
+		     tcpu->pipe_size, mode);
+	if (tot < 0)
+		return tot;
+
+	if (tot == 0)
+		return 0;
+
+	ret = splice(tcpu->splice_pipe[0], NULL, wfd, NULL,
+		     tot, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
+
+	if (ret >= 0)
+		return ret;
+
+	/* Some file systems do not allow splicing, try writing instead */
+	do {
+		int r = tcpu->subbuf_size;
+
+		if (r > tot)
+			r = tot;
+
+		ret = read(tcpu->splice_pipe[0], buffer, r);
+		if (ret > 0) {
+			tot -= ret;
+			ret = write(wfd, buffer, ret);
+		}
+		if (ret > 0)
+			tot_write += ret;
+	} while (ret > 0);
+
+	if (ret < 0)
+		return ret;
+
+	return tot_write;
+}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/5] libtracefs: Add tracefs_cpu_create_fd()
  2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 1/5] libtracefs: Add reading of per cpu files Steven Rostedt
@ 2022-10-25 18:32 ` Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 3/5] libtracefs: Add tracefs_cpu_pipe() Steven Rostedt
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add tracefs_cpu_create_fd() to attach a tracefs_cpu descriptor to an
already opened file descriptor.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/tracefs.h    |  1 +
 src/tracefs-record.c | 94 +++++++++++++++++++++++++++++---------------
 2 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/include/tracefs.h b/include/tracefs.h
index f500cb47c372..449bfd04a395 100644
--- a/include/tracefs.h
+++ b/include/tracefs.h
@@ -597,6 +597,7 @@ tracefs_synth_get_event(struct tep_handle *tep, struct tracefs_synth *synth);
 
 struct tracefs_cpu;
 
+struct tracefs_cpu *tracefs_cpu_create_fd(int fd, int subbuf_size, bool nonblock);
 struct tracefs_cpu *tracefs_cpu_open(struct tracefs_instance *instance,
 				     int cpu, bool nonblock);
 void tracefs_cpu_close(struct tracefs_cpu *tcpu);
diff --git a/src/tracefs-record.c b/src/tracefs-record.c
index a59614de05ab..fdc470d71f1e 100644
--- a/src/tracefs-record.c
+++ b/src/tracefs-record.c
@@ -24,7 +24,6 @@ enum {
 };
 
 struct tracefs_cpu {
-	int		cpu;
 	int		fd;
 	int		flags;
 	int		nfds;
@@ -37,25 +36,21 @@ struct tracefs_cpu {
 };
 
 /**
- * tracefs_cpu_open - open an instance raw trace file
- * @instance: the instance (NULL for toplevel) of the cpu raw file to open
- * @cpu: The CPU that the raw trace file is associated with
+ * tracefs_cpu_create_fd - create a tracefs_cpu instance for an existing fd
+ * @fd: The file descriptor to attach the tracefs_cpu to
+ * @subbuf_size: The expected size to read the subbuffer with
  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
  *
  * Return a descriptor that can read the tracefs trace_pipe_raw file
- * for a give @cpu in a given @instance.
+ * that is associated with the given @fd and must be read in @subbuf_size.
  *
  * Returns NULL on error.
  */
 struct tracefs_cpu *
-tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
+tracefs_cpu_create_fd(int fd, int subbuf_size, bool nonblock)
 {
 	struct tracefs_cpu *tcpu;
-	struct tep_handle *tep;
 	int mode = O_RDONLY;
-	char path[128];
-	char *buf;
-	int len;
 	int ret;
 
 	tcpu = calloc(1, sizeof(*tcpu));
@@ -70,14 +65,62 @@ tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
 	tcpu->splice_pipe[0] = -1;
 	tcpu->splice_pipe[1] = -1;
 
+	tcpu->fd = fd;
+
+	tcpu->subbuf_size = subbuf_size;
+
+	if (tcpu->flags & TC_NONBLOCK) {
+		tcpu->ctrl_pipe[0] = -1;
+		tcpu->ctrl_pipe[1] = -1;
+	} else {
+		/* ctrl_pipe is used to break out of blocked reads */
+		ret = pipe(tcpu->ctrl_pipe);
+		if (ret < 0)
+			goto fail;
+		if (tcpu->ctrl_pipe[0] > tcpu->fd)
+			tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
+		else
+			tcpu->nfds = tcpu->fd + 1;
+	}
+
+	return tcpu;
+ fail:
+	free(tcpu);
+	return NULL;
+}
+
+/**
+ * tracefs_cpu_open - open an instance raw trace file
+ * @instance: the instance (NULL for toplevel) of the cpu raw file to open
+ * @cpu: The CPU that the raw trace file is associated with
+ * @nonblock: If true, the file will be opened in O_NONBLOCK mode
+ *
+ * Return a descriptor that can read the tracefs trace_pipe_raw file
+ * for a give @cpu in a given @instance.
+ *
+ * Returns NULL on error.
+ */
+struct tracefs_cpu *
+tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
+{
+	struct tracefs_cpu *tcpu;
+	struct tep_handle *tep;
+	char path[128];
+	char *buf;
+	int mode = O_RDONLY;
+	int subbuf_size;
+	int len;
+	int ret;
+	int fd;
+
+	if (nonblock)
+		mode |= O_NONBLOCK;
+
 	sprintf(path, "per_cpu/cpu%d/trace_pipe_raw", cpu);
 
-	tcpu->cpu = cpu;
-	tcpu->fd = tracefs_instance_file_open(instance, path, mode);
-	if (tcpu->fd < 0) {
-		free(tcpu);
+	fd = tracefs_instance_file_open(instance, path, mode);
+	if (fd < 0)
 		return NULL;
-	}
 
 	tep = tep_alloc();
 	if (!tep)
@@ -93,29 +136,18 @@ tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
 	if (ret < 0)
 		goto fail;
 
-	tcpu->subbuf_size = tep_get_sub_buffer_size(tep);
+	subbuf_size = tep_get_sub_buffer_size(tep);
 	tep_free(tep);
 	tep = NULL;
 
-	if (tcpu->flags & TC_NONBLOCK) {
-		tcpu->ctrl_pipe[0] = -1;
-		tcpu->ctrl_pipe[1] = -1;
-	} else {
-		/* ctrl_pipe is used to break out of blocked reads */
-		ret = pipe(tcpu->ctrl_pipe);
-		if (ret < 0)
-			goto fail;
-		if (tcpu->ctrl_pipe[0] > tcpu->fd)
-			tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
-		else
-			tcpu->nfds = tcpu->fd + 1;
-	}
+	tcpu = tracefs_cpu_create_fd(fd, subbuf_size, nonblock);
+	if (!tcpu)
+		goto fail;
 
 	return tcpu;
  fail:
 	tep_free(tep);
-	close(tcpu->fd);
-	free(tcpu);
+	close(fd);
 	return NULL;
 }
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 3/5] libtracefs: Add tracefs_cpu_pipe()
  2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 1/5] libtracefs: Add reading of per cpu files Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 2/5] libtracefs: Add tracefs_cpu_create_fd() Steven Rostedt
@ 2022-10-25 18:32 ` Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 4/5] libtracefs utest: Make helper functions for affinity Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 5/5] libtracefs: Add unit tests for tracefs_cpu functions Steven Rostedt
  4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add the interface to call splice directly from the tracefs_cpu descriptor.
The requirement is that either the passed in file descriptor is a pipe, or
that the tcpu was created with tracefs_cpu_create_fd() and the fd used
there was a pipe.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/tracefs.h    |  2 +-
 src/tracefs-record.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/tracefs.h b/include/tracefs.h
index 449bfd04a395..aaa77045625e 100644
--- a/include/tracefs.h
+++ b/include/tracefs.h
@@ -608,6 +608,6 @@ int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock);
 int tracefs_cpu_stop(struct tracefs_cpu *tcpu);
 int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer);
 int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd);
-
+int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock);
 
 #endif /* _TRACE_FS_H */
diff --git a/src/tracefs-record.c b/src/tracefs-record.c
index fdc470d71f1e..4a15d19d5073 100644
--- a/src/tracefs-record.c
+++ b/src/tracefs-record.c
@@ -535,3 +535,33 @@ int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
 
 	return tot_write;
 }
+
+/**
+ * tracefs_cpu_pipe - Write the raw trace file into a pipe descriptor
+ * @tcpu: The descriptor representing the raw trace file
+ * @wfd: The write file descriptor to write the data to (must be a pipe)
+ * @nonblock: Hint to not block on the read if there's no data.
+ *
+ * This will splice directly the file descriptor of the trace_pipe_raw
+ * file to the given @wfd, which must be a pipe. This can also be used
+ * if @tcpu was created with tracefs_cpu_create_fd() where the passed
+ * in @fd there was a pipe, then @wfd does not need to be a pipe.
+ *
+ * Returns the number of bytes read or negative on error.
+ */
+int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
+{
+	int mode = SPLICE_F_MOVE;
+	int ret;
+
+	ret = wait_on_input(tcpu, nonblock);
+	if (ret <= 0)
+		return ret;
+
+	if (nonblock || tcpu->flags & TC_NONBLOCK)
+		mode |= SPLICE_F_NONBLOCK;
+
+	ret = splice(tcpu->fd, NULL, wfd, NULL,
+		     tcpu->pipe_size, mode);
+	return ret;
+}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 4/5] libtracefs utest: Make helper functions for affinity
  2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
                   ` (2 preceding siblings ...)
  2022-10-25 18:32 ` [PATCH v2 3/5] libtracefs: Add tracefs_cpu_pipe() Steven Rostedt
@ 2022-10-25 18:32 ` Steven Rostedt
  2022-10-25 18:32 ` [PATCH v2 5/5] libtracefs: Add unit tests for tracefs_cpu functions Steven Rostedt
  4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add helper functions for setting and restoring affinity so that other
tests do not need to reimplement them.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 utest/tracefs-utest.c | 49 ++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/utest/tracefs-utest.c b/utest/tracefs-utest.c
index f3c06eb3218b..ef59e10431a4 100644
--- a/utest/tracefs-utest.c
+++ b/utest/tracefs-utest.c
@@ -87,20 +87,45 @@ static int test_callback(struct tep_event *event, struct tep_record *record,
 	return 0;
 }
 
+static cpu_set_t *cpuset_save;
+static cpu_set_t *cpuset;
+static int cpu_size;
+
+static void save_affinity(void)
+{
+	int cpus;
+
+	cpus = sysconf(_SC_NPROCESSORS_CONF);
+	cpuset_save = CPU_ALLOC(cpus);
+	cpuset = CPU_ALLOC(cpus);
+	CU_TEST(cpuset_save != NULL && cpuset != NULL);
+	CU_TEST(sched_getaffinity(0, cpu_size, cpuset_save) == 0);
+}
+
+static void reset_affinity(void)
+{
+	sched_setaffinity(0, cpu_size, cpuset_save);
+	CPU_FREE(cpuset_save);
+	CPU_FREE(cpuset);
+}
+
+static void set_affinity(int cpu)
+{
+	CPU_ZERO_S(cpu_size, cpuset);
+	CPU_SET_S(cpu, cpu_size, cpuset);
+	CU_TEST(sched_setaffinity(0, cpu_size, cpuset) == 0);
+	sched_yield(); /* Force schedule */
+}
+
 static void test_iter_write(struct tracefs_instance *instance)
 {
-	int cpus = sysconf(_SC_NPROCESSORS_CONF);
-	cpu_set_t *cpuset, *cpusave;
-	int cpu_size;
 	char *path;
 	int i, fd;
+	int cpus;
 	int ret;
-	cpuset = CPU_ALLOC(cpus);
-	cpusave = CPU_ALLOC(cpus);
-	cpu_size = CPU_ALLOC_SIZE(cpus);
-	CPU_ZERO_S(cpu_size, cpuset);
 
-	sched_getaffinity(0, cpu_size, cpusave);
+	cpus = sysconf(_SC_NPROCESSORS_CONF);
+	save_affinity();
 
 	path = tracefs_instance_get_file(instance, "trace_marker");
 	CU_TEST(path != NULL);
@@ -114,17 +139,13 @@ static void test_iter_write(struct tracefs_instance *instance)
 		if (!test_array[i].value)
 			test_array[i].value++;
 		CU_TEST(test_array[i].cpu < cpus);
-		CPU_ZERO_S(cpu_size, cpuset);
-		CPU_SET(test_array[i].cpu, cpuset);
-		sched_setaffinity(0, cpu_size, cpuset);
+		set_affinity(test_array[i].cpu);
 		ret = write(fd, test_array + i, sizeof(struct test_sample));
 		CU_TEST(ret == sizeof(struct test_sample));
 	}
 
-	sched_setaffinity(0, cpu_size, cpusave);
+	reset_affinity();
 	close(fd);
-	CPU_FREE(cpuset);
-	CPU_FREE(cpusave);
 }
 
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 5/5] libtracefs: Add unit tests for tracefs_cpu functions
  2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
                   ` (3 preceding siblings ...)
  2022-10-25 18:32 ` [PATCH v2 4/5] libtracefs utest: Make helper functions for affinity Steven Rostedt
@ 2022-10-25 18:32 ` Steven Rostedt
  4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2022-10-25 18:32 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Steven Rostedt (Google)

From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

Add unit tests to test the following functions:

  tracefs_cpu_open()
  tracefs_cpu_close()
  tracefs_cpu_read_size();
  tracefs_cpu_read()
  tracefs_cpu_write()
  tracefs_cpu_flush_write()
  tracefs_cpu_stop()

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 src/tracefs-record.c  |  57 +++++---
 utest/tracefs-utest.c | 323 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 361 insertions(+), 19 deletions(-)

diff --git a/src/tracefs-record.c b/src/tracefs-record.c
index 4a15d19d5073..327a63600dbc 100644
--- a/src/tracefs-record.c
+++ b/src/tracefs-record.c
@@ -19,8 +19,9 @@
 #include "tracefs-local.h"
 
 enum {
-	TC_STOP		= 1 << 0,	/* Stop reading */
-	TC_NONBLOCK	= 1 << 1,	/* read is non blocking */
+	TC_STOP			= 1 << 0,   /* Stop reading */
+	TC_PERM_NONBLOCK	= 1 << 1,   /* read is always non blocking */
+	TC_NONBLOCK		= 1 << 2,   /* read is non blocking */
 };
 
 struct tracefs_cpu {
@@ -59,7 +60,7 @@ tracefs_cpu_create_fd(int fd, int subbuf_size, bool nonblock)
 
 	if (nonblock) {
 		mode |= O_NONBLOCK;
-		tcpu->flags |= TC_NONBLOCK;
+		tcpu->flags |= TC_NONBLOCK | TC_PERM_NONBLOCK;
 	}
 
 	tcpu->splice_pipe[0] = -1;
@@ -69,7 +70,7 @@ tracefs_cpu_create_fd(int fd, int subbuf_size, bool nonblock)
 
 	tcpu->subbuf_size = subbuf_size;
 
-	if (tcpu->flags & TC_NONBLOCK) {
+	if (tcpu->flags & TC_PERM_NONBLOCK) {
 		tcpu->ctrl_pipe[0] = -1;
 		tcpu->ctrl_pipe[1] = -1;
 	} else {
@@ -198,11 +199,27 @@ static void set_nonblock(struct tracefs_cpu *tcpu)
 {
 	long flags;
 
+	if (tcpu->flags & TC_NONBLOCK)
+		return;
+
 	flags = fcntl(tcpu->fd, F_GETFL);
 	fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
 	tcpu->flags |= TC_NONBLOCK;
 }
 
+static void unset_nonblock(struct tracefs_cpu *tcpu)
+{
+	long flags;
+
+	if (!(tcpu->flags & TC_NONBLOCK))
+		return;
+
+	flags = fcntl(tcpu->fd, F_GETFL);
+	flags &= ~O_NONBLOCK;
+	fcntl(tcpu->fd, F_SETFL, flags);
+	tcpu->flags &= ~TC_NONBLOCK;
+}
+
 /*
  * If set to blocking mode, block until the watermark has been
  * reached, or the control has said to stop. If the contol is
@@ -210,24 +227,24 @@ static void set_nonblock(struct tracefs_cpu *tcpu)
  */
 static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
 {
-	struct timeval tv, *ptv = NULL;
 	fd_set rfds;
 	int ret;
 
-	if (tcpu->flags & TC_NONBLOCK)
+	if (tcpu->flags & TC_PERM_NONBLOCK)
 		return 1;
 
 	if (nonblock) {
-		tv.tv_sec = 0;
-		tv.tv_usec = 0;
-		ptv = &tv;
+		set_nonblock(tcpu);
+		return 1;
+	} else {
+		unset_nonblock(tcpu);
 	}
 
 	FD_ZERO(&rfds);
 	FD_SET(tcpu->fd, &rfds);
 	FD_SET(tcpu->ctrl_pipe[0], &rfds);
 
-	ret = select(tcpu->nfds, &rfds, NULL, NULL, ptv);
+	ret = select(tcpu->nfds, &rfds, NULL, NULL, NULL);
 
 	/* Let the application decide what to do with signals and such */
 	if (ret < 0)
@@ -239,6 +256,8 @@ static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
 
 		/* Make nonblock as it is now stopped */
 		set_nonblock(tcpu);
+		/* Permanently set unblock */
+		tcpu->flags |= TC_PERM_NONBLOCK;
 	}
 
 	return FD_ISSET(tcpu->fd, &rfds);
@@ -263,8 +282,6 @@ static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
  */
 int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
 {
-	bool orig_nonblock = nonblock;
-	long flags = 0;
 	int ret;
 
 	/*
@@ -278,8 +295,9 @@ int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
 
 	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
 
-	if (nonblock != orig_nonblock && !(tcpu->flags & TC_NONBLOCK))
-		fcntl(tcpu->fd, F_SETFL, flags);
+	/* It's OK if there's no data to read */
+	if (ret < 0 && errno == EAGAIN)
+		ret = 0;
 
 	return ret;
 }
@@ -348,7 +366,7 @@ int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonbl
 	if (ret <= 0)
 		return ret;
 
-	if (nonblock || tcpu->flags & TC_NONBLOCK)
+	if (tcpu->flags & TC_NONBLOCK)
 		mode |= SPLICE_F_NONBLOCK;
 
 	ret = init_splice(tcpu);
@@ -402,6 +420,8 @@ int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
 	else
 		ret = 0;
 
+	set_nonblock(tcpu);
+
 	return ret;
 }
 
@@ -424,8 +444,7 @@ int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
 	int ret;
 
 	/* Make sure that reading is now non blocking */
-	if (!(tcpu->flags & TC_NONBLOCK))
-		set_nonblock(tcpu);
+	set_nonblock(tcpu);
 
 	if (tcpu->buffered < 0)
 		tcpu->buffered = 0;
@@ -493,7 +512,7 @@ int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
 	if (ret <= 0)
 		return ret;
 
-	if (nonblock || tcpu->flags & TC_NONBLOCK)
+	if (tcpu->flags & TC_NONBLOCK)
 		mode |= SPLICE_F_NONBLOCK;
 
 	ret = init_splice(tcpu);
@@ -558,7 +577,7 @@ int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
 	if (ret <= 0)
 		return ret;
 
-	if (nonblock || tcpu->flags & TC_NONBLOCK)
+	if (tcpu->flags & TC_NONBLOCK)
 		mode |= SPLICE_F_NONBLOCK;
 
 	ret = splice(tcpu->fd, NULL, wfd, NULL,
diff --git a/utest/tracefs-utest.c b/utest/tracefs-utest.c
index ef59e10431a4..2c9b267ea427 100644
--- a/utest/tracefs-utest.c
+++ b/utest/tracefs-utest.c
@@ -12,6 +12,8 @@
 #include <dirent.h>
 #include <ftw.h>
 #include <libgen.h>
+#include <kbuffer.h>
+#include <pthread.h>
 
 #include <CUnit/CUnit.h>
 #include <CUnit/Basic.h>
@@ -98,10 +100,16 @@ static void save_affinity(void)
 	cpus = sysconf(_SC_NPROCESSORS_CONF);
 	cpuset_save = CPU_ALLOC(cpus);
 	cpuset = CPU_ALLOC(cpus);
+	cpu_size = CPU_ALLOC_SIZE(cpus);
 	CU_TEST(cpuset_save != NULL && cpuset != NULL);
 	CU_TEST(sched_getaffinity(0, cpu_size, cpuset_save) == 0);
 }
 
+static void thread_affinity(void)
+{
+	sched_setaffinity(0, cpu_size, cpuset_save);
+}
+
 static void reset_affinity(void)
 {
 	sched_setaffinity(0, cpu_size, cpuset_save);
@@ -404,6 +412,317 @@ static void test_trace_sql(void)
 	test_instance_trace_sql(test_instance);
 }
 
+struct test_cpu_data {
+	struct tracefs_instance		*instance;
+	struct tracefs_cpu		*tcpu;
+	struct kbuffer			*kbuf;
+	struct tep_handle		*tep;
+	unsigned long long		missed_events;
+	void				*buf;
+	int				events_per_buf;
+	int				bufsize;
+	int				data_size;
+	int				this_pid;
+	int				fd;
+	bool				done;
+};
+
+static void cleanup_trace_cpu(struct test_cpu_data *data)
+{
+	close(data->fd);
+	tep_free(data->tep);
+	tracefs_cpu_close(data->tcpu);
+	free(data->buf);
+	kbuffer_free(data->kbuf);
+}
+
+#define EVENT_SYSTEM "syscalls"
+#define EVENT_NAME  "sys_enter_getppid"
+
+static int setup_trace_cpu(struct tracefs_instance *instance, struct test_cpu_data *data)
+{
+	struct tep_format_field **fields;
+	struct tep_event *event;
+	char tmpfile[] = "/tmp/utest-libtracefsXXXXXX";
+	int max = 0;
+	int ret;
+	int i;
+
+	/* Make sure tracing is on */
+	tracefs_trace_on(instance);
+
+	memset (data, 0, sizeof(*data));
+
+	data->instance = instance;
+
+	data->fd = mkstemp(tmpfile);
+	CU_TEST(data->fd >= 0);
+	unlink(tmpfile);
+	if (data->fd < 0)
+		return -1;
+
+	data->tep = tracefs_local_events(NULL);
+	CU_TEST(data->tep != NULL);
+	if (!data->tep)
+		goto fail;
+
+	data->tcpu = tracefs_cpu_open(instance, 0, true);
+	CU_TEST(data->tcpu != NULL);
+	if (!data->tcpu)
+		goto fail;
+
+	data->bufsize = tracefs_cpu_read_size(data->tcpu);
+
+	data->buf = calloc(1, data->bufsize);
+	CU_TEST(data->buf != NULL);
+	if (!data->buf)
+		goto fail;
+
+	data->kbuf = kbuffer_alloc(sizeof(long) == 8, !tep_is_bigendian());
+	CU_TEST(data->kbuf != NULL);
+	if (!data->kbuf)
+		goto fail;
+
+	data->data_size = data->bufsize - kbuffer_start_of_data(data->kbuf);
+
+	tracefs_instance_file_clear(instance, "trace");
+
+	event = tep_find_event_by_name(data->tep, EVENT_SYSTEM, EVENT_NAME);
+	CU_TEST(event != NULL);
+	if (!event)
+		goto fail;
+
+	fields = tep_event_fields(event);
+	CU_TEST(fields != NULL);
+	if (!fields)
+		goto fail;
+
+	for (i = 0; fields[i]; i++) {
+		int end = fields[i]->offset + fields[i]->size;
+		if (end > max)
+			max = end;
+	}
+	free(fields);
+
+	CU_TEST(max != 0);
+	if (!max)
+		goto fail;
+
+	data->events_per_buf = data->data_size / max;
+
+	data->this_pid = getpid();
+	ret = tracefs_event_enable(instance, EVENT_SYSTEM, EVENT_NAME);
+	CU_TEST(ret == 0);
+	if (ret)
+		goto fail;
+
+
+	save_affinity();
+	set_affinity(0);
+
+	return 0;
+ fail:
+	cleanup_trace_cpu(data);
+	return -1;
+}
+
+static void shutdown_trace_cpu(struct test_cpu_data *data)
+{
+	struct tracefs_instance *instance = data->instance;
+	int ret;
+
+	reset_affinity();
+
+	ret = tracefs_event_disable(instance, EVENT_SYSTEM, EVENT_NAME);
+	CU_TEST(ret == 0);
+
+	cleanup_trace_cpu(data);
+}
+
+static void call_getppid(int cnt)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++)
+		getppid();
+}
+
+static void test_cpu_read(struct test_cpu_data *data, int expect)
+{
+	struct tracefs_cpu *tcpu = data->tcpu;
+	struct kbuffer *kbuf = data->kbuf;
+	struct tep_record record;
+	void *buf = data->buf;
+	unsigned long long ts;
+	bool first = true;
+	int pid;
+	int ret;
+	int cnt = 0;
+
+	call_getppid(expect);
+
+	for (;;) {
+		ret = tracefs_cpu_read(tcpu, buf, false);
+		CU_TEST(ret > 0 || !first);
+		if (ret <= 0)
+			break;
+		first = false;
+		ret = kbuffer_load_subbuffer(kbuf, buf);
+		CU_TEST(ret == 0);
+		for (;;) {
+			record.data = kbuffer_read_event(kbuf, &ts);
+			if (!record.data)
+				break;
+			record.ts = ts;
+			pid = tep_data_pid(data->tep, &record);
+			if (pid == data->this_pid)
+				cnt++;
+			kbuffer_next_event(kbuf, NULL);
+		}
+	}
+	CU_TEST(cnt == expect);
+}
+
+static void test_instance_trace_cpu_read(struct tracefs_instance *instance)
+{
+	struct test_cpu_data data;
+
+	if (setup_trace_cpu(instance, &data))
+		return;
+
+	test_cpu_read(&data, 1);
+	test_cpu_read(&data, data.events_per_buf / 2);
+	test_cpu_read(&data, data.events_per_buf);
+	test_cpu_read(&data, data.events_per_buf + 1);
+	test_cpu_read(&data, data.events_per_buf * 50);
+
+	shutdown_trace_cpu(&data);
+}
+
+static void test_trace_cpu_read(void)
+{
+	test_instance_trace_cpu_read(NULL);
+	test_instance_trace_cpu_read(test_instance);
+}
+
+static int read_trace_cpu_file(struct test_cpu_data *data)
+{
+	unsigned long long ts;
+	struct tep_record record;
+	struct kbuffer *kbuf = data->kbuf;
+	void *buf = data->buf;
+	bool first = true;
+	int bufsize = data->bufsize;
+	int fd = data->fd;
+	int missed;
+	int pid;
+	int ret;
+	int cnt = 0;
+
+	ret = lseek64(fd, 0, SEEK_SET);
+	CU_TEST(ret == 0);
+	if (ret)
+		return -1;
+
+	for (;;) {
+		ret = read(fd, buf, bufsize);
+		CU_TEST(ret > 0 || !first);
+		if (ret <= 0)
+			break;
+		first = false;
+
+		ret = kbuffer_load_subbuffer(kbuf, buf);
+		CU_TEST(ret == 0);
+		missed = kbuffer_missed_events(kbuf);
+		if (missed)
+			printf("missed events %d\n", missed);
+		for (;;) {
+			record.data = kbuffer_read_event(kbuf, &ts);
+			if (!record.data)
+				break;
+			record.ts = ts;
+			pid = tep_data_pid(data->tep, &record);
+			if (pid == data->this_pid)
+				cnt++;
+			kbuffer_next_event(kbuf, NULL);
+		}
+	}
+	return ret == 0 ? cnt : ret;
+}
+
+static void *trace_cpu_thread(void *arg)
+{
+	struct test_cpu_data *data = arg;
+	struct tracefs_cpu *tcpu = data->tcpu;
+	int fd = data->fd;
+	long ret = 0;
+
+	thread_affinity();
+
+	while (!data->done && ret >= 0) {
+		ret = tracefs_cpu_write(tcpu, fd, false);
+		if (ret < 0 && errno == EAGAIN)
+			ret = 0;
+	}
+	if (ret >= 0 || errno == EAGAIN) {
+		do {
+			ret = tracefs_cpu_flush_write(tcpu, fd);
+		} while (ret > 0);
+	}
+
+	return (void *)ret;
+}
+
+static void test_cpu_pipe(struct test_cpu_data *data, int expect)
+{
+	pthread_t thread;
+	void *retval;
+	long ret;
+	int cnt;
+
+	tracefs_instance_file_clear(data->instance, "trace");
+	ftruncate(data->fd, 0);
+
+	data->done = false;
+
+	pthread_create(&thread, NULL, trace_cpu_thread, data);
+	sleep(1);
+
+	call_getppid(expect);
+
+	data->done = true;
+	tracefs_cpu_stop(data->tcpu);
+	pthread_join(thread, &retval);
+	ret = (long)retval;
+	CU_TEST(ret >= 0);
+
+	cnt = read_trace_cpu_file(data);
+
+	CU_TEST(cnt == expect);
+}
+
+static void test_instance_trace_cpu_pipe(struct tracefs_instance *instance)
+{
+	struct test_cpu_data data;
+
+	if (setup_trace_cpu(instance, &data))
+		return;
+
+	test_cpu_pipe(&data, 1);
+	test_cpu_pipe(&data, data.events_per_buf / 2);
+	test_cpu_pipe(&data, data.events_per_buf);
+	test_cpu_pipe(&data, data.events_per_buf + 1);
+	test_cpu_pipe(&data, data.events_per_buf * 1000);
+
+	shutdown_trace_cpu(&data);
+}
+
+static void test_trace_cpu_pipe(void)
+{
+	test_instance_trace_cpu_pipe(NULL);
+	test_instance_trace_cpu_pipe(test_instance);
+}
+
 static struct tracefs_dynevent **get_dynevents_check(enum tracefs_dynevent_type types, int count)
 {
 	struct tracefs_dynevent **devents;
@@ -1794,6 +2113,10 @@ void test_tracefs_lib(void)
 		fprintf(stderr, "Suite \"%s\" cannot be ceated\n", TRACEFS_SUITE);
 		return;
 	}
+	CU_add_test(suite, "trace cpu read",
+		    test_trace_cpu_read);
+	CU_add_test(suite, "trace cpu pipe",
+		    test_trace_cpu_pipe);
 	CU_add_test(suite, "trace sql",
 		    test_trace_sql);
 	CU_add_test(suite, "tracing file / directory APIs",
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-10-25 18:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-25 18:32 [PATCH v2 0/5] Add reading from per_cpu trace_pipe_raw helper functions Steven Rostedt
2022-10-25 18:32 ` [PATCH v2 1/5] libtracefs: Add reading of per cpu files Steven Rostedt
2022-10-25 18:32 ` [PATCH v2 2/5] libtracefs: Add tracefs_cpu_create_fd() Steven Rostedt
2022-10-25 18:32 ` [PATCH v2 3/5] libtracefs: Add tracefs_cpu_pipe() Steven Rostedt
2022-10-25 18:32 ` [PATCH v2 4/5] libtracefs utest: Make helper functions for affinity Steven Rostedt
2022-10-25 18:32 ` [PATCH v2 5/5] libtracefs: Add unit tests for tracefs_cpu functions Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.