linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1"
@ 2019-06-05 11:19 Tzvetomir Stoyanov
  2019-06-05 11:19 ` [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd Tzvetomir Stoyanov
  2019-06-18 16:20 ` [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Steven Rostedt
  0 siblings, 2 replies; 5+ messages in thread
From: Tzvetomir Stoyanov @ 2019-06-05 11:19 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

A fix for https://bugzilla.kernel.org/show_bug.cgi?id=203411
When trace-cmd is running in "profile" mode, trace files are not generated.
Instead, pipes are used to collect trace data from recorder threads. Some
internal functions, originally designed for working with files, are reused
in pipes use case:
 init_cpu()
 allocate_page()
 get_next_page()
There was an undesired behaviour in those functions, when working with pipes,
which causes the segmentation fault, described in the bug report.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
---
 lib/trace-cmd/trace-input.c | 45 +++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index ba20ef1..264e3c3 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -28,8 +28,6 @@
 /* for debugging read instead of mmap */
 static int force_read = 0;
 
-#define PAGE_STOPPER		((struct page *)-1L)
-
 struct page_map {
 	struct list_head	list;
 	off64_t			offset;
@@ -65,6 +63,7 @@ struct cpu_data {
 	struct tep_record	*next;
 	struct page		*page;
 	struct kbuffer		*kbuf;
+	int			nr_pages;
 	int			page_cnt;
 	int			cpu;
 	int			pipe_fd;
@@ -146,16 +145,17 @@ static void add_record(struct page *page, struct tep_record *record)
 	record->prev = NULL;
 	page->records = record;
 }
-static const char *show_records(struct page **pages)
+static const char *show_records(struct page **pages, int nr_pages)
 {
 	static char buf[BUFSIZ + 1];
 	struct tep_record *record;
 	struct page *page;
 	int len;
+	int i;
 
 	memset(buf, 0, sizeof(buf));
 	len = 0;
-	for (i = 0; pages[i] != PAGE_STOPPER; i--) {
+	for (i = 0; i < nr_pages; i++) {
 		page = pages[i];
 		if (!page)
 			continue;
@@ -172,7 +172,7 @@ static const char *show_records(struct page **pages)
 #else
 static inline void remove_record(struct page *page, struct tep_record *record) {}
 static inline void add_record(struct page *page, struct tep_record *record) {}
-static const char *show_records(struct page **pages)
+static const char *show_records(struct page **pages, int nr_pages)
 {
 	return "";
 }
@@ -919,10 +919,20 @@ static struct page *allocate_page(struct tracecmd_input *handle,
 				  int cpu, off64_t offset)
 {
 	struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+	struct page **pages;
 	struct page *page;
 	int index;
 
 	index = (offset - cpu_data->file_offset) / handle->page_size;
+	if (index >= cpu_data->nr_pages) {
+		pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages));
+		if (!pages)
+			return NULL;
+		memset(pages + cpu_data->nr_pages, 0,
+		       (index + 1 - cpu_data->nr_pages) * sizeof(*cpu_data->pages));
+		cpu_data->pages = pages;
+		cpu_data->nr_pages = index + 1;
+	}
 	if (cpu_data->pages[index]) {
 		cpu_data->pages[index]->ref_count++;
 		return cpu_data->pages[index];
@@ -954,6 +964,7 @@ static struct page *allocate_page(struct tracecmd_input *handle,
 static void __free_page(struct tracecmd_input *handle, struct page *page)
 {
 	struct cpu_data *cpu_data = &handle->cpu_data[page->cpu];
+	struct page **pages;
 	int index;
 
 	if (!page->ref_count)
@@ -973,6 +984,17 @@ static void __free_page(struct tracecmd_input *handle, struct page *page)
 	cpu_data->page_cnt--;
 
 	free(page);
+
+	for (index = cpu_data->nr_pages - 1; index > 0; index--)
+		if (cpu_data->pages[index])
+			break;
+	if (index < (cpu_data->nr_pages - 1)) {
+		pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages));
+		if (!pages)
+			return;
+		cpu_data->pages = pages;
+		cpu_data->nr_pages = index + 1;
+	}
 }
 
 static void free_page(struct tracecmd_input *handle, int cpu)
@@ -2026,7 +2048,6 @@ tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record)
 static int init_cpu(struct tracecmd_input *handle, int cpu)
 {
 	struct cpu_data *cpu_data = &handle->cpu_data[cpu];
-	int num_pages;
 	int i;
 
 	cpu_data->offset = cpu_data->file_offset;
@@ -2040,14 +2061,13 @@ static int init_cpu(struct tracecmd_input *handle, int cpu)
 		return 0;
 	}
 
-	num_pages = (cpu_data->size + handle->page_size - 1) / handle->page_size;
-	cpu_data->pages = calloc(num_pages + 1, sizeof(*cpu_data->pages));
+	cpu_data->nr_pages = (cpu_data->size + handle->page_size - 1) / handle->page_size;
+	if (!cpu_data->nr_pages)
+		cpu_data->nr_pages = 1;
+	cpu_data->pages = calloc(cpu_data->nr_pages, sizeof(*cpu_data->pages));
 	if (!cpu_data->pages)
 		return -1;
 
-	/* Add stopper */
-	cpu_data->pages[num_pages] = PAGE_STOPPER;
-
 	if (handle->use_pipe) {
 		/* Just make a page, it will be nuked later */
 		cpu_data->page = malloc(sizeof(*cpu_data->page));
@@ -2793,7 +2813,8 @@ void tracecmd_close(struct tracecmd_input *handle)
 			if (handle->cpu_data[cpu].page_cnt)
 				warning("%d pages still allocated on cpu %d%s",
 					handle->cpu_data[cpu].page_cnt,
-					cpu, show_records(handle->cpu_data[cpu].pages));
+					cpu, show_records(handle->cpu_data[cpu].pages,
+							  handle->cpu_data[cpu].nr_pages));
 			free(handle->cpu_data[cpu].pages);
 		}
 	}
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd
  2019-06-05 11:19 [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Tzvetomir Stoyanov
@ 2019-06-05 11:19 ` Tzvetomir Stoyanov
  2019-06-07  6:48   ` Slavomir Kaslev
  2019-06-18 16:20 ` [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Steven Rostedt
  1 sibling, 1 reply; 5+ messages in thread
From: Tzvetomir Stoyanov @ 2019-06-05 11:19 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

When pipes are used for communication between trace-cmd main
thread and per-cpu recorder threads, there is a possible race
condition in stop_threads(), which can cause a deadlock between
the main thread and cpu recorder thread:
   In trace_stream_read(), the select() call can return 0 if threads
   have no data to send. This will force stop_threads() to stop reading
   the thread's pipes and enter a waitpid() loop, to wait for all threads
   to be terminated. However, there is a case when some threads are still
   flushing its data - tracecmd_flush_recording() tries a blocking write()
   to the pipe. A dead lock appears - the cpu thread is blocked in write(),
   as its buffer is full and no one is reading it. The main thread is blocked
   in waitpid(), to wait the same thread to exit.
The deadlock can be (randomly) observed with the command
"trace-cmd profile -p function -F sleep 10"

The proposed fix removes the select timeout, makes the call blocking,
to ensure the threads are flushed its data before going in waitpid() loop.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
---
 tracecmd/trace-record.c | 3 +--
 tracecmd/trace-stream.c | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 4523128..2f5fbd9 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -626,7 +626,6 @@ static void delete_thread_data(void)
 
 static void stop_threads(enum trace_type type)
 {
-	struct timeval tv = { 0, 0 };
 	int ret;
 	int i;
 
@@ -643,7 +642,7 @@ static void stop_threads(enum trace_type type)
 	/* Flush out the pipes */
 	if (type & TRACE_TYPE_STREAM) {
 		do {
-			ret = trace_stream_read(pids, recorder_threads, &tv);
+			ret = trace_stream_read(pids, recorder_threads, NULL);
 		} while (ret > 0);
 	}
 
diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c
index dad3466..3814a35 100644
--- a/tracecmd/trace-stream.c
+++ b/tracecmd/trace-stream.c
@@ -92,6 +92,7 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
 	struct pid_record_data *last_pid;
 	fd_set rfds;
 	int top_rfd = 0;
+	int nr_fd;
 	int ret;
 	int i;
 
@@ -119,18 +120,23 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
 		return 1;
 	}
 
+	nr_fd = 0;
 	FD_ZERO(&rfds);
 
 	for (i = 0; i < nr_pids; i++) {
 		/* Do not process closed pipes */
 		if (pids[i].closed)
 			continue;
+		nr_fd++;
 		if (pids[i].brass[0] > top_rfd)
 			top_rfd = pids[i].brass[0];
 
 		FD_SET(pids[i].brass[0], &rfds);
 	}
 
+	if (!nr_fd)
+		return 0;
+
 	ret = select(top_rfd + 1, &rfds, NULL, NULL, tv);
 
 	if (ret > 0)
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd
  2019-06-05 11:19 ` [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd Tzvetomir Stoyanov
@ 2019-06-07  6:48   ` Slavomir Kaslev
  0 siblings, 0 replies; 5+ messages in thread
From: Slavomir Kaslev @ 2019-06-07  6:48 UTC (permalink / raw)
  To: Tzvetomir Stoyanov; +Cc: Steven Rostedt, linux-trace-devel

On Wed, Jun 5, 2019 at 2:21 PM Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote:
>
> When pipes are used for communication between trace-cmd main
> thread and per-cpu recorder threads, there is a possible race
> condition in stop_threads(), which can cause a deadlock between
> the main thread and cpu recorder thread:
>    In trace_stream_read(), the select() call can return 0 if threads
>    have no data to send. This will force stop_threads() to stop reading
>    the thread's pipes and enter a waitpid() loop, to wait for all threads
>    to be terminated. However, there is a case when some threads are still
>    flushing its data - tracecmd_flush_recording() tries a blocking write()
>    to the pipe. A dead lock appears - the cpu thread is blocked in write(),
>    as its buffer is full and no one is reading it. The main thread is blocked
>    in waitpid(), to wait the same thread to exit.
> The deadlock can be (randomly) observed with the command
> "trace-cmd profile -p function -F sleep 10"
>
> The proposed fix removes the select timeout, makes the call blocking,
> to ensure the threads are flushed its data before going in waitpid() loop.
>
> Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
> ---
>  tracecmd/trace-record.c | 3 +--
>  tracecmd/trace-stream.c | 6 ++++++
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 4523128..2f5fbd9 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -626,7 +626,6 @@ static void delete_thread_data(void)
>
>  static void stop_threads(enum trace_type type)
>  {
> -       struct timeval tv = { 0, 0 };
>         int ret;
>         int i;
>
> @@ -643,7 +642,7 @@ static void stop_threads(enum trace_type type)
>         /* Flush out the pipes */
>         if (type & TRACE_TYPE_STREAM) {
>                 do {
> -                       ret = trace_stream_read(pids, recorder_threads, &tv);
> +                       ret = trace_stream_read(pids, recorder_threads, NULL);
>                 } while (ret > 0);
>         }
>
> diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c
> index dad3466..3814a35 100644
> --- a/tracecmd/trace-stream.c
> +++ b/tracecmd/trace-stream.c
> @@ -92,6 +92,7 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
>         struct pid_record_data *last_pid;
>         fd_set rfds;
>         int top_rfd = 0;
> +       int nr_fd;
>         int ret;
>         int i;
>
> @@ -119,18 +120,23 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
>                 return 1;
>         }
>
> +       nr_fd = 0;
>         FD_ZERO(&rfds);
>
>         for (i = 0; i < nr_pids; i++) {
>                 /* Do not process closed pipes */
>                 if (pids[i].closed)
>                         continue;
> +               nr_fd++;
>                 if (pids[i].brass[0] > top_rfd)
>                         top_rfd = pids[i].brass[0];
>
>                 FD_SET(pids[i].brass[0], &rfds);
>         }
>
> +       if (!nr_fd)
> +               return 0;
> +
>         ret = select(top_rfd + 1, &rfds, NULL, NULL, tv);
>
>         if (ret > 0)
> --
> 2.21.0
>

Looks good to me.

Reviewed-by: Slavomir Kaslev <kaslevs@vmware.com>

Cheers,

-- Slavi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1"
  2019-06-05 11:19 [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Tzvetomir Stoyanov
  2019-06-05 11:19 ` [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd Tzvetomir Stoyanov
@ 2019-06-18 16:20 ` Steven Rostedt
  2019-06-18 17:54   ` Johannes Berg
  1 sibling, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2019-06-18 16:20 UTC (permalink / raw)
  To: Tzvetomir Stoyanov; +Cc: linux-trace-devel, Johannes Berg

On Wed,  5 Jun 2019 14:19:36 +0300
Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote:

> @@ -973,6 +984,17 @@ static void __free_page(struct tracecmd_input *handle, struct page *page)
>  	cpu_data->page_cnt--;
>  
>  	free(page);
> +
> +	for (index = cpu_data->nr_pages - 1; index > 0; index--)
> +		if (cpu_data->pages[index])
> +			break;
> +	if (index < (cpu_data->nr_pages - 1)) {
> +		pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages));
> +		if (!pages)
> +			return;
> +		cpu_data->pages = pages;
> +		cpu_data->nr_pages = index + 1;
> +	}
>  }
>  

Heads up. Johannes found a regression that this code above causes
reading a 1.7G file where the above code makes it go from reading it in
5 seconds, to over 2 minutes!

I bisected it down to this commit (saw a small, but not so drastic
performance issue it my tests). I asked Johannes to revert the commit,
and he said that it removed the regression.

He said he'll report this in the Bugzilla, but we should be looking at
changing the realloc() logic.

-- Steve

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1"
  2019-06-18 16:20 ` [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Steven Rostedt
@ 2019-06-18 17:54   ` Johannes Berg
  0 siblings, 0 replies; 5+ messages in thread
From: Johannes Berg @ 2019-06-18 17:54 UTC (permalink / raw)
  To: Steven Rostedt, Tzvetomir Stoyanov; +Cc: linux-trace-devel

On Tue, 2019-06-18 at 12:20 -0400, Steven Rostedt wrote:

> Heads up. Johannes found a regression that this code above causes
> reading a 1.7G file where the above code makes it go from reading it in
> 5 seconds, to over 2 minutes!

Actually, I didn't even let it finish ... It was something like 4.5
seconds to not finishing in 4.5 minutes, then I gave up. My prior
version was commit 1ad32c24746.

The file is 1.7G, but that's perhaps not the interesting part? A little
under 620k events - by my standards, that's a small file :-)

> I bisected it down to this commit (saw a small, but not so drastic
> performance issue it my tests). I asked Johannes to revert the commit,
> and he said that it removed the regression.

With that reverted I actually see ~2 seconds now, which is 2x faster
than before :-)

> He said he'll report this in the Bugzilla, but we should be looking at
> changing the realloc() logic.

https://bugzilla.kernel.org/show_bug.cgi?id=203927

johannes


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-06-18 17:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-05 11:19 [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Tzvetomir Stoyanov
2019-06-05 11:19 ` [PATCH v2 2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd Tzvetomir Stoyanov
2019-06-07  6:48   ` Slavomir Kaslev
2019-06-18 16:20 ` [PATCH v2 1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" Steven Rostedt
2019-06-18 17:54   ` Johannes Berg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).