All of lore.kernel.org
 help / color / mirror / Atom feed
* [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word
@ 2021-09-17 13:40 Alexander Kanavin
  2021-09-17 13:40 ` [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll() Alexander Kanavin
  2021-09-17 13:40 ` [ptest-runner][PATCH 3/3] utils.c: add system data collection when a test gets stuck Alexander Kanavin
  0 siblings, 2 replies; 4+ messages in thread
From: Alexander Kanavin @ 2021-09-17 13:40 UTC (permalink / raw)
  To: yocto; +Cc: Alexander Kanavin

I also took the opportunity to correct a weird API that
returns a result (or not), depending on some internal condition.

Signed-off-by: Alexander Kanavin <alex@linutronix.de>
---
 tests/utils.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/tests/utils.c b/tests/utils.c
index 8fffc18..19657ee 100644
--- a/tests/utils.c
+++ b/tests/utils.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <errno.h>
+#include <stdbool.h>
 
 #include <check.h>
 
@@ -61,16 +62,13 @@ static char *ptests_not_found[] = {
 
 static struct ptest_options EmptyOpts;
 
-static inline void
-find_word(int *found, const char *line, const char *word)
+static inline bool
+find_word(const char *line, const char *word)
 {
-
-	char *pivot = NULL;
-
-	pivot = strdup(line);
-	pivot[strlen(word)] = '\0';
-	if (strcmp(pivot, word) == 0) { *found = 1; }
-	free(pivot);
+	if (strncmp(line, word, strlen(word)) == 0)
+                return true;
+        else
+                return false;
 }
 
 static void test_ptest_expected_failure(struct ptest_list *, const unsigned int, char *,
@@ -206,18 +204,19 @@ search_for_timeout_and_duration(const int rp, FILE *fp_stdout)
 	const char *timeout_str = "TIMEOUT";
 	const char *duration_str = "DURATION";
 	char line_buf[PRINT_PTEST_BUF_SIZE];
-	int found_timeout = 0, found_duration = 0;
+	bool found_timeout = false, found_duration = false;
 	char *line = NULL;
 
 	ck_assert(rp != 0);
 
 	while ((line = fgets(line_buf, PRINT_PTEST_BUF_SIZE, fp_stdout)) != NULL) {
-		find_word(&found_timeout, line, timeout_str);
-		find_word(&found_duration, line, duration_str);
+		// once true, stay true
+		found_timeout = found_timeout ? found_timeout : find_word(line, timeout_str);
+		found_duration = found_duration ? found_duration : find_word(line, duration_str);
 	}
 
-	ck_assert(found_timeout == 1);
-	ck_assert(found_duration == 1);
+	ck_assert(found_timeout == true);
+	ck_assert(found_duration == true);
 }
 
 START_TEST(test_run_timeout_duration_ptest)
@@ -236,16 +235,18 @@ search_for_fail(const int rp, FILE *fp_stdout)
 {
         const char *fail_str = "ERROR: Exit status is 10";
         char line_buf[PRINT_PTEST_BUF_SIZE];
-        int found_fail = 0;
+        int found_fail = false;
         char *line = NULL;
 
         ck_assert(rp != 0);
 
         while ((line = fgets(line_buf, PRINT_PTEST_BUF_SIZE, fp_stdout)) != NULL) {
-		find_word(&found_fail, line, fail_str);
+		found_fail = find_word(line, fail_str);
+		if (found_fail == true)
+			break;
         }
 
-        ck_assert(found_fail == 1);
+        ck_assert(found_fail == true);
 }
 
 START_TEST(test_run_fail_ptest)
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll()
  2021-09-17 13:40 [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word Alexander Kanavin
@ 2021-09-17 13:40 ` Alexander Kanavin
  2021-09-17 13:40 ` [ptest-runner][PATCH 3/3] utils.c: add system data collection when a test gets stuck Alexander Kanavin
  1 sibling, 0 replies; 4+ messages in thread
From: Alexander Kanavin @ 2021-09-17 13:40 UTC (permalink / raw)
  To: yocto; +Cc: Alexander Kanavin

if poll()'s timeout expires that means the test did not
produce any output, which is exactly what we need to catch.

So there's no need to set up separate timeouts with signals
and alarms, and this greatly simplifies more sophisticated
processing of hanging tests (such as collecting overall system data).

Signed-off-by: Alexander Kanavin <alex@linutronix.de>
---
 utils.c | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/utils.c b/utils.c
index 128ff61..58c3aa1 100644
--- a/utils.c
+++ b/utils.c
@@ -51,7 +51,6 @@
 #include "utils.h"
 
 #define GET_STIME_BUF_SIZE 1024
-#define WAIT_CHILD_POLL_TIMEOUT_MS 200
 #define WAIT_CHILD_BUF_MAX_SIZE 1024
 
 #define UNUSED(x) (void)(x)
@@ -296,7 +295,7 @@ read_child(void *arg)
 	pfds[1].events = POLLIN;
 
 	do {
-		r = poll(pfds, 2, WAIT_CHILD_POLL_TIMEOUT_MS);
+		r = poll(pfds, 2, _child_reader.timeout*1000);
 		if (r > 0) {
 			char buf[WAIT_CHILD_BUF_MAX_SIZE];
 			ssize_t n;
@@ -313,10 +312,10 @@ read_child(void *arg)
 					fwrite(buf, (size_t)n, 1, _child_reader.fps[1]);
 			}
 
-			/* Child output reset alarm */
-			alarm(0);
-			alarm(_child_reader.timeout);
-		}
+		} else if (r == 0) {
+			_child_reader.timeouted = 1;
+			kill(-_child_reader.pid, SIGKILL);
+                }
 
 		fflush(_child_reader.fps[0]);
 		fflush(_child_reader.fps[1]);
@@ -344,26 +343,11 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
 	/* exit(1); not needed? */
 }
 
-static void
-timeout_child_handler(int signo)
-{
-	UNUSED(signo);
-	_child_reader.timeouted = 1;
-	kill(-_child_reader.pid, SIGKILL);
-}
-
 static inline int
-wait_child(pid_t pid, unsigned int timeout)
+wait_child(pid_t pid)
 {
 	int status = -1;
 
-	_child_reader.timeout = timeout;
-	_child_reader.timeouted = 0;
-	_child_reader.pid = pid;
-
-	/* setup alarm to timeout based on std{out,err} in the child */
-	alarm(timeout);
-
 	waitpid(pid, &status, 0);
 	if (WIFEXITED(status))
 		status = WEXITSTATUS(status);
@@ -462,6 +446,8 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 		_child_reader.fds[1] = pipefd_stderr[0];
 		_child_reader.fps[0] = fp;
 		_child_reader.fps[1] = fp_stderr;
+		_child_reader.timeout = opts.timeout;
+		_child_reader.timeouted = 0;
 		rc = pthread_create(&tid, NULL, read_child, NULL);
 		if (rc != 0) {
 			fprintf(fp, "ERROR: Failed to create reader thread, %s\n", strerror(errno));
@@ -469,7 +455,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 			close(pipefd_stdout[1]);
 			break;
 		}
-		signal(SIGALRM, timeout_child_handler);
 
 		fprintf(fp, "START: %s\n", progname);
 		PTEST_LIST_ITERATE_START(head, p)
@@ -511,6 +496,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 			} else {
 				int status;
 
+				_child_reader.pid = child;
 				if (setpgid(child, pgid) == -1) {
 					fprintf(fp, "ERROR: setpgid() failed, %s\n", strerror(errno));
 				}
@@ -520,7 +506,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 				fprintf(fp, "BEGIN: %s\n", ptest_dir);
 
 
-				status = wait_child(child, opts.timeout);
+				status = wait_child(child);
 
 				entime = time(NULL);
 				duration = entime - sttime;
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [ptest-runner][PATCH 3/3] utils.c: add system data collection when a test gets stuck.
  2021-09-17 13:40 [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word Alexander Kanavin
  2021-09-17 13:40 ` [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll() Alexander Kanavin
@ 2021-09-17 13:40 ` Alexander Kanavin
  1 sibling, 0 replies; 4+ messages in thread
From: Alexander Kanavin @ 2021-09-17 13:40 UTC (permalink / raw)
  To: yocto; +Cc: Alexander Kanavin

Currently, ptest-runner simply kills the offending test without further ado,
which is not at all helpful when trying to figure out why it happens
(especially if such hangs are intermittent and rare). There's now a script
that gets executed before killing the test, so ideas on what to have in it
are welcome.

Signed-off-by: Alexander Kanavin <alex@linutronix.de>
---
 Makefile                         |  2 +-
 ptest-runner-collect-system-data |  6 ++++++
 utils.c                          | 24 ++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100755 ptest-runner-collect-system-data

diff --git a/Makefile b/Makefile
index a6372de..168cf5a 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ $(TEST_EXECUTABLE): $(TEST_OBJECTS)
 	$(CC) $(LDFLAGS) $(TEST_OBJECTS) -o $@ $(TEST_LIBSTATIC) $(TEST_LDFLAGS)
 
 check: $(TEST_EXECUTABLE)
-	./$(TEST_EXECUTABLE) -d $(TEST_DATA)
+	PATH=.:$(PATH) ./$(TEST_EXECUTABLE) -d $(TEST_DATA)
 
 .c.o:
 	$(CC) $(CFLAGS) -c $< -o $@
diff --git a/ptest-runner-collect-system-data b/ptest-runner-collect-system-data
new file mode 100755
index 0000000..ba335da
--- /dev/null
+++ b/ptest-runner-collect-system-data
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Other ideas on what to do when a ptest gets stuck welcome.
+dmesg
+pstree -a -l
+df
+free
diff --git a/utils.c b/utils.c
index 58c3aa1..a67ac11 100644
--- a/utils.c
+++ b/utils.c
@@ -281,6 +281,27 @@ close_fds(void)
    	}
 }
 
+static void
+collect_system_state(FILE* fout)
+{
+	char *cmd = "ptest-runner-collect-system-data";
+
+	char buf[1024];
+	FILE *fp;
+
+	if ((fp = popen(cmd, "r")) == NULL) {
+		fprintf(fout, "Error opening pipe!\n");
+	}
+
+	while (fgets(buf, 1024, fp) != NULL) {
+		fprintf(fout, "%s", buf);
+	}
+
+	if(pclose(fp))  {
+		fprintf(fout, "Command not found or exited with error status\n");
+	}
+}
+
 static void *
 read_child(void *arg)
 {
@@ -313,6 +334,9 @@ read_child(void *arg)
 			}
 
 		} else if (r == 0) {
+			// no output from the test after a timeout; the test is stuck, so collect
+			// as much data from the system as possible and kill the test
+			collect_system_state(_child_reader.fps[0]);
 			_child_reader.timeouted = 1;
 			kill(-_child_reader.pid, SIGKILL);
                 }
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll()
  2021-09-16 12:46 [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word Alexander Kanavin
@ 2021-09-16 12:46 ` Alexander Kanavin
  0 siblings, 0 replies; 4+ messages in thread
From: Alexander Kanavin @ 2021-09-16 12:46 UTC (permalink / raw)
  To: yocto, anibal.limon; +Cc: Alexander Kanavin

if poll()'s timeout expires that means the test did not
produce any output, which is exactly what we need to catch.

So there's no need to set up separate timeouts with signals
and alarms, and this greatly simplifies more sophisticated
processing of hanging tests (such as collecting overall system data).

Signed-off-by: Alexander Kanavin <alex@linutronix.de>
---
 utils.c | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/utils.c b/utils.c
index 128ff61..58c3aa1 100644
--- a/utils.c
+++ b/utils.c
@@ -51,7 +51,6 @@
 #include "utils.h"
 
 #define GET_STIME_BUF_SIZE 1024
-#define WAIT_CHILD_POLL_TIMEOUT_MS 200
 #define WAIT_CHILD_BUF_MAX_SIZE 1024
 
 #define UNUSED(x) (void)(x)
@@ -296,7 +295,7 @@ read_child(void *arg)
 	pfds[1].events = POLLIN;
 
 	do {
-		r = poll(pfds, 2, WAIT_CHILD_POLL_TIMEOUT_MS);
+		r = poll(pfds, 2, _child_reader.timeout*1000);
 		if (r > 0) {
 			char buf[WAIT_CHILD_BUF_MAX_SIZE];
 			ssize_t n;
@@ -313,10 +312,10 @@ read_child(void *arg)
 					fwrite(buf, (size_t)n, 1, _child_reader.fps[1]);
 			}
 
-			/* Child output reset alarm */
-			alarm(0);
-			alarm(_child_reader.timeout);
-		}
+		} else if (r == 0) {
+			_child_reader.timeouted = 1;
+			kill(-_child_reader.pid, SIGKILL);
+                }
 
 		fflush(_child_reader.fps[0]);
 		fflush(_child_reader.fps[1]);
@@ -344,26 +343,11 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
 	/* exit(1); not needed? */
 }
 
-static void
-timeout_child_handler(int signo)
-{
-	UNUSED(signo);
-	_child_reader.timeouted = 1;
-	kill(-_child_reader.pid, SIGKILL);
-}
-
 static inline int
-wait_child(pid_t pid, unsigned int timeout)
+wait_child(pid_t pid)
 {
 	int status = -1;
 
-	_child_reader.timeout = timeout;
-	_child_reader.timeouted = 0;
-	_child_reader.pid = pid;
-
-	/* setup alarm to timeout based on std{out,err} in the child */
-	alarm(timeout);
-
 	waitpid(pid, &status, 0);
 	if (WIFEXITED(status))
 		status = WEXITSTATUS(status);
@@ -462,6 +446,8 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 		_child_reader.fds[1] = pipefd_stderr[0];
 		_child_reader.fps[0] = fp;
 		_child_reader.fps[1] = fp_stderr;
+		_child_reader.timeout = opts.timeout;
+		_child_reader.timeouted = 0;
 		rc = pthread_create(&tid, NULL, read_child, NULL);
 		if (rc != 0) {
 			fprintf(fp, "ERROR: Failed to create reader thread, %s\n", strerror(errno));
@@ -469,7 +455,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 			close(pipefd_stdout[1]);
 			break;
 		}
-		signal(SIGALRM, timeout_child_handler);
 
 		fprintf(fp, "START: %s\n", progname);
 		PTEST_LIST_ITERATE_START(head, p)
@@ -511,6 +496,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 			} else {
 				int status;
 
+				_child_reader.pid = child;
 				if (setpgid(child, pgid) == -1) {
 					fprintf(fp, "ERROR: setpgid() failed, %s\n", strerror(errno));
 				}
@@ -520,7 +506,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
 				fprintf(fp, "BEGIN: %s\n", ptest_dir);
 
 
-				status = wait_child(child, opts.timeout);
+				status = wait_child(child);
 
 				entime = time(NULL);
 				duration = entime - sttime;
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-09-17 13:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17 13:40 [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word Alexander Kanavin
2021-09-17 13:40 ` [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll() Alexander Kanavin
2021-09-17 13:40 ` [ptest-runner][PATCH 3/3] utils.c: add system data collection when a test gets stuck Alexander Kanavin
  -- strict thread matches above, loose matches on Subject: below --
2021-09-16 12:46 [ptest-runner][PATCH 1/3] tests/utils.c: fix a memory corruption in find_word Alexander Kanavin
2021-09-16 12:46 ` [ptest-runner][PATCH 2/3] utils.c: handle test timeouts directly with poll() Alexander Kanavin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.