bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
@ 2021-09-13 19:39 Yucong Sun
  2021-09-13 19:39 ` [PATCH v4 bpf-next 2/3] selftests/bpf: add per worker cgroup suffix Yucong Sun
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Yucong Sun @ 2021-09-13 19:39 UTC (permalink / raw)
  To: andrii; +Cc: bpf, Yucong Sun

From: Yucong Sun <sunyucong@gmail.com>

This patch adds "-j" mode to test_progs, executing tests in multiple process.
"-j" mode is optional, and works with all existing test selection mechanism, as
well as "-v", "-l" etc.

In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
commanding it to run tests and collect logs. After all tests are finished, a
summary is printed. main process use multiple competing threads to dispatch
work to worker, trying to keep them all busy.

Example output:

  > ./test_progs -n 15-20 -j
  [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
  Launching 2 workers.
  [0]: Running test 15.
  [1]: Running test 16.
  [1]: Running test 17.
  [1]: Running test 18.
  [1]: Running test 19.
  [1]: Running test 20.
  [1]: worker exit.
  [0]: worker exit.
  #15 btf_dump:OK
  #16 btf_endian:OK
  #17 btf_map_in_map:OK
  #18 btf_module:OK
  #19 btf_skc_cls_ingress:OK
  #20 btf_split:OK
  Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED

Know issue:

Some tests fail when running concurrently, later patch will either
fix the test or pin them to worker 0.

Signed-off-by: Yucong Sun <sunyucong@gmail.com>

V4 -> V3: address style warnings.
V3 -> V2: fix missing outputs in commit messages.
V2 -> V1: switch to UDS client/server model.
---
 tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
 tools/testing/selftests/bpf/test_progs.h |  36 +-
 2 files changed, 480 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2ed01f615d20..c542e2d2f893 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -12,6 +12,11 @@
 #include <string.h>
 #include <execinfo.h> /* backtrace */
 #include <linux/membarrier.h>
+#include <sys/sysinfo.h> /* get_nprocs */
+#include <netinet/in.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 
 /* Adapted from perf/util/string.c */
 static bool glob_match(const char *str, const char *pat)
@@ -48,6 +53,7 @@ struct prog_test_def {
 	bool force_log;
 	int error_cnt;
 	int skip_cnt;
+	int sub_succ_cnt;
 	bool tested;
 	bool need_cgroup_cleanup;
 
@@ -97,6 +103,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
 	if (stdout == env.stdout)
 		return;
 
+	/* worker always holds log */
+	if (env.worker_index != -1)
+		return;
+
 	fflush(stdout); /* exports env.log_buf & env.log_cnt */
 
 	if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
@@ -172,14 +182,14 @@ void test__end_subtest()
 
 	dump_test_log(test, sub_error_cnt);
 
-	fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+	fprintf(stdout, "#%d/%d %s/%s:%s\n",
 	       test->test_num, test->subtest_num, test->test_name, test->subtest_name,
 	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
 
 	if (sub_error_cnt)
-		env.fail_cnt++;
+		test->error_cnt++;
 	else if (test->skip_cnt == 0)
-		env.sub_succ_cnt++;
+		test->sub_succ_cnt++;
 	skip_account();
 
 	free(test->subtest_name);
@@ -474,6 +484,7 @@ enum ARG_KEYS {
 	ARG_LIST_TEST_NAMES = 'l',
 	ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
 	ARG_TEST_NAME_GLOB_DENYLIST = 'd',
+	ARG_NUM_WORKERS = 'j',
 };
 
 static const struct argp_option opts[] = {
@@ -495,6 +506,8 @@ static const struct argp_option opts[] = {
 	  "Run tests with name matching the pattern (supports '*' wildcard)." },
 	{ "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
 	  "Don't run tests with name matching the pattern (supports '*' wildcard)." },
+	{ "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
+	  "Number of workers to run in parallel, default to number of cpus." },
 	{},
 };
 
@@ -661,6 +674,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case ARG_LIST_TEST_NAMES:
 		env->list_test_names = true;
 		break;
+	case ARG_NUM_WORKERS:
+		if (arg) {
+			env->workers = atoi(arg);
+			if (!env->workers) {
+				fprintf(stderr, "Invalid number of worker: %s.", arg);
+				return -1;
+			}
+		} else {
+			env->workers = get_nprocs();
+		}
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -678,7 +702,7 @@ static void stdio_hijack(void)
 	env.stdout = stdout;
 	env.stderr = stderr;
 
-	if (env.verbosity > VERBOSE_NONE) {
+	if (env.verbosity > VERBOSE_NONE && env.worker_index == -1) {
 		/* nothing to do, output to stdout by default */
 		return;
 	}
@@ -704,10 +728,6 @@ static void stdio_restore(void)
 		return;
 
 	fclose(stdout);
-	free(env.log_buf);
-
-	env.log_buf = NULL;
-	env.log_cnt = 0;
 
 	stdout = env.stdout;
 	stderr = env.stderr;
@@ -799,6 +819,366 @@ void crash_handler(int signum)
 	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
 }
 
+int current_test_idx = 0;
+pthread_mutex_t current_test_lock;
+
+struct test_result {
+	int error_cnt;
+	int skip_cnt;
+	int sub_succ_cnt;
+
+	size_t log_cnt;
+	char *log_buf;
+};
+
+struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
+
+static inline const char *str_msg(const struct message *msg)
+{
+	static char buf[255];
+
+	switch (msg->type) {
+	case MSG_DO_TEST:
+		sprintf(buf, "MSG_DO_TEST %d", msg->u.message_do_test.num);
+		break;
+	case MSG_TEST_DONE:
+		sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
+			msg->u.message_test_done.num,
+			msg->u.message_test_done.have_log);
+		break;
+	case MSG_TEST_LOG:
+		sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
+			strlen(msg->u.message_test_log.log_buf),
+			msg->u.message_test_log.is_last);
+		break;
+	case MSG_EXIT:
+		sprintf(buf, "MSG_EXIT");
+		break;
+	default:
+		sprintf(buf, "UNKNOWN");
+		break;
+	}
+
+	return buf;
+}
+
+int send_message(int sock, const struct message *msg)
+{
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stderr, "Sending msg: %s\n", str_msg(msg));
+	return send(sock, msg, sizeof(*msg), 0);
+}
+
+int recv_message(int sock, struct message *msg)
+{
+	int ret;
+
+	memset(msg, 0, sizeof(*msg));
+	ret = recv(sock, msg, sizeof(*msg), 0);
+	if (ret >= 0) {
+		if (env.verbosity > VERBOSE_NONE)
+			fprintf(stderr, "Received msg: %s\n", str_msg(msg));
+	}
+	return ret;
+}
+
+struct dispatch_data {
+	int idx;
+	int sock;
+};
+
+void *dispatch_thread(void *_data)
+{
+	struct dispatch_data *data;
+	int sock;
+	FILE *log_fd = NULL;
+
+	data = (struct dispatch_data *)_data;
+	sock = data->sock;
+
+	while (true) {
+		int test_to_run = -1;
+		struct prog_test_def *test;
+		struct test_result *result;
+
+		/* grab a test */
+		{
+			pthread_mutex_lock(&current_test_lock);
+
+			if (current_test_idx >= prog_test_cnt) {
+				pthread_mutex_unlock(&current_test_lock);
+				goto done;
+			}
+			test_to_run = current_test_idx;
+			current_test_idx++;
+
+			pthread_mutex_unlock(&current_test_lock);
+		}
+
+		test = &prog_test_defs[test_to_run];
+		test->test_num = test_to_run + 1;
+
+		if (!should_run(&env.test_selector,
+				test->test_num, test->test_name))
+			continue;
+
+		/* run test through worker */
+		{
+			struct message msg_do_test;
+
+			msg_do_test.type = MSG_DO_TEST;
+			msg_do_test.u.message_do_test.num = test_to_run;
+			if (send_message(sock, &msg_do_test) < 0) {
+				perror("Fail to send command");
+				goto done;
+			}
+			env.worker_current_test[data->idx] = test_to_run;
+		}
+
+		/* wait for test done */
+		{
+			struct message msg_test_done;
+
+			if (recv_message(sock, &msg_test_done) < 0)
+				goto error;
+			if (msg_test_done.type != MSG_TEST_DONE)
+				goto error;
+			if (test_to_run != msg_test_done.u.message_test_done.num)
+				goto error;
+
+			result = &test_results[test_to_run];
+
+			test->tested = true;
+
+			result->error_cnt = msg_test_done.u.message_test_done.error_cnt;
+			result->skip_cnt = msg_test_done.u.message_test_done.skip_cnt;
+			result->sub_succ_cnt = msg_test_done.u.message_test_done.sub_succ_cnt;
+
+			/* collect all logs */
+			if (msg_test_done.u.message_test_done.have_log) {
+				log_fd = open_memstream(&result->log_buf, &result->log_cnt);
+				if (!log_fd)
+					goto error;
+
+				while (true) {
+					struct message msg_log;
+
+					if (recv_message(sock, &msg_log) < 0)
+						goto error;
+					if (msg_log.type != MSG_TEST_LOG)
+						goto error;
+
+					fprintf(log_fd, "%s", msg_log.u.message_test_log.log_buf);
+					if (msg_log.u.message_test_log.is_last)
+						break;
+				}
+				fclose(log_fd);
+				log_fd = NULL;
+			}
+		}
+	} /* while (true) */
+error:
+	fprintf(stderr, "[%d]: Protocol/IO error: %s", data->idx, strerror(errno));
+
+	if (log_fd)
+		fclose(log_fd);
+done:
+	{
+		struct message msg_exit;
+
+		msg_exit.type = MSG_EXIT;
+		if (send_message(sock, &msg_exit) < 0)
+			fprintf(stderr, "[%d]: send_message msg_exit: %s", data->idx, strerror(errno));
+	}
+	return NULL;
+}
+
+int server_main(void)
+{
+	pthread_t *dispatcher_threads;
+	struct dispatch_data *data;
+
+	dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
+	data = calloc(sizeof(struct dispatch_data), env.workers);
+
+	env.worker_current_test = calloc(sizeof(int), env.workers);
+	for (int i = 0; i < env.workers; i++) {
+		int rc;
+
+		data[i].idx = i;
+		data[i].sock = env.worker_socks[i];
+		rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
+		if (rc < 0) {
+			perror("Failed to launch dispatcher thread");
+			return -1;
+		}
+	}
+
+	/* wait for all dispatcher to finish */
+	for (int i = 0; i < env.workers; i++) {
+		while (true) {
+			struct timespec timeout = {
+				.tv_sec = time(NULL) + 5,
+				.tv_nsec = 0
+			};
+			if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
+				break;
+			fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
+		}
+	}
+	free(dispatcher_threads);
+	free(env.worker_current_test);
+	free(data);
+
+	/* generate summary */
+	fflush(stderr);
+	fflush(stdout);
+
+	for (int i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *current_test;
+		struct test_result *result;
+
+		current_test = &prog_test_defs[i];
+		result = &test_results[i];
+
+		if (!current_test->tested)
+			continue;
+
+		env.succ_cnt += result->error_cnt ? 0 : 1;
+		env.skip_cnt += result->skip_cnt;
+		env.fail_cnt += result->error_cnt;
+		env.sub_succ_cnt += result->sub_succ_cnt;
+
+		if (result->log_cnt) {
+			result->log_buf[result->log_cnt] = '\0';
+			fprintf(stdout, "%s", result->log_buf);
+			if (result->log_buf[result->log_cnt - 1] != '\n')
+				fprintf(stdout, "\n");
+		}
+		fprintf(stdout, "#%d %s:%s\n",
+			current_test->test_num, current_test->test_name,
+			result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
+	}
+
+	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+
+	/* reap all workers */
+	for (int i = 0; i < env.workers; i++) {
+		int wstatus, pid;
+
+		pid = waitpid(env.worker_pids[i], &wstatus, 0);
+		assert(pid == env.worker_pids[i]);
+	}
+
+	return 0;
+}
+
+int worker_main(int sock)
+{
+	save_netns();
+
+	while (true) {
+		/* receive command */
+		struct message msg;
+
+		assert(recv_message(sock, &msg) >= 0);
+
+		switch (msg.type) {
+		case MSG_EXIT:
+			fprintf(stderr, "[%d]: worker exit.\n",  env.worker_index);
+			goto out;
+		case MSG_DO_TEST: {
+			int test_to_run;
+			struct prog_test_def *test;
+			struct message msg_done;
+
+			test_to_run = msg.u.message_do_test.num;
+
+			fprintf(stderr, "[%d]: Running test %d.\n",
+				env.worker_index, test_to_run + 1);
+
+			test = &prog_test_defs[test_to_run];
+
+			env.test = test;
+			test->test_num = test_to_run + 1;
+
+			stdio_hijack();
+
+			test->run_test();
+
+			/* ensure last sub-test is finalized properly */
+			if (test->subtest_name)
+				test__end_subtest();
+
+			stdio_restore();
+
+			test->tested = true;
+
+			skip_account();
+
+			reset_affinity();
+			restore_netns();
+			if (test->need_cgroup_cleanup)
+				cleanup_cgroup_environment();
+
+			memset(&msg_done, 0, sizeof(msg_done));
+			msg_done.type = MSG_TEST_DONE;
+			msg_done.u.message_test_done.num = test_to_run;
+			msg_done.u.message_test_done.error_cnt = test->error_cnt;
+			msg_done.u.message_test_done.skip_cnt = test->skip_cnt;
+			msg_done.u.message_test_done.sub_succ_cnt = test->sub_succ_cnt;
+			msg_done.u.message_test_done.have_log = false;
+
+			if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
+				if (env.log_cnt)
+					msg_done.u.message_test_done.have_log = true;
+			}
+			assert(send_message(sock, &msg_done) >= 0);
+
+			/* send logs */
+			if (msg_done.u.message_test_done.have_log) {
+				char *src;
+				size_t slen;
+
+				src = env.log_buf;
+				slen = env.log_cnt;
+				while (slen) {
+					struct message msg_log;
+					char *dest;
+					size_t len;
+
+					memset(&msg_log, 0, sizeof(msg_log));
+					msg_log.type = MSG_TEST_LOG;
+					dest = msg_log.u.message_test_log.log_buf;
+					len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+					memcpy(dest, src, len);
+
+					src += len;
+					slen -= len;
+					if (!slen)
+						msg_log.u.message_test_log.is_last = true;
+
+					assert(send_message(sock, &msg_log) >= 0);
+				}
+			}
+			if (env.log_buf) {
+				free(env.log_buf);
+				env.log_buf = NULL;
+				env.log_cnt = 0;
+			}
+			break;
+		} /* case MSG_DO_TEST */
+		default:
+			fprintf(stderr, "[%d]: unknown message.\n",  env.worker_index);
+			return -1;
+		}
+	}
+out:
+	restore_netns();
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -837,13 +1217,57 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	save_netns();
-	stdio_hijack();
 	env.has_testmod = true;
 	if (!env.list_test_names && load_bpf_testmod()) {
 		fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
 		env.has_testmod = false;
 	}
+
+	/* ignore workers if we are just listing */
+	if (env.get_test_cnt || env.list_test_names)
+		env.workers = 0;
+
+	/* launch workers if requested */
+	env.worker_index = -1; /* main process */
+	if (env.workers) {
+		env.worker_pids = calloc(sizeof(__pid_t), env.workers);
+		env.worker_socks = calloc(sizeof(int), env.workers);
+		fprintf(stdout, "Launching %d workers.\n", env.workers);
+		for (int i = 0; i < env.workers; i++) {
+			int sv[2];
+			__pid_t pid;
+
+			if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
+				perror("Fail to create worker socket");
+				return -1;
+			}
+			pid = fork();
+			if (pid < 0) {
+				perror("Failed to fork worker");
+				return -1;
+			} else if (pid != 0) { /* main process */
+				close(sv[1]);
+				env.worker_pids[i] = pid;
+				env.worker_socks[i] = sv[0];
+			} else { /* inside each worker process */
+				close(sv[0]);
+				env.worker_index = i;
+				return worker_main(sv[1]);
+			}
+		}
+
+		if (env.worker_index == -1) {
+			server_main();
+			goto out;
+		}
+	}
+
+	/* The rest of the main process */
+
+	/* on single mode */
+	save_netns();
+	stdio_hijack();
+
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
 
@@ -866,6 +1290,7 @@ int main(int argc, char **argv)
 		}
 
 		test->run_test();
+
 		/* ensure last sub-test is finalized properly */
 		if (test->subtest_name)
 			test__end_subtest();
@@ -882,16 +1307,21 @@ int main(int argc, char **argv)
 			env.fail_cnt++;
 		else
 			env.succ_cnt++;
+
 		skip_account();
+		env.sub_succ_cnt += test->sub_succ_cnt;
 
 		reset_affinity();
 		restore_netns();
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
-	if (!env.list_test_names && env.has_testmod)
-		unload_bpf_testmod();
 	stdio_restore();
+	if (env.log_buf) {
+		free(env.log_buf);
+		env.log_buf = NULL;
+		env.log_cnt = 0;
+	}
 
 	if (env.get_test_cnt) {
 		printf("%d\n", env.succ_cnt);
@@ -904,14 +1334,16 @@ int main(int argc, char **argv)
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
+	close(env.saved_netns_fd);
 out:
+	if (!env.list_test_names && env.has_testmod)
+		unload_bpf_testmod();
 	free_str_set(&env.test_selector.blacklist);
 	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
 	free_str_set(&env.subtest_selector.blacklist);
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
-	close(env.saved_netns_fd);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 94bef0aa74cf..aadb44543225 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -69,7 +69,8 @@ struct test_env {
 	bool get_test_cnt;
 	bool list_test_names;
 
-	struct prog_test_def *test;
+	struct prog_test_def *test; /* current running tests */
+
 	FILE *stdout;
 	FILE *stderr;
 	char *log_buf;
@@ -82,6 +83,39 @@ struct test_env {
 	int skip_cnt; /* skipped tests */
 
 	int saved_netns_fd;
+	int workers; /* number of worker process */
+	int worker_index; /* index number of current worker, main process is -1 */
+	__pid_t *worker_pids; /* array of worker pids */
+	int *worker_socks; /* array of worker socks */
+	int *worker_current_test; /* array of current running test for each worker */
+};
+
+#define MAX_LOG_TRUNK_SIZE 8192
+enum message_type {
+	MSG_DO_TEST = 0,
+	MSG_TEST_DONE = 1,
+	MSG_TEST_LOG = 2,
+	MSG_EXIT = 255,
+};
+struct message {
+	enum message_type type;
+	union {
+		struct {
+			int num;
+		} message_do_test;
+		struct {
+			int num;
+
+			int sub_succ_cnt;
+			int error_cnt;
+			int skip_cnt;
+			bool have_log;
+		} message_test_done;
+		struct {
+			char log_buf[MAX_LOG_TRUNK_SIZE + 1];
+			bool is_last;
+		} message_test_log;
+	} u;
 };
 
 extern struct test_env env;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 bpf-next 2/3] selftests/bpf: add per worker cgroup suffix
  2021-09-13 19:39 [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Yucong Sun
@ 2021-09-13 19:39 ` Yucong Sun
  2021-09-13 19:39 ` [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0 Yucong Sun
  2021-09-14  7:11 ` [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Andrii Nakryiko
  2 siblings, 0 replies; 12+ messages in thread
From: Yucong Sun @ 2021-09-13 19:39 UTC (permalink / raw)
  To: andrii; +Cc: bpf, Yucong Sun

From: Yucong Sun <sunyucong@gmail.com>

This patch allows each worker to use a unique cgroup base directory, thus
allowing tests that uses cgroups to run concurrently.

Signed-off-by: Yucong Sun <sunyucong@gmail.com>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 5 +++--
 tools/testing/selftests/bpf/cgroup_helpers.h | 1 +
 tools/testing/selftests/bpf/test_progs.c     | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 033051717ba5..a0429f0d6db2 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -29,9 +29,10 @@
 #define WALK_FD_LIMIT			16
 #define CGROUP_MOUNT_PATH		"/mnt"
 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
+const char *CGROUP_WORK_DIR_SUFFIX = "";
 #define format_cgroup_path(buf, path) \
-	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
-		 CGROUP_WORK_DIR, path)
+	snprintf(buf, sizeof(buf), "%s%s%s%s", CGROUP_MOUNT_PATH, \
+	CGROUP_WORK_DIR, CGROUP_WORK_DIR_SUFFIX, path)
 
 /**
  * enable_all_controllers() - Enable all available cgroup v2 controllers
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5fe3d88e4f0d..5657aba02161 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -16,4 +16,5 @@ int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
 unsigned long long get_cgroup_id(const char *path);
 
+extern const char *CGROUP_WORK_DIR_SUFFIX;
 #endif
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c542e2d2f893..f0eeb17c348d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1076,6 +1076,11 @@ int server_main(void)
 
 int worker_main(int sock)
 {
+	static char suffix[16];
+
+	sprintf(suffix, "%d", env.worker_index);
+	CGROUP_WORK_DIR_SUFFIX = suffix;
+
 	save_netns();
 
 	while (true) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0
  2021-09-13 19:39 [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Yucong Sun
  2021-09-13 19:39 ` [PATCH v4 bpf-next 2/3] selftests/bpf: add per worker cgroup suffix Yucong Sun
@ 2021-09-13 19:39 ` Yucong Sun
  2021-09-14  7:23   ` Andrii Nakryiko
  2021-09-14  7:11 ` [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Andrii Nakryiko
  2 siblings, 1 reply; 12+ messages in thread
From: Yucong Sun @ 2021-09-13 19:39 UTC (permalink / raw)
  To: andrii; +Cc: bpf, Yucong Sun

From: Yucong Sun <sunyucong@gmail.com>

This patch adds a simple name list to pin some tests that fail to run in
parallel to worker 0. On encountering these tests, all other threads will wait
on a conditional variable, which worker 0 will signal once the tests has
finished running.

Additionally, before running the test, thread 0 also check and wait until all
other threads has finished their work, to make sure the pinned test really are
the only test running in the system.

After this change, all tests should pass in '-j' mode.

Signed-off-by: Yucong Sun <sunyucong@gmail.com>
---
 tools/testing/selftests/bpf/test_progs.c | 109 ++++++++++++++++++++---
 1 file changed, 97 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index f0eeb17c348d..dc72b3f526a6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -18,6 +18,16 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
+char *TESTS_MUST_SERIALIZE[] = {
+	"netcnt",
+	"select_reuseport",
+	"sockmap_listen",
+	"tc_redirect",
+	"xdp_bonding",
+	"xdp_info",
+	NULL,
+};
+
 /* Adapted from perf/util/string.c */
 static bool glob_match(const char *str, const char *pat)
 {
@@ -821,6 +831,7 @@ void crash_handler(int signum)
 
 int current_test_idx = 0;
 pthread_mutex_t current_test_lock;
+pthread_cond_t wait_for_worker0 = PTHREAD_COND_INITIALIZER;
 
 struct test_result {
 	int error_cnt;
@@ -887,6 +898,29 @@ struct dispatch_data {
 	int sock;
 };
 
+static const char *get_test_name(int idx)
+{
+	struct prog_test_def *test;
+
+	test = &prog_test_defs[idx];
+	return test->test_name;
+}
+
+bool is_test_must_serialize(int idx)
+{
+	struct prog_test_def *test;
+	char **p;
+
+	test = &prog_test_defs[idx];
+	p = &TESTS_MUST_SERIALIZE[0];
+	while (*p != NULL) {
+		if (strcmp(*p, test->test_name) == 0)
+			return true;
+		p++;
+	}
+	return false;
+}
+
 void *dispatch_thread(void *_data)
 {
 	struct dispatch_data *data;
@@ -901,6 +935,8 @@ void *dispatch_thread(void *_data)
 		struct prog_test_def *test;
 		struct test_result *result;
 
+		env.worker_current_test[data->idx] = -1;
+
 		/* grab a test */
 		{
 			pthread_mutex_lock(&current_test_lock);
@@ -909,8 +945,31 @@ void *dispatch_thread(void *_data)
 				pthread_mutex_unlock(&current_test_lock);
 				goto done;
 			}
+
 			test_to_run = current_test_idx;
-			current_test_idx++;
+
+			if (is_test_must_serialize(current_test_idx)) {
+				if (data->idx != 0) {
+					fprintf(stderr, "[%d]: Waiting for thread 0 to finish serialized test: %d.\n",
+						data->idx, current_test_idx + 1);
+					/* wait for worker 0 to pick this job up and finish */
+					pthread_cond_wait(&wait_for_worker0, &current_test_lock);
+					pthread_mutex_unlock(&current_test_lock);
+					goto next;
+				} else {
+					/* wait until all other worker has parked */
+					for (int i = 1; i < env.workers; i++) {
+						if (env.worker_current_test[i] != -1) {
+							fprintf(stderr, "[%d]: Waiting for other threads to finish current test...\n", data->idx);
+							pthread_mutex_unlock(&current_test_lock);
+							usleep(1 * 1000 * 1000);
+							goto next;
+						}
+					}
+				}
+			} else {
+				current_test_idx++;
+			}
 
 			pthread_mutex_unlock(&current_test_lock);
 		}
@@ -975,7 +1034,15 @@ void *dispatch_thread(void *_data)
 				fclose(log_fd);
 				log_fd = NULL;
 			}
+		} /* wait for test done */
+
+		/* unblock all other dispatcher threads */
+		if (is_test_must_serialize(test_to_run) && data->idx == 0) {
+			current_test_idx++;
+			pthread_cond_broadcast(&wait_for_worker0);
 		}
+next:
+	continue;
 	} /* while (true) */
 error:
 	fprintf(stderr, "[%d]: Protocol/IO error: %s", data->idx, strerror(errno));
@@ -997,16 +1064,19 @@ int server_main(void)
 {
 	pthread_t *dispatcher_threads;
 	struct dispatch_data *data;
+	int all_finished = false;
 
 	dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
 	data = calloc(sizeof(struct dispatch_data), env.workers);
 
 	env.worker_current_test = calloc(sizeof(int), env.workers);
+
 	for (int i = 0; i < env.workers; i++) {
 		int rc;
 
 		data[i].idx = i;
 		data[i].sock = env.worker_socks[i];
+		env.worker_current_test[i] = -1;
 		rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
 		if (rc < 0) {
 			perror("Failed to launch dispatcher thread");
@@ -1015,17 +1085,27 @@ int server_main(void)
 	}
 
 	/* wait for all dispatcher to finish */
-	for (int i = 0; i < env.workers; i++) {
-		while (true) {
-			struct timespec timeout = {
-				.tv_sec = time(NULL) + 5,
-				.tv_nsec = 0
-			};
-			if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
-				break;
-			fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
+	while (!all_finished) {
+		all_finished = true;
+		for (int i = 0; i < env.workers; i++) {
+			if (!dispatcher_threads[i])
+				continue;
+
+			if (pthread_tryjoin_np(dispatcher_threads[i], NULL) == EBUSY) {
+				all_finished = false;
+				if (env.worker_current_test[i] == -1)
+					fprintf(stderr, "Still waiting for thread %d (blocked by thread 0).\n", i);
+				else
+					fprintf(stderr, "Still waiting for thread %d (test #%d:%s).\n",
+						i, env.worker_current_test[i] + 1,
+						get_test_name(env.worker_current_test[i]));
+			} else {
+				dispatcher_threads[i] = 0;
+			}
 		}
+		usleep(10 * 1000 * 1000);
 	}
+
 	free(dispatcher_threads);
 	free(env.worker_current_test);
 	free(data);
@@ -1100,8 +1180,11 @@ int worker_main(int sock)
 
 			test_to_run = msg.u.message_do_test.num;
 
-			fprintf(stderr, "[%d]: Running test %d.\n",
-				env.worker_index, test_to_run + 1);
+			if (env.verbosity > VERBOSE_NONE)
+				fprintf(stderr, "[%d]: #%d:%s running.\n",
+					env.worker_index,
+					test_to_run + 1,
+					get_test_name(test_to_run));
 
 			test = &prog_test_defs[test_to_run];
 
@@ -1172,6 +1255,8 @@ int worker_main(int sock)
 				env.log_buf = NULL;
 				env.log_cnt = 0;
 			}
+			fprintf(stderr, "[%d]: #%d:%s done.\n",
+				env.worker_index, test_to_run + 1, get_test_name(test_to_run));
 			break;
 		} /* case MSG_DO_TEST */
 		default:
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-13 19:39 [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Yucong Sun
  2021-09-13 19:39 ` [PATCH v4 bpf-next 2/3] selftests/bpf: add per worker cgroup suffix Yucong Sun
  2021-09-13 19:39 ` [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0 Yucong Sun
@ 2021-09-14  7:11 ` Andrii Nakryiko
  2021-09-15 13:29   ` sunyucong
  2 siblings, 1 reply; 12+ messages in thread
From: Andrii Nakryiko @ 2021-09-14  7:11 UTC (permalink / raw)
  To: Yucong Sun; +Cc: Andrii Nakryiko, bpf, Yucong Sun

On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
>
> From: Yucong Sun <sunyucong@gmail.com>
>
> This patch adds "-j" mode to test_progs, executing tests in multiple process.
> "-j" mode is optional, and works with all existing test selection mechanism, as
> well as "-v", "-l" etc.
>
> In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> commanding it to run tests and collect logs. After all tests are finished, a
> summary is printed. main process use multiple competing threads to dispatch
> work to worker, trying to keep them all busy.
>

Overall this looks great and I'm super excited that we'll soon be able
to run tests mostly in parallel. I've done the first rough pass over
the code, but haven't played with running this locally yet.

I didn't trim the message to keep all the context in one place, so
please scroll through all of the below till the end.

> Example output:
>
>   > ./test_progs -n 15-20 -j
>   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
>   Launching 2 workers.
>   [0]: Running test 15.
>   [1]: Running test 16.
>   [1]: Running test 17.
>   [1]: Running test 18.
>   [1]: Running test 19.
>   [1]: Running test 20.
>   [1]: worker exit.
>   [0]: worker exit.

I think these messages shouldn't be emitted in normal mode, and not
even with -v. Maybe -vv. They will be useful initially to debug bugs
in concurrent test runner, probably, so I'd hide them behind a very
verbose setting.

>   #15 btf_dump:OK
>   #16 btf_endian:OK
>   #17 btf_map_in_map:OK
>   #18 btf_module:OK
>   #19 btf_skc_cls_ingress:OK
>   #20 btf_split:OK
>   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
>
> Know issue:
>
> Some tests fail when running concurrently, later patch will either
> fix the test or pin them to worker 0.

Hm.. patch #3 does that, no?

>
> Signed-off-by: Yucong Sun <sunyucong@gmail.com>
>
> V4 -> V3: address style warnings.
> V3 -> V2: fix missing outputs in commit messages.
> V2 -> V1: switch to UDS client/server model.

patch sets with more than one patch should come with a cover letter.
Keeping more-or-less detailed history in a cover letter is the best.
For next revision please add the cover letter and move the history
there. Check some other patch sets for an example.

> ---
>  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
>  tools/testing/selftests/bpf/test_progs.h |  36 +-
>  2 files changed, 480 insertions(+), 14 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index 2ed01f615d20..c542e2d2f893 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -12,6 +12,11 @@
>  #include <string.h>
>  #include <execinfo.h> /* backtrace */
>  #include <linux/membarrier.h>
> +#include <sys/sysinfo.h> /* get_nprocs */
> +#include <netinet/in.h>
> +#include <sys/select.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
>
>  /* Adapted from perf/util/string.c */
>  static bool glob_match(const char *str, const char *pat)
> @@ -48,6 +53,7 @@ struct prog_test_def {
>         bool force_log;
>         int error_cnt;
>         int skip_cnt;
> +       int sub_succ_cnt;
>         bool tested;
>         bool need_cgroup_cleanup;
>
> @@ -97,6 +103,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
>         if (stdout == env.stdout)
>                 return;
>
> +       /* worker always holds log */
> +       if (env.worker_index != -1)
> +               return;
> +
>         fflush(stdout); /* exports env.log_buf & env.log_cnt */
>
>         if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
> @@ -172,14 +182,14 @@ void test__end_subtest()
>
>         dump_test_log(test, sub_error_cnt);
>
> -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
>                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
>                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));

shouldn't this be emitted by the server (main) process, not the
worker? Worker itself just returns numbers through UDS.

>
>         if (sub_error_cnt)
> -               env.fail_cnt++;
> +               test->error_cnt++;
>         else if (test->skip_cnt == 0)
> -               env.sub_succ_cnt++;
> +               test->sub_succ_cnt++;

this also doesn't feel like a worker's responsibility and logic. It
feels like there is a bit of a missing separation between "executing
test" and "accounting and reporting test result". I'll get to that
below when talking about duplication of code and logic between
parallel and non-parallel modes of operation.

>         skip_account();
>
>         free(test->subtest_name);
> @@ -474,6 +484,7 @@ enum ARG_KEYS {
>         ARG_LIST_TEST_NAMES = 'l',
>         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
>         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> +       ARG_NUM_WORKERS = 'j',
>  };
>
>  static const struct argp_option opts[] = {
> @@ -495,6 +506,8 @@ static const struct argp_option opts[] = {
>           "Run tests with name matching the pattern (supports '*' wildcard)." },
>         { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
>           "Don't run tests with name matching the pattern (supports '*' wildcard)." },
> +       { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
> +         "Number of workers to run in parallel, default to number of cpus." },
>         {},
>  };
>
> @@ -661,6 +674,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
>         case ARG_LIST_TEST_NAMES:
>                 env->list_test_names = true;
>                 break;
> +       case ARG_NUM_WORKERS:
> +               if (arg) {
> +                       env->workers = atoi(arg);
> +                       if (!env->workers) {
> +                               fprintf(stderr, "Invalid number of worker: %s.", arg);
> +                               return -1;
> +                       }
> +               } else {
> +                       env->workers = get_nprocs();
> +               }
> +               break;
>         case ARGP_KEY_ARG:
>                 argp_usage(state);
>                 break;
> @@ -678,7 +702,7 @@ static void stdio_hijack(void)
>         env.stdout = stdout;
>         env.stderr = stderr;
>
> -       if (env.verbosity > VERBOSE_NONE) {
> +       if (env.verbosity > VERBOSE_NONE && env.worker_index == -1) {
>                 /* nothing to do, output to stdout by default */
>                 return;

there is a big #ifdef __GLIBC__ guard around this code. If its false
and we can't really hijack stdout/stderr in parallel mode, we should
just error out, otherwise it will be a mess with output.

>         }
> @@ -704,10 +728,6 @@ static void stdio_restore(void)
>                 return;
>
>         fclose(stdout);
> -       free(env.log_buf);
> -
> -       env.log_buf = NULL;
> -       env.log_cnt = 0;
>
>         stdout = env.stdout;
>         stderr = env.stderr;
> @@ -799,6 +819,366 @@ void crash_handler(int signum)
>         backtrace_symbols_fd(bt, sz, STDERR_FILENO);
>  }
>
> +int current_test_idx = 0;
> +pthread_mutex_t current_test_lock;

does this have to be global?

> +
> +struct test_result {
> +       int error_cnt;
> +       int skip_cnt;
> +       int sub_succ_cnt;
> +
> +       size_t log_cnt;
> +       char *log_buf;
> +};
> +
> +struct test_result test_results[ARRAY_SIZE(prog_test_defs)];

same, make static?

> +
> +static inline const char *str_msg(const struct message *msg)
> +{
> +       static char buf[255];

str_msg is called from send_message/recv_message, which are called
from one of many dispatch threads. You can't use shared static buf,
you'll inevitably corrupt each other messages

> +
> +       switch (msg->type) {
> +       case MSG_DO_TEST:
> +               sprintf(buf, "MSG_DO_TEST %d", msg->u.message_do_test.num);
> +               break;
> +       case MSG_TEST_DONE:
> +               sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
> +                       msg->u.message_test_done.num,
> +                       msg->u.message_test_done.have_log);
> +               break;
> +       case MSG_TEST_LOG:
> +               sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
> +                       strlen(msg->u.message_test_log.log_buf),
> +                       msg->u.message_test_log.is_last);
> +               break;
> +       case MSG_EXIT:
> +               sprintf(buf, "MSG_EXIT");
> +               break;
> +       default:
> +               sprintf(buf, "UNKNOWN");
> +               break;
> +       }
> +
> +       return buf;
> +}
> +
> +int send_message(int sock, const struct message *msg)

here and everywhere else, unless function and variable has to be
global, it has to be static

> +{
> +       if (env.verbosity > VERBOSE_NONE)
> +               fprintf(stderr, "Sending msg: %s\n", str_msg(msg));
> +       return send(sock, msg, sizeof(*msg), 0);
> +}
> +
> +int recv_message(int sock, struct message *msg)
> +{
> +       int ret;
> +
> +       memset(msg, 0, sizeof(*msg));
> +       ret = recv(sock, msg, sizeof(*msg), 0);
> +       if (ret >= 0) {
> +               if (env.verbosity > VERBOSE_NONE)
> +                       fprintf(stderr, "Received msg: %s\n", str_msg(msg));
> +       }
> +       return ret;
> +}
> +
> +struct dispatch_data {
> +       int idx;

is idx a worker index? let's make it clear with worker_idx/worker_id/worker_num?

> +       int sock;

is this a socket file descriptor? sock_fd then?

> +};
> +
> +void *dispatch_thread(void *_data)

our code base doesn't really use _-prefixed arguments, why not just
call it "arg" or "input" or "ctx"?

> +{
> +       struct dispatch_data *data;
> +       int sock;
> +       FILE *log_fd = NULL;
> +
> +       data = (struct dispatch_data *)_data;

in C you can just:

struct dispatch_data *data = input;

right there at variable declaration

> +       sock = data->sock;
> +
> +       while (true) {
> +               int test_to_run = -1;
> +               struct prog_test_def *test;
> +               struct test_result *result;
> +
> +               /* grab a test */
> +               {
> +                       pthread_mutex_lock(&current_test_lock);
> +
> +                       if (current_test_idx >= prog_test_cnt) {
> +                               pthread_mutex_unlock(&current_test_lock);
> +                               goto done;
> +                       }
> +                       test_to_run = current_test_idx;
> +                       current_test_idx++;
> +
> +                       pthread_mutex_unlock(&current_test_lock);
> +               }
> +
> +               test = &prog_test_defs[test_to_run];
> +               test->test_num = test_to_run + 1;

let's initialize test->test_num outside of all this logic in main()
(together with whatever other test_def initializations we need to
perform at startup)

> +
> +               if (!should_run(&env.test_selector,
> +                               test->test_num, test->test_name))
> +                       continue;

this probably also can be extracted outside of single-thread/parallel
execution paths, it's basically a simple pre-initialization as well.
Each test will be just marked as needing the run or not. WDYT?

> +
> +               /* run test through worker */
> +               {
> +                       struct message msg_do_test;
> +
> +                       msg_do_test.type = MSG_DO_TEST;
> +                       msg_do_test.u.message_do_test.num = test_to_run;
> +                       if (send_message(sock, &msg_do_test) < 0) {
> +                               perror("Fail to send command");
> +                               goto done;
> +                       }
> +                       env.worker_current_test[data->idx] = test_to_run;
> +               }
> +
> +               /* wait for test done */
> +               {
> +                       struct message msg_test_done;
> +
> +                       if (recv_message(sock, &msg_test_done) < 0)
> +                               goto error;
> +                       if (msg_test_done.type != MSG_TEST_DONE)
> +                               goto error;
> +                       if (test_to_run != msg_test_done.u.message_test_done.num)
> +                               goto error;
> +
> +                       result = &test_results[test_to_run];
> +
> +                       test->tested = true;
> +
> +                       result->error_cnt = msg_test_done.u.message_test_done.error_cnt;
> +                       result->skip_cnt = msg_test_done.u.message_test_done.skip_cnt;
> +                       result->sub_succ_cnt = msg_test_done.u.message_test_done.sub_succ_cnt;
> +
> +                       /* collect all logs */
> +                       if (msg_test_done.u.message_test_done.have_log) {
> +                               log_fd = open_memstream(&result->log_buf, &result->log_cnt);
> +                               if (!log_fd)
> +                                       goto error;
> +
> +                               while (true) {
> +                                       struct message msg_log;
> +
> +                                       if (recv_message(sock, &msg_log) < 0)
> +                                               goto error;
> +                                       if (msg_log.type != MSG_TEST_LOG)
> +                                               goto error;
> +
> +                                       fprintf(log_fd, "%s", msg_log.u.message_test_log.log_buf);
> +                                       if (msg_log.u.message_test_log.is_last)
> +                                               break;
> +                               }
> +                               fclose(log_fd);
> +                               log_fd = NULL;
> +                       }
> +               }
> +       } /* while (true) */
> +error:
> +       fprintf(stderr, "[%d]: Protocol/IO error: %s", data->idx, strerror(errno));
> +
> +       if (log_fd)
> +               fclose(log_fd);
> +done:
> +       {
> +               struct message msg_exit;
> +
> +               msg_exit.type = MSG_EXIT;
> +               if (send_message(sock, &msg_exit) < 0)
> +                       fprintf(stderr, "[%d]: send_message msg_exit: %s", data->idx, strerror(errno));
> +       }
> +       return NULL;
> +}
> +
> +int server_main(void)
> +{
> +       pthread_t *dispatcher_threads;
> +       struct dispatch_data *data;
> +
> +       dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
> +       data = calloc(sizeof(struct dispatch_data), env.workers);
> +
> +       env.worker_current_test = calloc(sizeof(int), env.workers);
> +       for (int i = 0; i < env.workers; i++) {
> +               int rc;
> +
> +               data[i].idx = i;
> +               data[i].sock = env.worker_socks[i];
> +               rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
> +               if (rc < 0) {
> +                       perror("Failed to launch dispatcher thread");
> +                       return -1;
> +               }
> +       }
> +
> +       /* wait for all dispatcher to finish */
> +       for (int i = 0; i < env.workers; i++) {
> +               while (true) {
> +                       struct timespec timeout = {
> +                               .tv_sec = time(NULL) + 5,
> +                               .tv_nsec = 0
> +                       };
> +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> +                               break;
> +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);

This is just going to spam output needlessly. Why dispatches have to
exit within 5 seconds, if there are tests that are running way longer
than that?... probably better to just do pthread_join()?

> +               }
> +       }
> +       free(dispatcher_threads);
> +       free(env.worker_current_test);
> +       free(data);
> +
> +       /* generate summary */
> +       fflush(stderr);
> +       fflush(stdout);
> +
> +       for (int i = 0; i < prog_test_cnt; i++) {
> +               struct prog_test_def *current_test;
> +               struct test_result *result;
> +
> +               current_test = &prog_test_defs[i];
> +               result = &test_results[i];
> +
> +               if (!current_test->tested)
> +                       continue;
> +
> +               env.succ_cnt += result->error_cnt ? 0 : 1;
> +               env.skip_cnt += result->skip_cnt;
> +               env.fail_cnt += result->error_cnt;
> +               env.sub_succ_cnt += result->sub_succ_cnt;
> +
> +               if (result->log_cnt) {
> +                       result->log_buf[result->log_cnt] = '\0';
> +                       fprintf(stdout, "%s", result->log_buf);
> +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> +                               fprintf(stdout, "\n");
> +               }
> +               fprintf(stdout, "#%d %s:%s\n",
> +                       current_test->test_num, current_test->test_name,
> +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));

it would be nice to see this as results come in (when handling
msg_test_done). It will come out of order, but it will sort of be a
progress indicator that stuff is happening. Why the totals summary
will be then output at the very end at the bottom.

> +       }
> +
> +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> +
> +       /* reap all workers */
> +       for (int i = 0; i < env.workers; i++) {
> +               int wstatus, pid;
> +
> +               pid = waitpid(env.worker_pids[i], &wstatus, 0);

should we send SIGKILL as well to ensure that some stuck or buggy
worker gets reaped for sure?

> +               assert(pid == env.worker_pids[i]);

let's not use assert(), better print error message

> +       }
> +
> +       return 0;
> +}
> +
> +int worker_main(int sock)
> +{
> +       save_netns();
> +
> +       while (true) {
> +               /* receive command */
> +               struct message msg;
> +
> +               assert(recv_message(sock, &msg) >= 0);

see above about assert()s

> +
> +               switch (msg.type) {
> +               case MSG_EXIT:
> +                       fprintf(stderr, "[%d]: worker exit.\n",  env.worker_index);
> +                       goto out;
> +               case MSG_DO_TEST: {
> +                       int test_to_run;
> +                       struct prog_test_def *test;
> +                       struct message msg_done;
> +
> +                       test_to_run = msg.u.message_do_test.num;
> +
> +                       fprintf(stderr, "[%d]: Running test %d.\n",
> +                               env.worker_index, test_to_run + 1);
> +
> +                       test = &prog_test_defs[test_to_run];
> +
> +                       env.test = test;
> +                       test->test_num = test_to_run + 1;
> +
> +                       stdio_hijack();
> +
> +                       test->run_test();
> +
> +                       /* ensure last sub-test is finalized properly */
> +                       if (test->subtest_name)
> +                               test__end_subtest();
> +
> +                       stdio_restore();
> +
> +                       test->tested = true;
> +
> +                       skip_account();

my biggest concern right now is with all this setup/teardown code
being duplicated between sequential mode and parallel mode. Let's
ensure that as much as possible is shared and re-used through either
helper functions or by doing whatever initialization that can be done
outside of test running loop itself (as suggested for test->test_num
and test filtering logic).

one way to attach some of that duplication is by having the logic for
MSG_DO_TEST handling extracted into a separate function that would be
called both from forked worker process *and* from sequential mode main
process all the same. The send_message() logic would stay in
parallelized worker code, most probably.

> +
> +                       reset_affinity();
> +                       restore_netns();
> +                       if (test->need_cgroup_cleanup)
> +                               cleanup_cgroup_environment();
> +
> +                       memset(&msg_done, 0, sizeof(msg_done));
> +                       msg_done.type = MSG_TEST_DONE;
> +                       msg_done.u.message_test_done.num = test_to_run;
> +                       msg_done.u.message_test_done.error_cnt = test->error_cnt;
> +                       msg_done.u.message_test_done.skip_cnt = test->skip_cnt;
> +                       msg_done.u.message_test_done.sub_succ_cnt = test->sub_succ_cnt;
> +                       msg_done.u.message_test_done.have_log = false;
> +
> +                       if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
> +                               if (env.log_cnt)
> +                                       msg_done.u.message_test_done.have_log = true;
> +                       }
> +                       assert(send_message(sock, &msg_done) >= 0);
> +
> +                       /* send logs */
> +                       if (msg_done.u.message_test_done.have_log) {
> +                               char *src;
> +                               size_t slen;
> +
> +                               src = env.log_buf;
> +                               slen = env.log_cnt;
> +                               while (slen) {
> +                                       struct message msg_log;
> +                                       char *dest;
> +                                       size_t len;
> +
> +                                       memset(&msg_log, 0, sizeof(msg_log));
> +                                       msg_log.type = MSG_TEST_LOG;
> +                                       dest = msg_log.u.message_test_log.log_buf;
> +                                       len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
> +                                       memcpy(dest, src, len);
> +
> +                                       src += len;
> +                                       slen -= len;
> +                                       if (!slen)
> +                                               msg_log.u.message_test_log.is_last = true;
> +
> +                                       assert(send_message(sock, &msg_log) >= 0);
> +                               }
> +                       }
> +                       if (env.log_buf) {
> +                               free(env.log_buf);
> +                               env.log_buf = NULL;
> +                               env.log_cnt = 0;
> +                       }
> +                       break;
> +               } /* case MSG_DO_TEST */
> +               default:
> +                       fprintf(stderr, "[%d]: unknown message.\n",  env.worker_index);
> +                       return -1;
> +               }
> +       }
> +out:
> +       restore_netns();
> +       return 0;
> +}
> +
>  int main(int argc, char **argv)
>  {
>         static const struct argp argp = {
> @@ -837,13 +1217,57 @@ int main(int argc, char **argv)
>                 return -1;
>         }
>
> -       save_netns();
> -       stdio_hijack();
>         env.has_testmod = true;
>         if (!env.list_test_names && load_bpf_testmod()) {
>                 fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
>                 env.has_testmod = false;
>         }
> +
> +       /* ignore workers if we are just listing */
> +       if (env.get_test_cnt || env.list_test_names)
> +               env.workers = 0;
> +
> +       /* launch workers if requested */
> +       env.worker_index = -1; /* main process */
> +       if (env.workers) {
> +               env.worker_pids = calloc(sizeof(__pid_t), env.workers);
> +               env.worker_socks = calloc(sizeof(int), env.workers);
> +               fprintf(stdout, "Launching %d workers.\n", env.workers);
> +               for (int i = 0; i < env.workers; i++) {
> +                       int sv[2];
> +                       __pid_t pid;

nit: how's __pid_t different from pid_t?

> +
> +                       if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
> +                               perror("Fail to create worker socket");
> +                               return -1;
> +                       }
> +                       pid = fork();
> +                       if (pid < 0) {
> +                               perror("Failed to fork worker");
> +                               return -1;
> +                       } else if (pid != 0) { /* main process */
> +                               close(sv[1]);
> +                               env.worker_pids[i] = pid;
> +                               env.worker_socks[i] = sv[0];
> +                       } else { /* inside each worker process */
> +                               close(sv[0]);
> +                               env.worker_index = i;
> +                               return worker_main(sv[1]);
> +                       }
> +               }
> +
> +               if (env.worker_index == -1) {

it can't be anything else here, so you can just run server_main() directly

> +                       server_main();
> +                       goto out;
> +               }
> +       }
> +
> +       /* The rest of the main process */
> +
> +       /* on single mode */
> +       save_netns();
> +       stdio_hijack();
> +
>         for (i = 0; i < prog_test_cnt; i++) {
>                 struct prog_test_def *test = &prog_test_defs[i];
>
> @@ -866,6 +1290,7 @@ int main(int argc, char **argv)
>                 }
>
>                 test->run_test();
> +
>                 /* ensure last sub-test is finalized properly */
>                 if (test->subtest_name)
>                         test__end_subtest();
> @@ -882,16 +1307,21 @@ int main(int argc, char **argv)
>                         env.fail_cnt++;
>                 else
>                         env.succ_cnt++;
> +
>                 skip_account();
> +               env.sub_succ_cnt += test->sub_succ_cnt;
>
>                 reset_affinity();
>                 restore_netns();
>                 if (test->need_cgroup_cleanup)
>                         cleanup_cgroup_environment();

all the logic in this loop (and maybe more of the surrounding code as
well) is conceptually either 1) running the test or 2)
accounting/reporting test results. For sequential mode it can be
intermixed, but for parallel mode there is a strict distinction:
running happens inside the worker, while accounting happens in the
dispatcher process. For sequential we can model the interaction the
same way and reuse as much code as possible.

>         }
> -       if (!env.list_test_names && env.has_testmod)
> -               unload_bpf_testmod();
>         stdio_restore();
> +       if (env.log_buf) {
> +               free(env.log_buf);
> +               env.log_buf = NULL;
> +               env.log_cnt = 0;
> +       }
>
>         if (env.get_test_cnt) {
>                 printf("%d\n", env.succ_cnt);
> @@ -904,14 +1334,16 @@ int main(int argc, char **argv)
>         fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
>                 env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
>
> +       close(env.saved_netns_fd);
>  out:
> +       if (!env.list_test_names && env.has_testmod)
> +               unload_bpf_testmod();
>         free_str_set(&env.test_selector.blacklist);
>         free_str_set(&env.test_selector.whitelist);
>         free(env.test_selector.num_set);
>         free_str_set(&env.subtest_selector.blacklist);
>         free_str_set(&env.subtest_selector.whitelist);
>         free(env.subtest_selector.num_set);
> -       close(env.saved_netns_fd);
>
>         if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
>                 return EXIT_NO_TEST;
> diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
> index 94bef0aa74cf..aadb44543225 100644
> --- a/tools/testing/selftests/bpf/test_progs.h
> +++ b/tools/testing/selftests/bpf/test_progs.h
> @@ -69,7 +69,8 @@ struct test_env {
>         bool get_test_cnt;
>         bool list_test_names;
>
> -       struct prog_test_def *test;
> +       struct prog_test_def *test; /* current running tests */
> +
>         FILE *stdout;
>         FILE *stderr;
>         char *log_buf;
> @@ -82,6 +83,39 @@ struct test_env {
>         int skip_cnt; /* skipped tests */
>
>         int saved_netns_fd;
> +       int workers; /* number of worker process */
> +       int worker_index; /* index number of current worker, main process is -1 */
> +       __pid_t *worker_pids; /* array of worker pids */
> +       int *worker_socks; /* array of worker socks */
> +       int *worker_current_test; /* array of current running test for each worker */
> +};
> +
> +#define MAX_LOG_TRUNK_SIZE 8192
> +enum message_type {
> +       MSG_DO_TEST = 0,
> +       MSG_TEST_DONE = 1,
> +       MSG_TEST_LOG = 2,
> +       MSG_EXIT = 255,
> +};

nit: empty line between type definitions

ok, here comes a bunch of naming nitpicking, sorry about that :) but
the current struct definition is a bit mouthful so it's a bit hard to
read, and some fields are actually too succinct at the same time, so
that it's hard to understand in some places what the field represents.

> +struct message {
> +       enum message_type type;

total nitpick, but given MSG_ enums above, I'd stick to "msg"
everywhere and have struct msg and enum msg_type

> +       union {
> +               struct {
> +                       int num;

this is the test number, right? test_num would make it more clear.
There were a few times when I wondered if it's worker number or a test
number.

> +               } message_do_test;

this is a bit tautological. In the code above you have accesses like:

msg->u.message_do_test.num

that message_ prefix doesn't bring any value, just makes everything
unnecessarily verbose. With the below suggestion to drop u name for
union and by dropping message_ prefix everywhere, we'll be down to a
clean ans simple

msg->do_test.test_num

(and I'd s/do_test/run_test/ because it reads better for me, but I
won't complain either way)


> +               struct {
> +                       int num;

same about test_num
> +
> +                       int sub_succ_cnt;
> +                       int error_cnt;
> +                       int skip_cnt;
> +                       bool have_log;
> +               } message_test_done;
> +               struct {
> +                       char log_buf[MAX_LOG_TRUNK_SIZE + 1];
> +                       bool is_last;
> +               } message_test_log;
> +       } u;

there is no need to name this union, it can be anonymous allowing to
skip .u. part in field accesses

>  };
>
>  extern struct test_env env;
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0
  2021-09-13 19:39 ` [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0 Yucong Sun
@ 2021-09-14  7:23   ` Andrii Nakryiko
  2021-09-15 22:15     ` sunyucong
  0 siblings, 1 reply; 12+ messages in thread
From: Andrii Nakryiko @ 2021-09-14  7:23 UTC (permalink / raw)
  To: Yucong Sun; +Cc: Andrii Nakryiko, bpf, Yucong Sun

On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
>
> From: Yucong Sun <sunyucong@gmail.com>
>
> This patch adds a simple name list to pin some tests that fail to run in
> parallel to worker 0. On encountering these tests, all other threads will wait
> on a conditional variable, which worker 0 will signal once the tests has
> finished running.
>
> Additionally, before running the test, thread 0 also check and wait until all
> other threads has finished their work, to make sure the pinned test really are
> the only test running in the system.
>
> After this change, all tests should pass in '-j' mode.
>
> Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> ---
>  tools/testing/selftests/bpf/test_progs.c | 109 ++++++++++++++++++++---
>  1 file changed, 97 insertions(+), 12 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index f0eeb17c348d..dc72b3f526a6 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -18,6 +18,16 @@
>  #include <sys/socket.h>
>  #include <sys/un.h>
>
> +char *TESTS_MUST_SERIALIZE[] = {
> +       "netcnt",
> +       "select_reuseport",
> +       "sockmap_listen",
> +       "tc_redirect",
> +       "xdp_bonding",
> +       "xdp_info",
> +       NULL,
> +};
> +

I was actually thinking to encode this as part of the test function
name itself. I.e.,

void test_vmlinux(void) for parallelizable tests

and

void serial_test_vmlinux(void)


Then we can use weak symbols to "detect" which one is actually defined
for any given test.:

struct prog_test_def {
    void (*run_test)(void);
    void (*run_test_serial)(void);
    ...
};

then test_progs.c will define

#define DEFINE_TEST(name) extern void test_##name(void) __weak; extern
void serial_test_##name(void) __weak;

and that prog_test_def (though another DEFINE_TEST macro definition)
will be initialized as

{
    .test_name = #name,
    .run_test = &test_##name,
    .run_test_serial = &serial_test_##name,
}


After all that checking which of .run_test or .run_test_serial isn't
NULL (and erroring out if both or neither) will determine whether the
test is serial or parallel. Keeping this knowledge next to test itself
is nice in that it will never get out of sync, will never be
mismatched, etc (and it's very obvious when looking at the test file
itself).

Thoughts?


[...]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-14  7:11 ` [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Andrii Nakryiko
@ 2021-09-15 13:29   ` sunyucong
  2021-09-15 16:00     ` Andrii Nakryiko
  0 siblings, 1 reply; 12+ messages in thread
From: sunyucong @ 2021-09-15 13:29 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> >
> > From: Yucong Sun <sunyucong@gmail.com>
> >
> > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > "-j" mode is optional, and works with all existing test selection mechanism, as
> > well as "-v", "-l" etc.
> >
> > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > commanding it to run tests and collect logs. After all tests are finished, a
> > summary is printed. main process use multiple competing threads to dispatch
> > work to worker, trying to keep them all busy.
> >
>
> Overall this looks great and I'm super excited that we'll soon be able
> to run tests mostly in parallel. I've done the first rough pass over
> the code, but haven't played with running this locally yet.
>
> I didn't trim the message to keep all the context in one place, so
> please scroll through all of the below till the end.
>
> > Example output:
> >
> >   > ./test_progs -n 15-20 -j
> >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> >   Launching 2 workers.
> >   [0]: Running test 15.
> >   [1]: Running test 16.
> >   [1]: Running test 17.
> >   [1]: Running test 18.
> >   [1]: Running test 19.
> >   [1]: Running test 20.
> >   [1]: worker exit.
> >   [0]: worker exit.
>
> I think these messages shouldn't be emitted in normal mode, and not
> even with -v. Maybe -vv. They will be useful initially to debug bugs
> in concurrent test runner, probably, so I'd hide them behind a very
> verbose setting.
>
> >   #15 btf_dump:OK
> >   #16 btf_endian:OK
> >   #17 btf_map_in_map:OK
> >   #18 btf_module:OK
> >   #19 btf_skc_cls_ingress:OK
> >   #20 btf_split:OK
> >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> >
> > Know issue:
> >
> > Some tests fail when running concurrently, later patch will either
> > fix the test or pin them to worker 0.
>
> Hm.. patch #3 does that, no?
>
> >
> > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> >
> > V4 -> V3: address style warnings.
> > V3 -> V2: fix missing outputs in commit messages.
> > V2 -> V1: switch to UDS client/server model.
>
> patch sets with more than one patch should come with a cover letter.
> Keeping more-or-less detailed history in a cover letter is the best.
> For next revision please add the cover letter and move the history
> there. Check some other patch sets for an example.

Ack.

>
> > ---
> >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> >  2 files changed, 480 insertions(+), 14 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index 2ed01f615d20..c542e2d2f893 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -12,6 +12,11 @@
> >  #include <string.h>
> >  #include <execinfo.h> /* backtrace */
> >  #include <linux/membarrier.h>
> > +#include <sys/sysinfo.h> /* get_nprocs */
> > +#include <netinet/in.h>
> > +#include <sys/select.h>
> > +#include <sys/socket.h>
> > +#include <sys/un.h>
> >
> >  /* Adapted from perf/util/string.c */
> >  static bool glob_match(const char *str, const char *pat)
> > @@ -48,6 +53,7 @@ struct prog_test_def {
> >         bool force_log;
> >         int error_cnt;
> >         int skip_cnt;
> > +       int sub_succ_cnt;
> >         bool tested;
> >         bool need_cgroup_cleanup;
> >
> > @@ -97,6 +103,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
> >         if (stdout == env.stdout)
> >                 return;
> >
> > +       /* worker always holds log */
> > +       if (env.worker_index != -1)
> > +               return;
> > +
> >         fflush(stdout); /* exports env.log_buf & env.log_cnt */
> >
> >         if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
> > @@ -172,14 +182,14 @@ void test__end_subtest()
> >
> >         dump_test_log(test, sub_error_cnt);
> >
> > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
>
> shouldn't this be emitted by the server (main) process, not the
> worker? Worker itself just returns numbers through UDS.

test__end_subtest() is executed by the worker,  and this output is
part of the log that is sent back if there is an error executing
subtests or in verbose mode.

In theory we can print this on the server side, but that would require
quite a bit hussle of sending back each subtest's execution stat
individually, and I don't think it is worth the trouble.

>
> >
> >         if (sub_error_cnt)
> > -               env.fail_cnt++;
> > +               test->error_cnt++;
> >         else if (test->skip_cnt == 0)
> > -               env.sub_succ_cnt++;
> > +               test->sub_succ_cnt++;
>
> this also doesn't feel like a worker's responsibility and logic. It
> feels like there is a bit of a missing separation between "executing
> test" and "accounting and reporting test result". I'll get to that
> below when talking about duplication of code and logic between
> parallel and non-parallel modes of operation.

This is also accounting the execution stats for the subtests of
current tests, right now only main tests's cumlated stats are sent
back, not individual subtests's stats.

>
> >         skip_account();
> >
> >         free(test->subtest_name);
> > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> >         ARG_LIST_TEST_NAMES = 'l',
> >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > +       ARG_NUM_WORKERS = 'j',
> >  };
> >
> >  static const struct argp_option opts[] = {
> > @@ -495,6 +506,8 @@ static const struct argp_option opts[] = {
> >           "Run tests with name matching the pattern (supports '*' wildcard)." },
> >         { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
> >           "Don't run tests with name matching the pattern (supports '*' wildcard)." },
> > +       { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
> > +         "Number of workers to run in parallel, default to number of cpus." },
> >         {},
> >  };
> >
> > @@ -661,6 +674,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
> >         case ARG_LIST_TEST_NAMES:
> >                 env->list_test_names = true;
> >                 break;
> > +       case ARG_NUM_WORKERS:
> > +               if (arg) {
> > +                       env->workers = atoi(arg);
> > +                       if (!env->workers) {
> > +                               fprintf(stderr, "Invalid number of worker: %s.", arg);
> > +                               return -1;
> > +                       }
> > +               } else {
> > +                       env->workers = get_nprocs();
> > +               }
> > +               break;
> >         case ARGP_KEY_ARG:
> >                 argp_usage(state);
> >                 break;
> > @@ -678,7 +702,7 @@ static void stdio_hijack(void)
> >         env.stdout = stdout;
> >         env.stderr = stderr;
> >
> > -       if (env.verbosity > VERBOSE_NONE) {
> > +       if (env.verbosity > VERBOSE_NONE && env.worker_index == -1) {
> >                 /* nothing to do, output to stdout by default */
> >                 return;
>
> there is a big #ifdef __GLIBC__ guard around this code. If its false
> and we can't really hijack stdout/stderr in parallel mode, we should
> just error out, otherwise it will be a mess with output.

Ack.

>
> >         }
> > @@ -704,10 +728,6 @@ static void stdio_restore(void)
> >                 return;
> >
> >         fclose(stdout);
> > -       free(env.log_buf);
> > -
> > -       env.log_buf = NULL;
> > -       env.log_cnt = 0;
> >
> >         stdout = env.stdout;
> >         stderr = env.stderr;
> > @@ -799,6 +819,366 @@ void crash_handler(int signum)
> >         backtrace_symbols_fd(bt, sz, STDERR_FILENO);
> >  }
> >
> > +int current_test_idx = 0;
> > +pthread_mutex_t current_test_lock;
>
> does this have to be global?

I've changed this to static.

>
> > +
> > +struct test_result {
> > +       int error_cnt;
> > +       int skip_cnt;
> > +       int sub_succ_cnt;
> > +
> > +       size_t log_cnt;
> > +       char *log_buf;
> > +};
> > +
> > +struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
>
> same, make static?

Ack.

>
> > +
> > +static inline const char *str_msg(const struct message *msg)
> > +{
> > +       static char buf[255];
>
> str_msg is called from send_message/recv_message, which are called
> from one of many dispatch threads. You can't use shared static buf,
> you'll inevitably corrupt each other messages

Nice catch, fixed!

>
> > +
> > +       switch (msg->type) {
> > +       case MSG_DO_TEST:
> > +               sprintf(buf, "MSG_DO_TEST %d", msg->u.message_do_test.num);
> > +               break;
> > +       case MSG_TEST_DONE:
> > +               sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
> > +                       msg->u.message_test_done.num,
> > +                       msg->u.message_test_done.have_log);
> > +               break;
> > +       case MSG_TEST_LOG:
> > +               sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
> > +                       strlen(msg->u.message_test_log.log_buf),
> > +                       msg->u.message_test_log.is_last);
> > +               break;
> > +       case MSG_EXIT:
> > +               sprintf(buf, "MSG_EXIT");
> > +               break;
> > +       default:
> > +               sprintf(buf, "UNKNOWN");
> > +               break;
> > +       }
> > +
> > +       return buf;
> > +}
> > +
> > +int send_message(int sock, const struct message *msg)
>
> here and everywhere else, unless function and variable has to be
> global, it has to be static

Done

>
> > +{
> > +       if (env.verbosity > VERBOSE_NONE)
> > +               fprintf(stderr, "Sending msg: %s\n", str_msg(msg));
> > +       return send(sock, msg, sizeof(*msg), 0);
> > +}
> > +
> > +int recv_message(int sock, struct message *msg)
> > +{
> > +       int ret;
> > +
> > +       memset(msg, 0, sizeof(*msg));
> > +       ret = recv(sock, msg, sizeof(*msg), 0);
> > +       if (ret >= 0) {
> > +               if (env.verbosity > VERBOSE_NONE)
> > +                       fprintf(stderr, "Received msg: %s\n", str_msg(msg));
> > +       }
> > +       return ret;
> > +}
> > +
> > +struct dispatch_data {
> > +       int idx;
>
> is idx a worker index? let's make it clear with worker_idx/worker_id/worker_num?

Changed to worker_id.

>
> > +       int sock;
>
> is this a socket file descriptor? sock_fd then?

Done.

>
> > +};
> > +
> > +void *dispatch_thread(void *_data)
>
> our code base doesn't really use _-prefixed arguments, why not just
> call it "arg" or "input" or "ctx"?
>
> > +{
> > +       struct dispatch_data *data;
> > +       int sock;
> > +       FILE *log_fd = NULL;
> > +
> > +       data = (struct dispatch_data *)_data;
>
> in C you can just:
>
> struct dispatch_data *data = input;
>
> right there at variable declaration
>
> > +       sock = data->sock;
> > +
> > +       while (true) {
> > +               int test_to_run = -1;
> > +               struct prog_test_def *test;
> > +               struct test_result *result;
> > +
> > +               /* grab a test */
> > +               {
> > +                       pthread_mutex_lock(&current_test_lock);
> > +
> > +                       if (current_test_idx >= prog_test_cnt) {
> > +                               pthread_mutex_unlock(&current_test_lock);
> > +                               goto done;
> > +                       }
> > +                       test_to_run = current_test_idx;
> > +                       current_test_idx++;
> > +
> > +                       pthread_mutex_unlock(&current_test_lock);
> > +               }
> > +
> > +               test = &prog_test_defs[test_to_run];
> > +               test->test_num = test_to_run + 1;
>
> let's initialize test->test_num outside of all this logic in main()
> (together with whatever other test_def initializations we need to
> perform at startup)
>
> > +
> > +               if (!should_run(&env.test_selector,
> > +                               test->test_num, test->test_name))
> > +                       continue;
>
> this probably also can be extracted outside of single-thread/parallel
> execution paths, it's basically a simple pre-initialization as well.
> Each test will be just marked as needing the run or not. WDYT?

For this, yes, I added a bool should_run to the test struct, and we
can avoid calling should_run() on every worker.

However, for subtests, we still have to call it in the worker, since
there is no way to iterate subtests without running the test.
>
> > +
> > +               /* run test through worker */
> > +               {
> > +                       struct message msg_do_test;
> > +
> > +                       msg_do_test.type = MSG_DO_TEST;
> > +                       msg_do_test.u.message_do_test.num = test_to_run;
> > +                       if (send_message(sock, &msg_do_test) < 0) {
> > +                               perror("Fail to send command");
> > +                               goto done;
> > +                       }
> > +                       env.worker_current_test[data->idx] = test_to_run;
> > +               }
> > +
> > +               /* wait for test done */
> > +               {
> > +                       struct message msg_test_done;
> > +
> > +                       if (recv_message(sock, &msg_test_done) < 0)
> > +                               goto error;
> > +                       if (msg_test_done.type != MSG_TEST_DONE)
> > +                               goto error;
> > +                       if (test_to_run != msg_test_done.u.message_test_done.num)
> > +                               goto error;
> > +
> > +                       result = &test_results[test_to_run];
> > +
> > +                       test->tested = true;
> > +
> > +                       result->error_cnt = msg_test_done.u.message_test_done.error_cnt;
> > +                       result->skip_cnt = msg_test_done.u.message_test_done.skip_cnt;
> > +                       result->sub_succ_cnt = msg_test_done.u.message_test_done.sub_succ_cnt;
> > +
> > +                       /* collect all logs */
> > +                       if (msg_test_done.u.message_test_done.have_log) {
> > +                               log_fd = open_memstream(&result->log_buf, &result->log_cnt);
> > +                               if (!log_fd)
> > +                                       goto error;
> > +
> > +                               while (true) {
> > +                                       struct message msg_log;
> > +
> > +                                       if (recv_message(sock, &msg_log) < 0)
> > +                                               goto error;
> > +                                       if (msg_log.type != MSG_TEST_LOG)
> > +                                               goto error;
> > +
> > +                                       fprintf(log_fd, "%s", msg_log.u.message_test_log.log_buf);
> > +                                       if (msg_log.u.message_test_log.is_last)
> > +                                               break;
> > +                               }
> > +                               fclose(log_fd);
> > +                               log_fd = NULL;
> > +                       }
> > +               }
> > +       } /* while (true) */
> > +error:
> > +       fprintf(stderr, "[%d]: Protocol/IO error: %s", data->idx, strerror(errno));
> > +
> > +       if (log_fd)
> > +               fclose(log_fd);
> > +done:
> > +       {
> > +               struct message msg_exit;
> > +
> > +               msg_exit.type = MSG_EXIT;
> > +               if (send_message(sock, &msg_exit) < 0)
> > +                       fprintf(stderr, "[%d]: send_message msg_exit: %s", data->idx, strerror(errno));
> > +       }
> > +       return NULL;
> > +}
> > +
> > +int server_main(void)
> > +{
> > +       pthread_t *dispatcher_threads;
> > +       struct dispatch_data *data;
> > +
> > +       dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
> > +       data = calloc(sizeof(struct dispatch_data), env.workers);
> > +
> > +       env.worker_current_test = calloc(sizeof(int), env.workers);
> > +       for (int i = 0; i < env.workers; i++) {
> > +               int rc;
> > +
> > +               data[i].idx = i;
> > +               data[i].sock = env.worker_socks[i];
> > +               rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
> > +               if (rc < 0) {
> > +                       perror("Failed to launch dispatcher thread");
> > +                       return -1;
> > +               }
> > +       }
> > +
> > +       /* wait for all dispatcher to finish */
> > +       for (int i = 0; i < env.workers; i++) {
> > +               while (true) {
> > +                       struct timespec timeout = {
> > +                               .tv_sec = time(NULL) + 5,
> > +                               .tv_nsec = 0
> > +                       };
> > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > +                               break;
> > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
>
> This is just going to spam output needlessly. Why dispatches have to
> exit within 5 seconds, if there are tests that are running way longer
> than that?... probably better to just do pthread_join()?

In later patches I actually changed it to list the status of all
threads every 10 seconds, a easy way to see which tests are blocking
progress or what the threads are actually doing, If 10s is too
aggressive, how about 30 seconds?

>
> > +               }
> > +       }
> > +       free(dispatcher_threads);
> > +       free(env.worker_current_test);
> > +       free(data);
> > +
> > +       /* generate summary */
> > +       fflush(stderr);
> > +       fflush(stdout);
> > +
> > +       for (int i = 0; i < prog_test_cnt; i++) {
> > +               struct prog_test_def *current_test;
> > +               struct test_result *result;
> > +
> > +               current_test = &prog_test_defs[i];
> > +               result = &test_results[i];
> > +
> > +               if (!current_test->tested)
> > +                       continue;
> > +
> > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > +               env.skip_cnt += result->skip_cnt;
> > +               env.fail_cnt += result->error_cnt;
> > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > +
> > +               if (result->log_cnt) {
> > +                       result->log_buf[result->log_cnt] = '\0';
> > +                       fprintf(stdout, "%s", result->log_buf);
> > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > +                               fprintf(stdout, "\n");
> > +               }
> > +               fprintf(stdout, "#%d %s:%s\n",
> > +                       current_test->test_num, current_test->test_name,
> > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
>
> it would be nice to see this as results come in (when handling
> msg_test_done). It will come out of order, but it will sort of be a
> progress indicator that stuff is happening. Why the totals summary
> will be then output at the very end at the bottom.

My first implementation is exactly that, but I don't quite like the
out of order logs, it makes reading them  / comparing them much
harder.

That's the reason I added test_results and print threads status as a
progress indicator.

>
> > +       }
> > +
> > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > +
> > +       /* reap all workers */
> > +       for (int i = 0; i < env.workers; i++) {
> > +               int wstatus, pid;
> > +
> > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
>
> should we send SIGKILL as well to ensure that some stuck or buggy
> worker gets reaped for sure?

Right now if a worker is buggy it is possible for the dispatcher
thread to get stuck (waiting for a message never arrives), then the
main thread will never finish pthread_join() loop, so we can't make it
here to kill the workers. The main process would need some kind of
signal before starting to kill workers, we could use a fixed timeout ,
but it would be hard to set the timeout value.

>
> > +               assert(pid == env.worker_pids[i]);
>
> let's not use assert(), better print error message

Ack.

>
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int worker_main(int sock)
> > +{
> > +       save_netns();
> > +
> > +       while (true) {
> > +               /* receive command */
> > +               struct message msg;
> > +
> > +               assert(recv_message(sock, &msg) >= 0);
>
> see above about assert()s
>
> > +
> > +               switch (msg.type) {
> > +               case MSG_EXIT:
> > +                       fprintf(stderr, "[%d]: worker exit.\n",  env.worker_index);
> > +                       goto out;
> > +               case MSG_DO_TEST: {
> > +                       int test_to_run;
> > +                       struct prog_test_def *test;
> > +                       struct message msg_done;
> > +
> > +                       test_to_run = msg.u.message_do_test.num;
> > +
> > +                       fprintf(stderr, "[%d]: Running test %d.\n",
> > +                               env.worker_index, test_to_run + 1);
> > +
> > +                       test = &prog_test_defs[test_to_run];
> > +
> > +                       env.test = test;
> > +                       test->test_num = test_to_run + 1;
> > +
> > +                       stdio_hijack();
> > +
> > +                       test->run_test();
> > +
> > +                       /* ensure last sub-test is finalized properly */
> > +                       if (test->subtest_name)
> > +                               test__end_subtest();
> > +
> > +                       stdio_restore();
> > +
> > +                       test->tested = true;
> > +
> > +                       skip_account();
>
> my biggest concern right now is with all this setup/teardown code
> being duplicated between sequential mode and parallel mode. Let's
> ensure that as much as possible is shared and re-used through either
> helper functions or by doing whatever initialization that can be done
> outside of test running loop itself (as suggested for test->test_num
> and test filtering logic).
>
> one way to attach some of that duplication is by having the logic for
> MSG_DO_TEST handling extracted into a separate function that would be
> called both from forked worker process *and* from sequential mode main
> process all the same. The send_message() logic would stay in
> parallelized worker code, most probably.
>

Ack.

> > +
> > +                       reset_affinity();
> > +                       restore_netns();
> > +                       if (test->need_cgroup_cleanup)
> > +                               cleanup_cgroup_environment();
> > +
> > +                       memset(&msg_done, 0, sizeof(msg_done));
> > +                       msg_done.type = MSG_TEST_DONE;
> > +                       msg_done.u.message_test_done.num = test_to_run;
> > +                       msg_done.u.message_test_done.error_cnt = test->error_cnt;
> > +                       msg_done.u.message_test_done.skip_cnt = test->skip_cnt;
> > +                       msg_done.u.message_test_done.sub_succ_cnt = test->sub_succ_cnt;
> > +                       msg_done.u.message_test_done.have_log = false;
> > +
> > +                       if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
> > +                               if (env.log_cnt)
> > +                                       msg_done.u.message_test_done.have_log = true;
> > +                       }
> > +                       assert(send_message(sock, &msg_done) >= 0);
> > +
> > +                       /* send logs */
> > +                       if (msg_done.u.message_test_done.have_log) {
> > +                               char *src;
> > +                               size_t slen;
> > +
> > +                               src = env.log_buf;
> > +                               slen = env.log_cnt;
> > +                               while (slen) {
> > +                                       struct message msg_log;
> > +                                       char *dest;
> > +                                       size_t len;
> > +
> > +                                       memset(&msg_log, 0, sizeof(msg_log));
> > +                                       msg_log.type = MSG_TEST_LOG;
> > +                                       dest = msg_log.u.message_test_log.log_buf;
> > +                                       len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
> > +                                       memcpy(dest, src, len);
> > +
> > +                                       src += len;
> > +                                       slen -= len;
> > +                                       if (!slen)
> > +                                               msg_log.u.message_test_log.is_last = true;
> > +
> > +                                       assert(send_message(sock, &msg_log) >= 0);
> > +                               }
> > +                       }
> > +                       if (env.log_buf) {
> > +                               free(env.log_buf);
> > +                               env.log_buf = NULL;
> > +                               env.log_cnt = 0;
> > +                       }
> > +                       break;
> > +               } /* case MSG_DO_TEST */
> > +               default:
> > +                       fprintf(stderr, "[%d]: unknown message.\n",  env.worker_index);
> > +                       return -1;
> > +               }
> > +       }
> > +out:
> > +       restore_netns();
> > +       return 0;
> > +}
> > +
> >  int main(int argc, char **argv)
> >  {
> >         static const struct argp argp = {
> > @@ -837,13 +1217,57 @@ int main(int argc, char **argv)
> >                 return -1;
> >         }
> >
> > -       save_netns();
> > -       stdio_hijack();
> >         env.has_testmod = true;
> >         if (!env.list_test_names && load_bpf_testmod()) {
> >                 fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
> >                 env.has_testmod = false;
> >         }
> > +
> > +       /* ignore workers if we are just listing */
> > +       if (env.get_test_cnt || env.list_test_names)
> > +               env.workers = 0;
> > +
> > +       /* launch workers if requested */
> > +       env.worker_index = -1; /* main process */
> > +       if (env.workers) {
> > +               env.worker_pids = calloc(sizeof(__pid_t), env.workers);
> > +               env.worker_socks = calloc(sizeof(int), env.workers);
> > +               fprintf(stdout, "Launching %d workers.\n", env.workers);
> > +               for (int i = 0; i < env.workers; i++) {
> > +                       int sv[2];
> > +                       __pid_t pid;
>
> nit: how's __pid_t different from pid_t?
>
> > +
> > +                       if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
> > +                               perror("Fail to create worker socket");
> > +                               return -1;
> > +                       }
> > +                       pid = fork();
> > +                       if (pid < 0) {
> > +                               perror("Failed to fork worker");
> > +                               return -1;
> > +                       } else if (pid != 0) { /* main process */
> > +                               close(sv[1]);
> > +                               env.worker_pids[i] = pid;
> > +                               env.worker_socks[i] = sv[0];
> > +                       } else { /* inside each worker process */
> > +                               close(sv[0]);
> > +                               env.worker_index = i;
> > +                               return worker_main(sv[1]);
> > +                       }
> > +               }
> > +
> > +               if (env.worker_index == -1) {
>
> it can't be anything else here, so you can just run server_main() directly
>
> > +                       server_main();
> > +                       goto out;
> > +               }
> > +       }
> > +
> > +       /* The rest of the main process */
> > +
> > +       /* on single mode */
> > +       save_netns();
> > +       stdio_hijack();
> > +
> >         for (i = 0; i < prog_test_cnt; i++) {
> >                 struct prog_test_def *test = &prog_test_defs[i];
> >
> > @@ -866,6 +1290,7 @@ int main(int argc, char **argv)
> >                 }
> >
> >                 test->run_test();
> > +
> >                 /* ensure last sub-test is finalized properly */
> >                 if (test->subtest_name)
> >                         test__end_subtest();
> > @@ -882,16 +1307,21 @@ int main(int argc, char **argv)
> >                         env.fail_cnt++;
> >                 else
> >                         env.succ_cnt++;
> > +
> >                 skip_account();
> > +               env.sub_succ_cnt += test->sub_succ_cnt;
> >
> >                 reset_affinity();
> >                 restore_netns();
> >                 if (test->need_cgroup_cleanup)
> >                         cleanup_cgroup_environment();
>
> all the logic in this loop (and maybe more of the surrounding code as
> well) is conceptually either 1) running the test or 2)
> accounting/reporting test results. For sequential mode it can be
> intermixed, but for parallel mode there is a strict distinction:
> running happens inside the worker, while accounting happens in the
> dispatcher process. For sequential we can model the interaction the
> same way and reuse as much code as possible.
>
> >         }
> > -       if (!env.list_test_names && env.has_testmod)
> > -               unload_bpf_testmod();
> >         stdio_restore();
> > +       if (env.log_buf) {
> > +               free(env.log_buf);
> > +               env.log_buf = NULL;
> > +               env.log_cnt = 0;
> > +       }
> >
> >         if (env.get_test_cnt) {
> >                 printf("%d\n", env.succ_cnt);
> > @@ -904,14 +1334,16 @@ int main(int argc, char **argv)
> >         fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> >                 env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> >
> > +       close(env.saved_netns_fd);
> >  out:
> > +       if (!env.list_test_names && env.has_testmod)
> > +               unload_bpf_testmod();
> >         free_str_set(&env.test_selector.blacklist);
> >         free_str_set(&env.test_selector.whitelist);
> >         free(env.test_selector.num_set);
> >         free_str_set(&env.subtest_selector.blacklist);
> >         free_str_set(&env.subtest_selector.whitelist);
> >         free(env.subtest_selector.num_set);
> > -       close(env.saved_netns_fd);
> >
> >         if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
> >                 return EXIT_NO_TEST;
> > diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
> > index 94bef0aa74cf..aadb44543225 100644
> > --- a/tools/testing/selftests/bpf/test_progs.h
> > +++ b/tools/testing/selftests/bpf/test_progs.h
> > @@ -69,7 +69,8 @@ struct test_env {
> >         bool get_test_cnt;
> >         bool list_test_names;
> >
> > -       struct prog_test_def *test;
> > +       struct prog_test_def *test; /* current running tests */
> > +
> >         FILE *stdout;
> >         FILE *stderr;
> >         char *log_buf;
> > @@ -82,6 +83,39 @@ struct test_env {
> >         int skip_cnt; /* skipped tests */
> >
> >         int saved_netns_fd;
> > +       int workers; /* number of worker process */
> > +       int worker_index; /* index number of current worker, main process is -1 */
> > +       __pid_t *worker_pids; /* array of worker pids */
> > +       int *worker_socks; /* array of worker socks */
> > +       int *worker_current_test; /* array of current running test for each worker */
> > +};
> > +
> > +#define MAX_LOG_TRUNK_SIZE 8192
> > +enum message_type {
> > +       MSG_DO_TEST = 0,
> > +       MSG_TEST_DONE = 1,
> > +       MSG_TEST_LOG = 2,
> > +       MSG_EXIT = 255,
> > +};
>
> nit: empty line between type definitions
>
> ok, here comes a bunch of naming nitpicking, sorry about that :) but
> the current struct definition is a bit mouthful so it's a bit hard to
> read, and some fields are actually too succinct at the same time, so
> that it's hard to understand in some places what the field represents.
>
> > +struct message {
> > +       enum message_type type;
>
> total nitpick, but given MSG_ enums above, I'd stick to "msg"
> everywhere and have struct msg and enum msg_type
>
> > +       union {
> > +               struct {
> > +                       int num;
>
> this is the test number, right? test_num would make it more clear.
> There were a few times when I wondered if it's worker number or a test
> number.
>
> > +               } message_do_test;
>
> this is a bit tautological. In the code above you have accesses like:
>
> msg->u.message_do_test.num
>
> that message_ prefix doesn't bring any value, just makes everything
> unnecessarily verbose. With the below suggestion to drop u name for
> union and by dropping message_ prefix everywhere, we'll be down to a
> clean ans simple
>
> msg->do_test.test_num
>
> (and I'd s/do_test/run_test/ because it reads better for me, but I
> won't complain either way)
>

I think anonymous union/struct is in C11 , or gnu c99 extension, can we use it?

>
> > +               struct {
> > +                       int num;
>
> same about test_num
> > +
> > +                       int sub_succ_cnt;
> > +                       int error_cnt;
> > +                       int skip_cnt;
> > +                       bool have_log;
> > +               } message_test_done;
> > +               struct {
> > +                       char log_buf[MAX_LOG_TRUNK_SIZE + 1];
> > +                       bool is_last;
> > +               } message_test_log;
> > +       } u;
>
> there is no need to name this union, it can be anonymous allowing to
> skip .u. part in field accesses
>
> >  };
> >
> >  extern struct test_env env;
> > --
> > 2.30.2
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-15 13:29   ` sunyucong
@ 2021-09-15 16:00     ` Andrii Nakryiko
  2021-09-15 16:56       ` sunyucong
  0 siblings, 1 reply; 12+ messages in thread
From: Andrii Nakryiko @ 2021-09-15 16:00 UTC (permalink / raw)
  To: sunyucong; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Wed, Sep 15, 2021 at 6:29 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
>
> On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> > >
> > > From: Yucong Sun <sunyucong@gmail.com>
> > >
> > > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > > "-j" mode is optional, and works with all existing test selection mechanism, as
> > > well as "-v", "-l" etc.
> > >
> > > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > > commanding it to run tests and collect logs. After all tests are finished, a
> > > summary is printed. main process use multiple competing threads to dispatch
> > > work to worker, trying to keep them all busy.
> > >
> >
> > Overall this looks great and I'm super excited that we'll soon be able
> > to run tests mostly in parallel. I've done the first rough pass over
> > the code, but haven't played with running this locally yet.
> >
> > I didn't trim the message to keep all the context in one place, so
> > please scroll through all of the below till the end.
> >
> > > Example output:
> > >
> > >   > ./test_progs -n 15-20 -j
> > >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> > >   Launching 2 workers.
> > >   [0]: Running test 15.
> > >   [1]: Running test 16.
> > >   [1]: Running test 17.
> > >   [1]: Running test 18.
> > >   [1]: Running test 19.
> > >   [1]: Running test 20.
> > >   [1]: worker exit.
> > >   [0]: worker exit.
> >
> > I think these messages shouldn't be emitted in normal mode, and not
> > even with -v. Maybe -vv. They will be useful initially to debug bugs
> > in concurrent test runner, probably, so I'd hide them behind a very
> > verbose setting.
> >
> > >   #15 btf_dump:OK
> > >   #16 btf_endian:OK
> > >   #17 btf_map_in_map:OK
> > >   #18 btf_module:OK
> > >   #19 btf_skc_cls_ingress:OK
> > >   #20 btf_split:OK
> > >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> > >
> > > Know issue:
> > >
> > > Some tests fail when running concurrently, later patch will either
> > > fix the test or pin them to worker 0.
> >
> > Hm.. patch #3 does that, no?
> >
> > >
> > > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > >
> > > V4 -> V3: address style warnings.
> > > V3 -> V2: fix missing outputs in commit messages.
> > > V2 -> V1: switch to UDS client/server model.
> >
> > patch sets with more than one patch should come with a cover letter.
> > Keeping more-or-less detailed history in a cover letter is the best.
> > For next revision please add the cover letter and move the history
> > there. Check some other patch sets for an example.
>
> Ack.
>
> >
> > > ---
> > >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> > >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> > >  2 files changed, 480 insertions(+), 14 deletions(-)
> > >

[...]

> > > @@ -172,14 +182,14 @@ void test__end_subtest()
> > >
> > >         dump_test_log(test, sub_error_cnt);
> > >
> > > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> > >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> > >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
> >
> > shouldn't this be emitted by the server (main) process, not the
> > worker? Worker itself just returns numbers through UDS.
>
> test__end_subtest() is executed by the worker,  and this output is
> part of the log that is sent back if there is an error executing
> subtests or in verbose mode.
>
> In theory we can print this on the server side, but that would require
> quite a bit hussle of sending back each subtest's execution stat
> individually, and I don't think it is worth the trouble.

Fair enough. Sub-tests are much more dynamic feature which is harder
to deal with. I have some ideas if we decide to parallelize sub-tests
as well, but ok, for now let's leave it as is.

>
> >
> > >
> > >         if (sub_error_cnt)
> > > -               env.fail_cnt++;
> > > +               test->error_cnt++;
> > >         else if (test->skip_cnt == 0)
> > > -               env.sub_succ_cnt++;
> > > +               test->sub_succ_cnt++;
> >
> > this also doesn't feel like a worker's responsibility and logic. It
> > feels like there is a bit of a missing separation between "executing
> > test" and "accounting and reporting test result". I'll get to that
> > below when talking about duplication of code and logic between
> > parallel and non-parallel modes of operation.
>
> This is also accounting the execution stats for the subtests of
> current tests, right now only main tests's cumlated stats are sent
> back, not individual subtests's stats.

Right, subtests again, ok.

>
> >
> > >         skip_account();
> > >
> > >         free(test->subtest_name);
> > > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> > >         ARG_LIST_TEST_NAMES = 'l',
> > >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> > >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > > +       ARG_NUM_WORKERS = 'j',
> > >  };
> > >

[...]

> > > +                       test_to_run = current_test_idx;
> > > +                       current_test_idx++;
> > > +
> > > +                       pthread_mutex_unlock(&current_test_lock);
> > > +               }
> > > +
> > > +               test = &prog_test_defs[test_to_run];
> > > +               test->test_num = test_to_run + 1;
> >
> > let's initialize test->test_num outside of all this logic in main()
> > (together with whatever other test_def initializations we need to
> > perform at startup)
> >
> > > +
> > > +               if (!should_run(&env.test_selector,
> > > +                               test->test_num, test->test_name))
> > > +                       continue;
> >
> > this probably also can be extracted outside of single-thread/parallel
> > execution paths, it's basically a simple pre-initialization as well.
> > Each test will be just marked as needing the run or not. WDYT?
>
> For this, yes, I added a bool should_run to the test struct, and we
> can avoid calling should_run() on every worker.
>
> However, for subtests, we still have to call it in the worker, since
> there is no way to iterate subtests without running the test.

right

> >
> > > +
> > > +               /* run test through worker */
> > > +               {
> > > +                       struct message msg_do_test;
> > > +

[...]

> > > +       /* wait for all dispatcher to finish */
> > > +       for (int i = 0; i < env.workers; i++) {
> > > +               while (true) {
> > > +                       struct timespec timeout = {
> > > +                               .tv_sec = time(NULL) + 5,
> > > +                               .tv_nsec = 0
> > > +                       };
> > > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > > +                               break;
> > > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
> >
> > This is just going to spam output needlessly. Why dispatches have to
> > exit within 5 seconds, if there are tests that are running way longer
> > than that?... probably better to just do pthread_join()?
>
> In later patches I actually changed it to list the status of all
> threads every 10 seconds, a easy way to see which tests are blocking
> progress or what the threads are actually doing, If 10s is too
> aggressive, how about 30 seconds?

Ok, but let's hide it by default (i.e., only show in -vv mode)?

>
> >
> > > +               }
> > > +       }
> > > +       free(dispatcher_threads);
> > > +       free(env.worker_current_test);
> > > +       free(data);
> > > +
> > > +       /* generate summary */
> > > +       fflush(stderr);
> > > +       fflush(stdout);
> > > +
> > > +       for (int i = 0; i < prog_test_cnt; i++) {
> > > +               struct prog_test_def *current_test;
> > > +               struct test_result *result;
> > > +
> > > +               current_test = &prog_test_defs[i];
> > > +               result = &test_results[i];
> > > +
> > > +               if (!current_test->tested)
> > > +                       continue;
> > > +
> > > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > > +               env.skip_cnt += result->skip_cnt;
> > > +               env.fail_cnt += result->error_cnt;
> > > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > > +
> > > +               if (result->log_cnt) {
> > > +                       result->log_buf[result->log_cnt] = '\0';
> > > +                       fprintf(stdout, "%s", result->log_buf);
> > > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > > +                               fprintf(stdout, "\n");
> > > +               }
> > > +               fprintf(stdout, "#%d %s:%s\n",
> > > +                       current_test->test_num, current_test->test_name,
> > > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
> >
> > it would be nice to see this as results come in (when handling
> > msg_test_done). It will come out of order, but it will sort of be a
> > progress indicator that stuff is happening. Why the totals summary
> > will be then output at the very end at the bottom.
>
> My first implementation is exactly that, but I don't quite like the
> out of order logs, it makes reading them  / comparing them much
> harder.

by out of order you mean that tests will finish in different order? Or
that logs from multiple tests will be intermingled? For the former I
think it's fine given this is parallel mode. For the latter, that
shouldn't happen, because log output should happen under common lock,
so that log of one test is finished before log of another test is
emitted.

As for comparing two log outputs, that's not something I ever really
do. Usually you want to see what failed and why, not how it differs
from the previous run. With the dumping of only errored tests at the
end (that we previously talked about), we can sort those and emit
their logs in order, because by that time all tests are done already.
But for now I'd just emit SUCCESS/FAIL as test results come in.

>
> That's the reason I added test_results and print threads status as a
> progress indicator.
>
> >
> > > +       }
> > > +
> > > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > > +
> > > +       /* reap all workers */
> > > +       for (int i = 0; i < env.workers; i++) {
> > > +               int wstatus, pid;
> > > +
> > > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
> >
> > should we send SIGKILL as well to ensure that some stuck or buggy
> > worker gets reaped for sure?
>
> Right now if a worker is buggy it is possible for the dispatcher
> thread to get stuck (waiting for a message never arrives), then the
> main thread will never finish pthread_join() loop, so we can't make it
> here to kill the workers. The main process would need some kind of
> signal before starting to kill workers, we could use a fixed timeout ,
> but it would be hard to set the timeout value.


Hm.. If you kill a child process that didn't want to exit by itself,
its socket will be closed by the kernel. Which will result in the
dispatcher's read/write operation to fail. It's actually good to test
this, we don't want to debug this in Github Actions when some test
actually crashes a worker process (which will happen sooner or later).

>
> >
> > > +               assert(pid == env.worker_pids[i]);
> >
> > let's not use assert(), better print error message
>
> Ack.
>
> >

[...]

> > > +
> > > +#define MAX_LOG_TRUNK_SIZE 8192
> > > +enum message_type {
> > > +       MSG_DO_TEST = 0,
> > > +       MSG_TEST_DONE = 1,
> > > +       MSG_TEST_LOG = 2,
> > > +       MSG_EXIT = 255,
> > > +};
> >
> > nit: empty line between type definitions
> >
> > ok, here comes a bunch of naming nitpicking, sorry about that :) but
> > the current struct definition is a bit mouthful so it's a bit hard to
> > read, and some fields are actually too succinct at the same time, so
> > that it's hard to understand in some places what the field represents.
> >
> > > +struct message {
> > > +       enum message_type type;
> >
> > total nitpick, but given MSG_ enums above, I'd stick to "msg"
> > everywhere and have struct msg and enum msg_type
> >
> > > +       union {
> > > +               struct {
> > > +                       int num;
> >
> > this is the test number, right? test_num would make it more clear.
> > There were a few times when I wondered if it's worker number or a test
> > number.
> >
> > > +               } message_do_test;
> >
> > this is a bit tautological. In the code above you have accesses like:
> >
> > msg->u.message_do_test.num
> >
> > that message_ prefix doesn't bring any value, just makes everything
> > unnecessarily verbose. With the below suggestion to drop u name for
> > union and by dropping message_ prefix everywhere, we'll be down to a
> > clean ans simple
> >
> > msg->do_test.test_num
> >
> > (and I'd s/do_test/run_test/ because it reads better for me, but I
> > won't complain either way)
> >
>
> I think anonymous union/struct is in C11 , or gnu c99 extension, can we use it?

we use anonymous unions even in BPF UAPI, so yeah, we definitely can use it

>
> >
> > > +               struct {
> > > +                       int num;
> >
> > same about test_num
> > > +
> > > +                       int sub_succ_cnt;
> > > +                       int error_cnt;
> > > +                       int skip_cnt;
> > > +                       bool have_log;
> > > +               } message_test_done;
> > > +               struct {
> > > +                       char log_buf[MAX_LOG_TRUNK_SIZE + 1];
> > > +                       bool is_last;
> > > +               } message_test_log;
> > > +       } u;
> >
> > there is no need to name this union, it can be anonymous allowing to
> > skip .u. part in field accesses
> >
> > >  };
> > >
> > >  extern struct test_env env;
> > > --
> > > 2.30.2
> > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-15 16:00     ` Andrii Nakryiko
@ 2021-09-15 16:56       ` sunyucong
  2021-09-15 17:36         ` Andrii Nakryiko
  0 siblings, 1 reply; 12+ messages in thread
From: sunyucong @ 2021-09-15 16:56 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Wed, Sep 15, 2021 at 12:01 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 15, 2021 at 6:29 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> >
> > On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> > > >
> > > > From: Yucong Sun <sunyucong@gmail.com>
> > > >
> > > > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > > > "-j" mode is optional, and works with all existing test selection mechanism, as
> > > > well as "-v", "-l" etc.
> > > >
> > > > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > > > commanding it to run tests and collect logs. After all tests are finished, a
> > > > summary is printed. main process use multiple competing threads to dispatch
> > > > work to worker, trying to keep them all busy.
> > > >
> > >
> > > Overall this looks great and I'm super excited that we'll soon be able
> > > to run tests mostly in parallel. I've done the first rough pass over
> > > the code, but haven't played with running this locally yet.
> > >
> > > I didn't trim the message to keep all the context in one place, so
> > > please scroll through all of the below till the end.
> > >
> > > > Example output:
> > > >
> > > >   > ./test_progs -n 15-20 -j
> > > >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> > > >   Launching 2 workers.
> > > >   [0]: Running test 15.
> > > >   [1]: Running test 16.
> > > >   [1]: Running test 17.
> > > >   [1]: Running test 18.
> > > >   [1]: Running test 19.
> > > >   [1]: Running test 20.
> > > >   [1]: worker exit.
> > > >   [0]: worker exit.
> > >
> > > I think these messages shouldn't be emitted in normal mode, and not
> > > even with -v. Maybe -vv. They will be useful initially to debug bugs
> > > in concurrent test runner, probably, so I'd hide them behind a very
> > > verbose setting.

I created a new "debug" mode, which will print these messages. putting
them under verbose mode will also make all other test log output more
stuff, which make it harder to debug.

> > >
> > > >   #15 btf_dump:OK
> > > >   #16 btf_endian:OK
> > > >   #17 btf_map_in_map:OK
> > > >   #18 btf_module:OK
> > > >   #19 btf_skc_cls_ingress:OK
> > > >   #20 btf_split:OK
> > > >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> > > >
> > > > Know issue:
> > > >
> > > > Some tests fail when running concurrently, later patch will either
> > > > fix the test or pin them to worker 0.
> > >
> > > Hm.. patch #3 does that, no?
> > >
> > > >
> > > > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > > >
> > > > V4 -> V3: address style warnings.
> > > > V3 -> V2: fix missing outputs in commit messages.
> > > > V2 -> V1: switch to UDS client/server model.
> > >
> > > patch sets with more than one patch should come with a cover letter.
> > > Keeping more-or-less detailed history in a cover letter is the best.
> > > For next revision please add the cover letter and move the history
> > > there. Check some other patch sets for an example.
> >
> > Ack.
> >
> > >
> > > > ---
> > > >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> > > >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> > > >  2 files changed, 480 insertions(+), 14 deletions(-)
> > > >
>
> [...]
>
> > > > @@ -172,14 +182,14 @@ void test__end_subtest()
> > > >
> > > >         dump_test_log(test, sub_error_cnt);
> > > >
> > > > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > > > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> > > >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> > > >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
> > >
> > > shouldn't this be emitted by the server (main) process, not the
> > > worker? Worker itself just returns numbers through UDS.
> >
> > test__end_subtest() is executed by the worker,  and this output is
> > part of the log that is sent back if there is an error executing
> > subtests or in verbose mode.
> >
> > In theory we can print this on the server side, but that would require
> > quite a bit hussle of sending back each subtest's execution stat
> > individually, and I don't think it is worth the trouble.
>
> Fair enough. Sub-tests are much more dynamic feature which is harder
> to deal with. I have some ideas if we decide to parallelize sub-tests
> as well, but ok, for now let's leave it as is.

I think it is much easier just to make sub-tests into top level tests,
from what I've seen the actual setup/tear down code is not that much.

>
> >
> > >
> > > >
> > > >         if (sub_error_cnt)
> > > > -               env.fail_cnt++;
> > > > +               test->error_cnt++;
> > > >         else if (test->skip_cnt == 0)
> > > > -               env.sub_succ_cnt++;
> > > > +               test->sub_succ_cnt++;
> > >
> > > this also doesn't feel like a worker's responsibility and logic. It
> > > feels like there is a bit of a missing separation between "executing
> > > test" and "accounting and reporting test result". I'll get to that
> > > below when talking about duplication of code and logic between
> > > parallel and non-parallel modes of operation.
> >
> > This is also accounting the execution stats for the subtests of
> > current tests, right now only main tests's cumlated stats are sent
> > back, not individual subtests's stats.
>
> Right, subtests again, ok.
>
> >
> > >
> > > >         skip_account();
> > > >
> > > >         free(test->subtest_name);
> > > > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> > > >         ARG_LIST_TEST_NAMES = 'l',
> > > >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> > > >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > > > +       ARG_NUM_WORKERS = 'j',
> > > >  };
> > > >
>
> [...]
>
> > > > +                       test_to_run = current_test_idx;
> > > > +                       current_test_idx++;
> > > > +
> > > > +                       pthread_mutex_unlock(&current_test_lock);
> > > > +               }
> > > > +
> > > > +               test = &prog_test_defs[test_to_run];
> > > > +               test->test_num = test_to_run + 1;
> > >
> > > let's initialize test->test_num outside of all this logic in main()
> > > (together with whatever other test_def initializations we need to
> > > perform at startup)
> > >
> > > > +
> > > > +               if (!should_run(&env.test_selector,
> > > > +                               test->test_num, test->test_name))
> > > > +                       continue;
> > >
> > > this probably also can be extracted outside of single-thread/parallel
> > > execution paths, it's basically a simple pre-initialization as well.
> > > Each test will be just marked as needing the run or not. WDYT?
> >
> > For this, yes, I added a bool should_run to the test struct, and we
> > can avoid calling should_run() on every worker.
> >
> > However, for subtests, we still have to call it in the worker, since
> > there is no way to iterate subtests without running the test.
>
> right
>
> > >
> > > > +
> > > > +               /* run test through worker */
> > > > +               {
> > > > +                       struct message msg_do_test;
> > > > +
>
> [...]
>
> > > > +       /* wait for all dispatcher to finish */
> > > > +       for (int i = 0; i < env.workers; i++) {
> > > > +               while (true) {
> > > > +                       struct timespec timeout = {
> > > > +                               .tv_sec = time(NULL) + 5,
> > > > +                               .tv_nsec = 0
> > > > +                       };
> > > > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > > > +                               break;
> > > > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
> > >
> > > This is just going to spam output needlessly. Why dispatches have to
> > > exit within 5 seconds, if there are tests that are running way longer
> > > than that?... probably better to just do pthread_join()?
> >
> > In later patches I actually changed it to list the status of all
> > threads every 10 seconds, a easy way to see which tests are blocking
> > progress or what the threads are actually doing, If 10s is too
> > aggressive, how about 30 seconds?
>
> Ok, but let's hide it by default (i.e., only show in -vv mode)?
>
> >
> > >
> > > > +               }
> > > > +       }
> > > > +       free(dispatcher_threads);
> > > > +       free(env.worker_current_test);
> > > > +       free(data);
> > > > +
> > > > +       /* generate summary */
> > > > +       fflush(stderr);
> > > > +       fflush(stdout);
> > > > +
> > > > +       for (int i = 0; i < prog_test_cnt; i++) {
> > > > +               struct prog_test_def *current_test;
> > > > +               struct test_result *result;
> > > > +
> > > > +               current_test = &prog_test_defs[i];
> > > > +               result = &test_results[i];
> > > > +
> > > > +               if (!current_test->tested)
> > > > +                       continue;
> > > > +
> > > > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > > > +               env.skip_cnt += result->skip_cnt;
> > > > +               env.fail_cnt += result->error_cnt;
> > > > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > > > +
> > > > +               if (result->log_cnt) {
> > > > +                       result->log_buf[result->log_cnt] = '\0';
> > > > +                       fprintf(stdout, "%s", result->log_buf);
> > > > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > > > +                               fprintf(stdout, "\n");
> > > > +               }
> > > > +               fprintf(stdout, "#%d %s:%s\n",
> > > > +                       current_test->test_num, current_test->test_name,
> > > > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
> > >
> > > it would be nice to see this as results come in (when handling
> > > msg_test_done). It will come out of order, but it will sort of be a
> > > progress indicator that stuff is happening. Why the totals summary
> > > will be then output at the very end at the bottom.
> >
> > My first implementation is exactly that, but I don't quite like the
> > out of order logs, it makes reading them  / comparing them much
> > harder.
>
> by out of order you mean that tests will finish in different order? Or
> that logs from multiple tests will be intermingled? For the former I
> think it's fine given this is parallel mode. For the latter, that
> shouldn't happen, because log output should happen under common lock,
> so that log of one test is finished before log of another test is
> emitted.
>
> As for comparing two log outputs, that's not something I ever really
> do. Usually you want to see what failed and why, not how it differs
> from the previous run. With the dumping of only errored tests at the
> end (that we previously talked about), we can sort those and emit
> their logs in order, because by that time all tests are done already.
> But for now I'd just emit SUCCESS/FAIL as test results come in.
>

OK, as long as we are only outputting one line here it should be fine.

> >
> > That's the reason I added test_results and print threads status as a
> > progress indicator.
> >
> > >
> > > > +       }
> > > > +
> > > > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > > > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > > > +
> > > > +       /* reap all workers */
> > > > +       for (int i = 0; i < env.workers; i++) {
> > > > +               int wstatus, pid;
> > > > +
> > > > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
> > >
> > > should we send SIGKILL as well to ensure that some stuck or buggy
> > > worker gets reaped for sure?
> >
> > Right now if a worker is buggy it is possible for the dispatcher
> > thread to get stuck (waiting for a message never arrives), then the
> > main thread will never finish pthread_join() loop, so we can't make it
> > here to kill the workers. The main process would need some kind of
> > signal before starting to kill workers, we could use a fixed timeout ,
> > but it would be hard to set the timeout value.
>
>
> Hm.. If you kill a child process that didn't want to exit by itself,
> its socket will be closed by the kernel. Which will result in the
> dispatcher's read/write operation to fail. It's actually good to test
> this, we don't want to debug this in Github Actions when some test
> actually crashes a worker process (which will happen sooner or later).
>
> >
> > >
> > > > +               assert(pid == env.worker_pids[i]);
> > >
> > > let's not use assert(), better print error message
> >
> > Ack.
> >
> > >
>
> [...]
>
> > > > +
> > > > +#define MAX_LOG_TRUNK_SIZE 8192
> > > > +enum message_type {
> > > > +       MSG_DO_TEST = 0,
> > > > +       MSG_TEST_DONE = 1,
> > > > +       MSG_TEST_LOG = 2,
> > > > +       MSG_EXIT = 255,
> > > > +};
> > >
> > > nit: empty line between type definitions
> > >
> > > ok, here comes a bunch of naming nitpicking, sorry about that :) but
> > > the current struct definition is a bit mouthful so it's a bit hard to
> > > read, and some fields are actually too succinct at the same time, so
> > > that it's hard to understand in some places what the field represents.
> > >
> > > > +struct message {
> > > > +       enum message_type type;
> > >
> > > total nitpick, but given MSG_ enums above, I'd stick to "msg"
> > > everywhere and have struct msg and enum msg_type
> > >
> > > > +       union {
> > > > +               struct {
> > > > +                       int num;
> > >
> > > this is the test number, right? test_num would make it more clear.
> > > There were a few times when I wondered if it's worker number or a test
> > > number.
> > >
> > > > +               } message_do_test;
> > >
> > > this is a bit tautological. In the code above you have accesses like:
> > >
> > > msg->u.message_do_test.num
> > >
> > > that message_ prefix doesn't bring any value, just makes everything
> > > unnecessarily verbose. With the below suggestion to drop u name for
> > > union and by dropping message_ prefix everywhere, we'll be down to a
> > > clean ans simple
> > >
> > > msg->do_test.test_num
> > >
> > > (and I'd s/do_test/run_test/ because it reads better for me, but I
> > > won't complain either way)
> > >
> >
> > I think anonymous union/struct is in C11 , or gnu c99 extension, can we use it?
>
> we use anonymous unions even in BPF UAPI, so yeah, we definitely can use it
>
> >
> > >
> > > > +               struct {
> > > > +                       int num;
> > >
> > > same about test_num
> > > > +
> > > > +                       int sub_succ_cnt;
> > > > +                       int error_cnt;
> > > > +                       int skip_cnt;
> > > > +                       bool have_log;
> > > > +               } message_test_done;
> > > > +               struct {
> > > > +                       char log_buf[MAX_LOG_TRUNK_SIZE + 1];
> > > > +                       bool is_last;
> > > > +               } message_test_log;
> > > > +       } u;
> > >
> > > there is no need to name this union, it can be anonymous allowing to
> > > skip .u. part in field accesses
> > >
> > > >  };
> > > >
> > > >  extern struct test_env env;
> > > > --
> > > > 2.30.2
> > > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-15 16:56       ` sunyucong
@ 2021-09-15 17:36         ` Andrii Nakryiko
  2021-09-15 17:43           ` sunyucong
  0 siblings, 1 reply; 12+ messages in thread
From: Andrii Nakryiko @ 2021-09-15 17:36 UTC (permalink / raw)
  To: sunyucong; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Wed, Sep 15, 2021 at 9:56 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
>
> On Wed, Sep 15, 2021 at 12:01 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Sep 15, 2021 at 6:29 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> > >
> > > On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> > > > >
> > > > > From: Yucong Sun <sunyucong@gmail.com>
> > > > >
> > > > > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > > > > "-j" mode is optional, and works with all existing test selection mechanism, as
> > > > > well as "-v", "-l" etc.
> > > > >
> > > > > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > > > > commanding it to run tests and collect logs. After all tests are finished, a
> > > > > summary is printed. main process use multiple competing threads to dispatch
> > > > > work to worker, trying to keep them all busy.
> > > > >
> > > >
> > > > Overall this looks great and I'm super excited that we'll soon be able
> > > > to run tests mostly in parallel. I've done the first rough pass over
> > > > the code, but haven't played with running this locally yet.
> > > >
> > > > I didn't trim the message to keep all the context in one place, so
> > > > please scroll through all of the below till the end.
> > > >
> > > > > Example output:
> > > > >
> > > > >   > ./test_progs -n 15-20 -j
> > > > >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> > > > >   Launching 2 workers.
> > > > >   [0]: Running test 15.
> > > > >   [1]: Running test 16.
> > > > >   [1]: Running test 17.
> > > > >   [1]: Running test 18.
> > > > >   [1]: Running test 19.
> > > > >   [1]: Running test 20.
> > > > >   [1]: worker exit.
> > > > >   [0]: worker exit.
> > > >
> > > > I think these messages shouldn't be emitted in normal mode, and not
> > > > even with -v. Maybe -vv. They will be useful initially to debug bugs
> > > > in concurrent test runner, probably, so I'd hide them behind a very
> > > > verbose setting.
>
> I created a new "debug" mode, which will print these messages. putting
> them under verbose mode will also make all other test log output more
> stuff, which make it harder to debug.

Cool, makes sense.

>
> > > >
> > > > >   #15 btf_dump:OK
> > > > >   #16 btf_endian:OK
> > > > >   #17 btf_map_in_map:OK
> > > > >   #18 btf_module:OK
> > > > >   #19 btf_skc_cls_ingress:OK
> > > > >   #20 btf_split:OK
> > > > >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> > > > >
> > > > > Know issue:
> > > > >
> > > > > Some tests fail when running concurrently, later patch will either
> > > > > fix the test or pin them to worker 0.
> > > >
> > > > Hm.. patch #3 does that, no?
> > > >
> > > > >
> > > > > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > > > >
> > > > > V4 -> V3: address style warnings.
> > > > > V3 -> V2: fix missing outputs in commit messages.
> > > > > V2 -> V1: switch to UDS client/server model.
> > > >
> > > > patch sets with more than one patch should come with a cover letter.
> > > > Keeping more-or-less detailed history in a cover letter is the best.
> > > > For next revision please add the cover letter and move the history
> > > > there. Check some other patch sets for an example.
> > >
> > > Ack.
> > >
> > > >
> > > > > ---
> > > > >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> > > > >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> > > > >  2 files changed, 480 insertions(+), 14 deletions(-)
> > > > >
> >
> > [...]
> >
> > > > > @@ -172,14 +182,14 @@ void test__end_subtest()
> > > > >
> > > > >         dump_test_log(test, sub_error_cnt);
> > > > >
> > > > > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > > > > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> > > > >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> > > > >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
> > > >
> > > > shouldn't this be emitted by the server (main) process, not the
> > > > worker? Worker itself just returns numbers through UDS.
> > >
> > > test__end_subtest() is executed by the worker,  and this output is
> > > part of the log that is sent back if there is an error executing
> > > subtests or in verbose mode.
> > >
> > > In theory we can print this on the server side, but that would require
> > > quite a bit hussle of sending back each subtest's execution stat
> > > individually, and I don't think it is worth the trouble.
> >
> > Fair enough. Sub-tests are much more dynamic feature which is harder
> > to deal with. I have some ideas if we decide to parallelize sub-tests
> > as well, but ok, for now let's leave it as is.
>
> I think it is much easier just to make sub-tests into top level tests,
> from what I've seen the actual setup/tear down code is not that much.

That's what we'll end up doing for bpf_verif_scale tests, probably.
It's going to be the biggest bottleneck for speeding up testing in
parallel mode. For the rest we can probably keep all subtests within
their respective tests for now without increasing the total parallel
run time much.

>
> >
> > >
> > > >
> > > > >
> > > > >         if (sub_error_cnt)
> > > > > -               env.fail_cnt++;
> > > > > +               test->error_cnt++;
> > > > >         else if (test->skip_cnt == 0)
> > > > > -               env.sub_succ_cnt++;
> > > > > +               test->sub_succ_cnt++;
> > > >
> > > > this also doesn't feel like a worker's responsibility and logic. It
> > > > feels like there is a bit of a missing separation between "executing
> > > > test" and "accounting and reporting test result". I'll get to that
> > > > below when talking about duplication of code and logic between
> > > > parallel and non-parallel modes of operation.
> > >
> > > This is also accounting the execution stats for the subtests of
> > > current tests, right now only main tests's cumlated stats are sent
> > > back, not individual subtests's stats.
> >
> > Right, subtests again, ok.
> >
> > >
> > > >
> > > > >         skip_account();
> > > > >
> > > > >         free(test->subtest_name);
> > > > > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> > > > >         ARG_LIST_TEST_NAMES = 'l',
> > > > >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> > > > >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > > > > +       ARG_NUM_WORKERS = 'j',
> > > > >  };
> > > > >
> >
> > [...]
> >
> > > > > +                       test_to_run = current_test_idx;
> > > > > +                       current_test_idx++;
> > > > > +
> > > > > +                       pthread_mutex_unlock(&current_test_lock);
> > > > > +               }
> > > > > +
> > > > > +               test = &prog_test_defs[test_to_run];
> > > > > +               test->test_num = test_to_run + 1;
> > > >
> > > > let's initialize test->test_num outside of all this logic in main()
> > > > (together with whatever other test_def initializations we need to
> > > > perform at startup)
> > > >
> > > > > +
> > > > > +               if (!should_run(&env.test_selector,
> > > > > +                               test->test_num, test->test_name))
> > > > > +                       continue;
> > > >
> > > > this probably also can be extracted outside of single-thread/parallel
> > > > execution paths, it's basically a simple pre-initialization as well.
> > > > Each test will be just marked as needing the run or not. WDYT?
> > >
> > > For this, yes, I added a bool should_run to the test struct, and we
> > > can avoid calling should_run() on every worker.
> > >
> > > However, for subtests, we still have to call it in the worker, since
> > > there is no way to iterate subtests without running the test.
> >
> > right
> >
> > > >
> > > > > +
> > > > > +               /* run test through worker */
> > > > > +               {
> > > > > +                       struct message msg_do_test;
> > > > > +
> >
> > [...]
> >
> > > > > +       /* wait for all dispatcher to finish */
> > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > +               while (true) {
> > > > > +                       struct timespec timeout = {
> > > > > +                               .tv_sec = time(NULL) + 5,
> > > > > +                               .tv_nsec = 0
> > > > > +                       };
> > > > > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > > > > +                               break;
> > > > > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
> > > >
> > > > This is just going to spam output needlessly. Why dispatches have to
> > > > exit within 5 seconds, if there are tests that are running way longer
> > > > than that?... probably better to just do pthread_join()?
> > >
> > > In later patches I actually changed it to list the status of all
> > > threads every 10 seconds, a easy way to see which tests are blocking
> > > progress or what the threads are actually doing, If 10s is too
> > > aggressive, how about 30 seconds?
> >
> > Ok, but let's hide it by default (i.e., only show in -vv mode)?
> >
> > >
> > > >
> > > > > +               }
> > > > > +       }
> > > > > +       free(dispatcher_threads);
> > > > > +       free(env.worker_current_test);
> > > > > +       free(data);
> > > > > +
> > > > > +       /* generate summary */
> > > > > +       fflush(stderr);
> > > > > +       fflush(stdout);
> > > > > +
> > > > > +       for (int i = 0; i < prog_test_cnt; i++) {
> > > > > +               struct prog_test_def *current_test;
> > > > > +               struct test_result *result;
> > > > > +
> > > > > +               current_test = &prog_test_defs[i];
> > > > > +               result = &test_results[i];
> > > > > +
> > > > > +               if (!current_test->tested)
> > > > > +                       continue;
> > > > > +
> > > > > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > > > > +               env.skip_cnt += result->skip_cnt;
> > > > > +               env.fail_cnt += result->error_cnt;
> > > > > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > > > > +
> > > > > +               if (result->log_cnt) {
> > > > > +                       result->log_buf[result->log_cnt] = '\0';
> > > > > +                       fprintf(stdout, "%s", result->log_buf);
> > > > > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > > > > +                               fprintf(stdout, "\n");
> > > > > +               }
> > > > > +               fprintf(stdout, "#%d %s:%s\n",
> > > > > +                       current_test->test_num, current_test->test_name,
> > > > > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
> > > >
> > > > it would be nice to see this as results come in (when handling
> > > > msg_test_done). It will come out of order, but it will sort of be a
> > > > progress indicator that stuff is happening. Why the totals summary
> > > > will be then output at the very end at the bottom.
> > >
> > > My first implementation is exactly that, but I don't quite like the
> > > out of order logs, it makes reading them  / comparing them much
> > > harder.
> >
> > by out of order you mean that tests will finish in different order? Or
> > that logs from multiple tests will be intermingled? For the former I
> > think it's fine given this is parallel mode. For the latter, that
> > shouldn't happen, because log output should happen under common lock,
> > so that log of one test is finished before log of another test is
> > emitted.
> >
> > As for comparing two log outputs, that's not something I ever really
> > do. Usually you want to see what failed and why, not how it differs
> > from the previous run. With the dumping of only errored tests at the
> > end (that we previously talked about), we can sort those and emit
> > their logs in order, because by that time all tests are done already.
> > But for now I'd just emit SUCCESS/FAIL as test results come in.
> >
>
> OK, as long as we are only outputting one line here it should be fine.

It will be many lines if a test failed, is that a problem?

>
> > >
> > > That's the reason I added test_results and print threads status as a
> > > progress indicator.
> > >
> > > >
> > > > > +       }
> > > > > +
> > > > > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > > > > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > > > > +
> > > > > +       /* reap all workers */
> > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > +               int wstatus, pid;
> > > > > +
> > > > > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
> > > >

[...]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-15 17:36         ` Andrii Nakryiko
@ 2021-09-15 17:43           ` sunyucong
  2021-09-15 17:52             ` Andrii Nakryiko
  0 siblings, 1 reply; 12+ messages in thread
From: sunyucong @ 2021-09-15 17:43 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Wed, Sep 15, 2021 at 1:36 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 15, 2021 at 9:56 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> >
> > On Wed, Sep 15, 2021 at 12:01 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Sep 15, 2021 at 6:29 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> > > >
> > > > On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> > > > > >
> > > > > > From: Yucong Sun <sunyucong@gmail.com>
> > > > > >
> > > > > > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > > > > > "-j" mode is optional, and works with all existing test selection mechanism, as
> > > > > > well as "-v", "-l" etc.
> > > > > >
> > > > > > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > > > > > commanding it to run tests and collect logs. After all tests are finished, a
> > > > > > summary is printed. main process use multiple competing threads to dispatch
> > > > > > work to worker, trying to keep them all busy.
> > > > > >
> > > > >
> > > > > Overall this looks great and I'm super excited that we'll soon be able
> > > > > to run tests mostly in parallel. I've done the first rough pass over
> > > > > the code, but haven't played with running this locally yet.
> > > > >
> > > > > I didn't trim the message to keep all the context in one place, so
> > > > > please scroll through all of the below till the end.
> > > > >
> > > > > > Example output:
> > > > > >
> > > > > >   > ./test_progs -n 15-20 -j
> > > > > >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> > > > > >   Launching 2 workers.
> > > > > >   [0]: Running test 15.
> > > > > >   [1]: Running test 16.
> > > > > >   [1]: Running test 17.
> > > > > >   [1]: Running test 18.
> > > > > >   [1]: Running test 19.
> > > > > >   [1]: Running test 20.
> > > > > >   [1]: worker exit.
> > > > > >   [0]: worker exit.
> > > > >
> > > > > I think these messages shouldn't be emitted in normal mode, and not
> > > > > even with -v. Maybe -vv. They will be useful initially to debug bugs
> > > > > in concurrent test runner, probably, so I'd hide them behind a very
> > > > > verbose setting.
> >
> > I created a new "debug" mode, which will print these messages. putting
> > them under verbose mode will also make all other test log output more
> > stuff, which make it harder to debug.
>
> Cool, makes sense.
>
> >
> > > > >
> > > > > >   #15 btf_dump:OK
> > > > > >   #16 btf_endian:OK
> > > > > >   #17 btf_map_in_map:OK
> > > > > >   #18 btf_module:OK
> > > > > >   #19 btf_skc_cls_ingress:OK
> > > > > >   #20 btf_split:OK
> > > > > >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> > > > > >
> > > > > > Know issue:
> > > > > >
> > > > > > Some tests fail when running concurrently, later patch will either
> > > > > > fix the test or pin them to worker 0.
> > > > >
> > > > > Hm.. patch #3 does that, no?
> > > > >
> > > > > >
> > > > > > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > > > > >
> > > > > > V4 -> V3: address style warnings.
> > > > > > V3 -> V2: fix missing outputs in commit messages.
> > > > > > V2 -> V1: switch to UDS client/server model.
> > > > >
> > > > > patch sets with more than one patch should come with a cover letter.
> > > > > Keeping more-or-less detailed history in a cover letter is the best.
> > > > > For next revision please add the cover letter and move the history
> > > > > there. Check some other patch sets for an example.
> > > >
> > > > Ack.
> > > >
> > > > >
> > > > > > ---
> > > > > >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> > > > > >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> > > > > >  2 files changed, 480 insertions(+), 14 deletions(-)
> > > > > >
> > >
> > > [...]
> > >
> > > > > > @@ -172,14 +182,14 @@ void test__end_subtest()
> > > > > >
> > > > > >         dump_test_log(test, sub_error_cnt);
> > > > > >
> > > > > > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > > > > > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> > > > > >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> > > > > >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
> > > > >
> > > > > shouldn't this be emitted by the server (main) process, not the
> > > > > worker? Worker itself just returns numbers through UDS.
> > > >
> > > > test__end_subtest() is executed by the worker,  and this output is
> > > > part of the log that is sent back if there is an error executing
> > > > subtests or in verbose mode.
> > > >
> > > > In theory we can print this on the server side, but that would require
> > > > quite a bit hussle of sending back each subtest's execution stat
> > > > individually, and I don't think it is worth the trouble.
> > >
> > > Fair enough. Sub-tests are much more dynamic feature which is harder
> > > to deal with. I have some ideas if we decide to parallelize sub-tests
> > > as well, but ok, for now let's leave it as is.
> >
> > I think it is much easier just to make sub-tests into top level tests,
> > from what I've seen the actual setup/tear down code is not that much.
>
> That's what we'll end up doing for bpf_verif_scale tests, probably.
> It's going to be the biggest bottleneck for speeding up testing in
> parallel mode. For the rest we can probably keep all subtests within
> their respective tests for now without increasing the total parallel
> run time much.
>
> >
> > >
> > > >
> > > > >
> > > > > >
> > > > > >         if (sub_error_cnt)
> > > > > > -               env.fail_cnt++;
> > > > > > +               test->error_cnt++;
> > > > > >         else if (test->skip_cnt == 0)
> > > > > > -               env.sub_succ_cnt++;
> > > > > > +               test->sub_succ_cnt++;
> > > > >
> > > > > this also doesn't feel like a worker's responsibility and logic. It
> > > > > feels like there is a bit of a missing separation between "executing
> > > > > test" and "accounting and reporting test result". I'll get to that
> > > > > below when talking about duplication of code and logic between
> > > > > parallel and non-parallel modes of operation.
> > > >
> > > > This is also accounting the execution stats for the subtests of
> > > > current tests, right now only main tests's cumlated stats are sent
> > > > back, not individual subtests's stats.
> > >
> > > Right, subtests again, ok.
> > >
> > > >
> > > > >
> > > > > >         skip_account();
> > > > > >
> > > > > >         free(test->subtest_name);
> > > > > > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> > > > > >         ARG_LIST_TEST_NAMES = 'l',
> > > > > >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> > > > > >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > > > > > +       ARG_NUM_WORKERS = 'j',
> > > > > >  };
> > > > > >
> > >
> > > [...]
> > >
> > > > > > +                       test_to_run = current_test_idx;
> > > > > > +                       current_test_idx++;
> > > > > > +
> > > > > > +                       pthread_mutex_unlock(&current_test_lock);
> > > > > > +               }
> > > > > > +
> > > > > > +               test = &prog_test_defs[test_to_run];
> > > > > > +               test->test_num = test_to_run + 1;
> > > > >
> > > > > let's initialize test->test_num outside of all this logic in main()
> > > > > (together with whatever other test_def initializations we need to
> > > > > perform at startup)
> > > > >
> > > > > > +
> > > > > > +               if (!should_run(&env.test_selector,
> > > > > > +                               test->test_num, test->test_name))
> > > > > > +                       continue;
> > > > >
> > > > > this probably also can be extracted outside of single-thread/parallel
> > > > > execution paths, it's basically a simple pre-initialization as well.
> > > > > Each test will be just marked as needing the run or not. WDYT?
> > > >
> > > > For this, yes, I added a bool should_run to the test struct, and we
> > > > can avoid calling should_run() on every worker.
> > > >
> > > > However, for subtests, we still have to call it in the worker, since
> > > > there is no way to iterate subtests without running the test.
> > >
> > > right
> > >
> > > > >
> > > > > > +
> > > > > > +               /* run test through worker */
> > > > > > +               {
> > > > > > +                       struct message msg_do_test;
> > > > > > +
> > >
> > > [...]
> > >
> > > > > > +       /* wait for all dispatcher to finish */
> > > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > > +               while (true) {
> > > > > > +                       struct timespec timeout = {
> > > > > > +                               .tv_sec = time(NULL) + 5,
> > > > > > +                               .tv_nsec = 0
> > > > > > +                       };
> > > > > > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > > > > > +                               break;
> > > > > > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
> > > > >
> > > > > This is just going to spam output needlessly. Why dispatches have to
> > > > > exit within 5 seconds, if there are tests that are running way longer
> > > > > than that?... probably better to just do pthread_join()?
> > > >
> > > > In later patches I actually changed it to list the status of all
> > > > threads every 10 seconds, a easy way to see which tests are blocking
> > > > progress or what the threads are actually doing, If 10s is too
> > > > aggressive, how about 30 seconds?
> > >
> > > Ok, but let's hide it by default (i.e., only show in -vv mode)?
> > >
> > > >
> > > > >
> > > > > > +               }
> > > > > > +       }
> > > > > > +       free(dispatcher_threads);
> > > > > > +       free(env.worker_current_test);
> > > > > > +       free(data);
> > > > > > +
> > > > > > +       /* generate summary */
> > > > > > +       fflush(stderr);
> > > > > > +       fflush(stdout);
> > > > > > +
> > > > > > +       for (int i = 0; i < prog_test_cnt; i++) {
> > > > > > +               struct prog_test_def *current_test;
> > > > > > +               struct test_result *result;
> > > > > > +
> > > > > > +               current_test = &prog_test_defs[i];
> > > > > > +               result = &test_results[i];
> > > > > > +
> > > > > > +               if (!current_test->tested)
> > > > > > +                       continue;
> > > > > > +
> > > > > > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > > > > > +               env.skip_cnt += result->skip_cnt;
> > > > > > +               env.fail_cnt += result->error_cnt;
> > > > > > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > > > > > +
> > > > > > +               if (result->log_cnt) {
> > > > > > +                       result->log_buf[result->log_cnt] = '\0';
> > > > > > +                       fprintf(stdout, "%s", result->log_buf);
> > > > > > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > > > > > +                               fprintf(stdout, "\n");
> > > > > > +               }
> > > > > > +               fprintf(stdout, "#%d %s:%s\n",
> > > > > > +                       current_test->test_num, current_test->test_name,
> > > > > > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
> > > > >
> > > > > it would be nice to see this as results come in (when handling
> > > > > msg_test_done). It will come out of order, but it will sort of be a
> > > > > progress indicator that stuff is happening. Why the totals summary
> > > > > will be then output at the very end at the bottom.
> > > >
> > > > My first implementation is exactly that, but I don't quite like the
> > > > out of order logs, it makes reading them  / comparing them much
> > > > harder.
> > >
> > > by out of order you mean that tests will finish in different order? Or
> > > that logs from multiple tests will be intermingled? For the former I
> > > think it's fine given this is parallel mode. For the latter, that
> > > shouldn't happen, because log output should happen under common lock,
> > > so that log of one test is finished before log of another test is
> > > emitted.
> > >
> > > As for comparing two log outputs, that's not something I ever really
> > > do. Usually you want to see what failed and why, not how it differs
> > > from the previous run. With the dumping of only errored tests at the
> > > end (that we previously talked about), we can sort those and emit
> > > their logs in order, because by that time all tests are done already.
> > > But for now I'd just emit SUCCESS/FAIL as test results come in.
> > >
> >
> > OK, as long as we are only outputting one line here it should be fine.
>
> It will be many lines if a test failed, is that a problem?

Yes, because stdio is line buffered, if we output multiple line from
multiple dispatcher threads, they will be interleaved, so in the new
revision of this patch I changed the way reporting is done, one single
line will be printed as test getting finished to indicate progress,
and once all tests are done, a summary line is printed, then all the
error logs from failed logs, as example below

./test_progs -b scale,fentry,fexit -n 1-10 -j
[   16.300708] bpf_testmod: loading out-of-tree module taints kernel.
Launching 8 workers.
#6 bind_perm:OK
#1 align:OK
#10 bpf_obj_id:FAIL
#5 autoload:OK
#2 atomic_bounds:OK
#4 attach_probe:OK
#7 bpf_cookie:FAIL
#8 bpf_iter:OK
#3 atomics:OK
#9 bpf_iter_setsockopt:OK
Summary: 8/51 PASSED, 0 SKIPPED, 4 FAILED
test_bpf_cookie:PASS:skel_open 0 nsec
kprobe_subtest:PASS:link1 0 nsec
kprobe_subtest:PASS:link2 0 nsec
kprobe_subtest:PASS:retlink1 0 nsec
kprobe_subtest:PASS:retlink2 0 nsec
kprobe_subtest:PASS:kprobe_res 0 nsec
kprobe_subtest:PASS:kretprobe_res 0 nsec
#7/1 bpf_cookie/kprobe:OK
uprobe_subtest:PASS:link1 0 nsec
uprobe_subtest:PASS:link2 0 nsec
uprobe_subtest:PASS:retlink1 0 nsec
uprobe_subtest:PASS:retlink2 0 nsec
uprobe_subtest:PASS:uprobe_res 0 nsec
uprobe_subtest:PASS:uretprobe_res 0 nsec
#7/2 bpf_cookie/uprobe:OK
tp_subtest:PASS:link1 0 nsec
tp_subtest:PASS:link2 0 nsec
tp_subtest:PASS:tp_res1 0 nsec
tp_subtest:PASS:link3 0 nsec
tp_subtest:PASS:tp_res2 0 nsec
#7/3 bpf_cookie/tracepoint:OK
pe_subtest:PASS:perf_fd 0 nsec
pe_subtest:PASS:link1 0 nsec
burn_cpu:PASS:set_thread_affinity 0 nsec
pe_subtest:FAIL:pe_res1 unexpected pe_res1: actual 0 != expected 1048576
pe_subtest:PASS:link2 0 nsec
burn_cpu:PASS:set_thread_affinity 0 nsec
pe_subtest:PASS:pe_res2 0 nsec
#7/4 bpf_cookie/perf_event:FAIL
#7 bpf_cookie:FAIL
test_bpf_obj_id:PASS:get-fd-by-notexist-prog-id 0 nsec
test_bpf_obj_id:PASS:get-fd-by-notexist-map-id 0 nsec
test_bpf_obj_id:PASS:get-fd-by-notexist-link-id 0 nsec
test_bpf_obj_id:PASS:prog_attach 0 nsec
test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-link-info(fd) 0 nsec
test_bpf_obj_id:PASS:prog_attach 0 nsec
test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-link-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd-bad-nr-map-ids 0 nsec
test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd-bad-nr-map-ids 0 nsec
test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:check total prog id found by get_next_id 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:check total map id found by get_next_id 0 nsec
test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
test_bpf_obj_id:FAIL:get-link-fd(next_id) link_fd -11 next_id 7 errno 11
test_bpf_obj_id:FAIL:check total link id found by get_next_id nr_id_found 0(2)
#10 bpf_obj_id:FAIL

>
> >
> > > >
> > > > That's the reason I added test_results and print threads status as a
> > > > progress indicator.
> > > >
> > > > >
> > > > > > +       }
> > > > > > +
> > > > > > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > > > > > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > > > > > +
> > > > > > +       /* reap all workers */
> > > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > > +               int wstatus, pid;
> > > > > > +
> > > > > > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
> > > > >
>
> [...]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs
  2021-09-15 17:43           ` sunyucong
@ 2021-09-15 17:52             ` Andrii Nakryiko
  0 siblings, 0 replies; 12+ messages in thread
From: Andrii Nakryiko @ 2021-09-15 17:52 UTC (permalink / raw)
  To: sunyucong; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Wed, Sep 15, 2021 at 10:44 AM sunyucong@gmail.com
<sunyucong@gmail.com> wrote:
>
> On Wed, Sep 15, 2021 at 1:36 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Sep 15, 2021 at 9:56 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> > >
> > > On Wed, Sep 15, 2021 at 12:01 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Wed, Sep 15, 2021 at 6:29 AM sunyucong@gmail.com <sunyucong@gmail.com> wrote:
> > > > >
> > > > > On Tue, Sep 14, 2021 at 3:11 AM Andrii Nakryiko
> > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> > > > > > >
> > > > > > > From: Yucong Sun <sunyucong@gmail.com>
> > > > > > >
> > > > > > > This patch adds "-j" mode to test_progs, executing tests in multiple process.
> > > > > > > "-j" mode is optional, and works with all existing test selection mechanism, as
> > > > > > > well as "-v", "-l" etc.
> > > > > > >
> > > > > > > In "-j" mode, main process use UDS/DGRAM to communicate to each forked worker,
> > > > > > > commanding it to run tests and collect logs. After all tests are finished, a
> > > > > > > summary is printed. main process use multiple competing threads to dispatch
> > > > > > > work to worker, trying to keep them all busy.
> > > > > > >
> > > > > >
> > > > > > Overall this looks great and I'm super excited that we'll soon be able
> > > > > > to run tests mostly in parallel. I've done the first rough pass over
> > > > > > the code, but haven't played with running this locally yet.
> > > > > >
> > > > > > I didn't trim the message to keep all the context in one place, so
> > > > > > please scroll through all of the below till the end.
> > > > > >
> > > > > > > Example output:
> > > > > > >
> > > > > > >   > ./test_progs -n 15-20 -j
> > > > > > >   [    8.584709] bpf_testmod: loading out-of-tree module taints kernel.
> > > > > > >   Launching 2 workers.
> > > > > > >   [0]: Running test 15.
> > > > > > >   [1]: Running test 16.
> > > > > > >   [1]: Running test 17.
> > > > > > >   [1]: Running test 18.
> > > > > > >   [1]: Running test 19.
> > > > > > >   [1]: Running test 20.
> > > > > > >   [1]: worker exit.
> > > > > > >   [0]: worker exit.
> > > > > >
> > > > > > I think these messages shouldn't be emitted in normal mode, and not
> > > > > > even with -v. Maybe -vv. They will be useful initially to debug bugs
> > > > > > in concurrent test runner, probably, so I'd hide them behind a very
> > > > > > verbose setting.
> > >
> > > I created a new "debug" mode, which will print these messages. putting
> > > them under verbose mode will also make all other test log output more
> > > stuff, which make it harder to debug.
> >
> > Cool, makes sense.
> >
> > >
> > > > > >
> > > > > > >   #15 btf_dump:OK
> > > > > > >   #16 btf_endian:OK
> > > > > > >   #17 btf_map_in_map:OK
> > > > > > >   #18 btf_module:OK
> > > > > > >   #19 btf_skc_cls_ingress:OK
> > > > > > >   #20 btf_split:OK
> > > > > > >   Summary: 6/20 PASSED, 0 SKIPPED, 0 FAILED
> > > > > > >
> > > > > > > Know issue:
> > > > > > >
> > > > > > > Some tests fail when running concurrently, later patch will either
> > > > > > > fix the test or pin them to worker 0.
> > > > > >
> > > > > > Hm.. patch #3 does that, no?
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > > > > > >
> > > > > > > V4 -> V3: address style warnings.
> > > > > > > V3 -> V2: fix missing outputs in commit messages.
> > > > > > > V2 -> V1: switch to UDS client/server model.
> > > > > >
> > > > > > patch sets with more than one patch should come with a cover letter.
> > > > > > Keeping more-or-less detailed history in a cover letter is the best.
> > > > > > For next revision please add the cover letter and move the history
> > > > > > there. Check some other patch sets for an example.
> > > > >
> > > > > Ack.
> > > > >
> > > > > >
> > > > > > > ---
> > > > > > >  tools/testing/selftests/bpf/test_progs.c | 458 ++++++++++++++++++++++-
> > > > > > >  tools/testing/selftests/bpf/test_progs.h |  36 +-
> > > > > > >  2 files changed, 480 insertions(+), 14 deletions(-)
> > > > > > >
> > > >
> > > > [...]
> > > >
> > > > > > > @@ -172,14 +182,14 @@ void test__end_subtest()
> > > > > > >
> > > > > > >         dump_test_log(test, sub_error_cnt);
> > > > > > >
> > > > > > > -       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
> > > > > > > +       fprintf(stdout, "#%d/%d %s/%s:%s\n",
> > > > > > >                test->test_num, test->subtest_num, test->test_name, test->subtest_name,
> > > > > > >                sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
> > > > > >
> > > > > > shouldn't this be emitted by the server (main) process, not the
> > > > > > worker? Worker itself just returns numbers through UDS.
> > > > >
> > > > > test__end_subtest() is executed by the worker,  and this output is
> > > > > part of the log that is sent back if there is an error executing
> > > > > subtests or in verbose mode.
> > > > >
> > > > > In theory we can print this on the server side, but that would require
> > > > > quite a bit hussle of sending back each subtest's execution stat
> > > > > individually, and I don't think it is worth the trouble.
> > > >
> > > > Fair enough. Sub-tests are much more dynamic feature which is harder
> > > > to deal with. I have some ideas if we decide to parallelize sub-tests
> > > > as well, but ok, for now let's leave it as is.
> > >
> > > I think it is much easier just to make sub-tests into top level tests,
> > > from what I've seen the actual setup/tear down code is not that much.
> >
> > That's what we'll end up doing for bpf_verif_scale tests, probably.
> > It's going to be the biggest bottleneck for speeding up testing in
> > parallel mode. For the rest we can probably keep all subtests within
> > their respective tests for now without increasing the total parallel
> > run time much.
> >
> > >
> > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > >         if (sub_error_cnt)
> > > > > > > -               env.fail_cnt++;
> > > > > > > +               test->error_cnt++;
> > > > > > >         else if (test->skip_cnt == 0)
> > > > > > > -               env.sub_succ_cnt++;
> > > > > > > +               test->sub_succ_cnt++;
> > > > > >
> > > > > > this also doesn't feel like a worker's responsibility and logic. It
> > > > > > feels like there is a bit of a missing separation between "executing
> > > > > > test" and "accounting and reporting test result". I'll get to that
> > > > > > below when talking about duplication of code and logic between
> > > > > > parallel and non-parallel modes of operation.
> > > > >
> > > > > This is also accounting the execution stats for the subtests of
> > > > > current tests, right now only main tests's cumlated stats are sent
> > > > > back, not individual subtests's stats.
> > > >
> > > > Right, subtests again, ok.
> > > >
> > > > >
> > > > > >
> > > > > > >         skip_account();
> > > > > > >
> > > > > > >         free(test->subtest_name);
> > > > > > > @@ -474,6 +484,7 @@ enum ARG_KEYS {
> > > > > > >         ARG_LIST_TEST_NAMES = 'l',
> > > > > > >         ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
> > > > > > >         ARG_TEST_NAME_GLOB_DENYLIST = 'd',
> > > > > > > +       ARG_NUM_WORKERS = 'j',
> > > > > > >  };
> > > > > > >
> > > >
> > > > [...]
> > > >
> > > > > > > +                       test_to_run = current_test_idx;
> > > > > > > +                       current_test_idx++;
> > > > > > > +
> > > > > > > +                       pthread_mutex_unlock(&current_test_lock);
> > > > > > > +               }
> > > > > > > +
> > > > > > > +               test = &prog_test_defs[test_to_run];
> > > > > > > +               test->test_num = test_to_run + 1;
> > > > > >
> > > > > > let's initialize test->test_num outside of all this logic in main()
> > > > > > (together with whatever other test_def initializations we need to
> > > > > > perform at startup)
> > > > > >
> > > > > > > +
> > > > > > > +               if (!should_run(&env.test_selector,
> > > > > > > +                               test->test_num, test->test_name))
> > > > > > > +                       continue;
> > > > > >
> > > > > > this probably also can be extracted outside of single-thread/parallel
> > > > > > execution paths, it's basically a simple pre-initialization as well.
> > > > > > Each test will be just marked as needing the run or not. WDYT?
> > > > >
> > > > > For this, yes, I added a bool should_run to the test struct, and we
> > > > > can avoid calling should_run() on every worker.
> > > > >
> > > > > However, for subtests, we still have to call it in the worker, since
> > > > > there is no way to iterate subtests without running the test.
> > > >
> > > > right
> > > >
> > > > > >
> > > > > > > +
> > > > > > > +               /* run test through worker */
> > > > > > > +               {
> > > > > > > +                       struct message msg_do_test;
> > > > > > > +
> > > >
> > > > [...]
> > > >
> > > > > > > +       /* wait for all dispatcher to finish */
> > > > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > > > +               while (true) {
> > > > > > > +                       struct timespec timeout = {
> > > > > > > +                               .tv_sec = time(NULL) + 5,
> > > > > > > +                               .tv_nsec = 0
> > > > > > > +                       };
> > > > > > > +                       if (pthread_timedjoin_np(dispatcher_threads[i], NULL, &timeout) != ETIMEDOUT)
> > > > > > > +                               break;
> > > > > > > +                       fprintf(stderr, "Still waiting for thread %d (test %d).\n", i,  env.worker_current_test[i] + 1);
> > > > > >
> > > > > > This is just going to spam output needlessly. Why dispatches have to
> > > > > > exit within 5 seconds, if there are tests that are running way longer
> > > > > > than that?... probably better to just do pthread_join()?
> > > > >
> > > > > In later patches I actually changed it to list the status of all
> > > > > threads every 10 seconds, a easy way to see which tests are blocking
> > > > > progress or what the threads are actually doing, If 10s is too
> > > > > aggressive, how about 30 seconds?
> > > >
> > > > Ok, but let's hide it by default (i.e., only show in -vv mode)?
> > > >
> > > > >
> > > > > >
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +       free(dispatcher_threads);
> > > > > > > +       free(env.worker_current_test);
> > > > > > > +       free(data);
> > > > > > > +
> > > > > > > +       /* generate summary */
> > > > > > > +       fflush(stderr);
> > > > > > > +       fflush(stdout);
> > > > > > > +
> > > > > > > +       for (int i = 0; i < prog_test_cnt; i++) {
> > > > > > > +               struct prog_test_def *current_test;
> > > > > > > +               struct test_result *result;
> > > > > > > +
> > > > > > > +               current_test = &prog_test_defs[i];
> > > > > > > +               result = &test_results[i];
> > > > > > > +
> > > > > > > +               if (!current_test->tested)
> > > > > > > +                       continue;
> > > > > > > +
> > > > > > > +               env.succ_cnt += result->error_cnt ? 0 : 1;
> > > > > > > +               env.skip_cnt += result->skip_cnt;
> > > > > > > +               env.fail_cnt += result->error_cnt;
> > > > > > > +               env.sub_succ_cnt += result->sub_succ_cnt;
> > > > > > > +
> > > > > > > +               if (result->log_cnt) {
> > > > > > > +                       result->log_buf[result->log_cnt] = '\0';
> > > > > > > +                       fprintf(stdout, "%s", result->log_buf);
> > > > > > > +                       if (result->log_buf[result->log_cnt - 1] != '\n')
> > > > > > > +                               fprintf(stdout, "\n");
> > > > > > > +               }
> > > > > > > +               fprintf(stdout, "#%d %s:%s\n",
> > > > > > > +                       current_test->test_num, current_test->test_name,
> > > > > > > +                       result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
> > > > > >
> > > > > > it would be nice to see this as results come in (when handling
> > > > > > msg_test_done). It will come out of order, but it will sort of be a
> > > > > > progress indicator that stuff is happening. Why the totals summary
> > > > > > will be then output at the very end at the bottom.
> > > > >
> > > > > My first implementation is exactly that, but I don't quite like the
> > > > > out of order logs, it makes reading them  / comparing them much
> > > > > harder.
> > > >
> > > > by out of order you mean that tests will finish in different order? Or
> > > > that logs from multiple tests will be intermingled? For the former I
> > > > think it's fine given this is parallel mode. For the latter, that
> > > > shouldn't happen, because log output should happen under common lock,
> > > > so that log of one test is finished before log of another test is
> > > > emitted.
> > > >
> > > > As for comparing two log outputs, that's not something I ever really
> > > > do. Usually you want to see what failed and why, not how it differs
> > > > from the previous run. With the dumping of only errored tests at the
> > > > end (that we previously talked about), we can sort those and emit
> > > > their logs in order, because by that time all tests are done already.
> > > > But for now I'd just emit SUCCESS/FAIL as test results come in.
> > > >
> > >
> > > OK, as long as we are only outputting one line here it should be fine.
> >
> > It will be many lines if a test failed, is that a problem?
>
> Yes, because stdio is line buffered, if we output multiple line from

That's why I said you'd need a lock while emitting test log. I like
the summary, obviously and we should definitely do that (will
non-parallel mode have that as well?), but the problem with delaying
error logs till the very end is that you won't be able to ctrl-c and
go look at the error log before the whole run finishes. Which is quite
inconvenient in local development. Given it's just one lock + fflush()
away, can we do both -- log errors as test results come in, and then
dump only errors at the bottom after all tests finish?

> multiple dispatcher threads, they will be interleaved, so in the new
> revision of this patch I changed the way reporting is done, one single
> line will be printed as test getting finished to indicate progress,
> and once all tests are done, a summary line is printed, then all the
> error logs from failed logs, as example below
>
> ./test_progs -b scale,fentry,fexit -n 1-10 -j
> [   16.300708] bpf_testmod: loading out-of-tree module taints kernel.
> Launching 8 workers.
> #6 bind_perm:OK
> #1 align:OK
> #10 bpf_obj_id:FAIL
> #5 autoload:OK
> #2 atomic_bounds:OK
> #4 attach_probe:OK
> #7 bpf_cookie:FAIL
> #8 bpf_iter:OK
> #3 atomics:OK
> #9 bpf_iter_setsockopt:OK
> Summary: 8/51 PASSED, 0 SKIPPED, 4 FAILED
> test_bpf_cookie:PASS:skel_open 0 nsec
> kprobe_subtest:PASS:link1 0 nsec
> kprobe_subtest:PASS:link2 0 nsec
> kprobe_subtest:PASS:retlink1 0 nsec
> kprobe_subtest:PASS:retlink2 0 nsec
> kprobe_subtest:PASS:kprobe_res 0 nsec
> kprobe_subtest:PASS:kretprobe_res 0 nsec
> #7/1 bpf_cookie/kprobe:OK
> uprobe_subtest:PASS:link1 0 nsec
> uprobe_subtest:PASS:link2 0 nsec
> uprobe_subtest:PASS:retlink1 0 nsec
> uprobe_subtest:PASS:retlink2 0 nsec
> uprobe_subtest:PASS:uprobe_res 0 nsec
> uprobe_subtest:PASS:uretprobe_res 0 nsec
> #7/2 bpf_cookie/uprobe:OK
> tp_subtest:PASS:link1 0 nsec
> tp_subtest:PASS:link2 0 nsec
> tp_subtest:PASS:tp_res1 0 nsec
> tp_subtest:PASS:link3 0 nsec
> tp_subtest:PASS:tp_res2 0 nsec
> #7/3 bpf_cookie/tracepoint:OK
> pe_subtest:PASS:perf_fd 0 nsec
> pe_subtest:PASS:link1 0 nsec
> burn_cpu:PASS:set_thread_affinity 0 nsec
> pe_subtest:FAIL:pe_res1 unexpected pe_res1: actual 0 != expected 1048576
> pe_subtest:PASS:link2 0 nsec
> burn_cpu:PASS:set_thread_affinity 0 nsec
> pe_subtest:PASS:pe_res2 0 nsec
> #7/4 bpf_cookie/perf_event:FAIL
> #7 bpf_cookie:FAIL
> test_bpf_obj_id:PASS:get-fd-by-notexist-prog-id 0 nsec
> test_bpf_obj_id:PASS:get-fd-by-notexist-map-id 0 nsec
> test_bpf_obj_id:PASS:get-fd-by-notexist-link-id 0 nsec
> test_bpf_obj_id:PASS:prog_attach 0 nsec
> test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
> test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
> test_bpf_obj_id:PASS:get-link-info(fd) 0 nsec
> test_bpf_obj_id:PASS:prog_attach 0 nsec
> test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
> test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
> test_bpf_obj_id:PASS:get-link-info(fd) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd-bad-nr-map-ids 0 nsec
> test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd-bad-nr-map-ids 0 nsec
> test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
> test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:check total prog id found by get_next_id 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
> test_bpf_obj_id:PASS:check total map id found by get_next_id 0 nsec
> test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
> test_bpf_obj_id:PASS:get-link-fd(next_id) 0 nsec
> test_bpf_obj_id:FAIL:get-link-fd(next_id) link_fd -11 next_id 7 errno 11
> test_bpf_obj_id:FAIL:check total link id found by get_next_id nr_id_found 0(2)
> #10 bpf_obj_id:FAIL
>
> >
> > >
> > > > >
> > > > > That's the reason I added test_results and print threads status as a
> > > > > progress indicator.
> > > > >
> > > > > >
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
> > > > > > > +               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
> > > > > > > +
> > > > > > > +       /* reap all workers */
> > > > > > > +       for (int i = 0; i < env.workers; i++) {
> > > > > > > +               int wstatus, pid;
> > > > > > > +
> > > > > > > +               pid = waitpid(env.worker_pids[i], &wstatus, 0);
> > > > > >
> >
> > [...]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0
  2021-09-14  7:23   ` Andrii Nakryiko
@ 2021-09-15 22:15     ` sunyucong
  0 siblings, 0 replies; 12+ messages in thread
From: sunyucong @ 2021-09-15 22:15 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Yucong Sun, Andrii Nakryiko, bpf

On Tue, Sep 14, 2021 at 3:23 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Sep 13, 2021 at 12:39 PM Yucong Sun <fallentree@fb.com> wrote:
> >
> > From: Yucong Sun <sunyucong@gmail.com>
> >
> > This patch adds a simple name list to pin some tests that fail to run in
> > parallel to worker 0. On encountering these tests, all other threads will wait
> > on a conditional variable, which worker 0 will signal once the tests has
> > finished running.
> >
> > Additionally, before running the test, thread 0 also check and wait until all
> > other threads has finished their work, to make sure the pinned test really are
> > the only test running in the system.
> >
> > After this change, all tests should pass in '-j' mode.
> >
> > Signed-off-by: Yucong Sun <sunyucong@gmail.com>
> > ---
> >  tools/testing/selftests/bpf/test_progs.c | 109 ++++++++++++++++++++---
> >  1 file changed, 97 insertions(+), 12 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index f0eeb17c348d..dc72b3f526a6 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -18,6 +18,16 @@
> >  #include <sys/socket.h>
> >  #include <sys/un.h>
> >
> > +char *TESTS_MUST_SERIALIZE[] = {
> > +       "netcnt",
> > +       "select_reuseport",
> > +       "sockmap_listen",
> > +       "tc_redirect",
> > +       "xdp_bonding",
> > +       "xdp_info",
> > +       NULL,
> > +};
> > +
>
> I was actually thinking to encode this as part of the test function
> name itself. I.e.,
>
> void test_vmlinux(void) for parallelizable tests
>
> and
>
> void serial_test_vmlinux(void)
>
>
> Then we can use weak symbols to "detect" which one is actually defined
> for any given test.:
>
> struct prog_test_def {
>     void (*run_test)(void);
>     void (*run_test_serial)(void);
>     ...
> };
>
> then test_progs.c will define
>
> #define DEFINE_TEST(name) extern void test_##name(void) __weak; extern
> void serial_test_##name(void) __weak;
>
> and that prog_test_def (though another DEFINE_TEST macro definition)
> will be initialized as
>
> {
>     .test_name = #name,
>     .run_test = &test_##name,
>     .run_test_serial = &serial_test_##name,
> }
>
>
> After all that checking which of .run_test or .run_test_serial isn't
> NULL (and erroring out if both or neither) will determine whether the
> test is serial or parallel. Keeping this knowledge next to test itself
> is nice in that it will never get out of sync, will never be
> mismatched, etc (and it's very obvious when looking at the test file
> itself).
>
> Thoughts?

Great idea!

I ended up doing "serial_test_NAME()" and "run_serial_test()", but the
idea is the same.

>
>
> [...]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-09-15 22:16 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-13 19:39 [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Yucong Sun
2021-09-13 19:39 ` [PATCH v4 bpf-next 2/3] selftests/bpf: add per worker cgroup suffix Yucong Sun
2021-09-13 19:39 ` [PATCH v4 bpf-next 3/3] selftests/bpf: pin some tests to worker 0 Yucong Sun
2021-09-14  7:23   ` Andrii Nakryiko
2021-09-15 22:15     ` sunyucong
2021-09-14  7:11 ` [PATCH v4 bpf-next 1/3] selftests/bpf: Add parallelism to test_progs Andrii Nakryiko
2021-09-15 13:29   ` sunyucong
2021-09-15 16:00     ` Andrii Nakryiko
2021-09-15 16:56       ` sunyucong
2021-09-15 17:36         ` Andrii Nakryiko
2021-09-15 17:43           ` sunyucong
2021-09-15 17:52             ` Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).