All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
@ 2019-05-16 11:27 Petri Latvala
  2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Petri Latvala @ 2019-05-16 11:27 UTC (permalink / raw)
  To: igt-dev; +Cc: Tomi Sarvela, Petri Latvala

If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.

A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.

v2:
 - Use a higher timeout
 - Allow hostname configuration from environment

Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
---
 meson.build        |   1 +
 meson_options.txt  |   6 ++
 runner/executor.c  | 135 +++++++++++++++++++++++++++++++++++++++++++++
 runner/meson.build |  12 +++-
 runner/settings.c  |   4 ++
 runner/settings.h  |   5 +-
 6 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index a05e912c..aac67f1a 100644
--- a/meson.build
+++ b/meson.build
@@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
 with_libdrm = get_option('with_libdrm')
 with_libunwind = get_option('with_libunwind')
 build_runner = get_option('build_runner')
+with_oping = get_option('with_oping')
 
 _build_overlay = build_overlay != 'false'
 _overlay_required = build_overlay == 'true'
diff --git a/meson_options.txt b/meson_options.txt
index 888efe56..935f883e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -58,6 +58,12 @@ option('build_runner',
        choices : ['auto', 'true', 'false'],
        description : 'Build test runner')
 
+option('with_oping',
+       type : 'combo',
+       value : 'auto',
+       choices : ['auto', 'true', 'false'],
+       description : 'Build igt_runner with liboping')
+
 option('use_rpath',
        type : 'boolean',
        value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 7e5fbe8f..be78474c 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
 #include <errno.h>
 #include <fcntl.h>
+#include <glib.h>
 #include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -108,6 +112,133 @@ static void ping_watchdogs(void)
 	}
 }
 
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+
+static bool load_ping_config_from_file(void)
+{
+	char *key_file_env = NULL;
+	char *key_file_loc = NULL;
+	GError *error = NULL;
+	GKeyFile *key_file = NULL;
+	const char *ping_hostname;
+	int ret;
+
+	/* Determine igt config path */
+	key_file_env = getenv("IGT_CONFIG_PATH");
+	if (key_file_env) {
+		key_file_loc = strdup(key_file_env);
+	} else {
+		asprintf(&key_file_loc, "%s/.igtrc", g_get_home_dir());
+	}
+
+	/* Load igt config file */
+	key_file = g_key_file_new();
+	ret = g_key_file_load_from_file(key_file, key_file_loc,
+					G_KEY_FILE_NONE, &error);
+	free(key_file_loc);
+
+	if (!ret) {
+		g_error_free(error);
+		g_key_file_free(key_file);
+		return false;
+	}
+
+	g_clear_error(&error);
+
+	ping_hostname =
+		g_key_file_get_string(key_file, "DUT",
+				      "PingHostName", &error);
+
+	g_clear_error(&error);
+	g_key_file_free(key_file);
+
+	if (!ping_hostname)
+		return false;
+
+	if (ping_host_add(pingobj, ping_hostname)) {
+		fprintf(stderr,
+			"abort on ping: Cannot use hostname from config file\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool load_ping_config_from_env(void)
+{
+	const char *ping_hostname;
+
+	ping_hostname = getenv("IGT_PING_HOSTNAME");
+	if (!ping_hostname)
+		return false;
+
+	if (ping_host_add(pingobj, ping_hostname)) {
+		fprintf(stderr,
+			"abort on ping: Cannot use hostname from environment\n");
+		return false;
+	}
+
+	return true;
+}
+
+#endif
+
+static void ping_config(void)
+{
+#if HAVE_OPING
+	double timeout = 20.0; /* Fair dice roll */
+
+	if (pingobj)
+		return;
+
+	pingobj = ping_construct();
+
+	/* Try env first, then config file */
+	if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
+		fprintf(stderr,
+			"abort on ping: No host to ping configured\n");
+		ping_destroy(pingobj);
+		pingobj = NULL;
+		return;
+	}
+
+	ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+	if (pingobj) {
+		pingobj_iter_t *iter;
+
+		ping_send(pingobj);
+
+		for (iter = ping_iterator_get(pingobj);
+		     iter != NULL;
+		     iter = ping_iterator_next(iter)) {
+			double latency;
+			size_t len = sizeof(latency);
+
+			ping_iterator_get_info(iter,
+					       PING_INFO_LATENCY,
+					       &latency,
+					       &len);
+			if (latency < 0.0) {
+				char *reason;
+
+				asprintf(&reason,
+					 "Ping host did not respond to ping, network down");
+				return reason;
+			}
+		}
+	}
+#endif
+
+	return NULL;
+}
+
 static char *handle_lockdep(void)
 {
 	const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -197,6 +328,7 @@ static const struct {
 } abort_handlers[] = {
 	{ ABORT_LOCKDEP, handle_lockdep },
 	{ ABORT_TAINT, handle_taint },
+	{ ABORT_PING, handle_ping },
 	{ 0, 0 },
 };
 
@@ -1288,6 +1420,9 @@ bool execute(struct execute_state *state,
 
 	init_watchdogs(settings);
 
+	if (settings->abort_mask & ABORT_PING)
+		ping_config();
+
 	if (!uname(&unamebuf)) {
 		dprintf(unamefd, "%s %s %s %s %s\n",
 			unamebuf.sysname,
diff --git a/runner/meson.build b/runner/meson.build
index b658f1d2..a54aaab4 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -1,4 +1,13 @@
 jsonc = dependency('json-c', required: _runner_required)
+runner_deps = [jsonc, glib]
+have_oping = []
+if with_oping != 'false'
+	oping = dependency('liboping', required: with_oping == 'true')
+	if oping.found()
+		runner_deps += oping
+		have_oping = '-DHAVE_OPING=1'
+	endif
+endif
 
 runnerlib_sources = [ 'settings.c',
 		      'job_list.c',
@@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
 
 	runnerlib = static_library('igt_runner', runnerlib_sources,
 				   include_directories : inc,
-				   dependencies : [jsonc, glib])
+				   c_args : have_oping,
+				   dependencies : runner_deps)
 
 	runner = executable('igt_runner', runner_sources,
 			    link_with : runnerlib,
diff --git a/runner/settings.c b/runner/settings.c
index ad38ae8d..b57d1a6a 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -48,6 +48,7 @@ static struct {
 } abort_conditions[] = {
 	{ ABORT_TAINT, "taint" },
 	{ ABORT_LOCKDEP, "lockdep" },
+	{ ABORT_PING, "ping" },
 	{ ABORT_ALL, "all" },
 	{ 0, 0 },
 };
@@ -136,6 +137,9 @@ static const char *usage_str =
 	"                        Possible conditions:\n"
 	"                         lockdep - abort when kernel lockdep has been angered.\n"
 	"                         taint   - abort when kernel becomes fatally tainted.\n"
+	"                         ping    - abort when a host configured in .igtrc or\n"
+	"                                   environment variable IGT_PING_HOSTNAME does\n"
+	"                                   not respond to ping.\n"
 	"                         all     - abort for all of the above.\n"
 	"  -s, --sync            Sync results to disk after every test\n"
 	"  -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 0a1ee08d..2b6e65d0 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -15,9 +15,10 @@ enum {
 
 #define ABORT_TAINT   (1 << 0)
 #define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING    (1 << 2)
+#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
 
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
 
 struct regex_list {
 	char **regex_strings;
-- 
2.19.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort
  2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
@ 2019-05-16 11:27 ` Petri Latvala
  2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Petri Latvala @ 2019-05-16 11:27 UTC (permalink / raw)
  To: igt-dev; +Cc: Petri Latvala

---
 runner/executor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/executor.c b/runner/executor.c
index be78474c..72f2f020 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -339,7 +339,7 @@ static char *need_to_abort(const struct settings* settings)
 	for (it = abort_handlers; it->condition; it++) {
 		char *abort;
 
-		if (!(settings->abort_mask & it->condition))
+		if (false && !(settings->abort_mask & it->condition))
 			continue;
 
 		abort = it->handler();
-- 
2.19.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
  2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
  2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
@ 2019-05-16 12:23 ` Patchwork
  2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-05-16 12:23 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
URL   : https://patchwork.freedesktop.org/series/60711/
State : success

== Summary ==

CI Bug Log - changes from IGT_4993 -> IGTPW_2988
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60711/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2988 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [PASS][1] -> [DMESG-FAIL][2] ([fdo#110235])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u2:          [PASS][3] -> [FAIL][4] ([fdo#103167])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-blb-e6850:       [PASS][5] -> [INCOMPLETE][6] ([fdo#107718])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
#### Warnings ####

  * igt@i915_selftest@live_hangcheck:
    - fi-apl-guc:         [INCOMPLETE][7] ([fdo#103927] / [fdo#110624]) -> [DMESG-FAIL][8] ([fdo#110620])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-apl-guc/igt@i915_selftest@live_hangcheck.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-apl-guc/igt@i915_selftest@live_hangcheck.html

  * igt@runner@aborted:
    - fi-apl-guc:         [FAIL][9] ([fdo#110624]) -> [FAIL][10] ([fdo#110622])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-apl-guc/igt@runner@aborted.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-apl-guc/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235
  [fdo#110620]: https://bugs.freedesktop.org/show_bug.cgi?id=110620
  [fdo#110622]: https://bugs.freedesktop.org/show_bug.cgi?id=110622
  [fdo#110624]: https://bugs.freedesktop.org/show_bug.cgi?id=110624


Participating hosts (52 -> 45)
------------------------------

  Missing    (7): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * IGT: IGT_4993 -> IGTPW_2988

  CI_DRM_6090: df27ebbc9ec0d8ae42e8cf3d7a3b29fd6baf4940 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2988: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
  IGT_4993: 2d41b51980257a7d0a3de30a9b3ea0e95b13df91 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
  2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
  2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
  2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
@ 2019-05-16 12:51 ` Daniel Vetter
  2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
  2019-05-20  6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
  4 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2019-05-16 12:51 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev, Tomi Sarvela

On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> If the network goes down while testing, CI tends to interpret that as
> the device being down, cutting its power after a while. This causes an
> incomplete to an innocent test, increasing noise in the results.
> 
> A new flag to --abort-on-monitored-error, "ping", uses liboping to
> ping a host configured in .igtrc with one ping after each test
> execution and aborts the run if there is no reply in a hardcoded
> amount of time.
> 
> v2:
>  - Use a higher timeout
>  - Allow hostname configuration from environment
> 
> Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Martin Peres <martin.peres@linux.intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> ---
>  meson.build        |   1 +
>  meson_options.txt  |   6 ++
>  runner/executor.c  | 135 +++++++++++++++++++++++++++++++++++++++++++++
>  runner/meson.build |  12 +++-
>  runner/settings.c  |   4 ++
>  runner/settings.h  |   5 +-
>  6 files changed, 160 insertions(+), 3 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index a05e912c..aac67f1a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
>  with_libdrm = get_option('with_libdrm')
>  with_libunwind = get_option('with_libunwind')
>  build_runner = get_option('build_runner')
> +with_oping = get_option('with_oping')
>  
>  _build_overlay = build_overlay != 'false'
>  _overlay_required = build_overlay == 'true'
> diff --git a/meson_options.txt b/meson_options.txt
> index 888efe56..935f883e 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -58,6 +58,12 @@ option('build_runner',
>         choices : ['auto', 'true', 'false'],
>         description : 'Build test runner')
>  
> +option('with_oping',
> +       type : 'combo',
> +       value : 'auto',
> +       choices : ['auto', 'true', 'false'],
> +       description : 'Build igt_runner with liboping')
> +
>  option('use_rpath',
>         type : 'boolean',
>         value : false,
> diff --git a/runner/executor.c b/runner/executor.c
> index 7e5fbe8f..be78474c 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -1,6 +1,10 @@
>  #include <errno.h>
>  #include <fcntl.h>
> +#include <glib.h>
>  #include <linux/watchdog.h>
> +#if HAVE_OPING
> +#include <oping.h>
> +#endif
>  #include <signal.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -108,6 +112,133 @@ static void ping_watchdogs(void)
>  	}
>  }
>  
> +#if HAVE_OPING
> +static pingobj_t *pingobj = NULL;
> +
> +static bool load_ping_config_from_file(void)
> +{
> +	char *key_file_env = NULL;
> +	char *key_file_loc = NULL;
> +	GError *error = NULL;
> +	GKeyFile *key_file = NULL;
> +	const char *ping_hostname;
> +	int ret;
> +
> +	/* Determine igt config path */
> +	key_file_env = getenv("IGT_CONFIG_PATH");
> +	if (key_file_env) {
> +		key_file_loc = strdup(key_file_env);
> +	} else {
> +		asprintf(&key_file_loc, "%s/.igtrc", g_get_home_dir());
> +	}
> +
> +	/* Load igt config file */
> +	key_file = g_key_file_new();
> +	ret = g_key_file_load_from_file(key_file, key_file_loc,
> +					G_KEY_FILE_NONE, &error);
> +	free(key_file_loc);
> +
> +	if (!ret) {
> +		g_error_free(error);
> +		g_key_file_free(key_file);
> +		return false;
> +	}
> +
> +	g_clear_error(&error);
> +
> +	ping_hostname =
> +		g_key_file_get_string(key_file, "DUT",
> +				      "PingHostName", &error);
> +
> +	g_clear_error(&error);
> +	g_key_file_free(key_file);
> +
> +	if (!ping_hostname)
> +		return false;
> +
> +	if (ping_host_add(pingobj, ping_hostname)) {
> +		fprintf(stderr,
> +			"abort on ping: Cannot use hostname from config file\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool load_ping_config_from_env(void)
> +{
> +	const char *ping_hostname;
> +
> +	ping_hostname = getenv("IGT_PING_HOSTNAME");
> +	if (!ping_hostname)
> +		return false;
> +
> +	if (ping_host_add(pingobj, ping_hostname)) {
> +		fprintf(stderr,
> +			"abort on ping: Cannot use hostname from environment\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +#endif
> +
> +static void ping_config(void)
> +{
> +#if HAVE_OPING
> +	double timeout = 20.0; /* Fair dice roll */
> +
> +	if (pingobj)
> +		return;
> +
> +	pingobj = ping_construct();
> +
> +	/* Try env first, then config file */
> +	if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
> +		fprintf(stderr,
> +			"abort on ping: No host to ping configured\n");
> +		ping_destroy(pingobj);
> +		pingobj = NULL;
> +		return;
> +	}
> +
> +	ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
> +#endif
> +}
> +
> +static char *handle_ping(void)
> +{
> +#if HAVE_OPING
> +	if (pingobj) {
> +		pingobj_iter_t *iter;
> +
> +		ping_send(pingobj);

I think we should try to ping a few times, in case our ping (or the reply
got lost). Networks aren't 100% reliable, even when they work. Throwing
out 3 pings or so should be good enough, maybe try that twice.

As long as we die quicker than jenkins declares us dead we should be good
enough.

btw, could we also somehow check the ssh connection itself that we're
running under? I think that would more directly make sure that Jenkins
still knows we're doing well ...
-Daniel

> +
> +		for (iter = ping_iterator_get(pingobj);
> +		     iter != NULL;
> +		     iter = ping_iterator_next(iter)) {
> +			double latency;
> +			size_t len = sizeof(latency);
> +
> +			ping_iterator_get_info(iter,
> +					       PING_INFO_LATENCY,
> +					       &latency,
> +					       &len);
> +			if (latency < 0.0) {
> +				char *reason;
> +
> +				asprintf(&reason,
> +					 "Ping host did not respond to ping, network down");
> +				return reason;
> +			}
> +		}
> +	}
> +#endif
> +
> +	return NULL;
> +}
> +
>  static char *handle_lockdep(void)
>  {
>  	const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
> @@ -197,6 +328,7 @@ static const struct {
>  } abort_handlers[] = {
>  	{ ABORT_LOCKDEP, handle_lockdep },
>  	{ ABORT_TAINT, handle_taint },
> +	{ ABORT_PING, handle_ping },
>  	{ 0, 0 },
>  };
>  
> @@ -1288,6 +1420,9 @@ bool execute(struct execute_state *state,
>  
>  	init_watchdogs(settings);
>  
> +	if (settings->abort_mask & ABORT_PING)
> +		ping_config();
> +
>  	if (!uname(&unamebuf)) {
>  		dprintf(unamefd, "%s %s %s %s %s\n",
>  			unamebuf.sysname,
> diff --git a/runner/meson.build b/runner/meson.build
> index b658f1d2..a54aaab4 100644
> --- a/runner/meson.build
> +++ b/runner/meson.build
> @@ -1,4 +1,13 @@
>  jsonc = dependency('json-c', required: _runner_required)
> +runner_deps = [jsonc, glib]
> +have_oping = []
> +if with_oping != 'false'
> +	oping = dependency('liboping', required: with_oping == 'true')
> +	if oping.found()
> +		runner_deps += oping
> +		have_oping = '-DHAVE_OPING=1'
> +	endif
> +endif
>  
>  runnerlib_sources = [ 'settings.c',
>  		      'job_list.c',
> @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
>  
>  	runnerlib = static_library('igt_runner', runnerlib_sources,
>  				   include_directories : inc,
> -				   dependencies : [jsonc, glib])
> +				   c_args : have_oping,
> +				   dependencies : runner_deps)
>  
>  	runner = executable('igt_runner', runner_sources,
>  			    link_with : runnerlib,
> diff --git a/runner/settings.c b/runner/settings.c
> index ad38ae8d..b57d1a6a 100644
> --- a/runner/settings.c
> +++ b/runner/settings.c
> @@ -48,6 +48,7 @@ static struct {
>  } abort_conditions[] = {
>  	{ ABORT_TAINT, "taint" },
>  	{ ABORT_LOCKDEP, "lockdep" },
> +	{ ABORT_PING, "ping" },
>  	{ ABORT_ALL, "all" },
>  	{ 0, 0 },
>  };
> @@ -136,6 +137,9 @@ static const char *usage_str =
>  	"                        Possible conditions:\n"
>  	"                         lockdep - abort when kernel lockdep has been angered.\n"
>  	"                         taint   - abort when kernel becomes fatally tainted.\n"
> +	"                         ping    - abort when a host configured in .igtrc or\n"
> +	"                                   environment variable IGT_PING_HOSTNAME does\n"
> +	"                                   not respond to ping.\n"
>  	"                         all     - abort for all of the above.\n"
>  	"  -s, --sync            Sync results to disk after every test\n"
>  	"  -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
> diff --git a/runner/settings.h b/runner/settings.h
> index 0a1ee08d..2b6e65d0 100644
> --- a/runner/settings.h
> +++ b/runner/settings.h
> @@ -15,9 +15,10 @@ enum {
>  
>  #define ABORT_TAINT   (1 << 0)
>  #define ABORT_LOCKDEP (1 << 1)
> -#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP)
> +#define ABORT_PING    (1 << 2)
> +#define ABORT_ALL     (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
>  
> -_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
> +_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
>  
>  struct regex_list {
>  	char **regex_strings;
> -- 
> 2.19.1
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
  2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
                   ` (2 preceding siblings ...)
  2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
@ 2019-05-16 14:45 ` Patchwork
  2019-05-20  6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-05-16 14:45 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
URL   : https://patchwork.freedesktop.org/series/60711/
State : success

== Summary ==

CI Bug Log - changes from IGT_4993_full -> IGTPW_2988_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60711/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2988_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@reset-stress:
    - shard-snb:          [PASS][1] -> [FAIL][2] ([fdo#109661])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-snb5/igt@gem_eio@reset-stress.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-snb6/igt@gem_eio@reset-stress.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-iclb:         [PASS][3] -> [FAIL][4] ([fdo#108686])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb5/igt@gem_tiled_swapping@non-threaded.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb7/igt@gem_tiled_swapping@non-threaded.html
    - shard-hsw:          [PASS][5] -> [FAIL][6] ([fdo#108686])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw1/igt@gem_tiled_swapping@non-threaded.html

  * igt@i915_pm_rpm@fences:
    - shard-kbl:          [PASS][7] -> [DMESG-WARN][8] ([fdo#103558] / [fdo#105602]) +20 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl1/igt@i915_pm_rpm@fences.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@i915_pm_rpm@fences.html

  * igt@i915_pm_rpm@i2c:
    - shard-iclb:         [PASS][9] -> [FAIL][10] ([fdo#104097])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb5/igt@i915_pm_rpm@i2c.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb6/igt@i915_pm_rpm@i2c.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-apl:          [PASS][11] -> [DMESG-WARN][12] ([fdo#108566]) +4 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl8/igt@i915_suspend@fence-restore-tiled2untiled.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl8/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
    - shard-glk:          [PASS][13] -> [FAIL][14] ([fdo#107409])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk5/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk9/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html

  * igt@kms_fence_pin_leak:
    - shard-hsw:          [PASS][15] -> [DMESG-WARN][16] ([fdo#102614]) +2 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw2/igt@kms_fence_pin_leak.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw5/igt@kms_fence_pin_leak.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-kbl:          [PASS][17] -> [INCOMPLETE][18] ([fdo#103665])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl7/igt@kms_flip@flip-vs-suspend.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl6/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@modeset-vs-vblank-race-interruptible:
    - shard-glk:          [PASS][19] -> [FAIL][20] ([fdo#103060])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk4/igt@kms_flip@modeset-vs-vblank-race-interruptible.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk6/igt@kms_flip@modeset-vs-vblank-race-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render:
    - shard-hsw:          [PASS][21] -> [DMESG-FAIL][22] ([fdo#102614] / [fdo#103167])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render:
    - shard-iclb:         [PASS][23] -> [FAIL][24] ([fdo#103167]) +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-iclb:         [PASS][25] -> [FAIL][26] ([fdo#103166])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-iclb:         [PASS][27] -> [SKIP][28] ([fdo#109441]) +5 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb6/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_vblank@pipe-c-ts-continuation-modeset-hang:
    - shard-hsw:          [PASS][29] -> [INCOMPLETE][30] ([fdo#103540])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw5/igt@kms_vblank@pipe-c-ts-continuation-modeset-hang.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw4/igt@kms_vblank@pipe-c-ts-continuation-modeset-hang.html

  
#### Possible fixes ####

  * igt@gem_mocs_settings@mocs-settings-render:
    - shard-iclb:         [INCOMPLETE][31] ([fdo#107713]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@gem_mocs_settings@mocs-settings-render.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@gem_mocs_settings@mocs-settings-render.html

  * {igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen}:
    - shard-kbl:          [FAIL][33] ([fdo#103232]) -> [PASS][34] +1 similar issue
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html

  * {igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen}:
    - shard-apl:          [FAIL][35] ([fdo#103232]) -> [PASS][36] +1 similar issue
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl4/igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl2/igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          [FAIL][37] ([fdo#104873]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk2/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk5/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render:
    - shard-iclb:         [FAIL][39] ([fdo#103167]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff:
    - shard-iclb:         [INCOMPLETE][41] ([fdo#106978] / [fdo#107713]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [FAIL][43] ([fdo#103166]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb6/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_sprite_render:
    - shard-iclb:         [SKIP][45] ([fdo#109441]) -> [PASS][46] +1 similar issue
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@kms_psr@psr2_sprite_render.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_psr@psr2_sprite_render.html

  * igt@kms_setmode@basic:
    - shard-apl:          [FAIL][47] ([fdo#99912]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl2/igt@kms_setmode@basic.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl1/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [DMESG-WARN][49] ([fdo#108566]) -> [PASS][50] +3 similar issues
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl1/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  
#### Warnings ####

  * igt@gem_mmap_gtt@forked-big-copy-xy:
    - shard-iclb:         [TIMEOUT][51] ([fdo#109673]) -> [INCOMPLETE][52] ([fdo#107713] / [fdo#109100])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@gem_mmap_gtt@forked-big-copy-xy.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb4/igt@gem_mmap_gtt@forked-big-copy-xy.html

  * igt@kms_busy@extended-modeset-hang-newfb-render-d:
    - shard-kbl:          [SKIP][53] ([fdo#109271] / [fdo#109278]) -> [SKIP][54] ([fdo#105602] / [fdo#109271] / [fdo#109278]) +2 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl1/igt@kms_busy@extended-modeset-hang-newfb-render-d.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@kms_busy@extended-modeset-hang-newfb-render-d.html

  * igt@kms_panel_fitting@legacy:
    - shard-kbl:          [SKIP][55] ([fdo#109271]) -> [SKIP][56] ([fdo#105602] / [fdo#109271]) +15 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl3/igt@kms_panel_fitting@legacy.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@kms_panel_fitting@legacy.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#104097]: https://bugs.freedesktop.org/show_bug.cgi?id=104097
  [fdo#104873]: https://bugs.freedesktop.org/show_bug.cgi?id=104873
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#106978]: https://bugs.freedesktop.org/show_bug.cgi?id=106978
  [fdo#107409]: https://bugs.freedesktop.org/show_bug.cgi?id=107409
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109661]: https://bugs.freedesktop.org/show_bug.cgi?id=109661
  [fdo#109673]: https://bugs.freedesktop.org/show_bug.cgi?id=109673
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 6)
------------------------------

  Missing    (1): shard-skl 


Build changes
-------------

  * IGT: IGT_4993 -> IGTPW_2988

  CI_DRM_6090: df27ebbc9ec0d8ae42e8cf3d7a3b29fd6baf4940 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2988: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
  IGT_4993: 2d41b51980257a7d0a3de30a9b3ea0e95b13df91 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
  2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
                   ` (3 preceding siblings ...)
  2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
@ 2019-05-20  6:17 ` Arkadiusz Hiler
  2019-05-20  6:30   ` Ser, Simon
  2019-05-20  9:36   ` Petri Latvala
  4 siblings, 2 replies; 8+ messages in thread
From: Arkadiusz Hiler @ 2019-05-20  6:17 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev, Tomi Sarvela

On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> If the network goes down while testing, CI tends to interpret that as
> the device being down, cutting its power after a while. This causes an
> incomplete to an innocent test, increasing noise in the results.
> 
> A new flag to --abort-on-monitored-error, "ping", uses liboping to
> ping a host configured in .igtrc with one ping after each test
> execution and aborts the run if there is no reply in a hardcoded
> amount of time.
> 
> v2:
>  - Use a higher timeout
>  - Allow hostname configuration from environment
> 
> Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Martin Peres <martin.peres@linux.intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> ---
>  meson.build        |   1 +
>  meson_options.txt  |   6 ++
>  runner/executor.c  | 135 +++++++++++++++++++++++++++++++++++++++++++++
>  runner/meson.build |  12 +++-
>  runner/settings.c  |   4 ++
>  runner/settings.h  |   5 +-
>  6 files changed, 160 insertions(+), 3 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index a05e912c..aac67f1a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
>  with_libdrm = get_option('with_libdrm')
>  with_libunwind = get_option('with_libunwind')
>  build_runner = get_option('build_runner')
> +with_oping = get_option('with_oping')

I find it a bit distracting that we have this bit here, whereas
everything else is in runner/meson.build. Is there any reason for that?

>  _build_overlay = build_overlay != 'false'
>  _overlay_required = build_overlay == 'true'
<SNIP>
> diff --git a/runner/meson.build b/runner/meson.build
> index b658f1d2..a54aaab4 100644
> --- a/runner/meson.build
> +++ b/runner/meson.build
> @@ -1,4 +1,13 @@
>  jsonc = dependency('json-c', required: _runner_required)
> +runner_deps = [jsonc, glib]
> +have_oping = []
> +if with_oping != 'false'
> +	oping = dependency('liboping', required: with_oping == 'true')
> +	if oping.found()
> +		runner_deps += oping
> +		have_oping = '-DHAVE_OPING=1'
> +	endif
> +endif

I think it is about time to:
https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features

I'll check meson versions in various popular distros Today and see how
the patch would look like.

>  runnerlib_sources = [ 'settings.c',
>  		      'job_list.c',
> @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
>  
>  	runnerlib = static_library('igt_runner', runnerlib_sources,
>  				   include_directories : inc,
> -				   dependencies : [jsonc, glib])
> +				   c_args : have_oping,

I know that we have just a single dep here, but can we name this
runner_lib_c_args or something?

> +				   dependencies : runner_deps)
>  
>  	runner = executable('igt_runner', runner_sources,
>  			    link_with : runnerlib,

Other than that the C part looks ok. I share Daniel's sentiment on
network and NIC possibly being be a bit too "best-efforty" for aborting
on a single dropped ping.

It also seems like the feature haven't been properly tested, as the HAX
does not specify any IP address.

-- 
Cheers,
Arek
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
  2019-05-20  6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
@ 2019-05-20  6:30   ` Ser, Simon
  2019-05-20  9:36   ` Petri Latvala
  1 sibling, 0 replies; 8+ messages in thread
From: Ser, Simon @ 2019-05-20  6:30 UTC (permalink / raw)
  To: Hiler, Arkadiusz, Latvala, Petri; +Cc: igt-dev, Sarvela, Tomi P

On Mon, 2019-05-20 at 09:17 +0300, Arkadiusz Hiler wrote:
> On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> > If the network goes down while testing, CI tends to interpret that as
> > the device being down, cutting its power after a while. This causes an
> > incomplete to an innocent test, increasing noise in the results.
> > 
> > A new flag to --abort-on-monitored-error, "ping", uses liboping to
> > ping a host configured in .igtrc with one ping after each test
> > execution and aborts the run if there is no reply in a hardcoded
> > amount of time.
> > 
> > v2:
> >  - Use a higher timeout
> >  - Allow hostname configuration from environment
> > 
> > Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Martin Peres <martin.peres@linux.intel.com>
> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> > ---
> >  meson.build        |   1 +
> >  meson_options.txt  |   6 ++
> >  runner/executor.c  | 135 +++++++++++++++++++++++++++++++++++++++++++++
> >  runner/meson.build |  12 +++-
> >  runner/settings.c  |   4 ++
> >  runner/settings.h  |   5 +-
> >  6 files changed, 160 insertions(+), 3 deletions(-)
> > 
> > diff --git a/meson.build b/meson.build
> > index a05e912c..aac67f1a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> >  with_libdrm = get_option('with_libdrm')
> >  with_libunwind = get_option('with_libunwind')
> >  build_runner = get_option('build_runner')
> > +with_oping = get_option('with_oping')
> 
> I find it a bit distracting that we have this bit here, whereas
> everything else is in runner/meson.build. Is there any reason for that?
> 
> >  _build_overlay = build_overlay != 'false'
> >  _overlay_required = build_overlay == 'true'
> <SNIP>
> > diff --git a/runner/meson.build b/runner/meson.build
> > index b658f1d2..a54aaab4 100644
> > --- a/runner/meson.build
> > +++ b/runner/meson.build
> > @@ -1,4 +1,13 @@
> >  jsonc = dependency('json-c', required: _runner_required)
> > +runner_deps = [jsonc, glib]
> > +have_oping = []
> > +if with_oping != 'false'
> > +	oping = dependency('liboping', required: with_oping == 'true')
> > +	if oping.found()
> > +		runner_deps += oping
> > +		have_oping = '-DHAVE_OPING=1'
> > +	endif
> > +endif
> 
> I think it is about time to:
> https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features
> 
> I'll check meson versions in various popular distros Today and see how
> the patch would look like.

Oh, +1 from me!

I'd really like to replace some complicated logic with feature options.

> >  runnerlib_sources = [ 'settings.c',
> >  		      'job_list.c',
> > @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
> >  
> >  	runnerlib = static_library('igt_runner', runnerlib_sources,
> >  				   include_directories : inc,
> > -				   dependencies : [jsonc, glib])
> > +				   c_args : have_oping,
> 
> I know that we have just a single dep here, but can we name this
> runner_lib_c_args or something?
> 
> > +				   dependencies : runner_deps)
> >  
> >  	runner = executable('igt_runner', runner_sources,
> >  			    link_with : runnerlib,
> 
> Other than that the C part looks ok. I share Daniel's sentiment on
> network and NIC possibly being be a bit too "best-efforty" for aborting
> on a single dropped ping.
> 
> It also seems like the feature haven't been properly tested, as the HAX
> does not specify any IP address.
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
  2019-05-20  6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
  2019-05-20  6:30   ` Ser, Simon
@ 2019-05-20  9:36   ` Petri Latvala
  1 sibling, 0 replies; 8+ messages in thread
From: Petri Latvala @ 2019-05-20  9:36 UTC (permalink / raw)
  To: Arkadiusz Hiler; +Cc: igt-dev, Tomi Sarvela

On Mon, May 20, 2019 at 09:17:58AM +0300, Arkadiusz Hiler wrote:
> On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> > If the network goes down while testing, CI tends to interpret that as
> > the device being down, cutting its power after a while. This causes an
> > incomplete to an innocent test, increasing noise in the results.
> > 
> > A new flag to --abort-on-monitored-error, "ping", uses liboping to
> > ping a host configured in .igtrc with one ping after each test
> > execution and aborts the run if there is no reply in a hardcoded
> > amount of time.
> > 
> > v2:
> >  - Use a higher timeout
> >  - Allow hostname configuration from environment
> > 
> > Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Martin Peres <martin.peres@linux.intel.com>
> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> > ---
> >  meson.build        |   1 +
> >  meson_options.txt  |   6 ++
> >  runner/executor.c  | 135 +++++++++++++++++++++++++++++++++++++++++++++
> >  runner/meson.build |  12 +++-
> >  runner/settings.c  |   4 ++
> >  runner/settings.h  |   5 +-
> >  6 files changed, 160 insertions(+), 3 deletions(-)
> > 
> > diff --git a/meson.build b/meson.build
> > index a05e912c..aac67f1a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> >  with_libdrm = get_option('with_libdrm')
> >  with_libunwind = get_option('with_libunwind')
> >  build_runner = get_option('build_runner')
> > +with_oping = get_option('with_oping')
> 
> I find it a bit distracting that we have this bit here, whereas
> everything else is in runner/meson.build. Is there any reason for that?

Oh, I thought we had all option fetches in toplevel. Will move that to
runner/.


> 
> >  _build_overlay = build_overlay != 'false'
> >  _overlay_required = build_overlay == 'true'
> <SNIP>
> > diff --git a/runner/meson.build b/runner/meson.build
> > index b658f1d2..a54aaab4 100644
> > --- a/runner/meson.build
> > +++ b/runner/meson.build
> > @@ -1,4 +1,13 @@
> >  jsonc = dependency('json-c', required: _runner_required)
> > +runner_deps = [jsonc, glib]
> > +have_oping = []
> > +if with_oping != 'false'
> > +	oping = dependency('liboping', required: with_oping == 'true')
> > +	if oping.found()
> > +		runner_deps += oping
> > +		have_oping = '-DHAVE_OPING=1'
> > +	endif
> > +endif
> 
> I think it is about time to:
> https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features
> 
> I'll check meson versions in various popular distros Today and see how
> the patch would look like.
>

Oh yes please!



> >  runnerlib_sources = [ 'settings.c',
> >  		      'job_list.c',
> > @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
> >  
> >  	runnerlib = static_library('igt_runner', runnerlib_sources,
> >  				   include_directories : inc,
> > -				   dependencies : [jsonc, glib])
> > +				   c_args : have_oping,
> 
> I know that we have just a single dep here, but can we name this
> runner_lib_c_args or something?


Sure.


> 
> > +				   dependencies : runner_deps)
> >  
> >  	runner = executable('igt_runner', runner_sources,
> >  			    link_with : runnerlib,
> 
> Other than that the C part looks ok. I share Daniel's sentiment on
> network and NIC possibly being be a bit too "best-efforty" for aborting
> on a single dropped ping.


Yes, I'll make it retry a few times in the next version.


> It also seems like the feature haven't been properly tested, as the HAX
> does not specify any IP address.


Why would it need a HAX? The .igtrc configuration from the time of v1
was still in place. See run.log, "abort on ping" doesn't appear on the
machines I tested, and build log (not publicly available) says
liboping was used.


-- 
Petri Latvala
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-05-20  9:36 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
2019-05-20  6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
2019-05-20  6:30   ` Ser, Simon
2019-05-20  9:36   ` Petri Latvala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.