* [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
@ 2019-05-16 11:27 Petri Latvala
2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
` (4 more replies)
0 siblings, 5 replies; 12+ messages in thread
From: Petri Latvala @ 2019-05-16 11:27 UTC (permalink / raw)
To: igt-dev; +Cc: Tomi Sarvela, Petri Latvala
If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.
A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.
v2:
- Use a higher timeout
- Allow hostname configuration from environment
Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
---
meson.build | 1 +
meson_options.txt | 6 ++
runner/executor.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
runner/meson.build | 12 +++-
runner/settings.c | 4 ++
runner/settings.h | 5 +-
6 files changed, 160 insertions(+), 3 deletions(-)
diff --git a/meson.build b/meson.build
index a05e912c..aac67f1a 100644
--- a/meson.build
+++ b/meson.build
@@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
with_libdrm = get_option('with_libdrm')
with_libunwind = get_option('with_libunwind')
build_runner = get_option('build_runner')
+with_oping = get_option('with_oping')
_build_overlay = build_overlay != 'false'
_overlay_required = build_overlay == 'true'
diff --git a/meson_options.txt b/meson_options.txt
index 888efe56..935f883e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -58,6 +58,12 @@ option('build_runner',
choices : ['auto', 'true', 'false'],
description : 'Build test runner')
+option('with_oping',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false'],
+ description : 'Build igt_runner with liboping')
+
option('use_rpath',
type : 'boolean',
value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 7e5fbe8f..be78474c 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
#include <errno.h>
#include <fcntl.h>
+#include <glib.h>
#include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -108,6 +112,133 @@ static void ping_watchdogs(void)
}
}
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+
+static bool load_ping_config_from_file(void)
+{
+ char *key_file_env = NULL;
+ char *key_file_loc = NULL;
+ GError *error = NULL;
+ GKeyFile *key_file = NULL;
+ const char *ping_hostname;
+ int ret;
+
+ /* Determine igt config path */
+ key_file_env = getenv("IGT_CONFIG_PATH");
+ if (key_file_env) {
+ key_file_loc = strdup(key_file_env);
+ } else {
+ asprintf(&key_file_loc, "%s/.igtrc", g_get_home_dir());
+ }
+
+ /* Load igt config file */
+ key_file = g_key_file_new();
+ ret = g_key_file_load_from_file(key_file, key_file_loc,
+ G_KEY_FILE_NONE, &error);
+ free(key_file_loc);
+
+ if (!ret) {
+ g_error_free(error);
+ g_key_file_free(key_file);
+ return false;
+ }
+
+ g_clear_error(&error);
+
+ ping_hostname =
+ g_key_file_get_string(key_file, "DUT",
+ "PingHostName", &error);
+
+ g_clear_error(&error);
+ g_key_file_free(key_file);
+
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from config file\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool load_ping_config_from_env(void)
+{
+ const char *ping_hostname;
+
+ ping_hostname = getenv("IGT_PING_HOSTNAME");
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from environment\n");
+ return false;
+ }
+
+ return true;
+}
+
+#endif
+
+static void ping_config(void)
+{
+#if HAVE_OPING
+ double timeout = 20.0; /* Fair dice roll */
+
+ if (pingobj)
+ return;
+
+ pingobj = ping_construct();
+
+ /* Try env first, then config file */
+ if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
+ fprintf(stderr,
+ "abort on ping: No host to ping configured\n");
+ ping_destroy(pingobj);
+ pingobj = NULL;
+ return;
+ }
+
+ ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+ if (pingobj) {
+ pingobj_iter_t *iter;
+
+ ping_send(pingobj);
+
+ for (iter = ping_iterator_get(pingobj);
+ iter != NULL;
+ iter = ping_iterator_next(iter)) {
+ double latency;
+ size_t len = sizeof(latency);
+
+ ping_iterator_get_info(iter,
+ PING_INFO_LATENCY,
+ &latency,
+ &len);
+ if (latency < 0.0) {
+ char *reason;
+
+ asprintf(&reason,
+ "Ping host did not respond to ping, network down");
+ return reason;
+ }
+ }
+ }
+#endif
+
+ return NULL;
+}
+
static char *handle_lockdep(void)
{
const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -197,6 +328,7 @@ static const struct {
} abort_handlers[] = {
{ ABORT_LOCKDEP, handle_lockdep },
{ ABORT_TAINT, handle_taint },
+ { ABORT_PING, handle_ping },
{ 0, 0 },
};
@@ -1288,6 +1420,9 @@ bool execute(struct execute_state *state,
init_watchdogs(settings);
+ if (settings->abort_mask & ABORT_PING)
+ ping_config();
+
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
unamebuf.sysname,
diff --git a/runner/meson.build b/runner/meson.build
index b658f1d2..a54aaab4 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -1,4 +1,13 @@
jsonc = dependency('json-c', required: _runner_required)
+runner_deps = [jsonc, glib]
+have_oping = []
+if with_oping != 'false'
+ oping = dependency('liboping', required: with_oping == 'true')
+ if oping.found()
+ runner_deps += oping
+ have_oping = '-DHAVE_OPING=1'
+ endif
+endif
runnerlib_sources = [ 'settings.c',
'job_list.c',
@@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
runnerlib = static_library('igt_runner', runnerlib_sources,
include_directories : inc,
- dependencies : [jsonc, glib])
+ c_args : have_oping,
+ dependencies : runner_deps)
runner = executable('igt_runner', runner_sources,
link_with : runnerlib,
diff --git a/runner/settings.c b/runner/settings.c
index ad38ae8d..b57d1a6a 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -48,6 +48,7 @@ static struct {
} abort_conditions[] = {
{ ABORT_TAINT, "taint" },
{ ABORT_LOCKDEP, "lockdep" },
+ { ABORT_PING, "ping" },
{ ABORT_ALL, "all" },
{ 0, 0 },
};
@@ -136,6 +137,9 @@ static const char *usage_str =
" Possible conditions:\n"
" lockdep - abort when kernel lockdep has been angered.\n"
" taint - abort when kernel becomes fatally tainted.\n"
+ " ping - abort when a host configured in .igtrc or\n"
+ " environment variable IGT_PING_HOSTNAME does\n"
+ " not respond to ping.\n"
" all - abort for all of the above.\n"
" -s, --sync Sync results to disk after every test\n"
" -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 0a1ee08d..2b6e65d0 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -15,9 +15,10 @@ enum {
#define ABORT_TAINT (1 << 0)
#define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING (1 << 2)
+#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
struct regex_list {
char **regex_strings;
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
@ 2019-05-16 11:27 ` Petri Latvala
2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
` (3 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Petri Latvala @ 2019-05-16 11:27 UTC (permalink / raw)
To: igt-dev; +Cc: Petri Latvala
---
runner/executor.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/runner/executor.c b/runner/executor.c
index be78474c..72f2f020 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -339,7 +339,7 @@ static char *need_to_abort(const struct settings* settings)
for (it = abort_handlers; it->condition; it++) {
char *abort;
- if (!(settings->abort_mask & it->condition))
+ if (false && !(settings->abort_mask & it->condition))
continue;
abort = it->handler();
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
@ 2019-05-16 12:23 ` Patchwork
2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
` (2 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-05-16 12:23 UTC (permalink / raw)
To: Petri Latvala; +Cc: igt-dev
== Series Details ==
Series: series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
URL : https://patchwork.freedesktop.org/series/60711/
State : success
== Summary ==
CI Bug Log - changes from IGT_4993 -> IGTPW_2988
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://patchwork.freedesktop.org/api/1.0/series/60711/revisions/1/mbox/
Known issues
------------
Here are the changes found in IGTPW_2988 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_selftest@live_contexts:
- fi-bdw-gvtdvm: [PASS][1] -> [DMESG-FAIL][2] ([fdo#110235])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
* igt@kms_frontbuffer_tracking@basic:
- fi-icl-u2: [PASS][3] -> [FAIL][4] ([fdo#103167])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
* igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
- fi-blb-e6850: [PASS][5] -> [INCOMPLETE][6] ([fdo#107718])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
#### Warnings ####
* igt@i915_selftest@live_hangcheck:
- fi-apl-guc: [INCOMPLETE][7] ([fdo#103927] / [fdo#110624]) -> [DMESG-FAIL][8] ([fdo#110620])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-apl-guc/igt@i915_selftest@live_hangcheck.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-apl-guc/igt@i915_selftest@live_hangcheck.html
* igt@runner@aborted:
- fi-apl-guc: [FAIL][9] ([fdo#110624]) -> [FAIL][10] ([fdo#110622])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/fi-apl-guc/igt@runner@aborted.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/fi-apl-guc/igt@runner@aborted.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
[fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
[fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
[fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
[fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
[fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235
[fdo#110620]: https://bugs.freedesktop.org/show_bug.cgi?id=110620
[fdo#110622]: https://bugs.freedesktop.org/show_bug.cgi?id=110622
[fdo#110624]: https://bugs.freedesktop.org/show_bug.cgi?id=110624
Participating hosts (52 -> 45)
------------------------------
Missing (7): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus
Build changes
-------------
* IGT: IGT_4993 -> IGTPW_2988
CI_DRM_6090: df27ebbc9ec0d8ae42e8cf3d7a3b29fd6baf4940 @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_2988: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
IGT_4993: 2d41b51980257a7d0a3de30a9b3ea0e95b13df91 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
@ 2019-05-16 12:51 ` Daniel Vetter
2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
2019-05-20 6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
4 siblings, 0 replies; 12+ messages in thread
From: Daniel Vetter @ 2019-05-16 12:51 UTC (permalink / raw)
To: Petri Latvala; +Cc: igt-dev, Tomi Sarvela
On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> If the network goes down while testing, CI tends to interpret that as
> the device being down, cutting its power after a while. This causes an
> incomplete to an innocent test, increasing noise in the results.
>
> A new flag to --abort-on-monitored-error, "ping", uses liboping to
> ping a host configured in .igtrc with one ping after each test
> execution and aborts the run if there is no reply in a hardcoded
> amount of time.
>
> v2:
> - Use a higher timeout
> - Allow hostname configuration from environment
>
> Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Martin Peres <martin.peres@linux.intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> ---
> meson.build | 1 +
> meson_options.txt | 6 ++
> runner/executor.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
> runner/meson.build | 12 +++-
> runner/settings.c | 4 ++
> runner/settings.h | 5 +-
> 6 files changed, 160 insertions(+), 3 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index a05e912c..aac67f1a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> with_libdrm = get_option('with_libdrm')
> with_libunwind = get_option('with_libunwind')
> build_runner = get_option('build_runner')
> +with_oping = get_option('with_oping')
>
> _build_overlay = build_overlay != 'false'
> _overlay_required = build_overlay == 'true'
> diff --git a/meson_options.txt b/meson_options.txt
> index 888efe56..935f883e 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -58,6 +58,12 @@ option('build_runner',
> choices : ['auto', 'true', 'false'],
> description : 'Build test runner')
>
> +option('with_oping',
> + type : 'combo',
> + value : 'auto',
> + choices : ['auto', 'true', 'false'],
> + description : 'Build igt_runner with liboping')
> +
> option('use_rpath',
> type : 'boolean',
> value : false,
> diff --git a/runner/executor.c b/runner/executor.c
> index 7e5fbe8f..be78474c 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -1,6 +1,10 @@
> #include <errno.h>
> #include <fcntl.h>
> +#include <glib.h>
> #include <linux/watchdog.h>
> +#if HAVE_OPING
> +#include <oping.h>
> +#endif
> #include <signal.h>
> #include <stdio.h>
> #include <stdlib.h>
> @@ -108,6 +112,133 @@ static void ping_watchdogs(void)
> }
> }
>
> +#if HAVE_OPING
> +static pingobj_t *pingobj = NULL;
> +
> +static bool load_ping_config_from_file(void)
> +{
> + char *key_file_env = NULL;
> + char *key_file_loc = NULL;
> + GError *error = NULL;
> + GKeyFile *key_file = NULL;
> + const char *ping_hostname;
> + int ret;
> +
> + /* Determine igt config path */
> + key_file_env = getenv("IGT_CONFIG_PATH");
> + if (key_file_env) {
> + key_file_loc = strdup(key_file_env);
> + } else {
> + asprintf(&key_file_loc, "%s/.igtrc", g_get_home_dir());
> + }
> +
> + /* Load igt config file */
> + key_file = g_key_file_new();
> + ret = g_key_file_load_from_file(key_file, key_file_loc,
> + G_KEY_FILE_NONE, &error);
> + free(key_file_loc);
> +
> + if (!ret) {
> + g_error_free(error);
> + g_key_file_free(key_file);
> + return false;
> + }
> +
> + g_clear_error(&error);
> +
> + ping_hostname =
> + g_key_file_get_string(key_file, "DUT",
> + "PingHostName", &error);
> +
> + g_clear_error(&error);
> + g_key_file_free(key_file);
> +
> + if (!ping_hostname)
> + return false;
> +
> + if (ping_host_add(pingobj, ping_hostname)) {
> + fprintf(stderr,
> + "abort on ping: Cannot use hostname from config file\n");
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static bool load_ping_config_from_env(void)
> +{
> + const char *ping_hostname;
> +
> + ping_hostname = getenv("IGT_PING_HOSTNAME");
> + if (!ping_hostname)
> + return false;
> +
> + if (ping_host_add(pingobj, ping_hostname)) {
> + fprintf(stderr,
> + "abort on ping: Cannot use hostname from environment\n");
> + return false;
> + }
> +
> + return true;
> +}
> +
> +#endif
> +
> +static void ping_config(void)
> +{
> +#if HAVE_OPING
> + double timeout = 20.0; /* Fair dice roll */
> +
> + if (pingobj)
> + return;
> +
> + pingobj = ping_construct();
> +
> + /* Try env first, then config file */
> + if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
> + fprintf(stderr,
> + "abort on ping: No host to ping configured\n");
> + ping_destroy(pingobj);
> + pingobj = NULL;
> + return;
> + }
> +
> + ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
> +#endif
> +}
> +
> +static char *handle_ping(void)
> +{
> +#if HAVE_OPING
> + if (pingobj) {
> + pingobj_iter_t *iter;
> +
> + ping_send(pingobj);
I think we should try to ping a few times, in case our ping (or the reply
got lost). Networks aren't 100% reliable, even when they work. Throwing
out 3 pings or so should be good enough, maybe try that twice.
As long as we die quicker than jenkins declares us dead we should be good
enough.
btw, could we also somehow check the ssh connection itself that we're
running under? I think that would more directly make sure that Jenkins
still knows we're doing well ...
-Daniel
> +
> + for (iter = ping_iterator_get(pingobj);
> + iter != NULL;
> + iter = ping_iterator_next(iter)) {
> + double latency;
> + size_t len = sizeof(latency);
> +
> + ping_iterator_get_info(iter,
> + PING_INFO_LATENCY,
> + &latency,
> + &len);
> + if (latency < 0.0) {
> + char *reason;
> +
> + asprintf(&reason,
> + "Ping host did not respond to ping, network down");
> + return reason;
> + }
> + }
> + }
> +#endif
> +
> + return NULL;
> +}
> +
> static char *handle_lockdep(void)
> {
> const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
> @@ -197,6 +328,7 @@ static const struct {
> } abort_handlers[] = {
> { ABORT_LOCKDEP, handle_lockdep },
> { ABORT_TAINT, handle_taint },
> + { ABORT_PING, handle_ping },
> { 0, 0 },
> };
>
> @@ -1288,6 +1420,9 @@ bool execute(struct execute_state *state,
>
> init_watchdogs(settings);
>
> + if (settings->abort_mask & ABORT_PING)
> + ping_config();
> +
> if (!uname(&unamebuf)) {
> dprintf(unamefd, "%s %s %s %s %s\n",
> unamebuf.sysname,
> diff --git a/runner/meson.build b/runner/meson.build
> index b658f1d2..a54aaab4 100644
> --- a/runner/meson.build
> +++ b/runner/meson.build
> @@ -1,4 +1,13 @@
> jsonc = dependency('json-c', required: _runner_required)
> +runner_deps = [jsonc, glib]
> +have_oping = []
> +if with_oping != 'false'
> + oping = dependency('liboping', required: with_oping == 'true')
> + if oping.found()
> + runner_deps += oping
> + have_oping = '-DHAVE_OPING=1'
> + endif
> +endif
>
> runnerlib_sources = [ 'settings.c',
> 'job_list.c',
> @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
>
> runnerlib = static_library('igt_runner', runnerlib_sources,
> include_directories : inc,
> - dependencies : [jsonc, glib])
> + c_args : have_oping,
> + dependencies : runner_deps)
>
> runner = executable('igt_runner', runner_sources,
> link_with : runnerlib,
> diff --git a/runner/settings.c b/runner/settings.c
> index ad38ae8d..b57d1a6a 100644
> --- a/runner/settings.c
> +++ b/runner/settings.c
> @@ -48,6 +48,7 @@ static struct {
> } abort_conditions[] = {
> { ABORT_TAINT, "taint" },
> { ABORT_LOCKDEP, "lockdep" },
> + { ABORT_PING, "ping" },
> { ABORT_ALL, "all" },
> { 0, 0 },
> };
> @@ -136,6 +137,9 @@ static const char *usage_str =
> " Possible conditions:\n"
> " lockdep - abort when kernel lockdep has been angered.\n"
> " taint - abort when kernel becomes fatally tainted.\n"
> + " ping - abort when a host configured in .igtrc or\n"
> + " environment variable IGT_PING_HOSTNAME does\n"
> + " not respond to ping.\n"
> " all - abort for all of the above.\n"
> " -s, --sync Sync results to disk after every test\n"
> " -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
> diff --git a/runner/settings.h b/runner/settings.h
> index 0a1ee08d..2b6e65d0 100644
> --- a/runner/settings.h
> +++ b/runner/settings.h
> @@ -15,9 +15,10 @@ enum {
>
> #define ABORT_TAINT (1 << 0)
> #define ABORT_LOCKDEP (1 << 1)
> -#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP)
> +#define ABORT_PING (1 << 2)
> +#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
>
> -_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
> +_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
>
> struct regex_list {
> char **regex_strings;
> --
> 2.19.1
>
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
` (2 preceding siblings ...)
2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
@ 2019-05-16 14:45 ` Patchwork
2019-05-20 6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
4 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-05-16 14:45 UTC (permalink / raw)
To: Petri Latvala; +Cc: igt-dev
== Series Details ==
Series: series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure
URL : https://patchwork.freedesktop.org/series/60711/
State : success
== Summary ==
CI Bug Log - changes from IGT_4993_full -> IGTPW_2988_full
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://patchwork.freedesktop.org/api/1.0/series/60711/revisions/1/mbox/
Known issues
------------
Here are the changes found in IGTPW_2988_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_eio@reset-stress:
- shard-snb: [PASS][1] -> [FAIL][2] ([fdo#109661])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-snb5/igt@gem_eio@reset-stress.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-snb6/igt@gem_eio@reset-stress.html
* igt@gem_tiled_swapping@non-threaded:
- shard-iclb: [PASS][3] -> [FAIL][4] ([fdo#108686])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb5/igt@gem_tiled_swapping@non-threaded.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb7/igt@gem_tiled_swapping@non-threaded.html
- shard-hsw: [PASS][5] -> [FAIL][6] ([fdo#108686])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw1/igt@gem_tiled_swapping@non-threaded.html
* igt@i915_pm_rpm@fences:
- shard-kbl: [PASS][7] -> [DMESG-WARN][8] ([fdo#103558] / [fdo#105602]) +20 similar issues
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl1/igt@i915_pm_rpm@fences.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@i915_pm_rpm@fences.html
* igt@i915_pm_rpm@i2c:
- shard-iclb: [PASS][9] -> [FAIL][10] ([fdo#104097])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb5/igt@i915_pm_rpm@i2c.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb6/igt@i915_pm_rpm@i2c.html
* igt@i915_suspend@fence-restore-tiled2untiled:
- shard-apl: [PASS][11] -> [DMESG-WARN][12] ([fdo#108566]) +4 similar issues
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl8/igt@i915_suspend@fence-restore-tiled2untiled.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl8/igt@i915_suspend@fence-restore-tiled2untiled.html
* igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
- shard-glk: [PASS][13] -> [FAIL][14] ([fdo#107409])
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk5/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk9/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html
* igt@kms_fence_pin_leak:
- shard-hsw: [PASS][15] -> [DMESG-WARN][16] ([fdo#102614]) +2 similar issues
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw2/igt@kms_fence_pin_leak.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw5/igt@kms_fence_pin_leak.html
* igt@kms_flip@flip-vs-suspend:
- shard-kbl: [PASS][17] -> [INCOMPLETE][18] ([fdo#103665])
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl7/igt@kms_flip@flip-vs-suspend.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl6/igt@kms_flip@flip-vs-suspend.html
* igt@kms_flip@modeset-vs-vblank-race-interruptible:
- shard-glk: [PASS][19] -> [FAIL][20] ([fdo#103060])
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk4/igt@kms_flip@modeset-vs-vblank-race-interruptible.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk6/igt@kms_flip@modeset-vs-vblank-race-interruptible.html
* igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render:
- shard-hsw: [PASS][21] -> [DMESG-FAIL][22] ([fdo#102614] / [fdo#103167])
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
* igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render:
- shard-iclb: [PASS][23] -> [FAIL][24] ([fdo#103167]) +1 similar issue
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render.html
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-render.html
* igt@kms_plane_lowres@pipe-a-tiling-y:
- shard-iclb: [PASS][25] -> [FAIL][26] ([fdo#103166])
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-y.html
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-y.html
* igt@kms_psr@psr2_cursor_plane_onoff:
- shard-iclb: [PASS][27] -> [SKIP][28] ([fdo#109441]) +5 similar issues
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb6/igt@kms_psr@psr2_cursor_plane_onoff.html
* igt@kms_vblank@pipe-c-ts-continuation-modeset-hang:
- shard-hsw: [PASS][29] -> [INCOMPLETE][30] ([fdo#103540])
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-hsw5/igt@kms_vblank@pipe-c-ts-continuation-modeset-hang.html
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-hsw4/igt@kms_vblank@pipe-c-ts-continuation-modeset-hang.html
#### Possible fixes ####
* igt@gem_mocs_settings@mocs-settings-render:
- shard-iclb: [INCOMPLETE][31] ([fdo#107713]) -> [PASS][32]
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@gem_mocs_settings@mocs-settings-render.html
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@gem_mocs_settings@mocs-settings-render.html
* {igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen}:
- shard-kbl: [FAIL][33] ([fdo#103232]) -> [PASS][34] +1 similar issue
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html
* {igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen}:
- shard-apl: [FAIL][35] ([fdo#103232]) -> [PASS][36] +1 similar issue
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl4/igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen.html
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl2/igt@kms_cursor_crc@pipe-c-cursor-256x85-onscreen.html
* igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
- shard-glk: [FAIL][37] ([fdo#104873]) -> [PASS][38]
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-glk2/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-glk5/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
* igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render:
- shard-iclb: [FAIL][39] ([fdo#103167]) -> [PASS][40] +1 similar issue
[39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
[40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-render.html
* igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff:
- shard-iclb: [INCOMPLETE][41] ([fdo#106978] / [fdo#107713]) -> [PASS][42]
[41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff.html
[42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-onoff.html
* igt@kms_plane_lowres@pipe-a-tiling-x:
- shard-iclb: [FAIL][43] ([fdo#103166]) -> [PASS][44]
[43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb6/igt@kms_plane_lowres@pipe-a-tiling-x.html
[44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-x.html
* igt@kms_psr@psr2_sprite_render:
- shard-iclb: [SKIP][45] ([fdo#109441]) -> [PASS][46] +1 similar issue
[45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@kms_psr@psr2_sprite_render.html
[46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb2/igt@kms_psr@psr2_sprite_render.html
* igt@kms_setmode@basic:
- shard-apl: [FAIL][47] ([fdo#99912]) -> [PASS][48]
[47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl2/igt@kms_setmode@basic.html
[48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl1/igt@kms_setmode@basic.html
* igt@kms_vblank@pipe-c-ts-continuation-suspend:
- shard-apl: [DMESG-WARN][49] ([fdo#108566]) -> [PASS][50] +3 similar issues
[49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-apl1/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
[50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
#### Warnings ####
* igt@gem_mmap_gtt@forked-big-copy-xy:
- shard-iclb: [TIMEOUT][51] ([fdo#109673]) -> [INCOMPLETE][52] ([fdo#107713] / [fdo#109100])
[51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-iclb7/igt@gem_mmap_gtt@forked-big-copy-xy.html
[52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-iclb4/igt@gem_mmap_gtt@forked-big-copy-xy.html
* igt@kms_busy@extended-modeset-hang-newfb-render-d:
- shard-kbl: [SKIP][53] ([fdo#109271] / [fdo#109278]) -> [SKIP][54] ([fdo#105602] / [fdo#109271] / [fdo#109278]) +2 similar issues
[53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl1/igt@kms_busy@extended-modeset-hang-newfb-render-d.html
[54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@kms_busy@extended-modeset-hang-newfb-render-d.html
* igt@kms_panel_fitting@legacy:
- shard-kbl: [SKIP][55] ([fdo#109271]) -> [SKIP][56] ([fdo#105602] / [fdo#109271]) +15 similar issues
[55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4993/shard-kbl3/igt@kms_panel_fitting@legacy.html
[56]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/shard-kbl4/igt@kms_panel_fitting@legacy.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
[fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
[fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
[fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
[fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
[fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
[fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
[fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
[fdo#104097]: https://bugs.freedesktop.org/show_bug.cgi?id=104097
[fdo#104873]: https://bugs.freedesktop.org/show_bug.cgi?id=104873
[fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
[fdo#106978]: https://bugs.freedesktop.org/show_bug.cgi?id=106978
[fdo#107409]: https://bugs.freedesktop.org/show_bug.cgi?id=107409
[fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
[fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
[fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
[fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
[fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
[fdo#109661]: https://bugs.freedesktop.org/show_bug.cgi?id=109661
[fdo#109673]: https://bugs.freedesktop.org/show_bug.cgi?id=109673
[fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
Participating hosts (7 -> 6)
------------------------------
Missing (1): shard-skl
Build changes
-------------
* IGT: IGT_4993 -> IGTPW_2988
CI_DRM_6090: df27ebbc9ec0d8ae42e8cf3d7a3b29fd6baf4940 @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_2988: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
IGT_4993: 2d41b51980257a7d0a3de30a9b3ea0e95b13df91 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2988/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
` (3 preceding siblings ...)
2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
@ 2019-05-20 6:17 ` Arkadiusz Hiler
2019-05-20 6:30 ` Ser, Simon
2019-05-20 9:36 ` Petri Latvala
4 siblings, 2 replies; 12+ messages in thread
From: Arkadiusz Hiler @ 2019-05-20 6:17 UTC (permalink / raw)
To: Petri Latvala; +Cc: igt-dev, Tomi Sarvela
On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> If the network goes down while testing, CI tends to interpret that as
> the device being down, cutting its power after a while. This causes an
> incomplete to an innocent test, increasing noise in the results.
>
> A new flag to --abort-on-monitored-error, "ping", uses liboping to
> ping a host configured in .igtrc with one ping after each test
> execution and aborts the run if there is no reply in a hardcoded
> amount of time.
>
> v2:
> - Use a higher timeout
> - Allow hostname configuration from environment
>
> Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Martin Peres <martin.peres@linux.intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> ---
> meson.build | 1 +
> meson_options.txt | 6 ++
> runner/executor.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
> runner/meson.build | 12 +++-
> runner/settings.c | 4 ++
> runner/settings.h | 5 +-
> 6 files changed, 160 insertions(+), 3 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index a05e912c..aac67f1a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> with_libdrm = get_option('with_libdrm')
> with_libunwind = get_option('with_libunwind')
> build_runner = get_option('build_runner')
> +with_oping = get_option('with_oping')
I find it a bit distracting that we have this bit here, whereas
everything else is in runner/meson.build. Is there any reason for that?
> _build_overlay = build_overlay != 'false'
> _overlay_required = build_overlay == 'true'
<SNIP>
> diff --git a/runner/meson.build b/runner/meson.build
> index b658f1d2..a54aaab4 100644
> --- a/runner/meson.build
> +++ b/runner/meson.build
> @@ -1,4 +1,13 @@
> jsonc = dependency('json-c', required: _runner_required)
> +runner_deps = [jsonc, glib]
> +have_oping = []
> +if with_oping != 'false'
> + oping = dependency('liboping', required: with_oping == 'true')
> + if oping.found()
> + runner_deps += oping
> + have_oping = '-DHAVE_OPING=1'
> + endif
> +endif
I think it is about time to:
https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features
I'll check meson versions in various popular distros Today and see how
the patch would look like.
> runnerlib_sources = [ 'settings.c',
> 'job_list.c',
> @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
>
> runnerlib = static_library('igt_runner', runnerlib_sources,
> include_directories : inc,
> - dependencies : [jsonc, glib])
> + c_args : have_oping,
I know that we have just a single dep here, but can we name this
runner_lib_c_args or something?
> + dependencies : runner_deps)
>
> runner = executable('igt_runner', runner_sources,
> link_with : runnerlib,
Other than that the C part looks ok. I share Daniel's sentiment on
network and NIC possibly being be a bit too "best-efforty" for aborting
on a single dropped ping.
It also seems like the feature haven't been properly tested, as the HAX
does not specify any IP address.
--
Cheers,
Arek
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
2019-05-20 6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
@ 2019-05-20 6:30 ` Ser, Simon
2019-05-20 9:36 ` Petri Latvala
1 sibling, 0 replies; 12+ messages in thread
From: Ser, Simon @ 2019-05-20 6:30 UTC (permalink / raw)
To: Hiler, Arkadiusz, Latvala, Petri; +Cc: igt-dev, Sarvela, Tomi P
On Mon, 2019-05-20 at 09:17 +0300, Arkadiusz Hiler wrote:
> On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> > If the network goes down while testing, CI tends to interpret that as
> > the device being down, cutting its power after a while. This causes an
> > incomplete to an innocent test, increasing noise in the results.
> >
> > A new flag to --abort-on-monitored-error, "ping", uses liboping to
> > ping a host configured in .igtrc with one ping after each test
> > execution and aborts the run if there is no reply in a hardcoded
> > amount of time.
> >
> > v2:
> > - Use a higher timeout
> > - Allow hostname configuration from environment
> >
> > Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Martin Peres <martin.peres@linux.intel.com>
> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> > ---
> > meson.build | 1 +
> > meson_options.txt | 6 ++
> > runner/executor.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
> > runner/meson.build | 12 +++-
> > runner/settings.c | 4 ++
> > runner/settings.h | 5 +-
> > 6 files changed, 160 insertions(+), 3 deletions(-)
> >
> > diff --git a/meson.build b/meson.build
> > index a05e912c..aac67f1a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> > with_libdrm = get_option('with_libdrm')
> > with_libunwind = get_option('with_libunwind')
> > build_runner = get_option('build_runner')
> > +with_oping = get_option('with_oping')
>
> I find it a bit distracting that we have this bit here, whereas
> everything else is in runner/meson.build. Is there any reason for that?
>
> > _build_overlay = build_overlay != 'false'
> > _overlay_required = build_overlay == 'true'
> <SNIP>
> > diff --git a/runner/meson.build b/runner/meson.build
> > index b658f1d2..a54aaab4 100644
> > --- a/runner/meson.build
> > +++ b/runner/meson.build
> > @@ -1,4 +1,13 @@
> > jsonc = dependency('json-c', required: _runner_required)
> > +runner_deps = [jsonc, glib]
> > +have_oping = []
> > +if with_oping != 'false'
> > + oping = dependency('liboping', required: with_oping == 'true')
> > + if oping.found()
> > + runner_deps += oping
> > + have_oping = '-DHAVE_OPING=1'
> > + endif
> > +endif
>
> I think it is about time to:
> https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features
>
> I'll check meson versions in various popular distros Today and see how
> the patch would look like.
Oh, +1 from me!
I'd really like to replace some complicated logic with feature options.
> > runnerlib_sources = [ 'settings.c',
> > 'job_list.c',
> > @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
> >
> > runnerlib = static_library('igt_runner', runnerlib_sources,
> > include_directories : inc,
> > - dependencies : [jsonc, glib])
> > + c_args : have_oping,
>
> I know that we have just a single dep here, but can we name this
> runner_lib_c_args or something?
>
> > + dependencies : runner_deps)
> >
> > runner = executable('igt_runner', runner_sources,
> > link_with : runnerlib,
>
> Other than that the C part looks ok. I share Daniel's sentiment on
> network and NIC possibly being be a bit too "best-efforty" for aborting
> on a single dropped ping.
>
> It also seems like the feature haven't been properly tested, as the HAX
> does not specify any IP address.
>
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure
2019-05-20 6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
2019-05-20 6:30 ` Ser, Simon
@ 2019-05-20 9:36 ` Petri Latvala
1 sibling, 0 replies; 12+ messages in thread
From: Petri Latvala @ 2019-05-20 9:36 UTC (permalink / raw)
To: Arkadiusz Hiler; +Cc: igt-dev, Tomi Sarvela
On Mon, May 20, 2019 at 09:17:58AM +0300, Arkadiusz Hiler wrote:
> On Thu, May 16, 2019 at 02:27:54PM +0300, Petri Latvala wrote:
> > If the network goes down while testing, CI tends to interpret that as
> > the device being down, cutting its power after a while. This causes an
> > incomplete to an innocent test, increasing noise in the results.
> >
> > A new flag to --abort-on-monitored-error, "ping", uses liboping to
> > ping a host configured in .igtrc with one ping after each test
> > execution and aborts the run if there is no reply in a hardcoded
> > amount of time.
> >
> > v2:
> > - Use a higher timeout
> > - Allow hostname configuration from environment
> >
> > Signed-off-by: Petri Latvala <petri.latvala@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Martin Peres <martin.peres@linux.intel.com>
> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> > ---
> > meson.build | 1 +
> > meson_options.txt | 6 ++
> > runner/executor.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
> > runner/meson.build | 12 +++-
> > runner/settings.c | 4 ++
> > runner/settings.h | 5 +-
> > 6 files changed, 160 insertions(+), 3 deletions(-)
> >
> > diff --git a/meson.build b/meson.build
> > index a05e912c..aac67f1a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -100,6 +100,7 @@ build_tests = get_option('build_tests')
> > with_libdrm = get_option('with_libdrm')
> > with_libunwind = get_option('with_libunwind')
> > build_runner = get_option('build_runner')
> > +with_oping = get_option('with_oping')
>
> I find it a bit distracting that we have this bit here, whereas
> everything else is in runner/meson.build. Is there any reason for that?
Oh, I thought we had all option fetches in toplevel. Will move that to
runner/.
>
> > _build_overlay = build_overlay != 'false'
> > _overlay_required = build_overlay == 'true'
> <SNIP>
> > diff --git a/runner/meson.build b/runner/meson.build
> > index b658f1d2..a54aaab4 100644
> > --- a/runner/meson.build
> > +++ b/runner/meson.build
> > @@ -1,4 +1,13 @@
> > jsonc = dependency('json-c', required: _runner_required)
> > +runner_deps = [jsonc, glib]
> > +have_oping = []
> > +if with_oping != 'false'
> > + oping = dependency('liboping', required: with_oping == 'true')
> > + if oping.found()
> > + runner_deps += oping
> > + have_oping = '-DHAVE_OPING=1'
> > + endif
> > +endif
>
> I think it is about time to:
> https://mesonbuild.com/Release-notes-for-0-47-0.html#new-type-of-build-option-for-features
>
> I'll check meson versions in various popular distros Today and see how
> the patch would look like.
>
Oh yes please!
> > runnerlib_sources = [ 'settings.c',
> > 'job_list.c',
> > @@ -21,7 +30,8 @@ if _build_runner and _build_tests and jsonc.found()
> >
> > runnerlib = static_library('igt_runner', runnerlib_sources,
> > include_directories : inc,
> > - dependencies : [jsonc, glib])
> > + c_args : have_oping,
>
> I know that we have just a single dep here, but can we name this
> runner_lib_c_args or something?
Sure.
>
> > + dependencies : runner_deps)
> >
> > runner = executable('igt_runner', runner_sources,
> > link_with : runnerlib,
>
> Other than that the C part looks ok. I share Daniel's sentiment on
> network and NIC possibly being be a bit too "best-efforty" for aborting
> on a single dropped ping.
Yes, I'll make it retry a few times in the next version.
> It also seems like the feature haven't been properly tested, as the HAX
> does not specify any IP address.
Why would it need a HAX? The .igtrc configuration from the time of v1
was still in place. See run.log, "abort on ping" doesn't appear on the
machines I tested, and build log (not publicly available) says
liboping was used.
--
Petri Latvala
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t v3 1/2] runner: Add support for aborting on network failure
@ 2019-05-20 10:16 Petri Latvala
2019-05-20 10:16 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
0 siblings, 1 reply; 12+ messages in thread
From: Petri Latvala @ 2019-05-20 10:16 UTC (permalink / raw)
To: igt-dev; +Cc: Tomi Sarvela, Petri Latvala, Daniel Vetter
If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.
A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.
v2:
- Use a higher timeout
- Allow hostname configuration from environment
v3:
- Use runner_c_args for holding c args for runner
- Handle runner's meson options in runner/meson.build
- Instead of one ping with 20 second timeout, ping with 1 second timeout
for a duration of 20 seconds
Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
meson_options.txt | 6 ++
runner/executor.c | 150 +++++++++++++++++++++++++++++++++++++++++++++
runner/meson.build | 14 ++++-
runner/settings.c | 4 ++
runner/settings.h | 5 +-
5 files changed, 176 insertions(+), 3 deletions(-)
diff --git a/meson_options.txt b/meson_options.txt
index 888efe56..935f883e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -58,6 +58,12 @@ option('build_runner',
choices : ['auto', 'true', 'false'],
description : 'Build test runner')
+option('with_oping',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false'],
+ description : 'Build igt_runner with liboping')
+
option('use_rpath',
type : 'boolean',
value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 7e5fbe8f..c07e53fa 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
#include <errno.h>
#include <fcntl.h>
+#include <glib.h>
#include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -16,6 +20,7 @@
#include <time.h>
#include <unistd.h>
+#include "igt_aux.h"
#include "igt_core.h"
#include "executor.h"
#include "output_strings.h"
@@ -108,6 +113,147 @@ static void ping_watchdogs(void)
}
}
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+
+static bool load_ping_config_from_file(void)
+{
+ char *key_file_env = NULL;
+ char *key_file_loc = NULL;
+ GError *error = NULL;
+ GKeyFile *key_file = NULL;
+ const char *ping_hostname;
+ int ret;
+
+ /* Determine igt config path */
+ key_file_env = getenv("IGT_CONFIG_PATH");
+ if (key_file_env) {
+ key_file_loc = strdup(key_file_env);
+ } else {
+ asprintf(&key_file_loc, "%s/.igtrc", g_get_home_dir());
+ }
+
+ /* Load igt config file */
+ key_file = g_key_file_new();
+ ret = g_key_file_load_from_file(key_file, key_file_loc,
+ G_KEY_FILE_NONE, &error);
+ free(key_file_loc);
+
+ if (!ret) {
+ g_error_free(error);
+ g_key_file_free(key_file);
+ return false;
+ }
+
+ g_clear_error(&error);
+
+ ping_hostname =
+ g_key_file_get_string(key_file, "DUT",
+ "PingHostName", &error);
+
+ g_clear_error(&error);
+ g_key_file_free(key_file);
+
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from config file\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool load_ping_config_from_env(void)
+{
+ const char *ping_hostname;
+
+ ping_hostname = getenv("IGT_PING_HOSTNAME");
+ if (!ping_hostname)
+ return false;
+
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot use hostname from environment\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool can_ping(void)
+{
+ /*
+ * On some hosts, getting network back up after suspend takes
+ * upwards of 10 seconds. 20 seconds should be enough to see
+ * if network comes back at all, and hopefully not too long to
+ * make external monitoring freak out.
+ */
+ igt_until_timeout(20) {
+ pingobj_iter_t *iter;
+
+ ping_send(pingobj);
+
+ for (iter = ping_iterator_get(pingobj);
+ iter != NULL;
+ iter = ping_iterator_next(iter)) {
+ double latency;
+ size_t len = sizeof(latency);
+
+ ping_iterator_get_info(iter,
+ PING_INFO_LATENCY,
+ &latency,
+ &len);
+ if (latency >= 0.0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#endif
+
+static void ping_config(void)
+{
+#if HAVE_OPING
+ double timeout = 1.0;
+
+ if (pingobj)
+ return;
+
+ pingobj = ping_construct();
+
+ /* Try env first, then config file */
+ if (!load_ping_config_from_env() && !load_ping_config_from_file()) {
+ fprintf(stderr,
+ "abort on ping: No host to ping configured\n");
+ ping_destroy(pingobj);
+ pingobj = NULL;
+ return;
+ }
+
+ ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+ if (pingobj && !can_ping()) {
+ char *reason;
+
+ asprintf(&reason,
+ "Ping host did not respond to ping, network down");
+ return reason;
+ }
+#endif
+
+ return NULL;
+}
+
static char *handle_lockdep(void)
{
const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -197,6 +343,7 @@ static const struct {
} abort_handlers[] = {
{ ABORT_LOCKDEP, handle_lockdep },
{ ABORT_TAINT, handle_taint },
+ { ABORT_PING, handle_ping },
{ 0, 0 },
};
@@ -1288,6 +1435,9 @@ bool execute(struct execute_state *state,
init_watchdogs(settings);
+ if (settings->abort_mask & ABORT_PING)
+ ping_config();
+
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
unamebuf.sysname,
diff --git a/runner/meson.build b/runner/meson.build
index b658f1d2..6d60c530 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -1,4 +1,15 @@
jsonc = dependency('json-c', required: _runner_required)
+runner_deps = [jsonc, glib]
+runner_c_args = []
+
+with_oping = get_option('with_oping')
+if with_oping != 'false'
+ oping = dependency('liboping', required: with_oping == 'true')
+ if oping.found()
+ runner_deps += oping
+ runner_c_args += '-DHAVE_OPING=1'
+ endif
+endif
runnerlib_sources = [ 'settings.c',
'job_list.c',
@@ -21,7 +32,8 @@ if _build_runner and _build_tests and jsonc.found()
runnerlib = static_library('igt_runner', runnerlib_sources,
include_directories : inc,
- dependencies : [jsonc, glib])
+ c_args : runner_c_args,
+ dependencies : runner_deps)
runner = executable('igt_runner', runner_sources,
link_with : runnerlib,
diff --git a/runner/settings.c b/runner/settings.c
index ad38ae8d..b57d1a6a 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -48,6 +48,7 @@ static struct {
} abort_conditions[] = {
{ ABORT_TAINT, "taint" },
{ ABORT_LOCKDEP, "lockdep" },
+ { ABORT_PING, "ping" },
{ ABORT_ALL, "all" },
{ 0, 0 },
};
@@ -136,6 +137,9 @@ static const char *usage_str =
" Possible conditions:\n"
" lockdep - abort when kernel lockdep has been angered.\n"
" taint - abort when kernel becomes fatally tainted.\n"
+ " ping - abort when a host configured in .igtrc or\n"
+ " environment variable IGT_PING_HOSTNAME does\n"
+ " not respond to ping.\n"
" all - abort for all of the above.\n"
" -s, --sync Sync results to disk after every test\n"
" -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 0a1ee08d..2b6e65d0 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -15,9 +15,10 @@ enum {
#define ABORT_TAINT (1 << 0)
#define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING (1 << 2)
+#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
struct regex_list {
char **regex_strings;
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort
2019-05-20 10:16 [igt-dev] [PATCH i-g-t v3 1/2] " Petri Latvala
@ 2019-05-20 10:16 ` Petri Latvala
2019-05-20 11:51 ` Petri Latvala
0 siblings, 1 reply; 12+ messages in thread
From: Petri Latvala @ 2019-05-20 10:16 UTC (permalink / raw)
To: igt-dev; +Cc: Petri Latvala
---
runner/executor.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/runner/executor.c b/runner/executor.c
index c07e53fa..4a44c633 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -354,7 +354,7 @@ static char *need_to_abort(const struct settings* settings)
for (it = abort_handlers; it->condition; it++) {
char *abort;
- if (!(settings->abort_mask & it->condition))
+ if (false && !(settings->abort_mask & it->condition))
continue;
abort = it->handler();
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort
2019-05-20 10:16 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
@ 2019-05-20 11:51 ` Petri Latvala
2019-05-20 12:16 ` Petri Latvala
0 siblings, 1 reply; 12+ messages in thread
From: Petri Latvala @ 2019-05-20 11:51 UTC (permalink / raw)
To: igt-dev; +Cc: Petri Latvala
... and remember to HAX the ping setup as well
---
runner/executor.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/runner/executor.c b/runner/executor.c
index c07e53fa..af87e312 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -354,7 +354,7 @@ static char *need_to_abort(const struct settings* settings)
for (it = abort_handlers; it->condition; it++) {
char *abort;
- if (!(settings->abort_mask & it->condition))
+ if (false && !(settings->abort_mask & it->condition))
continue;
abort = it->handler();
@@ -1435,7 +1435,7 @@ bool execute(struct execute_state *state,
init_watchdogs(settings);
- if (settings->abort_mask & ABORT_PING)
+ if (true && settings->abort_mask & ABORT_PING)
ping_config();
if (!uname(&unamebuf)) {
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort
2019-05-20 11:51 ` Petri Latvala
@ 2019-05-20 12:16 ` Petri Latvala
0 siblings, 0 replies; 12+ messages in thread
From: Petri Latvala @ 2019-05-20 12:16 UTC (permalink / raw)
To: igt-dev; +Cc: Petri Latvala
... and remember to HAX the ping setup as well
... correctly
---
runner/executor.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/runner/executor.c b/runner/executor.c
index c07e53fa..8cb09dda 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -354,7 +354,7 @@ static char *need_to_abort(const struct settings* settings)
for (it = abort_handlers; it->condition; it++) {
char *abort;
- if (!(settings->abort_mask & it->condition))
+ if (false && !(settings->abort_mask & it->condition))
continue;
abort = it->handler();
@@ -1435,7 +1435,7 @@ bool execute(struct execute_state *state,
init_watchdogs(settings);
- if (settings->abort_mask & ABORT_PING)
+ if (true || settings->abort_mask & ABORT_PING)
ping_config();
if (!uname(&unamebuf)) {
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 1/2] runner: Add support for aborting on network failure
@ 2018-12-10 13:41 Petri Latvala
2018-12-10 13:41 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
0 siblings, 1 reply; 12+ messages in thread
From: Petri Latvala @ 2018-12-10 13:41 UTC (permalink / raw)
To: igt-dev; +Cc: Tomi Sarvela, Petri Latvala, Martin Peres
If the network goes down while testing, CI tends to interpret that as
the device being down, cutting its power after a while. This causes an
incomplete to an innocent test, increasing noise in the results.
A new flag to --abort-on-monitored-error, "ping", uses liboping to
ping a host configured in .igtrc with one ping after each test
execution and aborts the run if there is no reply in a hardcoded
amount of time.
Signed-off-by: Petri Latvala <petri.latvala@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
---
meson.build | 1 +
meson_options.txt | 6 +++
runner/executor.c | 104 +++++++++++++++++++++++++++++++++++++++++++++
runner/meson.build | 12 +++++-
runner/settings.c | 3 ++
runner/settings.h | 5 ++-
6 files changed, 128 insertions(+), 3 deletions(-)
diff --git a/meson.build b/meson.build
index 469723dc..a9fc0999 100644
--- a/meson.build
+++ b/meson.build
@@ -86,6 +86,7 @@ build_tests = get_option('build_tests')
with_libdrm = get_option('with_libdrm')
with_libunwind = get_option('with_libunwind')
build_runner = get_option('build_runner')
+with_oping = get_option('with_oping')
_build_overlay = build_overlay != 'false'
_overlay_required = build_overlay == 'true'
diff --git a/meson_options.txt b/meson_options.txt
index 0cd3b350..a5935704 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -64,6 +64,12 @@ option('build_runner',
choices : ['auto', 'true', 'false'],
description : 'Build test runner')
+option('with_oping',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'true', 'false'],
+ description : 'Build igt_runner with liboping')
+
option('use_rpath',
type : 'boolean',
value : false,
diff --git a/runner/executor.c b/runner/executor.c
index 54c530b7..bb0fc772 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,6 +1,10 @@
#include <errno.h>
#include <fcntl.h>
+#include <glib.h>
#include <linux/watchdog.h>
+#if HAVE_OPING
+#include <oping.h>
+#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -108,6 +112,104 @@ static void ping_watchdogs(void)
}
}
+#if HAVE_OPING
+static pingobj_t *pingobj = NULL;
+#endif
+
+static void load_ping_config(void)
+{
+#if HAVE_OPING
+ char *key_file_env = NULL;
+ char *key_file_loc = NULL;
+ GError *error = NULL;
+ GKeyFile *key_file = NULL;
+ const char *ping_hostname;
+ double timeout = 2.0; /* Fair dice roll */
+
+ if (pingobj)
+ return;
+
+ /* Determine igt config path */
+ key_file_env = getenv("IGT_CONFIG_PATH");
+ if (key_file_env) {
+ key_file_loc = key_file_env;
+ } else {
+ key_file_loc = malloc(100);
+ snprintf(key_file_loc, 100, "%s/.igtrc", g_get_home_dir());
+ }
+
+ /* Load igt config file */
+ key_file = g_key_file_new();
+ g_key_file_load_from_file(key_file, key_file_loc,
+ G_KEY_FILE_NONE, &error);
+ if (error && error->code == G_KEY_FILE_ERROR) {
+ g_error_free(error);
+ key_file = NULL;
+
+ goto out;
+ }
+
+ g_clear_error(&error);
+
+ ping_hostname =
+ g_key_file_get_string(key_file, "DUT",
+ "PingHostName", &error);
+
+ g_clear_error(&error);
+
+ if (!ping_hostname)
+ return;
+
+ pingobj = ping_construct();
+ if (ping_host_add(pingobj, ping_hostname)) {
+ fprintf(stderr,
+ "abort on ping: Cannot add configured hostname\n");
+ ping_destroy(pingobj);
+ pingobj = NULL;
+ return;
+ }
+
+ ping_setopt(pingobj, PING_OPT_TIMEOUT, &timeout);
+
+out:
+ if (!key_file_env && key_file_loc)
+ free(key_file_loc);
+ g_key_file_free(key_file);
+#endif
+}
+
+static char *handle_ping(void)
+{
+#if HAVE_OPING
+ if (pingobj) {
+ pingobj_iter_t *iter;
+
+ ping_send(pingobj);
+
+ for (iter = ping_iterator_get(pingobj);
+ iter != NULL;
+ iter = ping_iterator_next(iter)) {
+ double latency;
+ size_t len = sizeof(latency);
+
+ ping_iterator_get_info(iter,
+ PING_INFO_LATENCY,
+ &latency,
+ &len);
+ if (latency < 0.0) {
+ char *reason;
+
+ asprintf(&reason,
+ "Ping host did not respond to ping, network down");
+ return reason;
+ }
+ }
+ }
+#endif
+
+ return NULL;
+}
+
static char *handle_lockdep(void)
{
const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
@@ -175,6 +277,7 @@ static const struct {
} abort_handlers[] = {
{ ABORT_LOCKDEP, handle_lockdep },
{ ABORT_TAINT, handle_taint },
+ { ABORT_PING, handle_ping },
{ 0, 0 },
};
@@ -1238,6 +1341,7 @@ bool execute(struct execute_state *state,
}
init_watchdogs(settings);
+ load_ping_config();
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
diff --git a/runner/meson.build b/runner/meson.build
index de6e6f1c..218b492e 100644
--- a/runner/meson.build
+++ b/runner/meson.build
@@ -1,4 +1,13 @@
jsonc = dependency('json-c', required: _runner_required)
+runner_deps = [jsonc, glib]
+have_oping = []
+if with_oping != 'false'
+ oping = dependency('liboping', required: with_oping == 'true')
+ if oping.found()
+ runner_deps += oping
+ have_oping = '-DHAVE_OPING=1'
+ endif
+endif
runnerlib_sources = [ 'settings.c',
'job_list.c',
@@ -17,7 +26,8 @@ if _build_runner and jsonc.found()
runnerlib = static_library('igt_runner', runnerlib_sources,
include_directories : inc,
- dependencies : jsonc)
+ c_args : have_oping,
+ dependencies : runner_deps)
runner = executable('igt_runner', runner_sources,
link_with : runnerlib,
diff --git a/runner/settings.c b/runner/settings.c
index e64244e6..c531b9a4 100644
--- a/runner/settings.c
+++ b/runner/settings.c
@@ -47,6 +47,7 @@ static struct {
} abort_conditions[] = {
{ ABORT_TAINT, "taint" },
{ ABORT_LOCKDEP, "lockdep" },
+ { ABORT_PING, "ping" },
{ ABORT_ALL, "all" },
{ 0, 0 },
};
@@ -135,6 +136,8 @@ static const char *usage_str =
" Possible conditions:\n"
" lockdep - abort when kernel lockdep has been angered.\n"
" taint - abort when kernel becomes fatally tainted.\n"
+ " ping - abort when a host configured in .igtrc does\n"
+ " not respond to ping.\n"
" all - abort for all of the above.\n"
" -s, --sync Sync results to disk after every test\n"
" -l {quiet,verbose,dummy}, --log-level {quiet,verbose,dummy}\n"
diff --git a/runner/settings.h b/runner/settings.h
index 267d72cf..997e7370 100644
--- a/runner/settings.h
+++ b/runner/settings.h
@@ -14,9 +14,10 @@ enum {
#define ABORT_TAINT (1 << 0)
#define ABORT_LOCKDEP (1 << 1)
-#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP)
+#define ABORT_PING (1 << 2)
+#define ABORT_ALL (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING)
-_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP), "ABORT_ALL must be all conditions bitwise or'd");
+_Static_assert(ABORT_ALL == (ABORT_TAINT | ABORT_LOCKDEP | ABORT_PING), "ABORT_ALL must be all conditions bitwise or'd");
struct regex_list {
char **regex_strings;
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2019-05-20 12:16 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-16 11:27 [igt-dev] [PATCH i-g-t 1/2 v2] runner: Add support for aborting on network failure Petri Latvala
2019-05-16 11:27 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
2019-05-16 12:23 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2,v2] runner: Add support for aborting on network failure Patchwork
2019-05-16 12:51 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Daniel Vetter
2019-05-16 14:45 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2,v2] " Patchwork
2019-05-20 6:17 ` [igt-dev] [PATCH i-g-t 1/2 v2] " Arkadiusz Hiler
2019-05-20 6:30 ` Ser, Simon
2019-05-20 9:36 ` Petri Latvala
-- strict thread matches above, loose matches on Subject: below --
2019-05-20 10:16 [igt-dev] [PATCH i-g-t v3 1/2] " Petri Latvala
2019-05-20 10:16 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
2019-05-20 11:51 ` Petri Latvala
2019-05-20 12:16 ` Petri Latvala
2018-12-10 13:41 [igt-dev] [PATCH i-g-t 1/2] runner: Add support for aborting on network failure Petri Latvala
2018-12-10 13:41 ` [igt-dev] [PATCH i-g-t 2/2] HAX: Check all conditions to abort Petri Latvala
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.