All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
@ 2019-04-03 16:01 Janusz Krzysztofik
  2019-04-04 13:17 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Janusz Krzysztofik @ 2019-04-03 16:01 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Janusz Krzysztofik, Petri Latvala, Lee, Simon B, igt-dev

From: Janusz Krzysztofik <janusz.krzysztofik@intel.com>

Run a dummy load in background to put some workload on a device, then try
to either remove (unplug) the device from its bus, or unbind the device's
driver from it, depending on which subtest has been selected.  If
succeeded, unload the driver, rescan the device's bus if needed and
perform health checks on the device with the driver reloaded.

The driver hot unbind / device hot unplug operation is expected to succeed
in a reasonable time, however long timeouts are used to let kernel level
timeouts pop up first if hit by a bug.

The dummy load works only with i915 driver.  The test is skipped on other
hardware unless they provide their implementation of igt_spin_batch_new()
and friends.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
---
Changelog:
v1->v2:
- renamed to core_hot_reload,
- extended with module unload, device rediscover (unplug case), module reload 
  and healthcheck operations following the initial unplug/unbind, as requested
  by Daniel and Peter,
- fail if anything goes wrong, as requested by Daniel
- refactored a lot to reduce code redundancy.

Resending now with the igt-dev list address added to Cc:, sorry

 tests/Makefile.sources  |   1 +
 tests/core_hot_reload.c | 248 ++++++++++++++++++++++++++++++++++++++++
 tests/meson.build       |   1 +
 3 files changed, 250 insertions(+)
 create mode 100644 tests/core_hot_reload.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 86ad301e..5f9dba30 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -16,6 +16,7 @@ TESTS_progs = \
 	core_getclient \
 	core_getstats \
 	core_getversion \
+	core_hot_reload \
 	core_setmaster_vs_auth \
 	debugfs_test \
 	drm_import_export \
diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
new file mode 100644
index 00000000..96f0efd0
--- /dev/null
+++ b/tests/core_hot_reload.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_dummyload.h"
+#include "igt_kmod.h"
+#include "igt_sysfs.h"
+
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+
+typedef void (*workload_wait_t)(int device, void *priv);
+typedef int (*action_t)(int dir);
+typedef int (*workload_t)(int device, void *priv, action_t action);
+
+
+/*
+ * Actions
+ */
+
+/* Unbind the driver from the device */
+static int driver_unbind(int dir)
+{
+	char path[PATH_MAX], *dev_bus_addr;
+	int len;
+
+	len = readlinkat(dir, "device", path, sizeof(path) - 1);
+	path[len] = '\0';
+	dev_bus_addr = strrchr(path, '/') + 1;
+
+	igt_set_timeout(60, "Driver unbind timeout!");
+	igt_sysfs_set(dir, "device/driver/unbind", dev_bus_addr);
+
+	/* No need for bus rescan */
+	return -1;
+}
+
+/* Returns open file descriptor to a directory of a bus to rescan for re-plug */
+static int device_unplug(int dir)
+{
+	int bus;
+
+	bus = openat(dir, "device/subsystem", O_DIRECTORY);
+	igt_assert(bus >= 0);
+
+	igt_set_timeout(60, "Device unplug timeout!");
+	igt_sysfs_set(dir, "device/remove", "1");
+
+	return bus;
+}
+
+
+/*
+ * Operation template
+ */
+static int operation(int device, action_t action, workload_wait_t workload_wait,
+		     void *priv)
+{
+	int dir, bus;
+
+	dir = igt_sysfs_open(device);
+
+	bus = action(dir);
+	close(dir);
+
+	if (workload_wait)
+		workload_wait(device, priv);
+	igt_reset_timeout();
+
+	return bus;
+}
+
+
+/*
+ * Workloads
+ */
+
+/* Workload using igt_spin_batch_run() */
+
+static void spin_batch_wait(int device, void *priv)
+{
+	igt_spin_t *spin = priv;
+
+	/* wait until the workload has crashed */
+	gem_wait(device, spin->handle, NULL);
+}
+
+static int spin_batch(int device, void *priv, action_t action)
+{
+	igt_spin_t *spin;
+	int bus;
+
+	igt_assert(!priv);
+
+	/* Start the workload in background.
+	 * Make sure it is running and times out after 90 seconds */
+	spin = igt_spin_batch_new(device, .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_running(spin);
+	igt_spin_batch_set_timeout(spin, 90000000000);
+
+	/* run the tested action */
+	bus = operation(device, action, spin_batch_wait, spin);
+
+	/*
+	 * Clean up driver resources possibly not released on workload crash
+	 * so the driver module can be successfully unloaded
+	 */
+	igt_spin_batch_free(device, spin);
+
+	return bus;
+}
+
+
+/*
+ * Skeleton
+ */
+
+static void healthcheck(int chipset, int *device)
+{
+	if (chipset == DRIVER_INTEL) {
+		/*
+		 * We have it perfectly implemented in i915_module_load,
+		 * just use it.
+		 */
+		igt_assert(igt_system_quiet("i915_module_load --run-subtest reload")
+			   == IGT_EXIT_SUCCESS);
+	} else {
+		/*
+		 * We don't know how to check an unidentified device for health,
+		 * just reopen it.
+		 */
+	}
+}
+
+static void driver_unload(int chipset, char *driver)
+{
+	if (chipset == DRIVER_INTEL)
+		igt_assert(igt_i915_driver_unload() == IGT_EXIT_SUCCESS);
+	else
+		igt_assert(igt_kmod_unload(driver, 0) == 0);
+}
+
+static void __subtest(int device, int chipset, char *driver,
+		      workload_t workload, void *priv, action_t action)
+{
+	int bus = workload(device, priv, action);
+
+	close(device);
+	driver_unload(chipset, driver);
+
+	/* Valid file descriptor indicates we should rescan the bus */
+	if (bus >= 0) {
+		igt_sysfs_set(bus, "rescan", "1");
+		close(bus);
+	}
+}
+
+static void run_subtest(int *device, int chipset, char *driver,
+			workload_t workload, void *priv, action_t action)
+{
+	__subtest(*device, chipset, driver, workload, priv, action);
+
+	sleep(2);
+
+	*device = __drm_open_driver(chipset);
+	healthcheck(chipset, device);
+
+	igt_assert(*device >= 0);
+}
+
+
+igt_main {
+	int device, chipset;
+	char *driver;
+
+	igt_fixture {
+		char path[PATH_MAX];
+		int dir, len;
+
+		/*
+		 * Since the test depends on successful unload of driver module,
+		 * don't use drm_open_driver() as it keeps a file descriptor
+		 * open for exit handler use that effectively locks the module.
+		 */
+		device = __drm_open_driver(DRIVER_ANY);
+		igt_assert(device >= 0);
+
+		if (is_i915_device(device)) {
+			chipset = DRIVER_INTEL;
+			driver = strdup("i915");
+		} else {
+			chipset = DRIVER_ANY;
+
+			/* Capture module name to be unloaded */
+			dir = igt_sysfs_open(device);
+			len = readlinkat(dir, "device/driver/module", path,
+					 sizeof(path) - 1);
+			close(dir);
+			path[len] = '\0';
+			driver = strdup(strrchr(path, '/') + 1);
+		}
+		igt_info("Running the test on driver \"%s\", chipset mask %#0x\n",
+			 driver, chipset);
+	}
+
+	igt_subtest("unplug")
+		run_subtest(&device, chipset, driver, spin_batch, NULL,
+			    device_unplug);
+
+	igt_subtest("unbind")
+		run_subtest(&device, chipset, driver, spin_batch, NULL,
+			    driver_unbind);
+
+	igt_fixture {
+		free(driver);
+		close(device);
+		/*
+		 * Now reopen the device with drm_open_driver() for a while
+		 * so exit handler is registered and can do its job on exit.
+		 */
+		device = drm_open_driver(chipset);
+		close(device);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 8e1ae4fd..e6b87447 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -3,6 +3,7 @@ test_progs = [
 	'core_getclient',
 	'core_getstats',
 	'core_getversion',
+	'core_hot_reload',
 	'core_setmaster_vs_auth',
 	'debugfs_test',
 	'drm_import_export',
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests: Add a new test for driver/device hot reload
  2019-04-03 16:01 [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload Janusz Krzysztofik
@ 2019-04-04 13:17 ` Patchwork
  2019-04-04 13:56 ` [igt-dev] [PATCH] " Katarzyna Dec
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2019-04-04 13:17 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev

== Series Details ==

Series: tests: Add a new test for driver/device hot reload
URL   : https://patchwork.freedesktop.org/series/58991/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5870 -> IGTPW_2784
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/58991/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2784 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_hangcheck:
    - fi-skl-iommu:       PASS -> INCOMPLETE [fdo#108602] / [fdo#108744]

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - fi-blb-e6850:       PASS -> INCOMPLETE [fdo#107718]

  * igt@runner@aborted:
    - fi-skl-iommu:       NOTRUN -> FAIL [fdo#104108] / [fdo#108602]

  
#### Possible fixes ####

  * igt@debugfs_test@read_all_entries:
    - fi-ilk-650:         DMESG-WARN [fdo#106387] -> PASS

  * igt@kms_frontbuffer_tracking@basic:
    - {fi-icl-u3}:        FAIL [fdo#103167] -> PASS

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a:
    - fi-byt-clapper:     FAIL [fdo#107362] -> PASS

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS +1

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#106387]: https://bugs.freedesktop.org/show_bug.cgi?id=106387
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744


Participating hosts (50 -> 43)
------------------------------

  Missing    (7): fi-kbl-7567u fi-ilk-m540 fi-bxt-dsi fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 


Build changes
-------------

    * IGT: IGT_4928 -> IGTPW_2784

  CI_DRM_5870: 122dc43df57c9a03132314ddc78a6d60c714fa32 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2784: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2784/
  IGT_4928: 014a6fa238322b497116b359cb92df1ce7fa8847 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@core_hot_reload@unbind
+igt@core_hot_reload@unplug

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2784/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-03 16:01 [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload Janusz Krzysztofik
  2019-04-04 13:17 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
@ 2019-04-04 13:56 ` Katarzyna Dec
  2019-04-05  8:39   ` Janusz Krzysztofik
  2019-04-05  3:02 ` [igt-dev] ✗ Fi.CI.IGT: failure for " Patchwork
  2019-04-05 14:11 ` [igt-dev] [PATCH] " Chris Wilson
  3 siblings, 1 reply; 9+ messages in thread
From: Katarzyna Dec @ 2019-04-04 13:56 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: Petri Latvala, Lee, Simon B, igt-dev, Daniel Vetter

On Wed, Apr 03, 2019 at 06:01:22PM +0200, Janusz Krzysztofik wrote:

Sorry for joining the discussion in v2 of this patch :)
> From: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> 
> Run a dummy load in background to put some workload on a device, then try
> to either remove (unplug) the device from its bus, or unbind the device's
> driver from it, depending on which subtest has been selected.  If
> succeeded, unload the driver, rescan the device's bus if needed and
> perform health checks on the device with the driver reloaded.
> 
> The driver hot unbind / device hot unplug operation is expected to succeed
> in a reasonable time, however long timeouts are used to let kernel level
> timeouts pop up first if hit by a bug.
> 
> The dummy load works only with i915 driver.  The test is skipped on other
> hardware unless they provide their implementation of igt_spin_batch_new()
> and friends.
> 
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> ---
> Changelog:
> v1->v2:
> - renamed to core_hot_reload,
> - extended with module unload, device rediscover (unplug case), module reload 
>   and healthcheck operations following the initial unplug/unbind, as requested
>   by Daniel and Peter,
> - fail if anything goes wrong, as requested by Daniel
> - refactored a lot to reduce code redundancy.
> 
> Resending now with the igt-dev list address added to Cc:, sorry
nit: I do not know if this is a strict rule or just everyone does that, but usually
we put what has changed as a part of commit msg above any Sign-off-by or CC.
And as soon as this is a v2 you could mark it in the patch name (during
generating patch for review).
> 
>  tests/Makefile.sources  |   1 +
>  tests/core_hot_reload.c | 248 ++++++++++++++++++++++++++++++++++++++++
>  tests/meson.build       |   1 +
>  3 files changed, 250 insertions(+)
>  create mode 100644 tests/core_hot_reload.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 86ad301e..5f9dba30 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -16,6 +16,7 @@ TESTS_progs = \
>  	core_getclient \
>  	core_getstats \
>  	core_getversion \
> +	core_hot_reload \
>  	core_setmaster_vs_auth \
>  	debugfs_test \
>  	drm_import_export \
> diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
> new file mode 100644
> index 00000000..96f0efd0
> --- /dev/null
> +++ b/tests/core_hot_reload.c
> @@ -0,0 +1,248 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "igt.h"
> +#include "igt_device.h"
> +#include "igt_dummyload.h"
> +#include "igt_kmod.h"
> +#include "igt_sysfs.h"
> +
> +#include <getopt.h>
> +#include <limits.h>
> +#include <string.h>
> +#include <unistd.h>
> +
> +
> +typedef void (*workload_wait_t)(int device, void *priv);
> +typedef int (*action_t)(int dir);
> +typedef int (*workload_t)(int device, void *priv, action_t action);
> +
> +
> +/*
> + * Actions
> + */
> +
> +/* Unbind the driver from the device */
It would be nice if you add docs to all/almost all functions like:
/**
 *driver_unbind:
 *@dir: fd of directory to look in
 *
 * Unbind the driver from the device
 * Returns -1 for ?
 */
 Example is not finished. See other files for examples.
> +static int driver_unbind(int dir)
> +{
> +	char path[PATH_MAX], *dev_bus_addr;
> +	int len;
> +
> +	len = readlinkat(dir, "device", path, sizeof(path) - 1);
> +	path[len] = '\0';
> +	dev_bus_addr = strrchr(path, '/') + 1;
> +
> +	igt_set_timeout(60, "Driver unbind timeout!");
> +	igt_sysfs_set(dir, "device/driver/unbind", dev_bus_addr);
> +
> +	/* No need for bus rescan */
> +	return -1;
Why do we always want to return -1?
> +}
> +
> +/* Returns open file descriptor to a directory of a bus to rescan for re-plug */
s/open/opened/
> +static int device_unplug(int dir)
> +{
> +	int bus;
> +
> +	bus = openat(dir, "device/subsystem", O_DIRECTORY);
> +	igt_assert(bus >= 0);
> +
> +	igt_set_timeout(60, "Device unplug timeout!");
> +	igt_sysfs_set(dir, "device/remove", "1");
> +
> +	return bus;
> +}
> +
> +
> +/*
> + * Operation template
> + */
> +static int operation(int device, action_t action, workload_wait_t workload_wait,
> +		     void *priv)
> +{
> +	int dir, bus;
> +
> +	dir = igt_sysfs_open(device);
> +
> +	bus = action(dir);
> +	close(dir);
> +
> +	if (workload_wait)
> +		workload_wait(device, priv);
> +	igt_reset_timeout();
> +
> +	return bus;
> +}
> +
> +
> +/*
> + * Workloads
> + */
> +
> +/* Workload using igt_spin_batch_run() */
> +
> +static void spin_batch_wait(int device, void *priv)
> +{
> +	igt_spin_t *spin = priv;
> +
> +	/* wait until the workload has crashed */
> +	gem_wait(device, spin->handle, NULL);
> +}
> +
> +static int spin_batch(int device, void *priv, action_t action)
> +{
> +	igt_spin_t *spin;
> +	int bus;
> +
> +	igt_assert(!priv);
> +
> +	/* Start the workload in background.
> +	 * Make sure it is running and times out after 90 seconds */
Plase use drm/scripts/checkpatch.pl to check formating of this ^ commend and the
rest of the file.
> +	spin = igt_spin_batch_new(device, .flags = IGT_SPIN_POLL_RUN);
> +	igt_spin_busywait_until_running(spin);
> +	igt_spin_batch_set_timeout(spin, 90000000000);
> +
> +	/* run the tested action */
> +	bus = operation(device, action, spin_batch_wait, spin);
> +
> +	/*
> +	 * Clean up driver resources possibly not released on workload crash
> +	 * so the driver module can be successfully unloaded
> +	 */
> +	igt_spin_batch_free(device, spin);
> +
> +	return bus;
> +}
> +
> +
> +/*
> + * Skeleton
> + */
> +
> +static void healthcheck(int chipset, int *device)
> +{
> +	if (chipset == DRIVER_INTEL) {
> +		/*
> +		 * We have it perfectly implemented in i915_module_load,
> +		 * just use it.
> +		 */
> +		igt_assert(igt_system_quiet("i915_module_load --run-subtest reload")
> +			   == IGT_EXIT_SUCCESS);
> +	} else {
> +		/*
> +		 * We don't know how to check an unidentified device for health,
> +		 * just reopen it.
> +		 */
> +	}
If we do not know/do not want to know about other devices, we could add
requirement to whole binary:
fd = drm_open_driver(DRIVER_INTEL);
igt_require_gem(fd);
> +}
> +
> +static void driver_unload(int chipset, char *driver)
> +{
> +	if (chipset == DRIVER_INTEL)
> +		igt_assert(igt_i915_driver_unload() == IGT_EXIT_SUCCESS);
> +	else
> +		igt_assert(igt_kmod_unload(driver, 0) == 0);
> +}
> +
> +static void __subtest(int device, int chipset, char *driver,
> +		      workload_t workload, void *priv, action_t action)
> +{
> +	int bus = workload(device, priv, action);
> +
> +	close(device);
> +	driver_unload(chipset, driver);
> +
> +	/* Valid file descriptor indicates we should rescan the bus */
> +	if (bus >= 0) {
> +		igt_sysfs_set(bus, "rescan", "1");
> +		close(bus);
> +	}
> +}
> +
> +static void run_subtest(int *device, int chipset, char *driver,
> +			workload_t workload, void *priv, action_t action)
> +{
> +	__subtest(*device, chipset, driver, workload, priv, action);
> +
> +	sleep(2);
Why do we need 2 seconds of sleep here? Where is the data coming from?
> +
> +	*device = __drm_open_driver(chipset);
> +	healthcheck(chipset, device);
> +
> +	igt_assert(*device >= 0);
> +}
> +
> +
> +igt_main {
> +	int device, chipset;
> +	char *driver;
> +
> +	igt_fixture {
> +		char path[PATH_MAX];
> +		int dir, len;
> +
> +		/*
> +		 * Since the test depends on successful unload of driver module,
> +		 * don't use drm_open_driver() as it keeps a file descriptor
> +		 * open for exit handler use that effectively locks the module.
> +		 */
> +		device = __drm_open_driver(DRIVER_ANY);
Based on what you've wrote in commit msg when dummy load works only on
DRIVER_INTEL, why do we even care about other drivers? Why don't we open INTEL
here and skip on other types?

Quite a nice job done here :) Looks like you are going into right direction.
Kasia :)
> +		igt_assert(device >= 0);
> +
> +		if (is_i915_device(device)) {
> +			chipset = DRIVER_INTEL;
> +			driver = strdup("i915");
> +		} else {
> +			chipset = DRIVER_ANY;
> +
> +			/* Capture module name to be unloaded */
> +			dir = igt_sysfs_open(device);
> +			len = readlinkat(dir, "device/driver/module", path,
> +					 sizeof(path) - 1);
> +			close(dir);
> +			path[len] = '\0';
> +			driver = strdup(strrchr(path, '/') + 1);
> +		}
> +		igt_info("Running the test on driver \"%s\", chipset mask %#0x\n",
> +			 driver, chipset);
> +	}
> +
> +	igt_subtest("unplug")
> +		run_subtest(&device, chipset, driver, spin_batch, NULL,
> +			    device_unplug);
> +
> +	igt_subtest("unbind")
> +		run_subtest(&device, chipset, driver, spin_batch, NULL,
> +			    driver_unbind);
> +
> +	igt_fixture {
> +		free(driver);
> +		close(device);
> +		/*
> +		 * Now reopen the device with drm_open_driver() for a while
> +		 * so exit handler is registered and can do its job on exit.
> +		 */
> +		device = drm_open_driver(chipset);
> +		close(device);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 8e1ae4fd..e6b87447 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -3,6 +3,7 @@ test_progs = [
>  	'core_getclient',
>  	'core_getstats',
>  	'core_getversion',
> +	'core_hot_reload',
>  	'core_setmaster_vs_auth',
>  	'debugfs_test',
>  	'drm_import_export',
> -- 
> 2.20.1
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for tests: Add a new test for driver/device hot reload
  2019-04-03 16:01 [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload Janusz Krzysztofik
  2019-04-04 13:17 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
  2019-04-04 13:56 ` [igt-dev] [PATCH] " Katarzyna Dec
@ 2019-04-05  3:02 ` Patchwork
  2019-04-05 14:11 ` [igt-dev] [PATCH] " Chris Wilson
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2019-04-05  3:02 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev

== Series Details ==

Series: tests: Add a new test for driver/device hot reload
URL   : https://patchwork.freedesktop.org/series/58991/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5870_full -> IGTPW_2784_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_2784_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_2784_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/58991/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_2784_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@core_hot_reload@unbind} (NEW):
    - shard-apl:          NOTRUN -> DMESG-FAIL +1
    - shard-snb:          NOTRUN -> DMESG-FAIL +1
    - shard-kbl:          NOTRUN -> DMESG-FAIL +1

  * {igt@core_hot_reload@unplug} (NEW):
    - shard-hsw:          NOTRUN -> DMESG-FAIL +1
    - shard-glk:          NOTRUN -> DMESG-FAIL +1

  * igt@runner@aborted:
    - shard-hsw:          NOTRUN -> ( 2 FAIL )
    - shard-snb:          NOTRUN -> ( 2 FAIL )

  
New tests
---------

  New tests have been introduced between CI_DRM_5870_full and IGTPW_2784_full:

### New IGT tests (2) ###

  * igt@core_hot_reload@unbind:
    - Statuses : 5 dmesg-fail(s)
    - Exec time: [3.71, 90.00] s

  * igt@core_hot_reload@unplug:
    - Statuses : 5 dmesg-fail(s)
    - Exec time: [2.94, 90.00] s

  

Known issues
------------

  Here are the changes found in IGTPW_2784_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_reuse@baggage:
    - shard-glk:          PASS -> INCOMPLETE [fdo#103359] / [k.org#198133]

  * igt@i915_pm_rpm@system-suspend-execbuf:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665] / [fdo#107807]

  * igt@i915_pm_rps@reset:
    - shard-apl:          PASS -> FAIL [fdo#102250]

  * igt@i915_suspend@fence-restore-untiled:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665]

  * igt@kms_atomic_transition@4x-modeset-transitions:
    - shard-apl:          NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_busy@extended-modeset-hang-newfb-render-a:
    - shard-hsw:          PASS -> DMESG-WARN [fdo#110222] +2
    - shard-kbl:          PASS -> DMESG-WARN [fdo#110222]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
    - shard-snb:          PASS -> DMESG-WARN [fdo#110222] +1

  * igt@kms_content_protection@atomic:
    - shard-apl:          NOTRUN -> FAIL [fdo#108739] / [fdo#110321]

  * igt@kms_cursor_crc@cursor-64x64-dpms:
    - shard-snb:          PASS -> SKIP [fdo#109271]

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-xtiled:
    - shard-glk:          PASS -> FAIL [fdo#107791]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-pwrite:
    - shard-hsw:          PASS -> SKIP [fdo#109271]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-indfb-fliptrack:
    - shard-apl:          NOTRUN -> SKIP [fdo#109271] +7

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-cur-indfb-draw-blt:
    - shard-glk:          NOTRUN -> SKIP [fdo#109271]

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          PASS -> FAIL [fdo#109016]

  * igt@kms_setmode@basic:
    - shard-kbl:          PASS -> FAIL [fdo#99912]

  * igt@kms_vblank@pipe-b-ts-continuation-modeset-hang:
    - shard-apl:          PASS -> FAIL [fdo#104894] +1

  * igt@perf@oa-exponents:
    - shard-glk:          PASS -> FAIL [fdo#105483]

  
#### Possible fixes ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-hsw:          FAIL [fdo#108686] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-render-b:
    - shard-hsw:          DMESG-WARN [fdo#110222] -> PASS
    - shard-snb:          DMESG-WARN [fdo#110222] -> PASS
    - shard-apl:          DMESG-WARN [fdo#110222] -> PASS

  * igt@kms_busy@extended-pageflip-hang-newfb-render-c:
    - shard-glk:          DMESG-WARN [fdo#110222] -> PASS

  * igt@kms_cursor_edge_walk@pipe-a-64x64-left-edge:
    - shard-snb:          SKIP [fdo#109271] / [fdo#109278] -> PASS

  * igt@kms_flip@modeset-vs-vblank-race:
    - shard-kbl:          FAIL [fdo#103060] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-rgb101010-draw-blt:
    - shard-snb:          SKIP [fdo#109271] -> PASS +3

  * igt@kms_vblank@pipe-c-ts-continuation-modeset:
    - shard-kbl:          FAIL [fdo#104894] -> PASS
    - shard-apl:          FAIL [fdo#104894] -> PASS

  
#### Warnings ####

  * igt@kms_lease@atomic_implicit_crtc:
    - shard-snb:          SKIP [fdo#109271] -> FAIL [fdo#110279]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102250]: https://bugs.freedesktop.org/show_bug.cgi?id=102250
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#104894]: https://bugs.freedesktop.org/show_bug.cgi?id=104894
  [fdo#105483]: https://bugs.freedesktop.org/show_bug.cgi?id=105483
  [fdo#107791]: https://bugs.freedesktop.org/show_bug.cgi?id=107791
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#108739]: https://bugs.freedesktop.org/show_bug.cgi?id=108739
  [fdo#109016]: https://bugs.freedesktop.org/show_bug.cgi?id=109016
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#110222]: https://bugs.freedesktop.org/show_bug.cgi?id=110222
  [fdo#110279]: https://bugs.freedesktop.org/show_bug.cgi?id=110279
  [fdo#110321]: https://bugs.freedesktop.org/show_bug.cgi?id=110321
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (9 -> 5)
------------------------------

  Missing    (4): pig-skl-6260u shard-skl pig-glk-j5005 shard-iclb 


Build changes
-------------

    * IGT: IGT_4928 -> IGTPW_2784
    * Piglit: piglit_4509 -> None

  CI_DRM_5870: 122dc43df57c9a03132314ddc78a6d60c714fa32 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2784: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2784/
  IGT_4928: 014a6fa238322b497116b359cb92df1ce7fa8847 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2784/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-04 13:56 ` [igt-dev] [PATCH] " Katarzyna Dec
@ 2019-04-05  8:39   ` Janusz Krzysztofik
  0 siblings, 0 replies; 9+ messages in thread
From: Janusz Krzysztofik @ 2019-04-05  8:39 UTC (permalink / raw)
  To: Katarzyna Dec; +Cc: Petri Latvala, Lee, Simon B, igt-dev, Daniel Vetter

Hi Kasia,

Thanks for review.

On Thu, 2019-04-04 at 15:56 +0200, Katarzyna Dec wrote:
> On Wed, Apr 03, 2019 at 06:01:22PM +0200, Janusz Krzysztofik wrote:
> 
> Sorry for joining the discussion in v2 of this patch :)
> > From: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> > 
> > Run a dummy load in background to put some workload on a device,
> > then try
> > to either remove (unplug) the device from its bus, or unbind the
> > device's
> > driver from it, depending on which subtest has been selected.  If
> > succeeded, unload the driver, rescan the device's bus if needed and
> > perform health checks on the device with the driver reloaded.
> > 
> > The driver hot unbind / device hot unplug operation is expected to
> > succeed
> > in a reasonable time, however long timeouts are used to let kernel
> > level
> > timeouts pop up first if hit by a bug.
> > 
> > The dummy load works only with i915 driver.  The test is skipped on
> > other
> > hardware unless they provide their implementation of
> > igt_spin_batch_new()
> > and friends.
> > 
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> > ---
> > Changelog:
> > v1->v2:
> > - renamed to core_hot_reload,
> > - extended with module unload, device rediscover (unplug case),
> > module reload 
> >   and healthcheck operations following the initial unplug/unbind,
> > as requested
> >   by Daniel and Peter,
> > - fail if anything goes wrong, as requested by Daniel
> > - refactored a lot to reduce code redundancy.
> > 
> > Resending now with the igt-dev list address added to Cc:, sorry
> nit: I do not know if this is a strict rule or just everyone does
> that, but usually
> we put what has changed as a part of commit msg above any Sign-off-by 
> or CC.

I've just been following an advice I got recently on a kernel related
list to keep changelog out of commit message.  If your rules are
different, I'll be happy to follow them here.

> And as soon as this is a v2 you could mark it in the patch name
> (during
> generating patch for review).

Yeah, I missed that, sorry. 

> >  tests/Makefile.sources  |   1 +
> >  tests/core_hot_reload.c | 248
> > ++++++++++++++++++++++++++++++++++++++++
> >  tests/meson.build       |   1 +
> >  3 files changed, 250 insertions(+)
> >  create mode 100644 tests/core_hot_reload.c
> > 
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index 86ad301e..5f9dba30 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -16,6 +16,7 @@ TESTS_progs = \
> >  	core_getclient \
> >  	core_getstats \
> >  	core_getversion \
> > +	core_hot_reload \
> >  	core_setmaster_vs_auth \
> >  	debugfs_test \
> >  	drm_import_export \
> > diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
> > new file mode 100644
> > index 00000000..96f0efd0
> > --- /dev/null
> > +++ b/tests/core_hot_reload.c
> > @@ -0,0 +1,248 @@
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > + * copy of this software and associated documentation files (the
> > "Software"),
> > + * to deal in the Software without restriction, including without
> > limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> > sublicense,
> > + * and/or sell copies of the Software, and to permit persons to
> > whom the
> > + * Software is furnished to do so, subject to the following
> > conditions:
> > + *
> > + * The above copyright notice and this permission notice
> > (including the next
> > + * paragraph) shall be included in all copies or substantial
> > portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "igt.h"
> > +#include "igt_device.h"
> > +#include "igt_dummyload.h"
> > +#include "igt_kmod.h"
> > +#include "igt_sysfs.h"
> > +
> > +#include <getopt.h>
> > +#include <limits.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +
> > +
> > +typedef void (*workload_wait_t)(int device, void *priv);
> > +typedef int (*action_t)(int dir);
> > +typedef int (*workload_t)(int device, void *priv, action_t
> > action);
> > +
> > +
> > +/*
> > + * Actions
> > + */
> > +
> > +/* Unbind the driver from the device */
> It would be nice if you add docs to all/almost all functions like:
> /**
>  *driver_unbind:
>  *@dir: fd of directory to look in
>  *
>  * Unbind the driver from the device
>  * Returns -1 for ?
>  */
>  Example is not finished. See other files for examples.

OK, I'll do my best my final version have shiny comments :-)

> > +static int driver_unbind(int dir)
> > +{
> > +	char path[PATH_MAX], *dev_bus_addr;
> > +	int len;
> > +
> > +	len = readlinkat(dir, "device", path, sizeof(path) - 1);
> > +	path[len] = '\0';
> > +	dev_bus_addr = strrchr(path, '/') + 1;
> > +
> > +	igt_set_timeout(60, "Driver unbind timeout!");
> > +	igt_sysfs_set(dir, "device/driver/unbind", dev_bus_addr);
> > +
> > +	/* No need for bus rescan */
> > +	return -1;
> Why do we always want to return -1?

Return value is used by a caller as a file decriptor to a device bus'
sysfs directory for rescanning the bus.  In case of driver unbind
operation, the device does not disappear from its bus so there is no
need to rescan the bus, that's why an invalid file descriptor is passed
back here. The caller knows what that means.

> > +}
> > +
> > +/* Returns open file descriptor to a directory of a bus to rescan
> > for re-plug */
> s/open/opened/
> > +static int device_unplug(int dir)
> > +{
> > +	int bus;
> > +
> > +	bus = openat(dir, "device/subsystem", O_DIRECTORY);
> > +	igt_assert(bus >= 0);
> > +
> > +	igt_set_timeout(60, "Device unplug timeout!");
> > +	igt_sysfs_set(dir, "device/remove", "1");
> > +
> > +	return bus;
> > +}
> > +
> > +
> > +/*
> > + * Operation template
> > + */
> > +static int operation(int device, action_t action, workload_wait_t
> > workload_wait,
> > +		     void *priv)
> > +{
> > +	int dir, bus;
> > +
> > +	dir = igt_sysfs_open(device);
> > +
> > +	bus = action(dir);
> > +	close(dir);
> > +
> > +	if (workload_wait)
> > +		workload_wait(device, priv);
> > +	igt_reset_timeout();
> > +
> > +	return bus;
> > +}
> > +
> > +
> > +/*
> > + * Workloads
> > + */
> > +
> > +/* Workload using igt_spin_batch_run() */
> > +
> > +static void spin_batch_wait(int device, void *priv)
> > +{
> > +	igt_spin_t *spin = priv;
> > +
> > +	/* wait until the workload has crashed */
> > +	gem_wait(device, spin->handle, NULL);
> > +}
> > +
> > +static int spin_batch(int device, void *priv, action_t action)
> > +{
> > +	igt_spin_t *spin;
> > +	int bus;
> > +
> > +	igt_assert(!priv);
> > +
> > +	/* Start the workload in background.
> > +	 * Make sure it is running and times out after 90 seconds */
> Plase use drm/scripts/checkpatch.pl to check formating of this ^
> commend and the
> rest of the file.

OK, I was not aware we have it.

> > +	spin = igt_spin_batch_new(device, .flags = IGT_SPIN_POLL_RUN);
> > +	igt_spin_busywait_until_running(spin);
> > +	igt_spin_batch_set_timeout(spin, 90000000000);
> > +
> > +	/* run the tested action */
> > +	bus = operation(device, action, spin_batch_wait, spin);
> > +
> > +	/*
> > +	 * Clean up driver resources possibly not released on workload
> > crash
> > +	 * so the driver module can be successfully unloaded
> > +	 */
> > +	igt_spin_batch_free(device, spin);
> > +
> > +	return bus;
> > +}
> > +
> > +
> > +/*
> > + * Skeleton
> > + */
> > +
> > +static void healthcheck(int chipset, int *device)
> > +{
> > +	if (chipset == DRIVER_INTEL) {
> > +		/*
> > +		 * We have it perfectly implemented in
> > i915_module_load,
> > +		 * just use it.
> > +		 */
> > +		igt_assert(igt_system_quiet("i915_module_load --run-
> > subtest reload")
> > +			   == IGT_EXIT_SUCCESS);
> > +	} else {
> > +		/*
> > +		 * We don't know how to check an unidentified device
> > for health,
> > +		 * just reopen it.
> > +		 */
> > +	}
> If we do not know/do not want to know about other devices, we could
> add
> requirement to whole binary:
> fd = drm_open_driver(DRIVER_INTEL);
> igt_require_gem(fd);

My idea was to create a hardware agnostic tool, leaving placeholders
wherever no required funtionality exists for non-Intel hardware.

> > +}
> > +
> > +static void driver_unload(int chipset, char *driver)
> > +{
> > +	if (chipset == DRIVER_INTEL)
> > +		igt_assert(igt_i915_driver_unload() ==
> > IGT_EXIT_SUCCESS);
> > +	else
> > +		igt_assert(igt_kmod_unload(driver, 0) == 0);
> > +}
> > +
> > +static void __subtest(int device, int chipset, char *driver,
> > +		      workload_t workload, void *priv, action_t action)
> > +{
> > +	int bus = workload(device, priv, action);
> > +
> > +	close(device);
> > +	driver_unload(chipset, driver);
> > +
> > +	/* Valid file descriptor indicates we should rescan the bus */
> > +	if (bus >= 0) {
> > +		igt_sysfs_set(bus, "rescan", "1");
> > +		close(bus);
> > +	}
> > +}
> > +
> > +static void run_subtest(int *device, int chipset, char *driver,
> > +			workload_t workload, void *priv, action_t
> > action)
> > +{
> > +	__subtest(*device, chipset, driver, workload, priv, action);
> > +
> > +	sleep(2);
> Why do we need 2 seconds of sleep here? Where is the data coming
> from?

We don't, that's a left over from my testing.  Thanks for catching
this.

> > +
> > +	*device = __drm_open_driver(chipset);
> > +	healthcheck(chipset, device);
> > +
> > +	igt_assert(*device >= 0);
> > +}
> > +
> > +
> > +igt_main {
> > +	int device, chipset;
> > +	char *driver;
> > +
> > +	igt_fixture {
> > +		char path[PATH_MAX];
> > +		int dir, len;
> > +
> > +		/*
> > +		 * Since the test depends on successful unload of
> > driver module,
> > +		 * don't use drm_open_driver() as it keeps a file
> > descriptor
> > +		 * open for exit handler use that effectively locks the
> > module.
> > +		 */
> > +		device = __drm_open_driver(DRIVER_ANY);
> Based on what you've wrote in commit msg when dummy load works only
> on
> DRIVER_INTEL, why do we even care about other drivers? Why don't we
> open INTEL
> here and skip on other types?

Because I think we should be friendly to others, if that doesn't cost,
while developing an open source tool.

> 
> Quite a nice job done here :) Looks like you are going into right
> direction.
> Kasia :)

Thank you :-)

Meanwhile I've been observing some issues with this tool which exhibit
themselves as the driver under the test has its bugs, discovered with
the tool, patched.  I'll submit a new version soon.

Thanks,
Janusz

> > +		igt_assert(device >= 0);
> > +
> > +		if (is_i915_device(device)) {
> > +			chipset = DRIVER_INTEL;
> > +			driver = strdup("i915");
> > +		} else {
> > +			chipset = DRIVER_ANY;
> > +
> > +			/* Capture module name to be unloaded */
> > +			dir = igt_sysfs_open(device);
> > +			len = readlinkat(dir, "device/driver/module",
> > path,
> > +					 sizeof(path) - 1);
> > +			close(dir);
> > +			path[len] = '\0';
> > +			driver = strdup(strrchr(path, '/') + 1);
> > +		}
> > +		igt_info("Running the test on driver \"%s\", chipset
> > mask %#0x\n",
> > +			 driver, chipset);
> > +	}
> > +
> > +	igt_subtest("unplug")
> > +		run_subtest(&device, chipset, driver, spin_batch, NULL,
> > +			    device_unplug);
> > +
> > +	igt_subtest("unbind")
> > +		run_subtest(&device, chipset, driver, spin_batch, NULL,
> > +			    driver_unbind);
> > +
> > +	igt_fixture {
> > +		free(driver);
> > +		close(device);
> > +		/*
> > +		 * Now reopen the device with drm_open_driver() for a
> > while
> > +		 * so exit handler is registered and can do its job on
> > exit.
> > +		 */
> > +		device = drm_open_driver(chipset);
> > +		close(device);
> > +	}
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 8e1ae4fd..e6b87447 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -3,6 +3,7 @@ test_progs = [
> >  	'core_getclient',
> >  	'core_getstats',
> >  	'core_getversion',
> > +	'core_hot_reload',
> >  	'core_setmaster_vs_auth',
> >  	'debugfs_test',
> >  	'drm_import_export',
> > -- 
> > 2.20.1
> > 

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-03 16:01 [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload Janusz Krzysztofik
                   ` (2 preceding siblings ...)
  2019-04-05  3:02 ` [igt-dev] ✗ Fi.CI.IGT: failure for " Patchwork
@ 2019-04-05 14:11 ` Chris Wilson
  2019-04-09  8:54   ` Janusz Krzysztofik
  3 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2019-04-05 14:11 UTC (permalink / raw)
  To: Daniel Vetter, Janusz Krzysztofik
  Cc: igt-dev, Janusz Krzysztofik, Petri Latvala, Lee, Simon B

Quoting Janusz Krzysztofik (2019-04-03 17:01:22)
> From: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> 
> Run a dummy load in background to put some workload on a device, then try
> to either remove (unplug) the device from its bus, or unbind the device's
> driver from it, depending on which subtest has been selected.  If
> succeeded, unload the driver, rescan the device's bus if needed and
> perform health checks on the device with the driver reloaded.
> 
> The driver hot unbind / device hot unplug operation is expected to succeed
> in a reasonable time, however long timeouts are used to let kernel level
> timeouts pop up first if hit by a bug.
> 
> The dummy load works only with i915 driver.  The test is skipped on other
> hardware unless they provide their implementation of igt_spin_batch_new()
> and friends.
> 
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> ---
> Changelog:
> v1->v2:
> - renamed to core_hot_reload,
> - extended with module unload, device rediscover (unplug case), module reload 
>   and healthcheck operations following the initial unplug/unbind, as requested
>   by Daniel and Peter,
> - fail if anything goes wrong, as requested by Daniel
> - refactored a lot to reduce code redundancy.
> 
> Resending now with the igt-dev list address added to Cc:, sorry
> 
>  tests/Makefile.sources  |   1 +
>  tests/core_hot_reload.c | 248 ++++++++++++++++++++++++++++++++++++++++
>  tests/meson.build       |   1 +
>  3 files changed, 250 insertions(+)
>  create mode 100644 tests/core_hot_reload.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 86ad301e..5f9dba30 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -16,6 +16,7 @@ TESTS_progs = \
>         core_getclient \
>         core_getstats \
>         core_getversion \
> +       core_hot_reload \
>         core_setmaster_vs_auth \
>         debugfs_test \
>         drm_import_export \
> diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
> new file mode 100644
> index 00000000..96f0efd0
> --- /dev/null
> +++ b/tests/core_hot_reload.c
> @@ -0,0 +1,248 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "igt.h"
> +#include "igt_device.h"
> +#include "igt_dummyload.h"
> +#include "igt_kmod.h"
> +#include "igt_sysfs.h"
> +
> +#include <getopt.h>
> +#include <limits.h>
> +#include <string.h>
> +#include <unistd.h>
> +
> +
> +typedef void (*workload_wait_t)(int device, void *priv);
> +typedef int (*action_t)(int dir);
> +typedef int (*workload_t)(int device, void *priv, action_t action);
> +
> +
> +/*
> + * Actions
> + */
> +
> +/* Unbind the driver from the device */
> +static int driver_unbind(int dir)
> +{
> +       char path[PATH_MAX], *dev_bus_addr;
> +       int len;
> +
> +       len = readlinkat(dir, "device", path, sizeof(path) - 1);
> +       path[len] = '\0';
> +       dev_bus_addr = strrchr(path, '/') + 1;
> +
> +       igt_set_timeout(60, "Driver unbind timeout!");
> +       igt_sysfs_set(dir, "device/driver/unbind", dev_bus_addr);
> +
> +       /* No need for bus rescan */
> +       return -1;
> +}
> +
> +/* Returns open file descriptor to a directory of a bus to rescan for re-plug */
> +static int device_unplug(int dir)
> +{
> +       int bus;
> +
> +       bus = openat(dir, "device/subsystem", O_DIRECTORY);
> +       igt_assert(bus >= 0);
> +
> +       igt_set_timeout(60, "Device unplug timeout!");
> +       igt_sysfs_set(dir, "device/remove", "1");
> +
> +       return bus;
> +}
> +
> +
> +/*
> + * Operation template
> + */
> +static int operation(int device, action_t action, workload_wait_t workload_wait,
> +                    void *priv)
> +{
> +       int dir, bus;
> +
> +       dir = igt_sysfs_open(device);
> +
> +       bus = action(dir);
> +       close(dir);
> +
> +       if (workload_wait)
> +               workload_wait(device, priv);
> +       igt_reset_timeout();
> +
> +       return bus;
> +}
> +
> +
> +/*
> + * Workloads
> + */
> +
> +/* Workload using igt_spin_batch_run() */
> +
> +static void spin_batch_wait(int device, void *priv)
> +{
> +       igt_spin_t *spin = priv;
> +
> +       /* wait until the workload has crashed */
> +       gem_wait(device, spin->handle, NULL);

gem_sync().

> +}
> +
> +static int spin_batch(int device, void *priv, action_t action)
> +{
> +       igt_spin_t *spin;
> +       int bus;
> +
> +       igt_assert(!priv);
> +
> +       /* Start the workload in background.
> +        * Make sure it is running and times out after 90 seconds */
> +       spin = igt_spin_batch_new(device, .flags = IGT_SPIN_POLL_RUN);
> +       igt_spin_busywait_until_running(spin);

We don't need to wait until it's running for it to be an issue. In fact,
testing with requests that can't be run due to unresolved external
dependencies is also required. And also trying to race the actual
submission tasklets running in the background against the unplug.

> +       igt_spin_batch_set_timeout(spin, 90000000000);

Nah, just let it run indefinitely. You have the timeouts in place to
enforce the driver does something. And a hanging batch is about a hard a
task to resolve as you can create (make it a bit harder by disallowing
reset!).

One other thing to do here would be something like
	igt_fork(child, N) {
		uint32_t name = gem_flink(device, spin->handle);
		int fd = gem_reopen_device(device);
		uint32_t handle = gem_open(fd, name);
		gem_sync(fd, handle);
	}


Another once is to run a nop loop (with and without obj/ctx create) as
it unbinds, an mmap loop (over both CPU and GTT / mmap-offset paths),
pread/pwrite. set-tiling and set-caching both have windows of
opportunity for badness, but would be much harder to exercise.

And we need to cover flip queues.

> +       /* run the tested action */
> +       bus = operation(device, action, spin_batch_wait, spin);
> +
> +       /*
> +        * Clean up driver resources possibly not released on workload crash
> +        * so the driver module can be successfully unloaded
> +        */
> +       igt_spin_batch_free(device, spin);
> +
> +       return bus;
> +}
> +
> +
> +/*
> + * Skeleton
> + */
> +
> +static void healthcheck(int chipset, int *device)
> +{
> +       if (chipset == DRIVER_INTEL) {
> +               /*
> +                * We have it perfectly implemented in i915_module_load,
> +                * just use it.
> +                */
> +               igt_assert(igt_system_quiet("i915_module_load --run-subtest reload")
> +                          == IGT_EXIT_SUCCESS);

Ahem.

> +       } else {
> +               /*
> +                * We don't know how to check an unidentified device for health,
> +                * just reopen it.
> +                */
> +       }
> +}
> +
> +static void driver_unload(int chipset, char *driver)
> +{
> +       if (chipset == DRIVER_INTEL)
> +               igt_assert(igt_i915_driver_unload() == IGT_EXIT_SUCCESS);
> +       else
> +               igt_assert(igt_kmod_unload(driver, 0) == 0);
> +}
> +
> +static void __subtest(int device, int chipset, char *driver,
> +                     workload_t workload, void *priv, action_t action)
> +{
> +       int bus = workload(device, priv, action);
> +
> +       close(device);
> +       driver_unload(chipset, driver);
> +
> +       /* Valid file descriptor indicates we should rescan the bus */
> +       if (bus >= 0) {
> +               igt_sysfs_set(bus, "rescan", "1");
> +               close(bus);
> +       }
> +}
> +
> +static void run_subtest(int *device, int chipset, char *driver,
> +                       workload_t workload, void *priv, action_t action)
> +{
> +       __subtest(*device, chipset, driver, workload, priv, action);
> +
> +       sleep(2);

Why 2s?

The subtest is synchronous, the module load in drm_open_driver is also
synchronous, so this should just work without any delay.

> +
> +       *device = __drm_open_driver(chipset);
> +       healthcheck(chipset, device);
> +
> +       igt_assert(*device >= 0);
> +}
> +
> +
> +igt_main {
> +       int device, chipset;
> +       char *driver;
> +
> +       igt_fixture {
> +               char path[PATH_MAX];
> +               int dir, len;
> +
> +               /*
> +                * Since the test depends on successful unload of driver module,
> +                * don't use drm_open_driver() as it keeps a file descriptor
> +                * open for exit handler use that effectively locks the module.
> +                */
> +               device = __drm_open_driver(DRIVER_ANY);
> +               igt_assert(device >= 0);
> +
> +               if (is_i915_device(device)) {
> +                       chipset = DRIVER_INTEL;
> +                       driver = strdup("i915");
> +               } else {
> +                       chipset = DRIVER_ANY;
> +
> +                       /* Capture module name to be unloaded */
> +                       dir = igt_sysfs_open(device);
> +                       len = readlinkat(dir, "device/driver/module", path,
> +                                        sizeof(path) - 1);
> +                       close(dir);
> +                       path[len] = '\0';
> +                       driver = strdup(strrchr(path, '/') + 1);
> +               }
> +               igt_info("Running the test on driver \"%s\", chipset mask %#0x\n",
> +                        driver, chipset);
> +       }
> +
> +       igt_subtest("unplug")
> +               run_subtest(&device, chipset, driver, spin_batch, NULL,
> +                           device_unplug);

Why not just open and close in the test?

> +       igt_subtest("unbind")
> +               run_subtest(&device, chipset, driver, spin_batch, NULL,
> +                           driver_unbind);
> +
> +       igt_fixture {
> +               free(driver);
> +               close(device);
> +               /*
> +                * Now reopen the device with drm_open_driver() for a while
> +                * so exit handler is registered and can do its job on exit.
> +                */

Just opening the device for the exithandler? The exithandler doesn't
need to do anything.

> +               device = drm_open_driver(chipset);
> +               close(device);

As you note, drm_open_driver() will autoreload the module. So will the
next test.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-05 14:11 ` [igt-dev] [PATCH] " Chris Wilson
@ 2019-04-09  8:54   ` Janusz Krzysztofik
  2019-04-09  9:12     ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Janusz Krzysztofik @ 2019-04-09  8:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev, Lee, Simon B, Petri Latvala, Daniel Vetter

Hi Chris,

On Friday, April 5, 2019 4:11:48 PM CEST Chris Wilson wrote:
> Quoting Janusz Krzysztofik (2019-04-03 17:01:22)
> 
> > From: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> > 
> > Run a dummy load in background to put some workload on a device, then try
> > to either remove (unplug) the device from its bus, or unbind the device's
> > driver from it, depending on which subtest has been selected.  If
> > succeeded, unload the driver, rescan the device's bus if needed and
> > perform health checks on the device with the driver reloaded.
> > 
> > The driver hot unbind / device hot unplug operation is expected to succeed
> > in a reasonable time, however long timeouts are used to let kernel level
> > timeouts pop up first if hit by a bug.
> > 
> > The dummy load works only with i915 driver.  The test is skipped on other
> > hardware unless they provide their implementation of igt_spin_batch_new()
> > and friends.
> > 
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@intel.com>
> > ---
> > Changelog:
> > v1->v2:
> > - renamed to core_hot_reload,
> > - extended with module unload, device rediscover (unplug case), module
> > reload> 
> >   and healthcheck operations following the initial unplug/unbind, as
> >   requested by Daniel and Peter,
> > 
> > - fail if anything goes wrong, as requested by Daniel
> > - refactored a lot to reduce code redundancy.
> > 
> > Resending now with the igt-dev list address added to Cc:, sorry
> > 
> >  tests/Makefile.sources  |   1 +
> >  tests/core_hot_reload.c | 248 ++++++++++++++++++++++++++++++++++++++++
> >  tests/meson.build       |   1 +
> >  3 files changed, 250 insertions(+)
> >  create mode 100644 tests/core_hot_reload.c
> > 
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index 86ad301e..5f9dba30 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -16,6 +16,7 @@ TESTS_progs = \
> > 
> >         core_getclient \
> >         core_getstats \
> >         core_getversion \
> > 
> > +       core_hot_reload \
> > 
> >         core_setmaster_vs_auth \
> >         debugfs_test \
> >         drm_import_export \
> > 
> > diff --git a/tests/core_hot_reload.c b/tests/core_hot_reload.c
> > new file mode 100644
> > index 00000000..96f0efd0
> > --- /dev/null
> > +++ b/tests/core_hot_reload.c
> > @@ -0,0 +1,248 @@
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining
> > a
> > + * copy of this software and associated documentation files (the
> > "Software"), + * to deal in the Software without restriction, including
> > without limitation + * the rights to use, copy, modify, merge, publish,
> > distribute, sublicense, + * and/or sell copies of the Software, and to
> > permit persons to whom the + * Software is furnished to do so, subject to
> > the following conditions: + *
> > + * The above copyright notice and this permission notice (including the
> > next + * paragraph) shall be included in all copies or substantial
> > portions of the + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND
> > NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS
> > BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN
> > ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN
> > CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE
> > SOFTWARE.
> > + */
> > +
> > +#include "igt.h"
> > +#include "igt_device.h"
> > +#include "igt_dummyload.h"
> > +#include "igt_kmod.h"
> > +#include "igt_sysfs.h"
> > +
> > +#include <getopt.h>
> > +#include <limits.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +
> > +
> > +typedef void (*workload_wait_t)(int device, void *priv);
> > +typedef int (*action_t)(int dir);
> > +typedef int (*workload_t)(int device, void *priv, action_t action);
> > +
> > +
> > +/*
> > + * Actions
> > + */
> > +
> > +/* Unbind the driver from the device */
> > +static int driver_unbind(int dir)
> > +{
> > +       char path[PATH_MAX], *dev_bus_addr;
> > +       int len;
> > +
> > +       len = readlinkat(dir, "device", path, sizeof(path) - 1);
> > +       path[len] = '\0';
> > +       dev_bus_addr = strrchr(path, '/') + 1;
> > +
> > +       igt_set_timeout(60, "Driver unbind timeout!");
> > +       igt_sysfs_set(dir, "device/driver/unbind", dev_bus_addr);
> > +
> > +       /* No need for bus rescan */
> > +       return -1;
> > +}
> > +
> > +/* Returns open file descriptor to a directory of a bus to rescan for
> > re-plug */ +static int device_unplug(int dir)
> > +{
> > +       int bus;
> > +
> > +       bus = openat(dir, "device/subsystem", O_DIRECTORY);
> > +       igt_assert(bus >= 0);
> > +
> > +       igt_set_timeout(60, "Device unplug timeout!");
> > +       igt_sysfs_set(dir, "device/remove", "1");
> > +
> > +       return bus;
> > +}
> > +
> > +
> > +/*
> > + * Operation template
> > + */
> > +static int operation(int device, action_t action, workload_wait_t
> > workload_wait, +                    void *priv)
> > +{
> > +       int dir, bus;
> > +
> > +       dir = igt_sysfs_open(device);
> > +
> > +       bus = action(dir);
> > +       close(dir);
> > +
> > +       if (workload_wait)
> > +               workload_wait(device, priv);
> > +       igt_reset_timeout();
> > +
> > +       return bus;
> > +}
> > +
> > +
> > +/*
> > + * Workloads
> > + */
> > +
> > +/* Workload using igt_spin_batch_run() */
> > +
> > +static void spin_batch_wait(int device, void *priv)
> > +{
> > +       igt_spin_t *spin = priv;
> > +
> > +       /* wait until the workload has crashed */
> > +       gem_wait(device, spin->handle, NULL);
> 
> gem_sync().

OK, thanks.

> > +}
> > +
> > +static int spin_batch(int device, void *priv, action_t action)
> > +{
> > +       igt_spin_t *spin;
> > +       int bus;
> > +
> > +       igt_assert(!priv);
> > +
> > +       /* Start the workload in background.
> > +        * Make sure it is running and times out after 90 seconds */
> > +       spin = igt_spin_batch_new(device, .flags = IGT_SPIN_POLL_RUN);
> > +       igt_spin_busywait_until_running(spin);
> 
> We don't need to wait until it's running for it to be an issue. In fact,
> testing with requests that can't be run due to unresolved external
> dependencies is also required. And also trying to race the actual
> submission tasklets running in the background against the unplug.

OK.

> > +       igt_spin_batch_set_timeout(spin, 90000000000);
> 
> Nah, just let it run indefinitely. You have the timeouts in place to
> enforce the driver does something. 

OK

> And a hanging batch is about a hard a
> task to resolve as you can create (make it a bit harder by disallowing
> reset!).
> 
> One other thing to do here would be something like
> 	igt_fork(child, N) {
> 		uint32_t name = gem_flink(device, spin->handle);
> 		int fd = gem_reopen_device(device);
> 		uint32_t handle = gem_open(fd, name);
> 		gem_sync(fd, handle);
> 	}
> 
> 
> Another once is to run a nop loop (with and without obj/ctx create) as
> it unbinds, an mmap loop (over both CPU and GTT / mmap-offset paths),
> pread/pwrite. set-tiling and set-caching both have windows of
> opportunity for badness, but would be much harder to exercise.
> 
> And we need to cover flip queues.

As a quick win, I'm going to restore my idea, proposed before but finally not 
included in publicly submitted v2, of adding an option to run an arbitrary 
command in background as a workload.  That way existing tests best suited for 
the purpose can be used, even a few of them in parallel if applicable - that 
will be just a shell command line to run.

> > +       /* run the tested action */
> > +       bus = operation(device, action, spin_batch_wait, spin);
> > +
> > +       /*
> > +        * Clean up driver resources possibly not released on workload
> > crash +        * so the driver module can be successfully unloaded
> > +        */
> > +       igt_spin_batch_free(device, spin);
> > +
> > +       return bus;
> > +}
> > +
> > +
> > +/*
> > + * Skeleton
> > + */
> > +
> > +static void healthcheck(int chipset, int *device)
> > +{
> > +       if (chipset == DRIVER_INTEL) {
> > +               /*
> > +                * We have it perfectly implemented in i915_module_load,
> > +                * just use it.
> > +                */
> > +               igt_assert(igt_system_quiet("i915_module_load
> > --run-subtest reload") +                          == IGT_EXIT_SUCCESS);
> 
> Ahem.
> 
> > +       } else {
> > +               /*
> > +                * We don't know how to check an unidentified device for
> > health, +                * just reopen it.
> > +                */
> > +       }
> > +}
> > +
> > +static void driver_unload(int chipset, char *driver)
> > +{
> > +       if (chipset == DRIVER_INTEL)
> > +               igt_assert(igt_i915_driver_unload() == IGT_EXIT_SUCCESS);
> > +       else
> > +               igt_assert(igt_kmod_unload(driver, 0) == 0);
> > +}
> > +
> > +static void __subtest(int device, int chipset, char *driver,
> > +                     workload_t workload, void *priv, action_t action)
> > +{
> > +       int bus = workload(device, priv, action);
> > +
> > +       close(device);
> > +       driver_unload(chipset, driver);
> > +
> > +       /* Valid file descriptor indicates we should rescan the bus */
> > +       if (bus >= 0) {
> > +               igt_sysfs_set(bus, "rescan", "1");
> > +               close(bus);
> > +       }
> > +}
> > +
> > +static void run_subtest(int *device, int chipset, char *driver,
> > +                       workload_t workload, void *priv, action_t action)
> > +{
> > +       __subtest(*device, chipset, driver, workload, priv, action);
> > +
> > +       sleep(2);
> 
> Why 2s?
> 
> The subtest is synchronous, the module load in drm_open_driver is also
> synchronous, so this should just work without any delay.

A left over from testing phase, I'll drop it.

> > +
> > +       *device = __drm_open_driver(chipset);
> > +       healthcheck(chipset, device);
> > +
> > +       igt_assert(*device >= 0);
> > +}
> > +
> > +
> > +igt_main {
> > +       int device, chipset;
> > +       char *driver;
> > +
> > +       igt_fixture {
> > +               char path[PATH_MAX];
> > +               int dir, len;
> > +
> > +               /*
> > +                * Since the test depends on successful unload of driver
> > module, +                * don't use drm_open_driver() as it keeps a file
> > descriptor +                * open for exit handler use that effectively
> > locks the module. +                */
> > +               device = __drm_open_driver(DRIVER_ANY);
> > +               igt_assert(device >= 0);
> > +
> > +               if (is_i915_device(device)) {
> > +                       chipset = DRIVER_INTEL;
> > +                       driver = strdup("i915");
> > +               } else {
> > +                       chipset = DRIVER_ANY;
> > +
> > +                       /* Capture module name to be unloaded */
> > +                       dir = igt_sysfs_open(device);
> > +                       len = readlinkat(dir, "device/driver/module",
> > path,
> > +                                        sizeof(path) - 1);
> > +                       close(dir);
> > +                       path[len] = '\0';
> > +                       driver = strdup(strrchr(path, '/') + 1);
> > +               }
> > +               igt_info("Running the test on driver \"%s\", chipset mask
> > %#0x\n", +                        driver, chipset);
> > +       }
> > +
> > +       igt_subtest("unplug")
> > +               run_subtest(&device, chipset, driver, spin_batch, NULL,
> > +                           device_unplug);
> 
> Why not just open and close in the test?

I'm going to move the run of a workload out of subtests and run it in a 
subprocess, otherwise cleaning up workload resources with igt library 
functions for clean module unload may make a subtest to fail easy.  The 
purpose of opening a device in the initial fixture and reuse it in subtest was 
to make sure the same device is tested as the workload has been put on.

> > +       igt_subtest("unbind")
> > +               run_subtest(&device, chipset, driver, spin_batch, NULL,
> > +                           driver_unbind);
> > +
> > +       igt_fixture {
> > +               free(driver);
> > +               close(device);
> > +               /*
> > +                * Now reopen the device with drm_open_driver() for a
> > while
> > +                * so exit handler is registered and can do its job on
> > exit. +                */
> 
> Just opening the device for the exithandler? The exithandler doesn't
> need to do anything.

OK, I was not sure so I added that just in case.

> > +               device = drm_open_driver(chipset);
> > +               close(device);
> 
> As you note, drm_open_driver() will autoreload the module. So will the
> next test.

Yes, but speaking of that, I'm still not sure if running a next test after 
this one should be recommended.  I've been observing issues in other tests 
which exhibited themselves after successful module reload.

Thanks,
Janusz

> -Chris




_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-09  8:54   ` Janusz Krzysztofik
@ 2019-04-09  9:12     ` Chris Wilson
  2019-04-09  9:23       ` Janusz Krzysztofik
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2019-04-09  9:12 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Lee, Simon B, Petri Latvala, Daniel Vetter

Quoting Janusz Krzysztofik (2019-04-09 09:54:16)
> Hi Chris,
> 
> On Friday, April 5, 2019 4:11:48 PM CEST Chris Wilson wrote:
> > As you note, drm_open_driver() will autoreload the module. So will the
> > next test.
> 
> Yes, but speaking of that, I'm still not sure if running a next test after 
> this one should be recommended.  I've been observing issues in other tests 
> which exhibited themselves after successful module reload.

In other words, this test is not complete yet! :) Realistically though,
that is likely to be the memory corruption caused by the unclean module
unload. You can say the same about any igt, they all have the danger
that they may leave the system in an unstable state.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload
  2019-04-09  9:12     ` Chris Wilson
@ 2019-04-09  9:23       ` Janusz Krzysztofik
  0 siblings, 0 replies; 9+ messages in thread
From: Janusz Krzysztofik @ 2019-04-09  9:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev, Lee, Simon B, Petri Latvala, Daniel Vetter

On Tuesday, April 9, 2019 11:12:13 AM CEST Chris Wilson wrote:
> Quoting Janusz Krzysztofik (2019-04-09 09:54:16)
> 
> > Hi Chris,
> > 
> > On Friday, April 5, 2019 4:11:48 PM CEST Chris Wilson wrote:
> > > As you note, drm_open_driver() will autoreload the module. So will the
> > > next test.
> > 
> > Yes, but speaking of that, I'm still not sure if running a next test after
> > this one should be recommended.  I've been observing issues in other tests
> > which exhibited themselves after successful module reload.
> 
> In other words, this test is not complete yet! :) Realistically though,
> that is likely to be the memory corruption caused by the unclean module
> unload. You can say the same about any igt, they all have the danger
> that they may leave the system in an unstable state.

That's what I had on mind.  You don't have to use a test for module reloading, 
doing that manually was sufficient for me to observe issues under other tests.

Janusz

> -Chris




_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-04-09  9:23 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-03 16:01 [igt-dev] [PATCH] tests: Add a new test for driver/device hot reload Janusz Krzysztofik
2019-04-04 13:17 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2019-04-04 13:56 ` [igt-dev] [PATCH] " Katarzyna Dec
2019-04-05  8:39   ` Janusz Krzysztofik
2019-04-05  3:02 ` [igt-dev] ✗ Fi.CI.IGT: failure for " Patchwork
2019-04-05 14:11 ` [igt-dev] [PATCH] " Chris Wilson
2019-04-09  8:54   ` Janusz Krzysztofik
2019-04-09  9:12     ` Chris Wilson
2019-04-09  9:23       ` Janusz Krzysztofik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.