Re: [PATCH i-g-t] gem_flink_race/prime_self_import: Improve test reliability

From: Daniel Vetter <daniel@ffwll.ch>
To: Derek Morton <derek.j.morton@intel.com>
Cc: intel-gfx@lists.freedesktop.org, thomas.wood@intel.com
Subject: Re: [PATCH i-g-t] gem_flink_race/prime_self_import: Improve test reliability
Date: Thu, 10 Dec 2015 11:13:18 +0100	[thread overview]
Message-ID: <20151210101318.GQ20822@phenom.ffwll.local> (raw)
In-Reply-To: <1449578684-16320-1-git-send-email-derek.j.morton@intel.com>

On Tue, Dec 08, 2015 at 12:44:44PM +0000, Derek Morton wrote:
> gem_flink_race and prime_self_import have subtests which read the
> number of open gem objects from debugfs to determine if objects have
> leaked during the test. However the test can fail sporadically if
> the number of gem objects changes due to other process activity.
> This patch introduces a change to check the number of gem objects
> several times to filter out any fluctuations.

Why exactly does this happen? IGT tests should be run on bare metal, with
everything else killed/subdued/shutup. If there's still things going on
that create objects, we need to stop them from doing that.

If this only applies to Android, or some special Android deamon them imo
check for that at runtime and igt_skip("your setup is invalid, deamon %s
running\n"); is the correct fix. After all just because you sampled for a
bit doesn't mean that it wont still change right when you start running
the test for real, so this is still fragile.

Also would be good to extract get_stable_obj_count to a proper igt library
function, if it indeed needs to be this tricky. And then add the
explanation for why we need this in the gtkdoc.

Thanks, Daniel
> 
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> ---
>  tests/gem_flink_race.c    | 34 +++++++++++++++++++++++++++++-----
>  tests/prime_self_import.c | 43 +++++++++++++++++++++++++++++++++----------
>  2 files changed, 62 insertions(+), 15 deletions(-)
> 
> diff --git a/tests/gem_flink_race.c b/tests/gem_flink_race.c
> index b17ef85..0552e12 100644
> --- a/tests/gem_flink_race.c
> +++ b/tests/gem_flink_race.c
> @@ -44,7 +44,7 @@ IGT_TEST_DESCRIPTION("Check for flink/open vs. gem close races.");
>   * in the flink name and corresponding reference getting leaked.
>   */
>  
> -/* We want lockless and I'm to lazy to dig out an atomic libarary. On x86 this
> +/* We want lockless and I'm to lazy to dig out an atomic library. On x86 this
>   * works, too. */
>  volatile int pls_die = 0;
>  int fd;
> @@ -65,6 +65,32 @@ static int get_object_count(void)
>  	return ret;
>  }
>  
> +static int get_stable_obj_count(int driver)
> +{
> +	/* The test relies on the system being in the same state before and
> +	   after the test so any difference in the object count is a result of
> +	   leaks during the test. gem_quiescent_gpu() mostly achieves this but
> +	   occasionally obj_count can still change. The loop ensures obj_count
> +	   has remained stable over several checks */
> +	int obj_count, prev_obj_count;
> +	int loop_count = 0;
> +	gem_quiescent_gpu(driver);
> +	prev_obj_count = get_object_count();
> +	while (loop_count < 4) {
> +		usleep(200000);
> +		gem_quiescent_gpu(driver);
> +		obj_count = get_object_count();
> +		if (obj_count == prev_obj_count) {
> +			loop_count++;
> +		} else {
> +			igt_debug("loop_count=%d, obj_count=%d, prev_obj_count=%d\n", loop_count, obj_count, prev_obj_count);
> +			loop_count = 0;
> +			prev_obj_count = obj_count;
> +		}
> +
> +	}
> +	return obj_count;
> +}
>  
>  static void *thread_fn_flink_name(void *p)
>  {
> @@ -164,8 +190,7 @@ static void test_flink_close(void)
>  	 * up the counts */
>  	fake = drm_open_driver(DRIVER_INTEL);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count();
> +	obj_count = get_stable_obj_count(fake);
>  
>  	num_threads = sysconf(_SC_NPROCESSORS_ONLN);
>  
> @@ -190,8 +215,7 @@ static void test_flink_close(void)
>  
>  	close(fd);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count() - obj_count;
> +	obj_count = get_stable_obj_count(fake) - obj_count;
>  
>  	igt_info("leaked %i objects\n", obj_count);
>  
> diff --git a/tests/prime_self_import.c b/tests/prime_self_import.c
> index 91fe231..977c7b2 100644
> --- a/tests/prime_self_import.c
> +++ b/tests/prime_self_import.c
> @@ -47,7 +47,7 @@
>  #include "drm.h"
>  
>  IGT_TEST_DESCRIPTION("Check whether prime import/export works on the same"
> -		     " device.");
> +		     " device... but with different fds.");
>  
>  #define BO_SIZE (16*1024)
>  
> @@ -157,7 +157,7 @@ static void test_with_one_bo_two_files(void)
>  	dma_buf_fd2 = prime_handle_to_fd(fd2, handle_open);
>  	handle_import = prime_fd_to_handle(fd2, dma_buf_fd2);
>  
> -	/* dma-buf selfimporting an flink bo should give the same handle */
> +	/* dma-buf self importing an flink bo should give the same handle */
>  	igt_assert_eq_u32(handle_import, handle_open);
>  
>  	close(fd1);
> @@ -226,6 +226,33 @@ static int get_object_count(void)
>  	return ret;
>  }
>  
> +static int get_stable_obj_count(int driver)
> +{
> +	/* The test relies on the system being in the same state before and
> +	   after the test so any difference in the object count is a result of
> +	   leaks during the test. gem_quiescent_gpu() mostly achieves this but
> +	   occasionally obj_count can still change. The loop ensures obj_count
> +	   has remained stable over several checks */
> +	int obj_count, prev_obj_count;
> +	int loop_count = 0;
> +	gem_quiescent_gpu(driver);
> +	prev_obj_count = get_object_count();
> +	while (loop_count < 4) {
> +		usleep(200000);
> +		gem_quiescent_gpu(driver);
> +		obj_count = get_object_count();
> +		if (obj_count == prev_obj_count) {
> +			loop_count++;
> +		} else {
> +			igt_debug("loop_count=%d, obj_count=%d, prev_obj_count=%d\n", loop_count, obj_count, prev_obj_count);
> +			loop_count = 0;
> +			prev_obj_count = obj_count;
> +		}
> +
> +	}
> +	return obj_count;
> +}
> +
>  static void *thread_fn_reimport_vs_close(void *p)
>  {
>  	struct drm_gem_close close_bo;
> @@ -258,8 +285,7 @@ static void test_reimport_close_race(void)
>  	 * up the counts */
>  	fake = drm_open_driver(DRIVER_INTEL);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count();
> +	obj_count = get_stable_obj_count(fake);
>  
>  	num_threads = sysconf(_SC_NPROCESSORS_ONLN);
>  
> @@ -290,8 +316,7 @@ static void test_reimport_close_race(void)
>  	close(fds[0]);
>  	close(fds[1]);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count() - obj_count;
> +	obj_count = get_stable_obj_count(fake) - obj_count;
>  
>  	igt_info("leaked %i objects\n", obj_count);
>  
> @@ -350,8 +375,7 @@ static void test_export_close_race(void)
>  	 * up the counts */
>  	fake = drm_open_driver(DRIVER_INTEL);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count();
> +	obj_count = get_stable_obj_count(fake);
>  
>  	fd = drm_open_driver(DRIVER_INTEL);
>  
> @@ -373,8 +397,7 @@ static void test_export_close_race(void)
>  
>  	close(fd);
>  
> -	gem_quiescent_gpu(fake);
> -	obj_count = get_object_count() - obj_count;
> +	obj_count = get_stable_obj_count(fake) - obj_count;
>  
>  	igt_info("leaked %i objects\n", obj_count);
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx