All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH igt] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
@ 2016-09-12 14:09 Imre Deak
  2016-09-12 14:47 ` [PATCH igt v2] " Imre Deak
  0 siblings, 1 reply; 6+ messages in thread
From: Imre Deak @ 2016-09-12 14:09 UTC (permalink / raw)
  To: intel-gfx

Even in an otherwise quiescent system there may be user/kernel threads
independent of the test that add enough latency to make timing sensitive
subtests fail. Boost the priority of such subtests to avoid these
failures.

This got rid of sporadic failures in basic-cursor-vs-flip-legacy and
basic-cursor-vs-flip-varying-size with 'missed 1 frame' error message
APL and BSW.

CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 tests/kms_cursor_legacy.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tests/kms_cursor_legacy.c b/tests/kms_cursor_legacy.c
index 373873a..ac4cffa 100644
--- a/tests/kms_cursor_legacy.c
+++ b/tests/kms_cursor_legacy.c
@@ -450,6 +450,39 @@ static unsigned get_vblank(int fd, int pipe, unsigned flags)
 	return vbl.reply.sequence;
 }
 
+struct sched_info {
+	int policy;
+	int prio;
+};
+
+static void boost_to_rt_sched_prio(struct sched_info *old_info)
+{
+	struct sched_param new_param = {
+		.sched_priority = 99,
+	};
+	struct sched_param param;
+	int pid = getpid();
+
+	igt_info("Boosting to RT scheduler priority\n");
+
+	old_info->policy = sched_getscheduler(pid);
+	igt_assert(old_info->policy >= 0);
+	igt_assert(sched_getparam(pid, &param) == 0);
+	old_info->prio = param.sched_priority;
+
+	igt_assert(sched_setscheduler(pid, SCHED_FIFO | SCHED_RESET_ON_FORK,
+				      &new_param) == 0);
+}
+
+static void restore_sched_prio(struct sched_info *info)
+{
+	struct sched_param param = {
+		.sched_priority = info->prio,
+	};
+
+	igt_assert(sched_setscheduler(getpid(), info->policy, &param) == 0);
+}
+
 static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, int nloops)
 {
 	struct drm_mode_cursor arg[2];
@@ -458,6 +491,7 @@ static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, in
 	unsigned vblank_start;
 	int target;
 	enum pipe pipe = find_connected_pipe(display, false);
+	struct sched_info old_sched_info;
 
 	if (mode >= flip_test_atomic)
 		igt_require(display->is_atomic);
@@ -524,6 +558,8 @@ static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, in
 		igt_reset_timeout();
 	} while (nloops--);
 
+	restore_sched_prio(&old_sched_info);
+
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
@@ -565,6 +601,7 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 	enum pipe pipe2 = find_connected_pipe(display, true);
 	igt_output_t *output2;
 	bool skip_test = false;
+	struct sched_info old_sched_info;
 
 	if (modeset)
 		igt_require(display->is_atomic);
@@ -592,6 +629,8 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 
 	igt_display_commit2(display, display->is_atomic ? COMMIT_ATOMIC : COMMIT_LEGACY);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
 	igt_assert_eq(get_vblank(display->drm_fd, pipe, 0), vblank_start);
 	do_ioctl(display->drm_fd, DRM_IOCTL_MODE_CURSOR, &arg[0]);
@@ -636,6 +675,8 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 		}
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 cleanup:
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
@@ -657,6 +698,7 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 	enum pipe pipe = find_connected_pipe(display, false);
 	igt_output_t *output;
 	uint32_t vrefresh;
+	struct sched_info old_sched_info;
 
 	if (mode >= flip_test_atomic)
 		igt_require(display->is_atomic);
@@ -690,6 +732,8 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 	igt_debug("Using a target of %ld cursor updates per half-vblank (%u)\n",
 		  target, vrefresh);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	for (int i = 0; i < nloops; i++) {
 		shared[0] = 0;
 		igt_fork(child, 1) {
@@ -738,6 +782,8 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 			     shared[0], 2ul*vrefresh*target, vrefresh*target);
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
@@ -760,6 +806,7 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 	enum pipe pipe2 = find_connected_pipe(display, true);
 	igt_output_t *output2;
 	bool skip_test = false;
+	struct sched_info old_sched_info;
 
 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
 	igt_assert(shared != MAP_FAILED);
@@ -790,6 +837,8 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 
 	igt_display_commit2(display, display->is_atomic ? COMMIT_ATOMIC : COMMIT_LEGACY);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	target = 4096;
 	do {
 		vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
@@ -856,6 +905,8 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 			     shared[0], 2*60ul*target, 60ul*target);
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 cleanup:
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH igt v2] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
  2016-09-12 14:09 [PATCH igt] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio Imre Deak
@ 2016-09-12 14:47 ` Imre Deak
  2016-09-12 20:04   ` Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Imre Deak @ 2016-09-12 14:47 UTC (permalink / raw)
  To: intel-gfx

Even in an otherwise quiescent system there may be user/kernel threads
independent of the test that add enough latency to make timing sensitive
subtests fail. Boost the priority of such subtests to avoid these
failures.

This got rid of sporadic failures in basic-cursor-vs-flip-legacy and
basic-cursor-vs-flip-varying-size with 'missed 1 frame' error message
APL and BSW.

v2:
- Boost the priority in flip_vs_cursor_crc() too.

CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 tests/kms_cursor_legacy.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tests/kms_cursor_legacy.c b/tests/kms_cursor_legacy.c
index 373873a..47dbac9 100644
--- a/tests/kms_cursor_legacy.c
+++ b/tests/kms_cursor_legacy.c
@@ -450,6 +450,39 @@ static unsigned get_vblank(int fd, int pipe, unsigned flags)
 	return vbl.reply.sequence;
 }
 
+struct sched_info {
+	int policy;
+	int prio;
+};
+
+static void boost_to_rt_sched_prio(struct sched_info *old_info)
+{
+	struct sched_param new_param = {
+		.sched_priority = 99,
+	};
+	struct sched_param param;
+	int pid = getpid();
+
+	igt_info("Boosting to RT scheduler priority\n");
+
+	old_info->policy = sched_getscheduler(pid);
+	igt_assert(old_info->policy >= 0);
+	igt_assert(sched_getparam(pid, &param) == 0);
+	old_info->prio = param.sched_priority;
+
+	igt_assert(sched_setscheduler(pid, SCHED_FIFO | SCHED_RESET_ON_FORK,
+				      &new_param) == 0);
+}
+
+static void restore_sched_prio(struct sched_info *info)
+{
+	struct sched_param param = {
+		.sched_priority = info->prio,
+	};
+
+	igt_assert(sched_setscheduler(getpid(), info->policy, &param) == 0);
+}
+
 static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, int nloops)
 {
 	struct drm_mode_cursor arg[2];
@@ -458,6 +491,7 @@ static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, in
 	unsigned vblank_start;
 	int target;
 	enum pipe pipe = find_connected_pipe(display, false);
+	struct sched_info old_sched_info;
 
 	if (mode >= flip_test_atomic)
 		igt_require(display->is_atomic);
@@ -489,6 +523,8 @@ static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, in
 	} else
 		target = 1;
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
 	igt_assert_eq(get_vblank(display->drm_fd, pipe, 0), vblank_start);
 	for (int n = 0; n < target; n++)
@@ -524,6 +560,8 @@ static void basic_flip_vs_cursor(igt_display_t *display, enum flip_test mode, in
 		igt_reset_timeout();
 	} while (nloops--);
 
+	restore_sched_prio(&old_sched_info);
+
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
@@ -565,6 +603,7 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 	enum pipe pipe2 = find_connected_pipe(display, true);
 	igt_output_t *output2;
 	bool skip_test = false;
+	struct sched_info old_sched_info;
 
 	if (modeset)
 		igt_require(display->is_atomic);
@@ -592,6 +631,8 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 
 	igt_display_commit2(display, display->is_atomic ? COMMIT_ATOMIC : COMMIT_LEGACY);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
 	igt_assert_eq(get_vblank(display->drm_fd, pipe, 0), vblank_start);
 	do_ioctl(display->drm_fd, DRM_IOCTL_MODE_CURSOR, &arg[0]);
@@ -636,6 +677,8 @@ static void two_screens_flip_vs_cursor(igt_display_t *display, int nloops, bool
 		}
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 cleanup:
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
@@ -657,6 +700,7 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 	enum pipe pipe = find_connected_pipe(display, false);
 	igt_output_t *output;
 	uint32_t vrefresh;
+	struct sched_info old_sched_info;
 
 	if (mode >= flip_test_atomic)
 		igt_require(display->is_atomic);
@@ -690,6 +734,8 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 	igt_debug("Using a target of %ld cursor updates per half-vblank (%u)\n",
 		  target, vrefresh);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	for (int i = 0; i < nloops; i++) {
 		shared[0] = 0;
 		igt_fork(child, 1) {
@@ -738,6 +784,8 @@ static void basic_cursor_vs_flip(igt_display_t *display, enum flip_test mode, in
 			     shared[0], 2ul*vrefresh*target, vrefresh*target);
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
@@ -760,6 +808,7 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 	enum pipe pipe2 = find_connected_pipe(display, true);
 	igt_output_t *output2;
 	bool skip_test = false;
+	struct sched_info old_sched_info;
 
 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
 	igt_assert(shared != MAP_FAILED);
@@ -790,6 +839,8 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 
 	igt_display_commit2(display, display->is_atomic ? COMMIT_ATOMIC : COMMIT_LEGACY);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	target = 4096;
 	do {
 		vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
@@ -856,6 +907,8 @@ static void two_screens_cursor_vs_flip(igt_display_t *display, int nloops, bool
 			     shared[0], 2*60ul*target, 60ul*target);
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 cleanup:
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
@@ -876,6 +929,7 @@ static void flip_vs_cursor_crc(igt_display_t *display, bool atomic)
 	enum pipe pipe = find_connected_pipe(display, false);
 	igt_pipe_crc_t *pipe_crc;
 	igt_crc_t crcs[3];
+	struct sched_info old_sched_info;
 
 	if (atomic)
 		igt_require(display->is_atomic);
@@ -906,6 +960,8 @@ static void flip_vs_cursor_crc(igt_display_t *display, bool atomic)
 	/* Collect reference crc with cursor enabled. */
 	igt_pipe_crc_collect_crc(pipe_crc, &crcs[0]);
 
+	boost_to_rt_sched_prio(&old_sched_info);
+
 	/* Disable cursor, and immediately queue a flip. Check if resulting crc is correct. */
 	for (int i = 1; i >= 0; i--) {
 		vblank_start = get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
@@ -926,6 +982,8 @@ static void flip_vs_cursor_crc(igt_display_t *display, bool atomic)
 		igt_assert_crc_equal(&crcs[i], &crcs[2]);
 	}
 
+	restore_sched_prio(&old_sched_info);
+
 	do_cleanup_display(display);
 	igt_remove_fb(display->drm_fd, &fb_info);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH igt v2] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
  2016-09-12 14:47 ` [PATCH igt v2] " Imre Deak
@ 2016-09-12 20:04   ` Chris Wilson
  2016-09-12 20:57     ` Imre Deak
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2016-09-12 20:04 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Sep 12, 2016 at 05:47:57PM +0300, Imre Deak wrote:
> Even in an otherwise quiescent system there may be user/kernel threads
> independent of the test that add enough latency to make timing sensitive
> subtests fail. Boost the priority of such subtests to avoid these
> failures.
> 
> This got rid of sporadic failures in basic-cursor-vs-flip-legacy and
> basic-cursor-vs-flip-varying-size with 'missed 1 frame' error message
> APL and BSW.
> 
> v2:
> - Boost the priority in flip_vs_cursor_crc() too.
> 
> CC: Chris Wilson <chris@chris-wilson.co.uk>
> CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>

But we shouldn't need to. The basic test is:

	align to vblank
	request non-blocking flip
	update cursor
	check vblank hasn't advanced

We are not doing any busy loops here and there should be nothing else
running on the system. So what caused the context switch? Who are we
fighting against? If the only thing that is causing the issue is the
kernel thread used for the mmioflip (which won't be scheduled for
another 16ms until the next vblank), we have another bug to track down.

Imo, this patch is just papering over an issue that as it stands will be
present in real userspace (i.e. causing jerkiness in X, weston, cros
etc).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH igt v2] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
  2016-09-12 20:04   ` Chris Wilson
@ 2016-09-12 20:57     ` Imre Deak
  2016-09-13  7:38       ` Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Imre Deak @ 2016-09-12 20:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, 2016-09-12 at 21:04 +0100, Chris Wilson wrote:
> On Mon, Sep 12, 2016 at 05:47:57PM +0300, Imre Deak wrote:
> > Even in an otherwise quiescent system there may be user/kernel
> > threads
> > independent of the test that add enough latency to make timing
> > sensitive
> > subtests fail. Boost the priority of such subtests to avoid these
> > failures.
> > 
> > This got rid of sporadic failures in basic-cursor-vs-flip-legacy
> > and
> > basic-cursor-vs-flip-varying-size with 'missed 1 frame' error
> > message
> > APL and BSW.
> > 
> > v2:
> > - Boost the priority in flip_vs_cursor_crc() too.
> > 
> > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> 
> But we shouldn't need to. The basic test is:
> 
> 	align to vblank
> 	request non-blocking flip
> 	update cursor

In these subtests we run these cursor updates in a loop.

> 	check vblank hasn't advanced
> 
> We are not doing any busy loops here and there should be nothing else
> running on the system. So what caused the context switch? Who are we
> fighting against?

The cursor thread is one source for the delay, other than that it could
be anything running in the background. In my traces it looked like
something related to CI remote logging that caused >16ms delay for both
the user flip thread and the subsequent MMIO work. Imo there is no
guarantee that such delays won't happen between threads running at the
same priority, hence the need for higher priority for timing sensitive
stuff. Note that we see this problem on BSW with with 2 CPUs.

> If the only thing that is causing the issue is the
> kernel thread used for the mmioflip (which won't be scheduled for
> another 16ms until the next vblank), we have another bug to track
> down.

The MMIO flip work is scheduled right after we request the flip (since
we do the request after the previous flip completed) and I saw it being
delayed >16ms for the above reasons. Besides this I also saw the user
space flip thread being delayed the same way.

> Imo, this patch is just papering over an issue that as it stands will
> be
> present in real userspace (i.e. causing jerkiness in X, weston, cros
> etc).

I can't see any other way than adjusting priorities to guarantee the
timely completion of some work. Otherwise you'll only get best effort
scheduling and that doesn't seem to be enough in these subtests.

--Imre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH igt v2] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
  2016-09-12 20:57     ` Imre Deak
@ 2016-09-13  7:38       ` Chris Wilson
  2016-09-13 11:14         ` Imre Deak
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2016-09-13  7:38 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Sep 12, 2016 at 11:57:54PM +0300, Imre Deak wrote:
> On Mon, 2016-09-12 at 21:04 +0100, Chris Wilson wrote:
> > On Mon, Sep 12, 2016 at 05:47:57PM +0300, Imre Deak wrote:
> > > Even in an otherwise quiescent system there may be user/kernel
> > > threads
> > > independent of the test that add enough latency to make timing
> > > sensitive
> > > subtests fail. Boost the priority of such subtests to avoid these
> > > failures.
> > > 
> > > This got rid of sporadic failures in basic-cursor-vs-flip-legacy
> > > and
> > > basic-cursor-vs-flip-varying-size with 'missed 1 frame' error
> > > message
> > > APL and BSW.
> > > 
> > > v2:
> > > - Boost the priority in flip_vs_cursor_crc() too.
> > > 
> > > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > > CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > 
> > But we shouldn't need to. The basic test is:
> > 
> > 	align to vblank
> > 	request non-blocking flip
> > 	update cursor
> 
> In these subtests we run these cursor updates in a loop.

Oh, those. Ok, for the purpose of bat we want:

	align to vblank
	update cursor
	request non-blocking flip
	check vblank == vblank
	check flip-completion == vblank + 1
> 
> > 	check vblank hasn't advanced
> > 
> > We are not doing any busy loops here and there should be nothing else
> > running on the system. So what caused the context switch? Who are we
> > fighting against?
> 
> The cursor thread is one source for the delay, other than that it could
> be anything running in the background. In my traces it looked like
> something related to CI remote logging that caused >16ms delay for both
> the user flip thread and the subsequent MMIO work. Imo there is no
> guarantee that such delays won't happen between threads running at the
> same priority, hence the need for higher priority for timing sensitive
> stuff. Note that we see this problem on BSW with with 2 CPUs.
> 
> > If the only thing that is causing the issue is the
> > kernel thread used for the mmioflip (which won't be scheduled for
> > another 16ms until the next vblank), we have another bug to track
> > down.
> 
> The MMIO flip work is scheduled right after we request the flip (since
> we do the request after the previous flip completed) and I saw it being
> delayed >16ms for the above reasons. Besides this I also saw the user
> space flip thread being delayed the same way.
> 
> > Imo, this patch is just papering over an issue that as it stands will
> > be
> > present in real userspace (i.e. causing jerkiness in X, weston, cros
> > etc).
> 
> I can't see any other way than adjusting priorities to guarantee the
> timely completion of some work. Otherwise you'll only get best effort
> scheduling and that doesn't seem to be enough in these subtests.

Our worker has multiple phases and waits of which only a small portion is
timing crucial. We don't want to boost the priority of everything it
does, only the reprogramming of the registers within the next vblank.
The inputs to that crucial phase are irq driven (be they render
completion on any dmabuf device, or delay until after vblank) and we could
move the mmio into that irq context and that would avoid scheduling issues
on all but the RT systems that want threaded irqs.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH igt v2] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio
  2016-09-13  7:38       ` Chris Wilson
@ 2016-09-13 11:14         ` Imre Deak
  0 siblings, 0 replies; 6+ messages in thread
From: Imre Deak @ 2016-09-13 11:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On ti, 2016-09-13 at 08:38 +0100, Chris Wilson wrote:
> On Mon, Sep 12, 2016 at 11:57:54PM +0300, Imre Deak wrote:
> > On Mon, 2016-09-12 at 21:04 +0100, Chris Wilson wrote:
> > > On Mon, Sep 12, 2016 at 05:47:57PM +0300, Imre Deak wrote:
> > > > Even in an otherwise quiescent system there may be user/kernel
> > > > threads
> > > > independent of the test that add enough latency to make timing
> > > > sensitive
> > > > subtests fail. Boost the priority of such subtests to avoid
> > > > these
> > > > failures.
> > > > 
> > > > This got rid of sporadic failures in basic-cursor-vs-flip-
> > > > legacy
> > > > and
> > > > basic-cursor-vs-flip-varying-size with 'missed 1 frame' error
> > > > message
> > > > APL and BSW.
> > > > 
> > > > v2:
> > > > - Boost the priority in flip_vs_cursor_crc() too.
> > > > 
> > > > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > > > CC: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > 
> > > But we shouldn't need to. The basic test is:
> > > 
> > > 	align to vblank
> > > 	request non-blocking flip
> > > 	update cursor
> > 
> > In these subtests we run these cursor updates in a loop.
> 
> Oh, those. Ok, for the purpose of bat we want:
> 
> 	align to vblank
> 	update cursor
> 	request non-blocking flip
> 	check vblank == vblank
> 	check flip-completion == vblank + 1

That's basic_flip_vs_cursor, the ones failing are the cursor_vs_flip_*
running the cursor update in a separate thread. So are you suggesting
just removing these from bat or doing only a single cursor update
(target=1)? The latter would reduce the chance for failure, but
wouldn't eliminate it.

> > > 	check vblank hasn't advanced
> > > 
> > > We are not doing any busy loops here and there should be nothing
> > > else
> > > running on the system. So what caused the context switch? Who are
> > > we
> > > fighting against?
> > 
> > The cursor thread is one source for the delay, other than that it
> > could
> > be anything running in the background. In my traces it looked like
> > something related to CI remote logging that caused >16ms delay for
> > both
> > the user flip thread and the subsequent MMIO work. Imo there is no
> > guarantee that such delays won't happen between threads running at
> > the
> > same priority, hence the need for higher priority for timing
> > sensitive
> > stuff. Note that we see this problem on BSW with with 2 CPUs.
> > 
> > > If the only thing that is causing the issue is the
> > > kernel thread used for the mmioflip (which won't be scheduled for
> > > another 16ms until the next vblank), we have another bug to track
> > > down.
> > 
> > The MMIO flip work is scheduled right after we request the flip
> > (since
> > we do the request after the previous flip completed) and I saw it
> > being
> > delayed >16ms for the above reasons. Besides this I also saw the
> > user
> > space flip thread being delayed the same way.
> > 
> > > Imo, this patch is just papering over an issue that as it stands
> > > will
> > > be
> > > present in real userspace (i.e. causing jerkiness in X, weston,
> > > cros
> > > etc).
> > 
> > I can't see any other way than adjusting priorities to guarantee
> > the
> > timely completion of some work. Otherwise you'll only get best
> > effort
> > scheduling and that doesn't seem to be enough in these subtests.
> 
> Our worker has multiple phases and waits of which only a small
> portion is
> timing crucial. We don't want to boost the priority of everything it
> does, only the reprogramming of the registers within the next vblank.
> The inputs to that crucial phase are irq driven (be they render
> completion on any dmabuf device, or delay until after vblank) and we
> could
> move the mmio into that irq context and that would avoid scheduling
> issues
> on all but the RT systems that want threaded irqs.

There are differences in how time critical the different phases are
(for example preparing vs. register updating for vblank evasion), but
the whole work of queuing the flip is time critical. There is one frame
time to complete that work when a single flip can be queued.

--Imre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-09-13 11:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-12 14:09 [PATCH igt] tests/kms_cursor_legacy: Boost timing sensitive subtests to RT prio Imre Deak
2016-09-12 14:47 ` [PATCH igt v2] " Imre Deak
2016-09-12 20:04   ` Chris Wilson
2016-09-12 20:57     ` Imre Deak
2016-09-13  7:38       ` Chris Wilson
2016-09-13 11:14         ` Imre Deak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.