intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
@ 2012-12-01 17:48 Chris Wilson
  2012-12-01 20:35 ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2012-12-01 17:48 UTC (permalink / raw)
  To: intel-gfx

Before queuing the flip but crucially after attaching the unpin-work to
the crtc, we continue to setup the unpin-work. However, should the
hardware fire early, we see the connected unpin-work and queue the task.
The task then promptly runs and unpins the fb before we finish taking
the required references or even pinning it... Havoc.

To close the race, we use the flip-pending atomic to indicate when the
flip is finally setup and enqueued. So during the flip-done processing,
we can check more accurately whether the flip was expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    4 ++--
 drivers/gpu/drm/i915/i915_irq.c      |    4 +++-
 drivers/gpu/drm/i915/intel_display.c |   23 ++++++++++++++++-------
 drivers/gpu/drm/i915/intel_drv.h     |    5 ++++-
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8afc0dd..e6a11ca 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
 				   pipe, plane);
 		} else {
-			if (!work->pending) {
+			if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 				seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
 					   pipe, plane);
 			} else {
@@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Stall check enabled, ");
 			else
 				seq_printf(m, "Stall check waiting for page flip ioctl, ");
-			seq_printf(m, "%d prepares\n", work->pending);
+			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
 
 			if (work->old_fb_obj) {
 				struct drm_i915_gem_object *obj = work->old_fb_obj;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6cd3dc9..a4dc97f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
 
-	if (work == NULL || work->pending || !work->enable_stall_check) {
+	if (work == NULL ||
+	    atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE ||
+	    !work->enable_stall_check) {
 		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 78d12c4..2746b39 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
-	if (work == NULL || !work->pending) {
+	if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
 	}
@@ -6977,13 +6977,13 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
 	unsigned long flags;
 
+	/* NB: An MMIO update of the plane base pointer will also
+	 * generate a page-flip completion irq, i.e. every modeset
+	 * is also accompanied by a spurious intel_prepare_page_flip().
+	 */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work) {
-		if ((++intel_crtc->unpin_work->pending) > 1)
-			DRM_ERROR("Prepared flip multiple times\n");
-	} else {
-		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
-	}
+	if (intel_crtc->unpin_work)
+		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
@@ -7020,6 +7020,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
+
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7060,6 +7062,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7106,6 +7109,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7148,6 +7153,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7202,6 +7209,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, (MI_NOOP));
+
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
 	intel_ring_advance(ring);
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 522061c..3915ca9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -401,7 +401,10 @@ struct intel_unpin_work {
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	int pending;
+	atomic_t pending;
+#define INTEL_FLIP_INACTIVE	0
+#define INTEL_FLIP_PENDING	1
+#define INTEL_FLIP_COMPLETE	2
 	bool enable_stall_check;
 };
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-01 17:48 [PATCH] drm/i915: Close race between processing unpin task and queueing the flip Chris Wilson
@ 2012-12-01 20:35 ` Daniel Vetter
  2012-12-01 22:32   ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2012-12-01 20:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote:
> Before queuing the flip but crucially after attaching the unpin-work to
> the crtc, we continue to setup the unpin-work. However, should the
> hardware fire early, we see the connected unpin-work and queue the task.
> The task then promptly runs and unpins the fb before we finish taking
> the required references or even pinning it... Havoc.
> 
> To close the race, we use the flip-pending atomic to indicate when the
> flip is finally setup and enqueued. So during the flip-done processing,
> we can check more accurately whether the flip was expected.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Hm, can't this logic race?

- emit the MI_FLIP

- flip irq happens because the gpu is idle and completes it right away
(or our thread is preempted), work->pending increments from 0 -> 1

- queue_flip sets work->pending to 1

So work->pending will be stuck at 1 forverer, the unpin never happens and
stalls all subsequent flips.

Then there's also the usual annoying maintainer questions:
- do we have a way to readily reproduce this race?
- do we have a chance to just shut out all these spurious pageflip
  interrupts? If they all come from MMIO access, we should be able to lock
  them out somehow ...

Cheers, Daniel
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  |    4 ++--
>  drivers/gpu/drm/i915/i915_irq.c      |    4 +++-
>  drivers/gpu/drm/i915/intel_display.c |   23 ++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_drv.h     |    5 ++++-
>  4 files changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8afc0dd..e6a11ca 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>  			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
>  				   pipe, plane);
>  		} else {
> -			if (!work->pending) {
> +			if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
>  				seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
>  					   pipe, plane);
>  			} else {
> @@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>  				seq_printf(m, "Stall check enabled, ");
>  			else
>  				seq_printf(m, "Stall check waiting for page flip ioctl, ");
> -			seq_printf(m, "%d prepares\n", work->pending);
> +			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
>  
>  			if (work->old_fb_obj) {
>  				struct drm_i915_gem_object *obj = work->old_fb_obj;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6cd3dc9..a4dc97f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe)
>  	spin_lock_irqsave(&dev->event_lock, flags);
>  	work = intel_crtc->unpin_work;
>  
> -	if (work == NULL || work->pending || !work->enable_stall_check) {
> +	if (work == NULL ||
> +	    atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE ||
> +	    !work->enable_stall_check) {
>  		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
>  		spin_unlock_irqrestore(&dev->event_lock, flags);
>  		return;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 78d12c4..2746b39 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
>  
>  	spin_lock_irqsave(&dev->event_lock, flags);
>  	work = intel_crtc->unpin_work;
> -	if (work == NULL || !work->pending) {
> +	if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
>  		spin_unlock_irqrestore(&dev->event_lock, flags);
>  		return;
>  	}
> @@ -6977,13 +6977,13 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
>  		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
>  	unsigned long flags;
>  
> +	/* NB: An MMIO update of the plane base pointer will also
> +	 * generate a page-flip completion irq, i.e. every modeset
> +	 * is also accompanied by a spurious intel_prepare_page_flip().
> +	 */
>  	spin_lock_irqsave(&dev->event_lock, flags);
> -	if (intel_crtc->unpin_work) {
> -		if ((++intel_crtc->unpin_work->pending) > 1)
> -			DRM_ERROR("Prepared flip multiple times\n");
> -	} else {
> -		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
> -	}
> +	if (intel_crtc->unpin_work)
> +		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
>  	spin_unlock_irqrestore(&dev->event_lock, flags);
>  }
>  
> @@ -7020,6 +7020,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, fb->pitches[0]);
>  	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
>  	intel_ring_emit(ring, 0); /* aux display base address, unused */
> +
> +	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
>  	intel_ring_advance(ring);
>  	return 0;
>  
> @@ -7060,6 +7062,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
>  	intel_ring_emit(ring, MI_NOOP);
>  
> +	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
>  	intel_ring_advance(ring);
>  	return 0;
>  
> @@ -7106,6 +7109,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>  	pf = 0;
>  	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
>  	intel_ring_emit(ring, pf | pipesrc);
> +
> +	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
>  	intel_ring_advance(ring);
>  	return 0;
>  
> @@ -7148,6 +7153,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>  	pf = 0;
>  	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
>  	intel_ring_emit(ring, pf | pipesrc);
> +
> +	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
>  	intel_ring_advance(ring);
>  	return 0;
>  
> @@ -7202,6 +7209,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
>  	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
>  	intel_ring_emit(ring, (MI_NOOP));
> +
> +	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
>  	intel_ring_advance(ring);
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 522061c..3915ca9 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -401,7 +401,10 @@ struct intel_unpin_work {
>  	struct drm_i915_gem_object *old_fb_obj;
>  	struct drm_i915_gem_object *pending_flip_obj;
>  	struct drm_pending_vblank_event *event;
> -	int pending;
> +	atomic_t pending;
> +#define INTEL_FLIP_INACTIVE	0
> +#define INTEL_FLIP_PENDING	1
> +#define INTEL_FLIP_COMPLETE	2
>  	bool enable_stall_check;
>  };
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-01 20:35 ` Daniel Vetter
@ 2012-12-01 22:32   ` Chris Wilson
  2012-12-02  1:15     ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2012-12-01 22:32 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote:
> > Before queuing the flip but crucially after attaching the unpin-work to
> > the crtc, we continue to setup the unpin-work. However, should the
> > hardware fire early, we see the connected unpin-work and queue the task.
> > The task then promptly runs and unpins the fb before we finish taking
> > the required references or even pinning it... Havoc.
> > 
> > To close the race, we use the flip-pending atomic to indicate when the
> > flip is finally setup and enqueued. So during the flip-done processing,
> > we can check more accurately whether the flip was expected.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Hm, can't this logic race?
> 
> - emit the MI_FLIP
> 
> - flip irq happens because the gpu is idle and completes it right away
> (or our thread is preempted), work->pending increments from 0 -> 1
> 
> - queue_flip sets work->pending to 1

-> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP

I'm not happy with the explanation, but I could reliably (100%) hit the
race whilst loading a 2+GiB image using eog under compiz on an 965gm
with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS
with an unpin leak. Which means that was a flip pending/done prior to
the pinning + MI_FLIP. This patch adds a strong defence against that
spurious flip done, but doesn't explain where it came from.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-01 22:32   ` Chris Wilson
@ 2012-12-02  1:15     ` Daniel Vetter
  2012-12-02  9:26       ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2012-12-02  1:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Dec 1, 2012 at 11:32 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
>> On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote:
>> > Before queuing the flip but crucially after attaching the unpin-work to
>> > the crtc, we continue to setup the unpin-work. However, should the
>> > hardware fire early, we see the connected unpin-work and queue the task.
>> > The task then promptly runs and unpins the fb before we finish taking
>> > the required references or even pinning it... Havoc.
>> >
>> > To close the race, we use the flip-pending atomic to indicate when the
>> > flip is finally setup and enqueued. So during the flip-done processing,
>> > we can check more accurately whether the flip was expected.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Hm, can't this logic race?
>>
>> - emit the MI_FLIP
>>
>> - flip irq happens because the gpu is idle and completes it right away
>> (or our thread is preempted), work->pending increments from 0 -> 1
>>
>> - queue_flip sets work->pending to 1
>
> -> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP

Yeah, that should be the normal course of events where the MI_FLIP
gets executed after we set work->pending to 1 (and after all the stuff
has been done). The race I see is that the real MI_FLIP (not a
spurious one this patch defends against) happens before we set
work->pending to 1, so that we essentially lose the increment to 2 and
so block any further flips on this crtc (or modesets for the matter,
once the finish_fb stuff is fixed) indefinitely.

Iow I think it's a bit too good at preventing unpins ;-)

> I'm not happy with the explanation, but I could reliably (100%) hit the
> race whilst loading a 2+GiB image using eog under compiz on an 965gm
> with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS
> with an unpin leak. Which means that was a flip pending/done prior to
> the pinning + MI_FLIP. This patch adds a strong defence against that
> spurious flip done, but doesn't explain where it came from.

Hm, I have no idea how that could cause the spurious flip - the most
likely cause is that something introduces a nice delay somewhere
(through kswapd), but I don't really see how that can happen. I guess
I need to write a flip vs. swapping test. Was the swap due to
unrelated memory pressue, or due to our own gem objects?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-02  1:15     ` Daniel Vetter
@ 2012-12-02  9:26       ` Chris Wilson
  2012-12-02 11:38         ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2012-12-02  9:26 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Sun, 2 Dec 2012 02:15:23 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sat, Dec 1, 2012 at 11:32 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> >> On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote:
> >> > Before queuing the flip but crucially after attaching the unpin-work to
> >> > the crtc, we continue to setup the unpin-work. However, should the
> >> > hardware fire early, we see the connected unpin-work and queue the task.
> >> > The task then promptly runs and unpins the fb before we finish taking
> >> > the required references or even pinning it... Havoc.
> >> >
> >> > To close the race, we use the flip-pending atomic to indicate when the
> >> > flip is finally setup and enqueued. So during the flip-done processing,
> >> > we can check more accurately whether the flip was expected.
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Hm, can't this logic race?
> >>
> >> - emit the MI_FLIP
> >>
> >> - flip irq happens because the gpu is idle and completes it right away
> >> (or our thread is preempted), work->pending increments from 0 -> 1
> >>
> >> - queue_flip sets work->pending to 1
> >
> > -> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP
> 
> Yeah, that should be the normal course of events where the MI_FLIP
> gets executed after we set work->pending to 1 (and after all the stuff
> has been done). The race I see is that the real MI_FLIP (not a
> spurious one this patch defends against) happens before we set
> work->pending to 1, so that we essentially lose the increment to 2 and
> so block any further flips on this crtc (or modesets for the matter,
> once the finish_fb stuff is fixed) indefinitely.
> 
> Iow I think it's a bit too good at preventing unpins ;-)

There isn't a race with hardware. So are you concerned about the write
ordering, and so want some smb_mb()?
 
> > I'm not happy with the explanation, but I could reliably (100%) hit the
> > race whilst loading a 2+GiB image using eog under compiz on an 965gm
> > with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS
> > with an unpin leak. Which means that was a flip pending/done prior to
> > the pinning + MI_FLIP. This patch adds a strong defence against that
> > spurious flip done, but doesn't explain where it came from.
> 
> Hm, I have no idea how that could cause the spurious flip - the most
> likely cause is that something introduces a nice delay somewhere
> (through kswapd), but I don't really see how that can happen. I guess
> I need to write a flip vs. swapping test. Was the swap due to
> unrelated memory pressue, or due to our own gem objects?

eog starts swapping long before it sends the image to X, but at the same
time it continues to render its progress bar.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-02  9:26       ` Chris Wilson
@ 2012-12-02 11:38         ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2012-12-02 11:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sun, Dec 2, 2012 at 10:26 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> There isn't a race with hardware. So are you concerned about the write
> ordering, and so want some smb_mb()?

Oops, I've misread the patch, there's no race where I've seen it.
Barriers is a good topic though I think we need an smp_wmb before the
atomic set in queue_flip to ensure that all stores to the work struct
have landed before. Together with the implicit barriers of the
spinlock we should be fine.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
  2012-12-03 11:36 Chris Wilson
@ 2012-12-06 13:10 ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2012-12-06 13:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Mon, Dec 03, 2012 at 11:36:30AM +0000, Chris Wilson wrote:
> Before queuing the flip but crucially after attaching the unpin-work to
> the crtc, we continue to setup the unpin-work. However, should the
> hardware fire early, we see the connected unpin-work and queue the task.
> The task then promptly runs and unpins the fb before we finish taking
> the required references or even pinning it... Havoc.
> 
> To close the race, we use the flip-pending atomic to indicate when the
> flip is finally setup and enqueued. So during the flip-done processing,
> we can check more accurately whether the flip was expected.
> 
> v2: Add the appropriate mb() to ensure that the writes to the page-flip
> worker are complete prior to marking it active and emitting the MI_FLIP.
> On the read side, the mb should be enforced by the spinlocks.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: stable@vger.kernel.org

Merged to -fixes, with the barrier changes we've discussed on irc applied
(and the reasoning for why we need 2 on each sided added to my commit
message note).

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
@ 2012-12-03 11:36 Chris Wilson
  2012-12-06 13:10 ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2012-12-03 11:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, stable

Before queuing the flip but crucially after attaching the unpin-work to
the crtc, we continue to setup the unpin-work. However, should the
hardware fire early, we see the connected unpin-work and queue the task.
The task then promptly runs and unpins the fb before we finish taking
the required references or even pinning it... Havoc.

To close the race, we use the flip-pending atomic to indicate when the
flip is finally setup and enqueued. So during the flip-done processing,
we can check more accurately whether the flip was expected.

v2: Add the appropriate mb() to ensure that the writes to the page-flip
worker are complete prior to marking it active and emitting the MI_FLIP.
On the read side, the mb should be enforced by the spinlocks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    4 ++--
 drivers/gpu/drm/i915/i915_irq.c      |    4 +++-
 drivers/gpu/drm/i915/intel_display.c |   29 ++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_drv.h     |    5 ++++-
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8afc0dd..e6a11ca 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
 				   pipe, plane);
 		} else {
-			if (!work->pending) {
+			if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 				seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
 					   pipe, plane);
 			} else {
@@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Stall check enabled, ");
 			else
 				seq_printf(m, "Stall check waiting for page flip ioctl, ");
-			seq_printf(m, "%d prepares\n", work->pending);
+			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
 
 			if (work->old_fb_obj) {
 				struct drm_i915_gem_object *obj = work->old_fb_obj;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6cd3dc9..a4dc97f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
 
-	if (work == NULL || work->pending || !work->enable_stall_check) {
+	if (work == NULL ||
+	    atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE ||
+	    !work->enable_stall_check) {
 		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 78d12c4..e400075 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
-	if (work == NULL || !work->pending) {
+	if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
 	}
@@ -6977,16 +6977,22 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
 	unsigned long flags;
 
+	/* NB: An MMIO update of the plane base pointer will also
+	 * generate a page-flip completion irq, i.e. every modeset
+	 * is also accompanied by a spurious intel_prepare_page_flip().
+	 */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work) {
-		if ((++intel_crtc->unpin_work->pending) > 1)
-			DRM_ERROR("Prepared flip multiple times\n");
-	} else {
-		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
-	}
+	if (intel_crtc->unpin_work)
+		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
+inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
+{
+	smp_mb__before_atomic_inc();
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
+}
+
 static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -7020,6 +7026,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7060,6 +7068,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7106,6 +7115,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7148,6 +7159,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7202,6 +7215,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, (MI_NOOP));
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 522061c..3915ca9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -401,7 +401,10 @@ struct intel_unpin_work {
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	int pending;
+	atomic_t pending;
+#define INTEL_FLIP_INACTIVE	0
+#define INTEL_FLIP_PENDING	1
+#define INTEL_FLIP_COMPLETE	2
 	bool enable_stall_check;
 };
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Close race between processing unpin task and queueing the flip
@ 2012-12-03 11:25 Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2012-12-03 11:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

Before queuing the flip but crucially after attaching the unpin-work to
the crtc, we continue to setup the unpin-work. However, should the
hardware fire early, we see the connected unpin-work and queue the task.
The task then promptly runs and unpins the fb before we finish taking
the required references or even pinning it... Havoc.

To close the race, we use the flip-pending atomic to indicate when the
flip is finally setup and enqueued. So during the flip-done processing,
we can check more accurately whether the flip was expected.

v2: Add the appropriate mb() to ensure that the writes to the page-flip
worker are complete prior to marking it active and emitting the MI_FLIP.
On the read side, the mb should be enforced by the spinlocks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    4 ++--
 drivers/gpu/drm/i915/i915_irq.c      |    4 +++-
 drivers/gpu/drm/i915/intel_display.c |   29 ++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_drv.h     |    5 ++++-
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8afc0dd..e6a11ca 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
 				   pipe, plane);
 		} else {
-			if (!work->pending) {
+			if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 				seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
 					   pipe, plane);
 			} else {
@@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Stall check enabled, ");
 			else
 				seq_printf(m, "Stall check waiting for page flip ioctl, ");
-			seq_printf(m, "%d prepares\n", work->pending);
+			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
 
 			if (work->old_fb_obj) {
 				struct drm_i915_gem_object *obj = work->old_fb_obj;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6cd3dc9..a4dc97f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
 
-	if (work == NULL || work->pending || !work->enable_stall_check) {
+	if (work == NULL ||
+	    atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE ||
+	    !work->enable_stall_check) {
 		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 78d12c4..e400075 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
-	if (work == NULL || !work->pending) {
+	if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
 	}
@@ -6977,16 +6977,22 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
 	unsigned long flags;
 
+	/* NB: An MMIO update of the plane base pointer will also
+	 * generate a page-flip completion irq, i.e. every modeset
+	 * is also accompanied by a spurious intel_prepare_page_flip().
+	 */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work) {
-		if ((++intel_crtc->unpin_work->pending) > 1)
-			DRM_ERROR("Prepared flip multiple times\n");
-	} else {
-		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
-	}
+	if (intel_crtc->unpin_work)
+		atomic_inc_not_zero(&intel_crtc->unpin_work->pending);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
+inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
+{
+	smp_mb__before_atomic_inc();
+	atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING);
+}
+
 static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -7020,6 +7026,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7060,6 +7068,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7106,6 +7115,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7148,6 +7159,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 	pf = 0;
 	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
 	intel_ring_emit(ring, pf | pipesrc);
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
@@ -7202,6 +7215,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, (MI_NOOP));
+
+	intel_mark_page_flip_active(intel_crtc);
 	intel_ring_advance(ring);
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 522061c..3915ca9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -401,7 +401,10 @@ struct intel_unpin_work {
 	struct drm_i915_gem_object *old_fb_obj;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
-	int pending;
+	atomic_t pending;
+#define INTEL_FLIP_INACTIVE	0
+#define INTEL_FLIP_PENDING	1
+#define INTEL_FLIP_COMPLETE	2
 	bool enable_stall_check;
 };
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-12-06 13:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-01 17:48 [PATCH] drm/i915: Close race between processing unpin task and queueing the flip Chris Wilson
2012-12-01 20:35 ` Daniel Vetter
2012-12-01 22:32   ` Chris Wilson
2012-12-02  1:15     ` Daniel Vetter
2012-12-02  9:26       ` Chris Wilson
2012-12-02 11:38         ` Daniel Vetter
2012-12-03 11:25 Chris Wilson
2012-12-03 11:36 Chris Wilson
2012-12-06 13:10 ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).