All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Use RCS flips on Ivybridge+
@ 2013-08-20  8:34 Chris Wilson
  2013-08-20 12:31 ` Daniel Vetter
  2013-08-24  0:03 ` Stéphane Marchesin
  0 siblings, 2 replies; 6+ messages in thread
From: Chris Wilson @ 2013-08-20  8:34 UTC (permalink / raw)
  To: intel-gfx

RCS flips do work on Iybridge+ so long as we can unmask the messages
through DERRMR. However, there are quite a few workarounds mentioned
regarding unmasking more than one event or triggering more than one
message through DERRMR. Those workarounds in principle prevent us from
performing pipelined flips (and asynchronous flips across multiple
planes) and equally apply to the "known good" BCS ring. Given that it
already appears to work, and also appears to work with unmasking all 3
planes at once (and queuing flips across multiple planes), be brave.

Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reg.h      |   17 +++++++++++++
 drivers/gpu/drm/i915/intel_display.c |   45 +++++++++++++++++++++++++++-------
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e2690ec..730510d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -679,6 +679,23 @@
 #define   FPGA_DBG_RM_NOCLAIM	(1<<31)
 
 #define DERRMR		0x44050
+#define   DERRMR_PIPEA_SCANLINE		(1<<0)
+#define   DERRMR_PIPEA_PRI_FLIP_DONE	(1<<1)
+#define   DERRMR_PIPEA_SPR_FLIP_DONE	(1<<2)
+#define   DERRMR_PIPEA_VBLANK		(1<<3)
+#define   DERRMR_PIPEA_HBLANK		(1<<5)
+#define   DERRMR_PIPEB_SCANLINE 	(1<<8)
+#define   DERRMR_PIPEB_PRI_FLIP_DONE	(1<<9)
+#define   DERRMR_PIPEB_SPR_FLIP_DONE	(1<<10)
+#define   DERRMR_PIPEB_VBLANK		(1<<11)
+#define   DERRMR_PIPEB_HBLANK		(1<<13)
+/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
+#define   DERRMR_PIPEC_SCANLINE		(1<<14)
+#define   DERRMR_PIPEC_PRI_FLIP_DONE	(1<<15)
+#define   DERRMR_PIPEC_SPR_FLIP_DONE	(1<<20)
+#define   DERRMR_PIPEC_VBLANK		(1<<21)
+#define   DERRMR_PIPEC_HBLANK		(1<<22)
+
 
 /* GM45+ chicken bits -- debug workaround bits that may be required
  * for various sorts of correct behavior.  The top 16 bits of each are
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 727a123..55c9b39 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7653,12 +7653,6 @@ err:
 	return ret;
 }
 
-/*
- * On gen7 we currently use the blit ring because (in early silicon at least)
- * the render ring doesn't give us interrpts for page flip completion, which
- * means clients will hang after the first flip is queued.  Fortunately the
- * blit ring generates interrupts properly, so use it instead.
- */
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -7666,9 +7660,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+	struct intel_ring_buffer *ring;
 	uint32_t plane_bit = 0;
-	int ret;
+	int len, ret;
+
+	ring = obj->ring;
+	if (ring == NULL || ring->id != RCS)
+		ring = &dev_priv->ring[BCS];
 
 	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
 	if (ret)
@@ -7690,10 +7688,39 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 		goto err_unpin;
 	}
 
-	ret = intel_ring_begin(ring, 4);
+	len = 4;
+	if (ring->id == RCS)
+		len += 6;
+
+	ret = intel_ring_begin(ring, len);
 	if (ret)
 		goto err_unpin;
 
+	/* Unmask the flip-done completion message. Note that the bspec says that
+	 * we should do this for both the BCS and RCS, and that we must not unmask
+	 * more than one flip event at any time (or ensure that one flip message
+	 * can be sent by waiting for flip-done prior to queueing new flips).
+	 * Experimentation says that BCS works despite DERRMR masking all
+	 * flip-done completion events and that unmasking all planes at once
+	 * for the RCS also doesn't appear to drop events. Setting the DERRMR
+	 * to zero does lead to lockups within MI_DISPLAY_FLIP.
+	 */
+	if (ring->id == RCS) {
+		struct { /* XXX This is quite rude! */
+			struct drm_i915_gem_object *scratch;
+		} *priv = ring->private;
+		u32 addr = i915_gem_obj_ggtt_offset(priv->scratch) + 128;
+
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+					DERRMR_PIPEB_PRI_FLIP_DONE |
+					DERRMR_PIPEC_PRI_FLIP_DONE));
+		intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, addr);
+	}
+
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Use RCS flips on Ivybridge+
  2013-08-20  8:34 [PATCH] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
@ 2013-08-20 12:31 ` Daniel Vetter
  2013-08-20 12:39   ` Chris Wilson
  2013-08-24  0:03 ` Stéphane Marchesin
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Vetter @ 2013-08-20 12:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Aug 20, 2013 at 10:34 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> RCS flips do work on Iybridge+ so long as we can unmask the messages
> through DERRMR. However, there are quite a few workarounds mentioned
> regarding unmasking more than one event or triggering more than one
> message through DERRMR. Those workarounds in principle prevent us from
> performing pipelined flips (and asynchronous flips across multiple
> planes) and equally apply to the "known good" BCS ring. Given that it
> already appears to work, and also appears to work with unmasking all 3
> planes at once (and queuing flips across multiple planes), be brave.
>
> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Can you pls add some words with how this DERRMR write will interact
with scanline waits from SNA? I think just a few words to explain how
switching from scanline waits and blits to pageflips and back would be
good (even though I don't really expect that to happen often,
especially now that we have the buffer age extension and friends).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Use RCS flips on Ivybridge+
  2013-08-20 12:31 ` Daniel Vetter
@ 2013-08-20 12:39   ` Chris Wilson
  2013-08-20 12:41     ` Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2013-08-20 12:39 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, Aug 20, 2013 at 02:31:03PM +0200, Daniel Vetter wrote:
> On Tue, Aug 20, 2013 at 10:34 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > RCS flips do work on Iybridge+ so long as we can unmask the messages
> > through DERRMR. However, there are quite a few workarounds mentioned
> > regarding unmasking more than one event or triggering more than one
> > message through DERRMR. Those workarounds in principle prevent us from
> > performing pipelined flips (and asynchronous flips across multiple
> > planes) and equally apply to the "known good" BCS ring. Given that it
> > already appears to work, and also appears to work with unmasking all 3
> > planes at once (and queuing flips across multiple planes), be brave.
> >
> > Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Can you pls add some words with how this DERRMR write will interact
> with scanline waits from SNA? I think just a few words to explain how
> switching from scanline waits and blits to pageflips and back would be
> good (even though I don't really expect that to happen often,
> especially now that we have the buffer age extension and friends).

The DERRMR is overwritten by each before use. I thought that was
self-explanatory.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Use RCS flips on Ivybridge+
  2013-08-20 12:39   ` Chris Wilson
@ 2013-08-20 12:41     ` Chris Wilson
  2013-08-20 12:53       ` Daniel Vetter
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2013-08-20 12:41 UTC (permalink / raw)
  To: Daniel Vetter, intel-gfx

On Tue, Aug 20, 2013 at 01:39:02PM +0100, Chris Wilson wrote:
> On Tue, Aug 20, 2013 at 02:31:03PM +0200, Daniel Vetter wrote:
> > On Tue, Aug 20, 2013 at 10:34 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > RCS flips do work on Iybridge+ so long as we can unmask the messages
> > > through DERRMR. However, there are quite a few workarounds mentioned
> > > regarding unmasking more than one event or triggering more than one
> > > message through DERRMR. Those workarounds in principle prevent us from
> > > performing pipelined flips (and asynchronous flips across multiple
> > > planes) and equally apply to the "known good" BCS ring. Given that it
> > > already appears to work, and also appears to work with unmasking all 3
> > > planes at once (and queuing flips across multiple planes), be brave.
> > >
> > > Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Can you pls add some words with how this DERRMR write will interact
> > with scanline waits from SNA? I think just a few words to explain how
> > switching from scanline waits and blits to pageflips and back would be
> > good (even though I don't really expect that to happen often,
> > especially now that we have the buffer age extension and friends).
> 
> The DERRMR is overwritten by each before use. I thought that was
> self-explanatory.

The not-so-self-explanatory part is that there is a implicit
synchronisation point in userspace between switching between flips and
scanlines.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Use RCS flips on Ivybridge+
  2013-08-20 12:41     ` Chris Wilson
@ 2013-08-20 12:53       ` Daniel Vetter
  0 siblings, 0 replies; 6+ messages in thread
From: Daniel Vetter @ 2013-08-20 12:53 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Tue, Aug 20, 2013 at 2:41 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> The DERRMR is overwritten by each before use. I thought that was
>> self-explanatory.
>
> The not-so-self-explanatory part is that there is a implicit
> synchronisation point in userspace between switching between flips and
> scanlines.

I was hoping for something that I could just lazily paste into the
commit message ;-)

While I rattle down my wishlist: Can you please pimp the dummy_load
stuff (or something similar) in kms_flip to ensure that the
to-be-flipped buffer is busy on the render ring and in a separate test
on the blt ring?

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Use RCS flips on Ivybridge+
  2013-08-20  8:34 [PATCH] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
  2013-08-20 12:31 ` Daniel Vetter
@ 2013-08-24  0:03 ` Stéphane Marchesin
  1 sibling, 0 replies; 6+ messages in thread
From: Stéphane Marchesin @ 2013-08-24  0:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Aug 20, 2013 at 1:34 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> RCS flips do work on Iybridge+ so long as we can unmask the messages
> through DERRMR. However, there are quite a few workarounds mentioned
> regarding unmasking more than one event or triggering more than one
> message through DERRMR. Those workarounds in principle prevent us from
> performing pipelined flips (and asynchronous flips across multiple
> planes) and equally apply to the "known good" BCS ring. Given that it
> already appears to work, and also appears to work with unmasking all 3
> planes at once (and queuing flips across multiple planes), be brave.
>
> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Seems to work great here. Tested-by: Stéphane Marchesin <marcheu@chromium.org>

> ---
>  drivers/gpu/drm/i915/i915_reg.h      |   17 +++++++++++++
>  drivers/gpu/drm/i915/intel_display.c |   45 +++++++++++++++++++++++++++-------
>  2 files changed, 53 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e2690ec..730510d 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -679,6 +679,23 @@
>  #define   FPGA_DBG_RM_NOCLAIM  (1<<31)
>
>  #define DERRMR         0x44050
> +#define   DERRMR_PIPEA_SCANLINE                (1<<0)
> +#define   DERRMR_PIPEA_PRI_FLIP_DONE   (1<<1)
> +#define   DERRMR_PIPEA_SPR_FLIP_DONE   (1<<2)
> +#define   DERRMR_PIPEA_VBLANK          (1<<3)
> +#define   DERRMR_PIPEA_HBLANK          (1<<5)
> +#define   DERRMR_PIPEB_SCANLINE        (1<<8)
> +#define   DERRMR_PIPEB_PRI_FLIP_DONE   (1<<9)
> +#define   DERRMR_PIPEB_SPR_FLIP_DONE   (1<<10)
> +#define   DERRMR_PIPEB_VBLANK          (1<<11)
> +#define   DERRMR_PIPEB_HBLANK          (1<<13)
> +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
> +#define   DERRMR_PIPEC_SCANLINE                (1<<14)
> +#define   DERRMR_PIPEC_PRI_FLIP_DONE   (1<<15)
> +#define   DERRMR_PIPEC_SPR_FLIP_DONE   (1<<20)
> +#define   DERRMR_PIPEC_VBLANK          (1<<21)
> +#define   DERRMR_PIPEC_HBLANK          (1<<22)
> +
>
>  /* GM45+ chicken bits -- debug workaround bits that may be required
>   * for various sorts of correct behavior.  The top 16 bits of each are
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 727a123..55c9b39 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7653,12 +7653,6 @@ err:
>         return ret;
>  }
>
> -/*
> - * On gen7 we currently use the blit ring because (in early silicon at least)
> - * the render ring doesn't give us interrpts for page flip completion, which
> - * means clients will hang after the first flip is queued.  Fortunately the
> - * blit ring generates interrupts properly, so use it instead.
> - */
>  static int intel_gen7_queue_flip(struct drm_device *dev,
>                                  struct drm_crtc *crtc,
>                                  struct drm_framebuffer *fb,
> @@ -7666,9 +7660,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  {
>         struct drm_i915_private *dev_priv = dev->dev_private;
>         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -       struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
> +       struct intel_ring_buffer *ring;
>         uint32_t plane_bit = 0;
> -       int ret;
> +       int len, ret;
> +
> +       ring = obj->ring;
> +       if (ring == NULL || ring->id != RCS)
> +               ring = &dev_priv->ring[BCS];
>
>         ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>         if (ret)
> @@ -7690,10 +7688,39 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>                 goto err_unpin;
>         }
>
> -       ret = intel_ring_begin(ring, 4);
> +       len = 4;
> +       if (ring->id == RCS)
> +               len += 6;
> +
> +       ret = intel_ring_begin(ring, len);
>         if (ret)
>                 goto err_unpin;
>
> +       /* Unmask the flip-done completion message. Note that the bspec says that
> +        * we should do this for both the BCS and RCS, and that we must not unmask
> +        * more than one flip event at any time (or ensure that one flip message
> +        * can be sent by waiting for flip-done prior to queueing new flips).
> +        * Experimentation says that BCS works despite DERRMR masking all
> +        * flip-done completion events and that unmasking all planes at once
> +        * for the RCS also doesn't appear to drop events. Setting the DERRMR
> +        * to zero does lead to lockups within MI_DISPLAY_FLIP.
> +        */
> +       if (ring->id == RCS) {
> +               struct { /* XXX This is quite rude! */
> +                       struct drm_i915_gem_object *scratch;
> +               } *priv = ring->private;
> +               u32 addr = i915_gem_obj_ggtt_offset(priv->scratch) + 128;
> +
> +               intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +               intel_ring_emit(ring, DERRMR);
> +               intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
> +                                       DERRMR_PIPEB_PRI_FLIP_DONE |
> +                                       DERRMR_PIPEC_PRI_FLIP_DONE));
> +               intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
> +               intel_ring_emit(ring, DERRMR);
> +               intel_ring_emit(ring, addr);
> +       }
> +
>         intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
>         intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
>         intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-08-24  0:03 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-20  8:34 [PATCH] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
2013-08-20 12:31 ` Daniel Vetter
2013-08-20 12:39   ` Chris Wilson
2013-08-20 12:41     ` Chris Wilson
2013-08-20 12:53       ` Daniel Vetter
2013-08-24  0:03 ` Stéphane Marchesin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.