All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] Async flips
@ 2012-10-30 18:33 Jesse Barnes
  2012-10-30 18:33 ` [PATCH 1/2] drm: add flags argument to crtc page_flip callback Jesse Barnes
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Jesse Barnes @ 2012-10-30 18:33 UTC (permalink / raw)
  To: intel-gfx

The hw supports async flips through the render ring, so why not expose it?
It gives us one more "tear me harder" option we can use in the DDX and
for other cases where simply flipping to the latest buffer is more
important than visual quality.

Jesse

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/2] drm: add flags argument to crtc page_flip callback
  2012-10-30 18:33 [RFC] Async flips Jesse Barnes
@ 2012-10-30 18:33 ` Jesse Barnes
  2012-11-02  4:29   ` Mario Kleiner
  2012-10-30 18:33 ` [PATCH 2/2] drm/i915: add async flip support on gen7 Jesse Barnes
  2012-10-31 12:53 ` [RFC] Async flips Ville Syrjälä
  2 siblings, 1 reply; 21+ messages in thread
From: Jesse Barnes @ 2012-10-30 18:33 UTC (permalink / raw)
  To: intel-gfx

This lets us pass down flags the drivers might be interested in, e.g. async.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/drm_crtc.c                |    2 +-
 drivers/gpu/drm/exynos/exynos_drm_crtc.c  |    5 +++--
 drivers/gpu/drm/i915/intel_display.c      |    3 ++-
 drivers/gpu/drm/nouveau/nouveau_display.c |    2 +-
 drivers/gpu/drm/nouveau/nouveau_display.h |    2 +-
 drivers/gpu/drm/radeon/radeon_display.c   |    3 ++-
 drivers/gpu/drm/shmobile/shmob_drm_crtc.c |    3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c       |    3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h       |    3 ++-
 drivers/staging/imx-drm/ipuv3-crtc.c      |    3 ++-
 drivers/staging/omapdrm/omap_crtc.c       |    3 ++-
 include/drm/drm_crtc.h                    |    3 ++-
 include/uapi/drm/drm_mode.h               |    4 +++-
 13 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ef1b221..b4964ac 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -3627,7 +3627,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			(void (*) (struct drm_pending_event *)) kfree;
 	}
 
-	ret = crtc->funcs->page_flip(crtc, fb, e);
+	ret = crtc->funcs->page_flip(crtc, fb, e, flags);
 	if (ret) {
 		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
 			spin_lock_irqsave(&dev->event_lock, flags);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index fce245f..90dc116 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -202,8 +202,9 @@ static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
 };
 
 static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
-				      struct drm_framebuffer *fb,
-				      struct drm_pending_vblank_event *event)
+				     struct drm_framebuffer *fb,
+				     struct drm_pending_vblank_event *event,
+				     u32 flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct exynos_drm_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 31d9fb8..2c0c174 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7109,7 +7109,8 @@ static int intel_default_queue_flip(struct drm_device *dev,
 
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
-				struct drm_pending_vblank_event *event)
+				struct drm_pending_vblank_event *event,
+				u32 flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 8f98e5a..dc89496 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -610,7 +610,7 @@ fail:
 
 int
 nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct drm_pending_vblank_event *event)
+		       struct drm_pending_vblank_event *event, u32 flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index 722548b..bb2f63d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -63,7 +63,7 @@ int  nouveau_vblank_enable(struct drm_device *dev, int crtc);
 void nouveau_vblank_disable(struct drm_device *dev, int crtc);
 
 int  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			    struct drm_pending_vblank_event *event);
+			    struct drm_pending_vblank_event *event, u32 flags);
 int  nouveau_finish_page_flip(struct nouveau_channel *,
 			      struct nouveau_page_flip_state *);
 
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a60..4c32552 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -346,7 +346,8 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
 
 static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event)
+				 struct drm_pending_vblank_event *event,
+				 u32 flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index 0e7a930..4878fa6 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -476,7 +476,8 @@ void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc)
 
 static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
-				    struct drm_pending_vblank_event *event)
+				    struct drm_pending_vblank_event *event,
+				    u32 flags)
 {
 	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
 	struct drm_device *dev = scrtc->crtc.dev;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 5474394..3cb86fe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1682,7 +1682,8 @@ int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
 
 int vmw_du_page_flip(struct drm_crtc *crtc,
 		     struct drm_framebuffer *fb,
-		     struct drm_pending_vblank_event *event)
+		     struct drm_pending_vblank_event *event,
+		     u32 flags)
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	struct drm_framebuffer *old_fb = crtc->fb;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 6fa89c9..088a8c9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -123,7 +123,8 @@ struct vmw_display_unit {
 void vmw_display_unit_cleanup(struct vmw_display_unit *du);
 int vmw_du_page_flip(struct drm_crtc *crtc,
 		     struct drm_framebuffer *fb,
-		     struct drm_pending_vblank_event *event);
+		     struct drm_pending_vblank_event *event,
+		     u32 flags);
 void vmw_du_crtc_save(struct drm_crtc *crtc);
 void vmw_du_crtc_restore(struct drm_crtc *crtc);
 void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
index 78d3eda..8c93b57 100644
--- a/drivers/staging/imx-drm/ipuv3-crtc.c
+++ b/drivers/staging/imx-drm/ipuv3-crtc.c
@@ -132,7 +132,8 @@ static void ipu_crtc_dpms(struct drm_crtc *crtc, int mode)
 
 static int ipu_page_flip(struct drm_crtc *crtc,
 		struct drm_framebuffer *fb,
-		struct drm_pending_vblank_event *event)
+		struct drm_pending_vblank_event *event,
+		u32 flags)
 {
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	int ret;
diff --git a/drivers/staging/omapdrm/omap_crtc.c b/drivers/staging/omapdrm/omap_crtc.c
index 732f2ad..51daf5d 100644
--- a/drivers/staging/omapdrm/omap_crtc.c
+++ b/drivers/staging/omapdrm/omap_crtc.c
@@ -173,7 +173,8 @@ static void page_flip_cb(void *arg)
 
 static int omap_crtc_page_flip_locked(struct drm_crtc *crtc,
 		 struct drm_framebuffer *fb,
-		 struct drm_pending_vblank_event *event)
+		 struct drm_pending_vblank_event *event,
+		 u32 flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3fa18b7..79f1d9b 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -355,7 +355,8 @@ struct drm_crtc_funcs {
 	 */
 	int (*page_flip)(struct drm_crtc *crtc,
 			 struct drm_framebuffer *fb,
-			 struct drm_pending_vblank_event *event);
+			 struct drm_pending_vblank_event *event,
+			 u32 flags);
 
 	int (*set_property)(struct drm_crtc *crtc,
 			    struct drm_property *property, uint64_t val);
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 3d6301b..6e927bb 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -399,7 +399,9 @@ struct drm_mode_crtc_lut {
 };
 
 #define DRM_MODE_PAGE_FLIP_EVENT 0x01
-#define DRM_MODE_PAGE_FLIP_FLAGS DRM_MODE_PAGE_FLIP_EVENT
+#define DRM_MODE_PAGE_FLIP_ASYNC 0x02
+#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
+				  DRM_MODE_PAGE_FLIP_ASYNC)
 
 /*
  * Request a page flip on the specified crtc.
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/2] drm/i915: add async flip support on gen7
  2012-10-30 18:33 [RFC] Async flips Jesse Barnes
  2012-10-30 18:33 ` [PATCH 1/2] drm: add flags argument to crtc page_flip callback Jesse Barnes
@ 2012-10-30 18:33 ` Jesse Barnes
  2012-10-31 17:47   ` Eric Anholt
  2012-10-31 12:53 ` [RFC] Async flips Ville Syrjälä
  2 siblings, 1 reply; 21+ messages in thread
From: Jesse Barnes @ 2012-10-30 18:33 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_drv.h      |    3 ++-
 drivers/gpu/drm/i915/i915_reg.h      |    2 ++
 drivers/gpu/drm/i915/intel_display.c |   30 +++++++++++++++++++++---------
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f5120d0..af9db2b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -273,7 +273,8 @@ struct drm_i915_display_funcs {
 	void (*init_pch_clock_gating)(struct drm_device *dev);
 	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
 			  struct drm_framebuffer *fb,
-			  struct drm_i915_gem_object *obj);
+			  struct drm_i915_gem_object *obj,
+			  u32 flags);
 	int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			    int x, int y);
 	/* clock updates for mode set */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2dd880f..c9b49b7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -211,6 +211,7 @@
 #define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
 #define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
 #define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
+#define   MI_DISPLAY_FLIP_ASYNC_IND    (1 << 22)
 #define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
 /* IVB has funny definitions for which plane to flip. */
 #define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
@@ -219,6 +220,7 @@
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
 #define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
+#define   MI_DISPLAY_FLIP_ASYNC	       (1 << 0) /* in DW2 of the command */
 #define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
 #define   MI_ARB_ENABLE			(1<<0)
 #define   MI_ARB_DISABLE		(0<<0)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2c0c174..ed79892 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6878,7 +6878,8 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 u32 flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -6920,7 +6921,8 @@ err:
 static int intel_gen3_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 u32 flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -6960,7 +6962,8 @@ err:
 static int intel_gen4_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 u32 flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7006,7 +7009,8 @@ err:
 static int intel_gen6_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 u32 flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7054,12 +7058,13 @@ err:
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 u32 flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
-	uint32_t plane_bit = 0;
+	uint32_t plane_bit = 0, addr_bits = 0;
 	int ret;
 
 	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
@@ -7082,13 +7087,19 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 		goto err_unpin;
 	}
 
+	if (flags & DRM_MODE_PAGE_FLIP_ASYNC) {
+		plane_bit |= MI_DISPLAY_FLIP_ASYNC_IND;
+		addr_bits |= MI_DISPLAY_FLIP_ASYNC;
+	}
+
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		goto err_unpin;
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
-	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, (obj->gtt_offset + intel_crtc->dspaddr_offset) |
+			addr_bits);
 	intel_ring_emit(ring, (MI_NOOP));
 	intel_ring_advance(ring);
 	return 0;
@@ -7102,7 +7113,8 @@ err:
 static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
-				    struct drm_i915_gem_object *obj)
+				    struct drm_i915_gem_object *obj,
+				    u32 flags)
 {
 	return -ENODEV;
 }
@@ -7183,7 +7195,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	 */
 	atomic_add(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
 
-	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
+	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, flags);
 	if (ret)
 		goto cleanup_pending;
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-30 18:33 [RFC] Async flips Jesse Barnes
  2012-10-30 18:33 ` [PATCH 1/2] drm: add flags argument to crtc page_flip callback Jesse Barnes
  2012-10-30 18:33 ` [PATCH 2/2] drm/i915: add async flip support on gen7 Jesse Barnes
@ 2012-10-31 12:53 ` Ville Syrjälä
  2012-10-31 15:23   ` Jesse Barnes
  2012-10-31 17:44   ` Eric Anholt
  2 siblings, 2 replies; 21+ messages in thread
From: Ville Syrjälä @ 2012-10-31 12:53 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> The hw supports async flips through the render ring, so why not expose it?
> It gives us one more "tear me harder" option we can use in the DDX and
> for other cases where simply flipping to the latest buffer is more
> important than visual quality.

The only reason I can see why anyone would really want async flips is
when you're restricted to double buffering. With triple buffering you
should be able to override the previous flip w/o tearing.

Well, actually if you use the ring based flips, then you can't do the
override. My atomic page flip code can do it because it's using mmio
flips. There were also other reasons favoring mmio over ring.

Once the atomic code is deemed ready, I would suggest we just nuke the
ring based flip code (pun intended).

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 12:53 ` [RFC] Async flips Ville Syrjälä
@ 2012-10-31 15:23   ` Jesse Barnes
  2012-10-31 15:26     ` Daniel Vetter
  2012-10-31 17:44   ` Eric Anholt
  1 sibling, 1 reply; 21+ messages in thread
From: Jesse Barnes @ 2012-10-31 15:23 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Wed, 31 Oct 2012 14:53:24 +0200
Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:

> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> > The hw supports async flips through the render ring, so why not expose it?
> > It gives us one more "tear me harder" option we can use in the DDX and
> > for other cases where simply flipping to the latest buffer is more
> > important than visual quality.
> 
> The only reason I can see why anyone would really want async flips is
> when you're restricted to double buffering. With triple buffering you
> should be able to override the previous flip w/o tearing.
> 
> Well, actually if you use the ring based flips, then you can't do the
> override. My atomic page flip code can do it because it's using mmio
> flips. There were also other reasons favoring mmio over ring.
> 
> Once the atomic code is deemed ready, I would suggest we just nuke the
> ring based flip code (pun intended).

Yeah, I agree.  In fact one of the first versions of the flip code used
mmio, and I think it's a better way to go.

I think you're correct about triple buffering, but I don't know if our
DDX does the right thing here these days or not (by default).

-- 
Jesse Barnes, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:23   ` Jesse Barnes
@ 2012-10-31 15:26     ` Daniel Vetter
  2012-10-31 15:39       ` Jesse Barnes
  2012-10-31 16:05       ` Ville Syrjälä
  0 siblings, 2 replies; 21+ messages in thread
From: Daniel Vetter @ 2012-10-31 15:26 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Oct 31, 2012 at 4:23 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
>
>> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
>> > The hw supports async flips through the render ring, so why not expose it?
>> > It gives us one more "tear me harder" option we can use in the DDX and
>> > for other cases where simply flipping to the latest buffer is more
>> > important than visual quality.
>>
>> The only reason I can see why anyone would really want async flips is
>> when you're restricted to double buffering. With triple buffering you
>> should be able to override the previous flip w/o tearing.
>>
>> Well, actually if you use the ring based flips, then you can't do the
>> override. My atomic page flip code can do it because it's using mmio
>> flips. There were also other reasons favoring mmio over ring.
>>
>> Once the atomic code is deemed ready, I would suggest we just nuke the
>> ring based flip code (pun intended).
>
> Yeah, I agree.  In fact one of the first versions of the flip code used
> mmio, and I think it's a better way to go.

How are we gonna sync up with outstanding rendering before issuing the
flip? If the answer is involves enabling the render irq, I'm not gonna
like it ;-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:26     ` Daniel Vetter
@ 2012-10-31 15:39       ` Jesse Barnes
  2012-10-31 15:49         ` Chris Wilson
  2012-10-31 16:05       ` Ville Syrjälä
  1 sibling, 1 reply; 21+ messages in thread
From: Jesse Barnes @ 2012-10-31 15:39 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 31 Oct 2012 16:26:54 +0100
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Wed, Oct 31, 2012 at 4:23 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> >
> >> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> >> > The hw supports async flips through the render ring, so why not expose it?
> >> > It gives us one more "tear me harder" option we can use in the DDX and
> >> > for other cases where simply flipping to the latest buffer is more
> >> > important than visual quality.
> >>
> >> The only reason I can see why anyone would really want async flips is
> >> when you're restricted to double buffering. With triple buffering you
> >> should be able to override the previous flip w/o tearing.
> >>
> >> Well, actually if you use the ring based flips, then you can't do the
> >> override. My atomic page flip code can do it because it's using mmio
> >> flips. There were also other reasons favoring mmio over ring.
> >>
> >> Once the atomic code is deemed ready, I would suggest we just nuke the
> >> ring based flip code (pun intended).
> >
> > Yeah, I agree.  In fact one of the first versions of the flip code used
> > mmio, and I think it's a better way to go.
> 
> How are we gonna sync up with outstanding rendering before issuing the
> flip? If the answer is involves enabling the render irq, I'm not gonna
> like it ;-)

Why are you afraid of irqs when rendering is active?  We'll already be
awake at those times anyway...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:39       ` Jesse Barnes
@ 2012-10-31 15:49         ` Chris Wilson
  2012-10-31 16:03           ` Daniel Vetter
  2012-10-31 16:27           ` Jesse Barnes
  0 siblings, 2 replies; 21+ messages in thread
From: Chris Wilson @ 2012-10-31 15:49 UTC (permalink / raw)
  To: Jesse Barnes, Daniel Vetter; +Cc: intel-gfx

On Wed, 31 Oct 2012 08:39:09 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Wed, 31 Oct 2012 16:26:54 +0100
> Daniel Vetter <daniel@ffwll.ch> wrote:
> 
> > On Wed, Oct 31, 2012 at 4:23 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > >
> > >> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> > >> > The hw supports async flips through the render ring, so why not expose it?
> > >> > It gives us one more "tear me harder" option we can use in the DDX and
> > >> > for other cases where simply flipping to the latest buffer is more
> > >> > important than visual quality.
> > >>
> > >> The only reason I can see why anyone would really want async flips is
> > >> when you're restricted to double buffering. With triple buffering you
> > >> should be able to override the previous flip w/o tearing.
> > >>
> > >> Well, actually if you use the ring based flips, then you can't do the
> > >> override. My atomic page flip code can do it because it's using mmio
> > >> flips. There were also other reasons favoring mmio over ring.
> > >>
> > >> Once the atomic code is deemed ready, I would suggest we just nuke the
> > >> ring based flip code (pun intended).
> > >
> > > Yeah, I agree.  In fact one of the first versions of the flip code used
> > > mmio, and I think it's a better way to go.
> > 
> > How are we gonna sync up with outstanding rendering before issuing the
> > flip? If the answer is involves enabling the render irq, I'm not gonna
> > like it ;-)
> 
> Why are you afraid of irqs when rendering is active?  We'll already be
> awake at those times anyway...

Because it involves the driver stalling.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:49         ` Chris Wilson
@ 2012-10-31 16:03           ` Daniel Vetter
  2012-10-31 16:27           ` Jesse Barnes
  1 sibling, 0 replies; 21+ messages in thread
From: Daniel Vetter @ 2012-10-31 16:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Oct 31, 2012 at 4:49 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Because it involves the driver stalling.

And the coherent case of get_seqno is expensive. And depending upon
workload we'll get quite some interrupts, since we can't filter for
just the single batchbuffer completion we want at the hw level.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:26     ` Daniel Vetter
  2012-10-31 15:39       ` Jesse Barnes
@ 2012-10-31 16:05       ` Ville Syrjälä
  1 sibling, 0 replies; 21+ messages in thread
From: Ville Syrjälä @ 2012-10-31 16:05 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Oct 31, 2012 at 04:26:54PM +0100, Daniel Vetter wrote:
> On Wed, Oct 31, 2012 at 4:23 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> >
> >> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> >> > The hw supports async flips through the render ring, so why not expose it?
> >> > It gives us one more "tear me harder" option we can use in the DDX and
> >> > for other cases where simply flipping to the latest buffer is more
> >> > important than visual quality.
> >>
> >> The only reason I can see why anyone would really want async flips is
> >> when you're restricted to double buffering. With triple buffering you
> >> should be able to override the previous flip w/o tearing.
> >>
> >> Well, actually if you use the ring based flips, then you can't do the
> >> override. My atomic page flip code can do it because it's using mmio
> >> flips. There were also other reasons favoring mmio over ring.
> >>
> >> Once the atomic code is deemed ready, I would suggest we just nuke the
> >> ring based flip code (pun intended).
> >
> > Yeah, I agree.  In fact one of the first versions of the flip code used
> > mmio, and I think it's a better way to go.
> 
> How are we gonna sync up with outstanding rendering before issuing the
> flip? If the answer is involves enabling the render irq, I'm not gonna
> like it ;-)

That's currently the only major TODO item on my list. Currently the
ioctl just ends up blocking when I pin the buffers, but I need some
async method to avoid that. So yes, irqs seem like the right approach
here. What's the problem w/ irqs?

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 15:49         ` Chris Wilson
  2012-10-31 16:03           ` Daniel Vetter
@ 2012-10-31 16:27           ` Jesse Barnes
  1 sibling, 0 replies; 21+ messages in thread
From: Jesse Barnes @ 2012-10-31 16:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, 31 Oct 2012 15:49:36 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Wed, 31 Oct 2012 08:39:09 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Wed, 31 Oct 2012 16:26:54 +0100
> > Daniel Vetter <daniel@ffwll.ch> wrote:
> > 
> > > On Wed, Oct 31, 2012 at 4:23 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > >
> > > >> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> > > >> > The hw supports async flips through the render ring, so why not expose it?
> > > >> > It gives us one more "tear me harder" option we can use in the DDX and
> > > >> > for other cases where simply flipping to the latest buffer is more
> > > >> > important than visual quality.
> > > >>
> > > >> The only reason I can see why anyone would really want async flips is
> > > >> when you're restricted to double buffering. With triple buffering you
> > > >> should be able to override the previous flip w/o tearing.
> > > >>
> > > >> Well, actually if you use the ring based flips, then you can't do the
> > > >> override. My atomic page flip code can do it because it's using mmio
> > > >> flips. There were also other reasons favoring mmio over ring.
> > > >>
> > > >> Once the atomic code is deemed ready, I would suggest we just nuke the
> > > >> ring based flip code (pun intended).
> > > >
> > > > Yeah, I agree.  In fact one of the first versions of the flip code used
> > > > mmio, and I think it's a better way to go.
> > > 
> > > How are we gonna sync up with outstanding rendering before issuing the
> > > flip? If the answer is involves enabling the render irq, I'm not gonna
> > > like it ;-)
> > 
> > Why are you afraid of irqs when rendering is active?  We'll already be
> > awake at those times anyway...
> 
> Because it involves the driver stalling.

Can you elaborate?  I know there are pros & cons to mmio vs ring.
Synchronization is different in each case, and getting the flip to
happen when you want can be tough too in both cases.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 12:53 ` [RFC] Async flips Ville Syrjälä
  2012-10-31 15:23   ` Jesse Barnes
@ 2012-10-31 17:44   ` Eric Anholt
  2012-10-31 18:51     ` Ville Syrjälä
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Anholt @ 2012-10-31 17:44 UTC (permalink / raw)
  To: Ville Syrjälä, Jesse Barnes; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1076 bytes --]

Ville Syrjälä <ville.syrjala@linux.intel.com> writes:

> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
>> The hw supports async flips through the render ring, so why not expose it?
>> It gives us one more "tear me harder" option we can use in the DDX and
>> for other cases where simply flipping to the latest buffer is more
>> important than visual quality.
>
> The only reason I can see why anyone would really want async flips is
> when you're restricted to double buffering. With triple buffering you
> should be able to override the previous flip w/o tearing.
>
> Well, actually if you use the ring based flips, then you can't do the
> override. My atomic page flip code can do it because it's using mmio
> flips. There were also other reasons favoring mmio over ring.
>
> Once the atomic code is deemed ready, I would suggest we just nuke the
> ring based flip code (pun intended).

Can you outline what exactly your plan is for doing faster-than-vblank
page flipping without tearing, and how it gets synchronized with
rendering?

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/2] drm/i915: add async flip support on gen7
  2012-10-30 18:33 ` [PATCH 2/2] drm/i915: add async flip support on gen7 Jesse Barnes
@ 2012-10-31 17:47   ` Eric Anholt
  2012-10-31 19:24     ` Jesse Barnes
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Anholt @ 2012-10-31 17:47 UTC (permalink / raw)
  To: Jesse Barnes, intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 390 bytes --]

Jesse Barnes <jbarnes@virtuousgeek.org> writes:

> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

So, these days if you go to render to a buffer that's been flipped from,
your rendering gets stalled (since the buffer may still be scanned out
when the rendering starts happening), which is the thing we need to
avoid.  I don't see this patch series avoiding that in the async case.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 17:44   ` Eric Anholt
@ 2012-10-31 18:51     ` Ville Syrjälä
  2012-11-02  4:45       ` Mario Kleiner
  0 siblings, 1 reply; 21+ messages in thread
From: Ville Syrjälä @ 2012-10-31 18:51 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx

On Wed, Oct 31, 2012 at 10:44:47AM -0700, Eric Anholt wrote:
> Ville Syrjälä <ville.syrjala@linux.intel.com> writes:
> 
> > On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> >> The hw supports async flips through the render ring, so why not expose it?
> >> It gives us one more "tear me harder" option we can use in the DDX and
> >> for other cases where simply flipping to the latest buffer is more
> >> important than visual quality.
> >
> > The only reason I can see why anyone would really want async flips is
> > when you're restricted to double buffering. With triple buffering you
> > should be able to override the previous flip w/o tearing.
> >
> > Well, actually if you use the ring based flips, then you can't do the
> > override. My atomic page flip code can do it because it's using mmio
> > flips. There were also other reasons favoring mmio over ring.
> >
> > Once the atomic code is deemed ready, I would suggest we just nuke the
> > ring based flip code (pun intended).
> 
> Can you outline what exactly your plan is for doing faster-than-vblank
> page flipping without tearing, and how it gets synchronized with
> rendering?

The faster than vrefresh flipping simply involves overwriting the
display plane registers before they've been latched by the hardware.
This appears to work fine already.

As far as the synchronization goes, I basically just want a callback
from the GPU when it's done with the buffer. I'm expecting to find
some kind of GPU progress interrupt that I can enable while I'm waiting
for the GPU to catch up. So I also need a FIFO to store the flip
requests in the meantime. Once the GPU tells me it's ready, I pull the
flip request from the queue and proceed with the display plane
programming.

So the synchronization part it's still quite handwavy, and I need
to study the hardware/driver in more detail to figure out the
specifics.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/2] drm/i915: add async flip support on gen7
  2012-10-31 17:47   ` Eric Anholt
@ 2012-10-31 19:24     ` Jesse Barnes
  0 siblings, 0 replies; 21+ messages in thread
From: Jesse Barnes @ 2012-10-31 19:24 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx

On Wed, 31 Oct 2012 10:47:03 -0700
Eric Anholt <eric@anholt.net> wrote:

> Jesse Barnes <jbarnes@virtuousgeek.org> writes:
> 
> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> So, these days if you go to render to a buffer that's been flipped from,
> your rendering gets stalled (since the buffer may still be scanned out
> when the rendering starts happening), which is the thing we need to
> avoid.  I don't see this patch series avoiding that in the async case.

The currently displayed buffer should be marked idle shortly (i.e.
when the parser gets to your new flip command) after you queue your
async flip.  So the delay should be much shorter than with today's
sync'd flips, but may still be an issue if you have rendering queued up
before the flip.  We can certainly get rid of that...

Or we could just fix our triple buffering to do what you want and get
the best of both worlds.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] drm: add flags argument to crtc page_flip callback
  2012-10-30 18:33 ` [PATCH 1/2] drm: add flags argument to crtc page_flip callback Jesse Barnes
@ 2012-11-02  4:29   ` Mario Kleiner
  2012-11-02 16:00     ` Jesse Barnes
  0 siblings, 1 reply; 21+ messages in thread
From: Mario Kleiner @ 2012-11-02  4:29 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On 30.10.12 19:33, Jesse Barnes wrote:
> This lets us pass down flags the drivers might be interested in, e.g. async.
>
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Hi Jesse

I like it :) -- Anything that helps to get rid of the troublesome 
'SwapBuffersWait' madness in the ddx at some point makes me happy.

> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index ef1b221..b4964ac 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -3627,7 +3627,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
>   			(void (*) (struct drm_pending_event *)) kfree;
>   	}
>
> -	ret = crtc->funcs->page_flip(crtc, fb, e);
> +	ret = crtc->funcs->page_flip(crtc, fb, e, flags);

I think this should be page_flip->flags, ie.

+	ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags);

because flags is used here:

>   	if (ret) {
>   		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
>   			spin_lock_irqsave(&dev->event_lock, flags);

to spin_lock_irqsave.

As a tiny nit-pick, you sometimes name the variable 'flags', sometimes 
'flip_flags' in the different kms implementations below, which could be 
made consistent.

Reviewed-by: mario.kleiner@tuebingen.mpg.de


> diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
> index fce245f..90dc116 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
> @@ -202,8 +202,9 @@ static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
>   };
>
>   static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
> -				      struct drm_framebuffer *fb,
> -				      struct drm_pending_vblank_event *event)
> +				     struct drm_framebuffer *fb,
> +				     struct drm_pending_vblank_event *event,
> +				     u32 flags)
>   {
>   	struct drm_device *dev = crtc->dev;
>   	struct exynos_drm_private *dev_priv = dev->dev_private;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 31d9fb8..2c0c174 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7109,7 +7109,8 @@ static int intel_default_queue_flip(struct drm_device *dev,
>
>   static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   				struct drm_framebuffer *fb,
> -				struct drm_pending_vblank_event *event)
> +				struct drm_pending_vblank_event *event,
> +				u32 flip_flags)
>   {
>   	struct drm_device *dev = crtc->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
> index 8f98e5a..dc89496 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_display.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_display.c
> @@ -610,7 +610,7 @@ fail:
>
>   int
>   nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> -		       struct drm_pending_vblank_event *event)
> +		       struct drm_pending_vblank_event *event, u32 flip_flags)
>   {
>   	struct drm_device *dev = crtc->dev;
>   	struct nouveau_drm *drm = nouveau_drm(dev);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
> index 722548b..bb2f63d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_display.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_display.h
> @@ -63,7 +63,7 @@ int  nouveau_vblank_enable(struct drm_device *dev, int crtc);
>   void nouveau_vblank_disable(struct drm_device *dev, int crtc);
>
>   int  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> -			    struct drm_pending_vblank_event *event);
> +			    struct drm_pending_vblank_event *event, u32 flags);
>   int  nouveau_finish_page_flip(struct nouveau_channel *,
>   			      struct nouveau_page_flip_state *);
>
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index bfa2a60..4c32552 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -346,7 +346,8 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
>
>   static int radeon_crtc_page_flip(struct drm_crtc *crtc,
>   				 struct drm_framebuffer *fb,
> -				 struct drm_pending_vblank_event *event)
> +				 struct drm_pending_vblank_event *event,
> +				 u32 flip_flags)
>   {
>   	struct drm_device *dev = crtc->dev;
>   	struct radeon_device *rdev = dev->dev_private;
> diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
> index 0e7a930..4878fa6 100644
> --- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
> +++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
> @@ -476,7 +476,8 @@ void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc)
>
>   static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
>   				    struct drm_framebuffer *fb,
> -				    struct drm_pending_vblank_event *event)
> +				    struct drm_pending_vblank_event *event,
> +				    u32 flags)
>   {
>   	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
>   	struct drm_device *dev = scrtc->crtc.dev;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> index 5474394..3cb86fe 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
> @@ -1682,7 +1682,8 @@ int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
>
>   int vmw_du_page_flip(struct drm_crtc *crtc,
>   		     struct drm_framebuffer *fb,
> -		     struct drm_pending_vblank_event *event)
> +		     struct drm_pending_vblank_event *event,
> +		     u32 flags)
>   {
>   	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
>   	struct drm_framebuffer *old_fb = crtc->fb;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> index 6fa89c9..088a8c9 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
> @@ -123,7 +123,8 @@ struct vmw_display_unit {
>   void vmw_display_unit_cleanup(struct vmw_display_unit *du);
>   int vmw_du_page_flip(struct drm_crtc *crtc,
>   		     struct drm_framebuffer *fb,
> -		     struct drm_pending_vblank_event *event);
> +		     struct drm_pending_vblank_event *event,
> +		     u32 flags);
>   void vmw_du_crtc_save(struct drm_crtc *crtc);
>   void vmw_du_crtc_restore(struct drm_crtc *crtc);
>   void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
> diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
> index 78d3eda..8c93b57 100644
> --- a/drivers/staging/imx-drm/ipuv3-crtc.c
> +++ b/drivers/staging/imx-drm/ipuv3-crtc.c
> @@ -132,7 +132,8 @@ static void ipu_crtc_dpms(struct drm_crtc *crtc, int mode)
>
>   static int ipu_page_flip(struct drm_crtc *crtc,
>   		struct drm_framebuffer *fb,
> -		struct drm_pending_vblank_event *event)
> +		struct drm_pending_vblank_event *event,
> +		u32 flags)
>   {
>   	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
>   	int ret;
> diff --git a/drivers/staging/omapdrm/omap_crtc.c b/drivers/staging/omapdrm/omap_crtc.c
> index 732f2ad..51daf5d 100644
> --- a/drivers/staging/omapdrm/omap_crtc.c
> +++ b/drivers/staging/omapdrm/omap_crtc.c
> @@ -173,7 +173,8 @@ static void page_flip_cb(void *arg)
>
>   static int omap_crtc_page_flip_locked(struct drm_crtc *crtc,
>   		 struct drm_framebuffer *fb,
> -		 struct drm_pending_vblank_event *event)
> +		 struct drm_pending_vblank_event *event,
> +		 u32 flags)
>   {
>   	struct drm_device *dev = crtc->dev;
>   	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index 3fa18b7..79f1d9b 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -355,7 +355,8 @@ struct drm_crtc_funcs {
>   	 */
>   	int (*page_flip)(struct drm_crtc *crtc,
>   			 struct drm_framebuffer *fb,
> -			 struct drm_pending_vblank_event *event);
> +			 struct drm_pending_vblank_event *event,
> +			 u32 flags);
>
>   	int (*set_property)(struct drm_crtc *crtc,
>   			    struct drm_property *property, uint64_t val);
> diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
> index 3d6301b..6e927bb 100644
> --- a/include/uapi/drm/drm_mode.h
> +++ b/include/uapi/drm/drm_mode.h
> @@ -399,7 +399,9 @@ struct drm_mode_crtc_lut {
>   };
>
>   #define DRM_MODE_PAGE_FLIP_EVENT 0x01
> -#define DRM_MODE_PAGE_FLIP_FLAGS DRM_MODE_PAGE_FLIP_EVENT
> +#define DRM_MODE_PAGE_FLIP_ASYNC 0x02
> +#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
> +				  DRM_MODE_PAGE_FLIP_ASYNC)
>
>   /*
>    * Request a page flip on the specified crtc.
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-10-31 18:51     ` Ville Syrjälä
@ 2012-11-02  4:45       ` Mario Kleiner
  2012-11-02  9:29         ` Ville Syrjälä
  0 siblings, 1 reply; 21+ messages in thread
From: Mario Kleiner @ 2012-11-02  4:45 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx



On 31.10.12 19:51, Ville Syrjälä wrote:
> On Wed, Oct 31, 2012 at 10:44:47AM -0700, Eric Anholt wrote:
>> Ville Syrjälä <ville.syrjala@linux.intel.com> writes:
>>
>>> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
>>>> The hw supports async flips through the render ring, so why not expose it?
>>>> It gives us one more "tear me harder" option we can use in the DDX and
>>>> for other cases where simply flipping to the latest buffer is more
>>>> important than visual quality.
>>>
>>> The only reason I can see why anyone would really want async flips is
>>> when you're restricted to double buffering. With triple buffering you
>>> should be able to override the previous flip w/o tearing.
>>>
>>> Well, actually if you use the ring based flips, then you can't do the
>>> override. My atomic page flip code can do it because it's using mmio
>>> flips. There were also other reasons favoring mmio over ring.
>>>
>>> Once the atomic code is deemed ready, I would suggest we just nuke the
>>> ring based flip code (pun intended).
>>
>> Can you outline what exactly your plan is for doing faster-than-vblank
>> page flipping without tearing, and how it gets synchronized with
>> rendering?
>
> The faster than vrefresh flipping simply involves overwriting the
> display plane registers before they've been latched by the hardware.
> This appears to work fine already.
>
> As far as the synchronization goes, I basically just want a callback
> from the GPU when it's done with the buffer. I'm expecting to find
> some kind of GPU progress interrupt that I can enable while I'm waiting
> for the GPU to catch up. So I also need a FIFO to store the flip
> requests in the meantime. Once the GPU tells me it's ready, I pull the
> flip request from the queue and proceed with the display plane
> programming.
>
> So the synchronization part it's still quite handwavy, and I need
> to study the hardware/driver in more detail to figure out the
> specifics.
>

That's cool. But please make sure that the behaviour will be somehow 
controllable by OpenGL applications, via some OpenGL extension. I can 
see use for different modes:

a) Normal double-buffering: For deterministic, well controlled timing - 
That's what my type of applications need. Maximum control over what to 
show next, based on precise and reliable flip completion timestamps.

b) Triple buffering with FIFO queueing of frames ahead, what the intel 
ddx currently does, unfortunately for me with totally broken 
timestamping, so all my users have to disable it in the xorg.conf - 
quite a challenge for many Apple converts, which have trouble with the 
concept of editing configuration files. It's useful if an app manages to 
render at full refresh rate on average to smooth out occassional stalls, 
because the gpu has one frame of completed rendering queued up in 
advance. Maybe this also allows for some power saving if an app can 
render and queue frames ahead of time as fast as possible (race to 
completion) and then the cpu/gpu can go to some deeper sleep state earlier?

c) Your LIFO triple-buffering, as far as i understand, with dropping 
late frames, to reduce latency /lag for things like video games.

d) Flipping without vsync = tearing. I think this is at least useful for 
benchmarks, although not for anything else.

thanks,
-mario

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-11-02  4:45       ` Mario Kleiner
@ 2012-11-02  9:29         ` Ville Syrjälä
  2012-11-12  3:53           ` Mario Kleiner
  0 siblings, 1 reply; 21+ messages in thread
From: Ville Syrjälä @ 2012-11-02  9:29 UTC (permalink / raw)
  To: Mario Kleiner; +Cc: intel-gfx

On Fri, Nov 02, 2012 at 05:45:29AM +0100, Mario Kleiner wrote:
> 
> 
> On 31.10.12 19:51, Ville Syrjälä wrote:
> > On Wed, Oct 31, 2012 at 10:44:47AM -0700, Eric Anholt wrote:
> >> Ville Syrjälä <ville.syrjala@linux.intel.com> writes:
> >>
> >>> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
> >>>> The hw supports async flips through the render ring, so why not expose it?
> >>>> It gives us one more "tear me harder" option we can use in the DDX and
> >>>> for other cases where simply flipping to the latest buffer is more
> >>>> important than visual quality.
> >>>
> >>> The only reason I can see why anyone would really want async flips is
> >>> when you're restricted to double buffering. With triple buffering you
> >>> should be able to override the previous flip w/o tearing.
> >>>
> >>> Well, actually if you use the ring based flips, then you can't do the
> >>> override. My atomic page flip code can do it because it's using mmio
> >>> flips. There were also other reasons favoring mmio over ring.
> >>>
> >>> Once the atomic code is deemed ready, I would suggest we just nuke the
> >>> ring based flip code (pun intended).
> >>
> >> Can you outline what exactly your plan is for doing faster-than-vblank
> >> page flipping without tearing, and how it gets synchronized with
> >> rendering?
> >
> > The faster than vrefresh flipping simply involves overwriting the
> > display plane registers before they've been latched by the hardware.
> > This appears to work fine already.
> >
> > As far as the synchronization goes, I basically just want a callback
> > from the GPU when it's done with the buffer. I'm expecting to find
> > some kind of GPU progress interrupt that I can enable while I'm waiting
> > for the GPU to catch up. So I also need a FIFO to store the flip
> > requests in the meantime. Once the GPU tells me it's ready, I pull the
> > flip request from the queue and proceed with the display plane
> > programming.
> >
> > So the synchronization part it's still quite handwavy, and I need
> > to study the hardware/driver in more detail to figure out the
> > specifics.
> >
> 
> That's cool. But please make sure that the behaviour will be somehow 
> controllable by OpenGL applications, via some OpenGL extension. I can 
> see use for different modes:
> 
> a) Normal double-buffering: For deterministic, well controlled timing - 
> That's what my type of applications need. Maximum control over what to 
> show next, based on precise and reliable flip completion timestamps.
> 
> b) Triple buffering with FIFO queueing of frames ahead, what the intel 
> ddx currently does, unfortunately for me with totally broken 
> timestamping, so all my users have to disable it in the xorg.conf - 
> quite a challenge for many Apple converts, which have trouble with the 
> concept of editing configuration files. It's useful if an app manages to 
> render at full refresh rate on average to smooth out occassional stalls, 
> because the gpu has one frame of completed rendering queued up in 
> advance. Maybe this also allows for some power saving if an app can 
> render and queue frames ahead of time as fast as possible (race to 
> completion) and then the cpu/gpu can go to some deeper sleep state earlier?
> 
> c) Your LIFO triple-buffering, as far as i understand, with dropping 
> late frames, to reduce latency /lag for things like video games.
> 

Right. I've been occasionally thinking about pushing the swap interval
handling to the kernel.

Currently user space needs to do the wait for vblank trick before
scheduling the swap, and then hoping that the GPU will catch up fast
enough so that the swap will happen on the next vblank. If the kernel
handled it, it could actually guarantee the OML_sync_control remainder
behaviour (well assuming kernel threads get scheduled in a timely
fashon), whereas the user space solution can't give such guarantees.

But even w/o that extra kernel feature, my code should be no worse in
that regard than the current code. You can still do the wait for vblank
trick in user space to get similar swap interval behaviour, and you can
still use as many buffers as you want. The only real difference to the
current situation is that if you schedule the flip too soon, you won't
get the EBUSY from the kernel, but instead you drop the previous flip.
But assuming the user space code is well behaved it won't try to flip
too soon, so essentially nothing will change.

> d) Flipping without vsync = tearing. I think this is at least useful for 
> benchmarks, although not for anything else.

This one I don't support curently. It would be possible to support it
(assuming the HW allows it). The simplest way would be to just add a
new flag to the ioctl to control this behaviour.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/2] drm: add flags argument to crtc page_flip callback
  2012-11-02  4:29   ` Mario Kleiner
@ 2012-11-02 16:00     ` Jesse Barnes
  0 siblings, 0 replies; 21+ messages in thread
From: Jesse Barnes @ 2012-11-02 16:00 UTC (permalink / raw)
  To: Mario Kleiner; +Cc: intel-gfx

On Fri, 02 Nov 2012 05:29:10 +0100
Mario Kleiner <mario.kleiner@tuebingen.mpg.de> wrote:

> On 30.10.12 19:33, Jesse Barnes wrote:
> > This lets us pass down flags the drivers might be interested in, e.g. async.
> >
> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Hi Jesse
> 
> I like it :) -- Anything that helps to get rid of the troublesome 
> 'SwapBuffersWait' madness in the ddx at some point makes me happy.
> 
> > diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> > index ef1b221..b4964ac 100644
> > --- a/drivers/gpu/drm/drm_crtc.c
> > +++ b/drivers/gpu/drm/drm_crtc.c
> > @@ -3627,7 +3627,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
> >   			(void (*) (struct drm_pending_event *)) kfree;
> >   	}
> >
> > -	ret = crtc->funcs->page_flip(crtc, fb, e);
> > +	ret = crtc->funcs->page_flip(crtc, fb, e, flags);
> 
> I think this should be page_flip->flags, ie.
> 
> +	ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags);
> 
> because flags is used here:
> 
> >   	if (ret) {
> >   		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
> >   			spin_lock_irqsave(&dev->event_lock, flags);
> 
> to spin_lock_irqsave.
> 
> As a tiny nit-pick, you sometimes name the variable 'flags', sometimes 
> 'flip_flags' in the different kms implementations below, which could be 
> made consistent.
> 
> Reviewed-by: mario.kleiner@tuebingen.mpg.de
> 

Yeah thanks.  After I used 'flags' everywhere I saw the locking ones
and changed some, but obviously didn't catch them all. :)

I'll fix things up to be more consistent.

Thanks,
-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-11-02  9:29         ` Ville Syrjälä
@ 2012-11-12  3:53           ` Mario Kleiner
  2012-11-12 12:04             ` Ville Syrjälä
  0 siblings, 1 reply; 21+ messages in thread
From: Mario Kleiner @ 2012-11-12  3:53 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On 02.11.12 10:29, Ville Syrjälä wrote:
> On Fri, Nov 02, 2012 at 05:45:29AM +0100, Mario Kleiner wrote:
>>
>>
>> On 31.10.12 19:51, Ville Syrjälä wrote:
>>> On Wed, Oct 31, 2012 at 10:44:47AM -0700, Eric Anholt wrote:
>>>> Ville Syrjälä <ville.syrjala@linux.intel.com> writes:
>>>>
>>>>> On Tue, Oct 30, 2012 at 01:33:47PM -0500, Jesse Barnes wrote:
>>>>>> The hw supports async flips through the render ring, so why not expose it?
>>>>>> It gives us one more "tear me harder" option we can use in the DDX and
>>>>>> for other cases where simply flipping to the latest buffer is more
>>>>>> important than visual quality.
>>>>>
>>>>> The only reason I can see why anyone would really want async flips is
>>>>> when you're restricted to double buffering. With triple buffering you
>>>>> should be able to override the previous flip w/o tearing.
>>>>>
>>>>> Well, actually if you use the ring based flips, then you can't do the
>>>>> override. My atomic page flip code can do it because it's using mmio
>>>>> flips. There were also other reasons favoring mmio over ring.
>>>>>
>>>>> Once the atomic code is deemed ready, I would suggest we just nuke the
>>>>> ring based flip code (pun intended).
>>>>
>>>> Can you outline what exactly your plan is for doing faster-than-vblank
>>>> page flipping without tearing, and how it gets synchronized with
>>>> rendering?
>>>
>>> The faster than vrefresh flipping simply involves overwriting the
>>> display plane registers before they've been latched by the hardware.
>>> This appears to work fine already.
>>>
>>> As far as the synchronization goes, I basically just want a callback
>>> from the GPU when it's done with the buffer. I'm expecting to find
>>> some kind of GPU progress interrupt that I can enable while I'm waiting
>>> for the GPU to catch up. So I also need a FIFO to store the flip
>>> requests in the meantime. Once the GPU tells me it's ready, I pull the
>>> flip request from the queue and proceed with the display plane
>>> programming.
>>>
>>> So the synchronization part it's still quite handwavy, and I need
>>> to study the hardware/driver in more detail to figure out the
>>> specifics.
>>>
>>
>> That's cool. But please make sure that the behaviour will be somehow
>> controllable by OpenGL applications, via some OpenGL extension. I can
>> see use for different modes:
>>
>> a) Normal double-buffering: For deterministic, well controlled timing -
>> That's what my type of applications need. Maximum control over what to
>> show next, based on precise and reliable flip completion timestamps.
>>
>> b) Triple buffering with FIFO queueing of frames ahead, what the intel
>> ddx currently does, unfortunately for me with totally broken
>> timestamping, so all my users have to disable it in the xorg.conf -
>> quite a challenge for many Apple converts, which have trouble with the
>> concept of editing configuration files. It's useful if an app manages to
>> render at full refresh rate on average to smooth out occassional stalls,
>> because the gpu has one frame of completed rendering queued up in
>> advance. Maybe this also allows for some power saving if an app can
>> render and queue frames ahead of time as fast as possible (race to
>> completion) and then the cpu/gpu can go to some deeper sleep state earlier?
>>
>> c) Your LIFO triple-buffering, as far as i understand, with dropping
>> late frames, to reduce latency /lag for things like video games.
>>
>
> Right. I've been occasionally thinking about pushing the swap interval
> handling to the kernel.
>
> Currently user space needs to do the wait for vblank trick before
> scheduling the swap, and then hoping that the GPU will catch up fast
> enough so that the swap will happen on the next vblank. If the kernel
> handled it, it could actually guarantee the OML_sync_control remainder
> behaviour (well assuming kernel threads get scheduled in a timely
> fashon), whereas the user space solution can't give such guarantees.

Yes. You could even do much of it from the vblank irq for robustness of 
timing. The downside would be probably the complexity of 
error/special-case handling. E.g., if an app schedules a swap 10 seconds 
into the future, but then the app dies/quits or a fullscreen window gets 
switched back to windowed mode, so something that was meant to be 
page-flipped suddenly can't be page-flipped anymore, or the window went 
away during that 10 secs.

> But even w/o that extra kernel feature, my code should be no worse in
> that regard than the current code. You can still do the wait for vblank
> trick in user space to get similar swap interval behaviour, and you can
> still use as many buffers as you want. The only real difference to the
> current situation is that if you schedule the flip too soon, you won't
> get the EBUSY from the kernel, but instead you drop the previous flip.
> But assuming the user space code is well behaved it won't try to flip
> too soon, so essentially nothing will change.
>

Yes. My remark wrt application control was just because i assumed you 
would be responsible for the whole stack when implementing this feature, 
also how this gets exposed to apps via the ddx / glx / mesa etc.

>> d) Flipping without vsync = tearing. I think this is at least useful for
>> benchmarks, although not for anything else.
>
> This one I don't support curently. It would be possible to support it
> (assuming the HW allows it). The simplest way would be to just add a
> new flag to the ioctl to control this behaviour.
>

I think that's what Jesse's patches are supposed to add.

thanks,
-mario

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] Async flips
  2012-11-12  3:53           ` Mario Kleiner
@ 2012-11-12 12:04             ` Ville Syrjälä
  0 siblings, 0 replies; 21+ messages in thread
From: Ville Syrjälä @ 2012-11-12 12:04 UTC (permalink / raw)
  To: Mario Kleiner; +Cc: intel-gfx

On Mon, Nov 12, 2012 at 04:53:10AM +0100, Mario Kleiner wrote:
> On 02.11.12 10:29, Ville Syrjälä wrote:
> > On Fri, Nov 02, 2012 at 05:45:29AM +0100, Mario Kleiner wrote:
> >> d) Flipping without vsync = tearing. I think this is at least useful for
> >> benchmarks, although not for anything else.
> >
> > This one I don't support curently. It would be possible to support it
> > (assuming the HW allows it). The simplest way would be to just add a
> > new flag to the ioctl to control this behaviour.
> >
> 
> I think that's what Jesse's patches are supposed to add.

Yes, but it adds it to the current page flip path, which has nothing to
do with the atomic code.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-11-12 12:04 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-30 18:33 [RFC] Async flips Jesse Barnes
2012-10-30 18:33 ` [PATCH 1/2] drm: add flags argument to crtc page_flip callback Jesse Barnes
2012-11-02  4:29   ` Mario Kleiner
2012-11-02 16:00     ` Jesse Barnes
2012-10-30 18:33 ` [PATCH 2/2] drm/i915: add async flip support on gen7 Jesse Barnes
2012-10-31 17:47   ` Eric Anholt
2012-10-31 19:24     ` Jesse Barnes
2012-10-31 12:53 ` [RFC] Async flips Ville Syrjälä
2012-10-31 15:23   ` Jesse Barnes
2012-10-31 15:26     ` Daniel Vetter
2012-10-31 15:39       ` Jesse Barnes
2012-10-31 15:49         ` Chris Wilson
2012-10-31 16:03           ` Daniel Vetter
2012-10-31 16:27           ` Jesse Barnes
2012-10-31 16:05       ` Ville Syrjälä
2012-10-31 17:44   ` Eric Anholt
2012-10-31 18:51     ` Ville Syrjälä
2012-11-02  4:45       ` Mario Kleiner
2012-11-02  9:29         ` Ville Syrjälä
2012-11-12  3:53           ` Mario Kleiner
2012-11-12 12:04             ` Ville Syrjälä

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.