All of lore.kernel.org
 help / color / mirror / Atom feed
* RFC frontbuffer write tracking
@ 2013-06-20 17:18 Chris Wilson
  2013-06-20 17:18 ` [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

Having thrown around a few ideas for how to do PSR and FBC write
tracking on the frontbuffer, including the creation of a new SCANOUT
domain, the implementation that I've settled on is to detect writes to
either the GTT domain (hmm, a CPU write flush should also set the fb as
dirty) and then a deferred SCANOUT flush. The deferred flush then resets
the write domain and kicks the GTT mapping so that any further dumb
writes (i.e. dumb kms buffers or fbcon) cause a new frontbuffer
invalidation. This patch series also introduces framebuffer parameters
(which maybe should be properties) to allow userspace to opt out of the
dumb mechanism and elect to call dirtyfb itself when it requires the
scanout to be flushed.

I've been playing around with GFDT on SNB and IVB as a means for
prototyping the delayed updates and framebuffer parameters.
-Chris

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
@ 2013-06-20 17:18 ` Chris Wilson
  2013-06-20 17:18 ` [PATCH 2/5] drm/i915: Expose framebuffer parameters Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

As now the invalidate and flush bitfields are only used as booleans, and
we may want to extend the range of actions in future, consolidate those
parameters into a new bitmask.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/i915_trace.h       |  11 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 284 +++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 4 files changed, 162 insertions(+), 142 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ff47145..540a9c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -360,7 +360,7 @@ mi_set_context(struct intel_ring_buffer *ring,
 	 * itlb_before_ctx_switch.
 	 */
 	if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) {
-		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
+		ret = intel_ring_invalidate_all_caches(ring);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 3db4a68..ce392eb 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -252,26 +252,23 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 );
 
 TRACE_EVENT(i915_gem_ring_flush,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush),
-	    TP_ARGS(ring, invalidate, flush),
+	    TP_PROTO(struct intel_ring_buffer *ring, u32 flush),
+	    TP_ARGS(ring, flush),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, invalidate)
 			     __field(u32, flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->invalidate = invalidate;
 			   __entry->flush = flush;
 			   ),
 
-	    TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
-		      __entry->dev, __entry->ring,
-		      __entry->invalidate, __entry->flush)
+	    TP_printk("dev=%u, ring=%x, flush=%04x",
+		      __entry->dev, __entry->ring, __entry->flush)
 );
 
 DECLARE_EVENT_CLASS(i915_gem_request,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e51ab55..601e1eb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -52,38 +52,32 @@ static inline int ring_space(struct intel_ring_buffer *ring)
 }
 
 static int
-gen2_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
-	u32 cmd;
 	int ret;
 
-	cmd = MI_FLUSH;
-	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
-		cmd |= MI_NO_WRITE_FLUSH;
+	if (action & (RING_INVALIDATE | RING_FLUSH)) {
+		u32 cmd = MI_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
-		cmd |= MI_READ_FLUSH;
+		if (action & RING_INVALIDATE)
+			cmd |= MI_READ_FLUSH;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
 
 static int
-gen4_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen4_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	struct drm_device *dev = ring->dev;
-	u32 cmd;
 	int ret;
 
 	/*
@@ -114,23 +108,23 @@ gen4_render_ring_flush(struct intel_ring_buffer *ring,
 	 * are flushed at any MI_FLUSH.
 	 */
 
-	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
-		cmd &= ~MI_NO_WRITE_FLUSH;
-	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-		cmd |= MI_EXE_FLUSH;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		u32 cmd = MI_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-	    (IS_G4X(dev) || IS_GEN5(dev)))
-		cmd |= MI_INVALIDATE_ISP;
+		if (action & RING_INVALIDATE) {
+			cmd |= MI_EXE_FLUSH;
+			if (IS_G4X(dev) || IS_GEN5(dev))
+				cmd |= MI_INVALIDATE_ISP;
+		}
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -179,7 +173,6 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-
 	ret = intel_ring_begin(ring, 6);
 	if (ret)
 		return ret;
@@ -209,24 +202,18 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
 }
 
 static int
-gen6_render_ring_flush(struct intel_ring_buffer *ring,
-                         u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	u32 flags = 0;
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
-	if (ret)
-		return ret;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (action & RING_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		/*
@@ -235,7 +222,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		 */
 		flags |= PIPE_CONTROL_CS_STALL;
 	}
-	if (invalidate_domains) {
+	if (action & RING_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -248,15 +235,22 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags) {
+		/* Force SNB workarounds for PIPE_CONTROL flushes */
+		ret = intel_emit_post_sync_nonzero_flush(ring);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+		intel_ring_emit(ring, flags);
+		intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -302,33 +296,22 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
 }
 
 static int
-gen7_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32 invalidate_domains, u32 flush_domains)
+gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	u32 flags = 0;
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/*
-	 * Ensure that any following seqno writes only happen when the render
-	 * cache is indeed flushed.
-	 *
-	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
-	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
-	 * don't try to be clever and just set it unconditionally.
-	 */
-	flags |= PIPE_CONTROL_CS_STALL;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (action & RING_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 	}
-	if (invalidate_domains) {
+	if (action & RING_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -347,17 +330,30 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
 		gen7_render_ring_cs_stall_wa(ring);
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags) {
+		/*
+		 * Ensure that any following seqno writes only happen when the render
+		 * cache is indeed flushed.
+		 *
+		 * Workaround: 4th PIPE_CONTROL command (except the ones with only
+		 * read-cache invalidate bits set) must have the CS_STALL bit set. We
+		 * don't try to be clever and just set it unconditionally.
+		 */
+		if ((flags & RING_INVALIDATE) == 0)
+			flags |= PIPE_CONTROL_CS_STALL;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
 
-	if (flush_domains)
+		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+		intel_ring_emit(ring, flags);
+		intel_ring_emit(ring, scratch_addr);
+		intel_ring_emit(ring, 0);
+		intel_ring_advance(ring);
+	}
+
+	if (action & RING_FLUSH)
 		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
 
 	return 0;
@@ -956,19 +952,19 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
 }
 
 static int
-bsd_ring_flush(struct intel_ring_buffer *ring,
-	       u32     invalidate_domains,
-	       u32     flush_domains)
+bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, MI_FLUSH);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, MI_FLUSH);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 	return 0;
 }
 
@@ -1636,31 +1632,34 @@ static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 }
 
-static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring,
-			       u32 invalidate, u32 flush)
+static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
-	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		u32 cmd = MI_FLUSH_DW;
+
+		/*
+		 * Bspec vol 1c.5 - video engine command streamer:
+		 * "If ENABLED, all TLBs will be invalidated once the flush
+		 * operation is complete. This bit is only valid when the
+		 * Post-Sync Operation field is a value of 1h or 3h."
+		 */
+		if (action & RING_INVALIDATE)
+			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+				MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
-	cmd = MI_FLUSH_DW;
-	/*
-	 * Bspec vol 1c.5 - video engine command streamer:
-	 * "If ENABLED, all TLBs will be invalidated once the flush
-	 * operation is complete. This bit is only valid when the
-	 * Post-Sync Operation field is a value of 1h or 3h."
-	 */
-	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
 	return 0;
 }
 
@@ -1708,34 +1707,37 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct intel_ring_buffer *ring,
-			   u32 invalidate, u32 flush)
+static int gen6_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	struct drm_device *dev = ring->dev;
 	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		cmd = MI_FLUSH_DW;
 
-	cmd = MI_FLUSH_DW;
-	/*
-	 * Bspec vol 1c.3 - blitter engine command streamer:
-	 * "If ENABLED, all TLBs will be invalidated once the flush
-	 * operation is complete. This bit is only valid when the
-	 * Post-Sync Operation field is a value of 1h or 3h."
-	 */
-	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-			MI_FLUSH_DW_OP_STOREDW;
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		/*
+		 * Bspec vol 1c.3 - blitter engine command streamer:
+		 * "If ENABLED, all TLBs will be invalidated once the flush
+		 * operation is complete. This bit is only valid when the
+		 * Post-Sync Operation field is a value of 1h or 3h."
+		 */
+		if (action & RING_INVALIDATE)
+			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+				MI_FLUSH_DW_OP_STOREDW;
 
-	if (IS_GEN7(dev) && flush)
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
+
+	if (IS_GEN7(dev) && action & RING_FLUSH)
 		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
 
 	return 0;
@@ -2027,11 +2029,11 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
 	if (!ring->gpu_caches_dirty)
 		return 0;
 
-	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	ret = ring->flush(ring, RING_FLUSH);
 	if (ret)
 		return ret;
 
-	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	trace_i915_gem_ring_flush(ring, RING_FLUSH);
 
 	ring->gpu_caches_dirty = false;
 	return 0;
@@ -2040,18 +2042,36 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
 int
 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
 {
-	uint32_t flush_domains;
+	u32 action;
+	int ret;
+
+	action = RING_INVALIDATE;
+	if (ring->gpu_caches_dirty)
+		action |= RING_FLUSH;
+
+	ret = ring->flush(ring, action);
+	if (ret)
+		return ret;
+
+	trace_i915_gem_ring_flush(ring, action);
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
+
+int
+intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action)
+{
 	int ret;
 
-	flush_domains = 0;
 	if (ring->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
+		action |= RING_FLUSH;
 
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	ret = ring->flush(ring, action);
 	if (ret)
 		return ret;
 
-	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	trace_i915_gem_ring_flush(ring, action);
 
 	ring->gpu_caches_dirty = false;
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 799f04c..5066b3b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -93,8 +93,10 @@ struct  intel_ring_buffer {
 	void		(*write_tail)(struct intel_ring_buffer *ring,
 				      u32 value);
 	int __must_check (*flush)(struct intel_ring_buffer *ring,
-				  u32	invalidate_domains,
-				  u32	flush_domains);
+				  u32 action);
+#define RING_FLUSH	0x1
+#define RING_INVALIDATE 0x2
+
 	int		(*add_request)(struct intel_ring_buffer *ring);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
@@ -240,6 +242,7 @@ int __must_check intel_ring_idle(struct intel_ring_buffer *ring);
 void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno);
 int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
 int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
+int intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/5] drm/i915: Expose framebuffer parameters
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
  2013-06-20 17:18 ` [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush Chris Wilson
@ 2013-06-20 17:18 ` Chris Wilson
  2013-06-20 17:18 ` [PATCH 3/5] drm/i915: Add a powersave framebuffer parameter Chris Wilson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c      |  3 ++
 drivers/gpu/drm/i915/intel_display.c | 66 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |  4 +++
 include/uapi/drm/i915_drm.h          | 11 ++++++
 4 files changed, 84 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 78939b6..5dcfbbe 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1924,6 +1924,9 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
+
+	DRM_IOCTL_DEF_DRV(INTEL_FRAMEBUFFER_GETPARAM, intel_user_framebuffer_getparam, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(INTEL_FRAMEBUFFER_SETPARAM, intel_user_framebuffer_setparam, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 586a648..7bb8851 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9198,6 +9198,72 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	return intel_framebuffer_create(dev, mode_cmd, obj);
 }
 
+static int
+__intel_user_framebuffer_getparam(struct intel_framebuffer *fb,
+				  u32 param, u64 *value)
+{
+	switch (param) {
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+intel_user_framebuffer_getparam(struct drm_device *dev,
+				void *data,
+				struct drm_file *file)
+{
+	struct intel_framebuffer_param *args = data;
+	struct drm_framebuffer *fb, *iter;
+	int ret = -ENOENT;
+
+	mutex_lock(&file->fbs_lock);
+	fb = drm_framebuffer_lookup(dev, args->fb_id);
+	list_for_each_entry(iter, &file->fbs, filp_head) {
+		if (fb == iter) {
+			ret = __intel_user_framebuffer_getparam(to_intel_framebuffer(fb),
+								args->param,
+								&args->value);
+			break;
+		}
+	}
+	mutex_unlock(&file->fbs_lock);
+	return ret;
+}
+
+static int
+__intel_user_framebuffer_setparam(struct intel_framebuffer *fb,
+				  u32 param, u64 value)
+{
+	switch (param) {
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+intel_user_framebuffer_setparam(struct drm_device *dev,
+				void *data,
+				struct drm_file *file)
+{
+	struct intel_framebuffer_param *args = data;
+	struct drm_framebuffer *fb, *iter;
+	int ret = -ENOENT;
+
+	mutex_lock(&file->fbs_lock);
+	fb = drm_framebuffer_lookup(dev, args->fb_id);
+	list_for_each_entry(iter, &file->fbs, filp_head) {
+		if (fb == iter) {
+			ret = __intel_user_framebuffer_setparam(to_intel_framebuffer(fb),
+								args->param,
+								args->value);
+			break;
+		}
+	}
+	mutex_unlock(&file->fbs_lock);
+	return ret;
+}
+
 static const struct drm_mode_config_funcs intel_mode_funcs = {
 	.fb_create = intel_user_framebuffer_create,
 	.output_poll_changed = intel_fb_output_poll_changed,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 05d798f..b679adfb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -732,6 +732,10 @@ extern int intel_framebuffer_init(struct drm_device *dev,
 				  struct intel_framebuffer *ifb,
 				  struct drm_mode_fb_cmd2 *mode_cmd,
 				  struct drm_i915_gem_object *obj);
+extern int intel_user_framebuffer_getparam(struct drm_device *dev,
+					   void *data, struct drm_file *file);
+extern int intel_user_framebuffer_setparam(struct drm_device *dev,
+					   void *data, struct drm_file *file);
 extern int intel_fbdev_init(struct drm_device *dev);
 extern void intel_fbdev_initial_config(struct drm_device *dev);
 extern void intel_fbdev_fini(struct drm_device *dev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b27e6b9..38a9b52 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -199,6 +199,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GEM_USERPTR		0x32
+#define DRM_INTEL_FRAMEBUFFER_GETPARAM	0x33
+#define DRM_INTEL_FRAMEBUFFER_SETPARAM	0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -249,6 +251,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
+#define DRM_IOCTL_INTEL_FRAMEBUFFER_GETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_INTEL_FRAMEBUFFER_GETPARAM, struct intel_framebuffer_param)
+#define DRM_IOCTL_INTEL_FRAMEBUFFER_SETPARAM         DRM_IOW( DRM_COMMAND_BASE + DRM_INTEL_FRAMEBUFFER_SETPARAM, struct intel_framebuffer_param)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -997,4 +1001,11 @@ struct drm_i915_gem_userptr {
 	 */
 	__u32 handle;
 };
+
+struct intel_framebuffer_param {
+	__u32 fb_id;
+	__u32 param;
+	__u64 value;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/5] drm/i915: Add a powersave framebuffer parameter
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
  2013-06-20 17:18 ` [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush Chris Wilson
  2013-06-20 17:18 ` [PATCH 2/5] drm/i915: Expose framebuffer parameters Chris Wilson
@ 2013-06-20 17:18 ` Chris Wilson
  2013-06-20 17:18 ` [PATCH 4/5] drm/i915: Track modifications to the frontbuffer Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 3 +++
 drivers/gpu/drm/i915/i915_drv.h      | 1 +
 drivers/gpu/drm/i915/intel_display.c | 7 +++++++
 drivers/gpu/drm/i915/intel_drv.h     | 1 +
 drivers/gpu/drm/i915/intel_pm.c      | 6 ++++++
 include/uapi/drm/i915_drm.h          | 1 +
 6 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d4e78b6..d8deb3b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1469,6 +1469,9 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 		case FBC_MODULE_PARAM:
 			seq_printf(m, "disabled per module param (default off)");
 			break;
+		case FBC_USER:
+			seq_printf(m, "disabled by userspace request");
+			break;
 		default:
 			seq_printf(m, "unknown reason");
 		}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 837e166..01901b5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -544,6 +544,7 @@ enum no_fbc_reason {
 	FBC_NOT_TILED, /* buffer not tiled */
 	FBC_MULTIPLE_PIPES, /* more than one pipe active */
 	FBC_MODULE_PARAM,
+	FBC_USER,
 };
 
 enum intel_pch {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7bb8851..821a52d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9173,6 +9173,7 @@ int intel_framebuffer_init(struct drm_device *dev,
 
 	drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
 	intel_fb->obj = obj;
+	intel_fb->powersave = true;
 
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
@@ -9203,6 +9204,9 @@ __intel_user_framebuffer_getparam(struct intel_framebuffer *fb,
 				  u32 param, u64 *value)
 {
 	switch (param) {
+	case INTEL_FRAMEBUFFER_PARAM_POWERSAVE:
+		*value = fb->powersave;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -9236,6 +9240,9 @@ __intel_user_framebuffer_setparam(struct intel_framebuffer *fb,
 				  u32 param, u64 value)
 {
 	switch (param) {
+	case INTEL_FRAMEBUFFER_PARAM_POWERSAVE:
+		fb->powersave = value;
+		return 0;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b679adfb..4cddefb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -102,6 +102,7 @@
 struct intel_framebuffer {
 	struct drm_framebuffer base;
 	struct drm_i915_gem_object *obj;
+	unsigned powersave:1;
 };
 
 struct intel_fbdev {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9f7531f..2f6150b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -488,6 +488,12 @@ void intel_update_fbc(struct drm_device *dev)
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
+	if (!intel_fb->powersave) {
+		DRM_DEBUG_KMS("powersaving on this framebuffer disabled\n");
+		dev_priv->no_fbc_reason = FBC_USER;
+		goto out_disable;
+	}
+
 	enable_fbc = i915_enable_fbc;
 	if (enable_fbc < 0) {
 		DRM_DEBUG_KMS("fbc set to per-chip default\n");
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 38a9b52..74c34c2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1005,6 +1005,7 @@ struct drm_i915_gem_userptr {
 struct intel_framebuffer_param {
 	__u32 fb_id;
 	__u32 param;
+#define INTEL_FRAMEBUFFER_PARAM_POWERSAVE 1
 	__u64 value;
 };
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/5] drm/i915: Track modifications to the frontbuffer
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
                   ` (2 preceding siblings ...)
  2013-06-20 17:18 ` [PATCH 3/5] drm/i915: Add a powersave framebuffer parameter Chris Wilson
@ 2013-06-20 17:18 ` Chris Wilson
  2013-06-20 17:18 ` [PATCH 5/5] drm/i915: Defer the FBC w/a invalidation Chris Wilson
  2013-07-18  7:40 ` RFC frontbuffer write tracking Daniel Vetter
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

The frontbuffer (active framebuffer being used as the scanout) often
requires addition work to flush any modifications to the display engine
and to the displays. So in order to do so, we need to detect whenever we
begin to write to the frontbuffer and mark it for a flush. The flush
then is performed after a slight delay, in order to coalesce several
small updates into a single flush.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +
 drivers/gpu/drm/i915/i915_drv.h            |   4 +
 drivers/gpu/drm/i915/i915_gem.c            |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   2 +-
 drivers/gpu/drm/i915/intel_display.c       | 122 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_drv.h           |   5 ++
 include/uapi/drm/i915_drm.h                |   1 +
 7 files changed, 130 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d8deb3b..db9af99 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -138,6 +138,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	}
 	if (obj->ring != NULL)
 		seq_printf(m, " (%s)", obj->ring->name);
+	if (!list_empty(&obj->fb_list))
+		seq_printf(m, " (framebuffer)");
 }
 
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 01901b5..1257e0e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1064,6 +1064,8 @@ typedef struct drm_i915_private {
 	u32 hpd_event_bits;
 	struct timer_list hotplug_reenable_timer;
 
+	struct delayed_work display_refresh_work;
+
 	int num_plane;
 
 	unsigned long cfb_size;
@@ -1308,6 +1310,8 @@ struct drm_i915_gem_object {
 	unsigned int has_global_gtt_mapping:1;
 	unsigned int has_dma_mapping:1;
 
+	struct list_head fb_list;
+
 	struct sg_table *pages;
 	int pages_pin_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 030b742..6661adb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3321,6 +3321,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
 		obj->dirty = 1;
+
+		if (obj->pin_count && !list_empty(&obj->fb_list))
+			intel_mark_fb_busy(obj, NULL);
 	}
 
 	trace_i915_gem_object_change_domain(obj,
@@ -3890,6 +3893,7 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	INIT_LIST_HEAD(&obj->fb_list);
 	INIT_LIST_HEAD(&obj->mm_list);
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 73974fa..dabd9af 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -785,7 +785,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			obj->last_write_seqno = intel_ring_get_seqno(ring);
-			if (obj->pin_count) /* check for potential scanout */
+			if (obj->pin_count && !list_empty(&obj->fb_list))
 				intel_mark_fb_busy(obj, ring);
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 821a52d..be60f12 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6663,7 +6663,9 @@ intel_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	mutex_lock(&dev->struct_mutex);
 	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
+	mutex_unlock(&dev->struct_mutex);
 	if (ret) {
 		drm_gem_object_unreference_unlocked(&obj->base);
 		kfree(intel_fb);
@@ -7084,25 +7086,94 @@ void intel_mark_idle(struct drm_device *dev)
 	}
 }
 
-void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_ring_buffer *ring)
+static void intel_display_refresh(struct work_struct *work)
 {
-	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private, display_refresh_work.work);
+	struct drm_device *dev = dev_priv->dev;
 	struct drm_crtc *crtc;
+	u32 rings = 0;
+	u32 flush = 0;
 
-	if (!i915_powersave)
-		return;
+	mutex_lock(&dev->struct_mutex);
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct intel_framebuffer *fb = to_intel_framebuffer(crtc->fb);
+
 		if (!crtc->fb)
 			continue;
 
-		if (to_intel_framebuffer(crtc->fb)->obj != obj)
+		/* kick PSR/FBC/GFDT etc */
+
+		if (!fb->refresh_pending)
+			continue;
+
+		if (fb->refresh_mode == REFRESH_NONE)
+			i915_gem_release_mmap(fb->obj);
+		fb->obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+		fb->refresh_pending = false;
+	}
+
+	if (flush) {
+		struct intel_ring_buffer *ring;
+		int i;
+
+		if (rings == 0)
+			rings = 1 << BCS;
+
+		for_each_ring(ring, dev_priv, i)
+			if (rings & (1 << i))
+				(void)intel_ring_flush_internal(ring, flush);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void __intel_mark_fb_busy(struct intel_framebuffer *fb,
+				 struct intel_ring_buffer *ring)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct drm_crtc *crtc;
+	bool refresh = false;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &fb->base)
 			continue;
 
 		intel_increase_pllclock(crtc);
-		if (ring && intel_fbc_enabled(dev))
-			ring->fbc_dirty = true;
+		refresh = true;
+	}
+
+	if (!refresh || fb->refresh_pending)
+		return;
+
+	refresh = false;
+
+	if (ring && intel_fbc_enabled(dev)) {
+		ring->fbc_dirty = true;
+		refresh = true;
+	}
+
+	if (refresh) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+
+		fb->refresh_pending = true;
+		queue_delayed_work(dev_priv->wq,
+				   &dev_priv->display_refresh_work,
+				   msecs_to_jiffies(20));
+	}
+}
+
+void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
+			struct intel_ring_buffer *ring)
+{
+	struct intel_framebuffer *fb;
+
+	list_for_each_entry(fb, &obj->fb_list, obj_list) {
+		if (fb->refresh_mode == REFRESH_USER)
+			continue;
+
+		__intel_mark_fb_busy(fb, NULL);
 	}
 }
 
@@ -9071,12 +9142,31 @@ static void intel_setup_outputs(struct drm_device *dev)
 	drm_helper_move_panel_connectors_to_head(dev);
 }
 
+static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
+					struct drm_file *file,
+					unsigned flags, unsigned color,
+					struct drm_clip_rect *clips,
+					unsigned num_clips)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+
+	if (intel_fb->refresh_mode == REFRESH_USER)
+		__intel_mark_fb_busy(intel_fb, NULL);
+
+	return 0;
+}
+
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct drm_device *dev = fb->dev;
 
 	drm_framebuffer_cleanup(fb);
-	drm_gem_object_unreference_unlocked(&intel_fb->obj->base);
+
+	mutex_lock(&dev->struct_mutex);
+	list_del(&intel_fb->obj_list);
+	drm_gem_object_unreference(&intel_fb->obj->base);
+	mutex_unlock(&dev->struct_mutex);
 
 	kfree(intel_fb);
 }
@@ -9094,6 +9184,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 static const struct drm_framebuffer_funcs intel_fb_funcs = {
 	.destroy = intel_user_framebuffer_destroy,
 	.create_handle = intel_user_framebuffer_create_handle,
+	.dirty = intel_user_framebuffer_dirty,
 };
 
 int intel_framebuffer_init(struct drm_device *dev,
@@ -9174,6 +9265,8 @@ int intel_framebuffer_init(struct drm_device *dev,
 	drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
 	intel_fb->obj = obj;
 	intel_fb->powersave = true;
+	intel_fb->refresh_mode = REFRESH_NONE;
+	intel_fb->refresh_pending = false;
 
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
@@ -9181,6 +9274,8 @@ int intel_framebuffer_init(struct drm_device *dev,
 		return ret;
 	}
 
+	list_add(&intel_fb->obj_list, &obj->fb_list);
+
 	return 0;
 }
 
@@ -9207,6 +9302,9 @@ __intel_user_framebuffer_getparam(struct intel_framebuffer *fb,
 	case INTEL_FRAMEBUFFER_PARAM_POWERSAVE:
 		*value = fb->powersave;
 		return 0;
+	case INTEL_FRAMEBUFFER_PARAM_REFRESH:
+		*value = fb->refresh_mode;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -9243,6 +9341,9 @@ __intel_user_framebuffer_setparam(struct intel_framebuffer *fb,
 	case INTEL_FRAMEBUFFER_PARAM_POWERSAVE:
 		fb->powersave = value;
 		return 0;
+	case INTEL_FRAMEBUFFER_PARAM_REFRESH:
+		fb->refresh_mode = value;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -9616,6 +9717,9 @@ void intel_modeset_init(struct drm_device *dev)
 
 	/* Just in case the BIOS is doing something questionable. */
 	intel_disable_fbc(dev);
+
+	INIT_DELAYED_WORK(&dev_priv->display_refresh_work,
+			  intel_display_refresh);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4cddefb..9f32949 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -103,6 +103,11 @@ struct intel_framebuffer {
 	struct drm_framebuffer base;
 	struct drm_i915_gem_object *obj;
 	unsigned powersave:1;
+	unsigned refresh_mode:1;
+#define REFRESH_NONE 0
+#define REFRESH_USER 1
+	unsigned refresh_pending:1;
+	struct list_head obj_list;
 };
 
 struct intel_fbdev {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 74c34c2..609fc0c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1006,6 +1006,7 @@ struct intel_framebuffer_param {
 	__u32 fb_id;
 	__u32 param;
 #define INTEL_FRAMEBUFFER_PARAM_POWERSAVE 1
+#define INTEL_FRAMEBUFFER_PARAM_REFRESH 2
 	__u64 value;
 };
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/5] drm/i915: Defer the FBC w/a invalidation
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
                   ` (3 preceding siblings ...)
  2013-06-20 17:18 ` [PATCH 4/5] drm/i915: Track modifications to the frontbuffer Chris Wilson
@ 2013-06-20 17:18 ` Chris Wilson
  2013-07-18  7:40 ` RFC frontbuffer write tracking Daniel Vetter
  5 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2013-06-20 17:18 UTC (permalink / raw)
  To: intel-gfx

Hook into the frontbuffer write and delayed flush mechanism to avoid
having to send the FBC w/a on every batch, and instead just send the
w/a whenever we update the scanout.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_gem.c            |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c       | 30 ++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_drv.h           |  4 ++--
 drivers/gpu/drm/i915/intel_pm.c            |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  9 +++------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  2 +-
 8 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1257e0e..7dc76bf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1070,6 +1070,7 @@ typedef struct drm_i915_private {
 
 	unsigned long cfb_size;
 	unsigned int cfb_fb;
+	bool cfb_enabled;
 	enum plane cfb_plane;
 	int cfb_y;
 	struct intel_fbc_work *fbc_work;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6661adb..707a55e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3323,7 +3323,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->dirty = 1;
 
 		if (obj->pin_count && !list_empty(&obj->fb_list))
-			intel_mark_fb_busy(obj, NULL);
+			intel_mark_fb_busy(obj);
 	}
 
 	trace_i915_gem_object_change_domain(obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dabd9af..c6a02d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -786,7 +786,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 			obj->dirty = 1;
 			obj->last_write_seqno = intel_ring_get_seqno(ring);
 			if (obj->pin_count && !list_empty(&obj->fb_list))
-				intel_mark_fb_busy(obj, ring);
+				intel_mark_fb_busy(obj);
 		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index be60f12..9a90aa5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7108,6 +7108,14 @@ static void intel_display_refresh(struct work_struct *work)
 		if (!fb->refresh_pending)
 			continue;
 
+		if (fb->fbc_dirty) {
+			flush |= RING_FBC;
+			if (fb->obj->ring)
+				rings |= 1 << fb->obj->ring->id;
+			intel_update_fbc(dev);
+			fb->fbc_dirty = false;
+		}
+
 		if (fb->refresh_mode == REFRESH_NONE)
 			i915_gem_release_mmap(fb->obj);
 		fb->obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
@@ -7129,10 +7137,10 @@ static void intel_display_refresh(struct work_struct *work)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-static void __intel_mark_fb_busy(struct intel_framebuffer *fb,
-				 struct intel_ring_buffer *ring)
+static void __intel_mark_fb_busy(struct intel_framebuffer *fb)
 {
 	struct drm_device *dev = fb->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	bool refresh = false;
 
@@ -7149,14 +7157,13 @@ static void __intel_mark_fb_busy(struct intel_framebuffer *fb,
 
 	refresh = false;
 
-	if (ring && intel_fbc_enabled(dev)) {
-		ring->fbc_dirty = true;
+	if (dev_priv->cfb_enabled) {
+		intel_disable_fbc(dev);
+		fb->fbc_dirty = true;
 		refresh = true;
 	}
 
 	if (refresh) {
-		struct drm_i915_private *dev_priv = dev->dev_private;
-
 		fb->refresh_pending = true;
 		queue_delayed_work(dev_priv->wq,
 				   &dev_priv->display_refresh_work,
@@ -7164,8 +7171,7 @@ static void __intel_mark_fb_busy(struct intel_framebuffer *fb,
 	}
 }
 
-void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_ring_buffer *ring)
+void intel_mark_fb_busy(struct drm_i915_gem_object *obj)
 {
 	struct intel_framebuffer *fb;
 
@@ -7173,7 +7179,7 @@ void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
 		if (fb->refresh_mode == REFRESH_USER)
 			continue;
 
-		__intel_mark_fb_busy(fb, NULL);
+		__intel_mark_fb_busy(fb);
 	}
 }
 
@@ -7622,8 +7628,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup_pending;
 
-	intel_disable_fbc(dev);
-	intel_mark_fb_busy(obj, NULL);
+	intel_mark_fb_busy(obj);
 	mutex_unlock(&dev->struct_mutex);
 
 	trace_i915_flip_request(intel_crtc->plane, obj);
@@ -9151,7 +9156,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 
 	if (intel_fb->refresh_mode == REFRESH_USER)
-		__intel_mark_fb_busy(intel_fb, NULL);
+		__intel_mark_fb_busy(intel_fb);
 
 	return 0;
 }
@@ -9267,6 +9272,7 @@ int intel_framebuffer_init(struct drm_device *dev,
 	intel_fb->powersave = true;
 	intel_fb->refresh_mode = REFRESH_NONE;
 	intel_fb->refresh_pending = false;
+	intel_fb->fbc_dirty = false;
 
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 9f32949..01957bf 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -107,6 +107,7 @@ struct intel_framebuffer {
 #define REFRESH_NONE 0
 #define REFRESH_USER 1
 	unsigned refresh_pending:1;
+	unsigned fbc_dirty:1;
 	struct list_head obj_list;
 };
 
@@ -585,8 +586,7 @@ extern bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg,
 extern void intel_dvo_init(struct drm_device *dev);
 extern void intel_tv_init(struct drm_device *dev);
 extern void intel_mark_busy(struct drm_device *dev);
-extern void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			       struct intel_ring_buffer *ring);
+extern void intel_mark_fb_busy(struct drm_i915_gem_object *obj);
 extern void intel_mark_idle(struct drm_device *dev);
 extern void intel_lvds_init(struct drm_device *dev);
 extern bool intel_is_dual_link_lvds(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2f6150b..711cdd0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -347,6 +347,7 @@ static void intel_fbc_work_fn(struct work_struct *__work)
 
 static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
 {
+	dev_priv->cfb_enabled = false;
 	if (dev_priv->fbc_work == NULL)
 		return;
 
@@ -379,6 +380,7 @@ void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 
 	intel_cancel_fbc_work(dev_priv);
 
+	dev_priv->cfb_enabled = true;
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (work == NULL) {
 		DRM_ERROR("Failed to allocate FBC work structure\n");
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 601e1eb..1383267 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -278,12 +278,10 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
 {
 	int ret;
 
-	if (!ring->fbc_dirty)
-		return 0;
-
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
+
 	intel_ring_emit(ring, MI_NOOP);
 	/* WaFbcNukeOn3DBlt:ivb/hsw */
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -291,7 +289,6 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
 	intel_ring_emit(ring, value);
 	intel_ring_advance(ring);
 
-	ring->fbc_dirty = false;
 	return 0;
 }
 
@@ -353,7 +350,7 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 		intel_ring_advance(ring);
 	}
 
-	if (action & RING_FLUSH)
+	if (action & RING_FBC)
 		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
 
 	return 0;
@@ -1737,7 +1734,7 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, u32 action)
 		intel_ring_advance(ring);
 	}
 
-	if (IS_GEN7(dev) && action & RING_FLUSH)
+	if (IS_GEN7(dev) && action & RING_FBC)
 		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5066b3b..c91a4b1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -96,6 +96,7 @@ struct  intel_ring_buffer {
 				  u32 action);
 #define RING_FLUSH	0x1
 #define RING_INVALIDATE 0x2
+#define RING_FBC	0x4
 
 	int		(*add_request)(struct intel_ring_buffer *ring);
 	/* Some chipsets are not quite as coherent as advertised and need
@@ -146,7 +147,6 @@ struct  intel_ring_buffer {
 	 */
 	u32 outstanding_lazy_request;
 	bool gpu_caches_dirty;
-	bool fbc_dirty;
 
 	wait_queue_head_t irq_queue;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: RFC frontbuffer write tracking
  2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
                   ` (4 preceding siblings ...)
  2013-06-20 17:18 ` [PATCH 5/5] drm/i915: Defer the FBC w/a invalidation Chris Wilson
@ 2013-07-18  7:40 ` Daniel Vetter
  5 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2013-07-18  7:40 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Paulo Zanoni

On Thu, Jun 20, 2013 at 06:18:27PM +0100, Chris Wilson wrote:
> Having thrown around a few ideas for how to do PSR and FBC write
> tracking on the frontbuffer, including the creation of a new SCANOUT
> domain, the implementation that I've settled on is to detect writes to
> either the GTT domain (hmm, a CPU write flush should also set the fb as
> dirty) and then a deferred SCANOUT flush. The deferred flush then resets
> the write domain and kicks the GTT mapping so that any further dumb
> writes (i.e. dumb kms buffers or fbcon) cause a new frontbuffer
> invalidation. This patch series also introduces framebuffer parameters
> (which maybe should be properties) to allow userspace to opt out of the
> dumb mechanism and elect to call dirtyfb itself when it requires the
> scanout to be flushed.
>
> I've been playing around with GFDT on SNB and IVB as a means for
> prototyping the delayed updates and framebuffer parameters.

Sorry for the massive delay in writing down my review here, it's been
sitting on my table for way too long.

Mostly unsorted thoughts on the entire topic:

- dirtyfb locking is really heavyweight, it takes all modeset locks
  currently. But it's easy to just push that down into driver callbacks so
  that we can do something saner.

- It's not part of this patch series, but imo if we want to expose a hint
  to userspace that frontbuffer rendering might not be a good idea we
  should do that as a crtc or maybe plane property (Ville's primary plane
  conversion is missing for that to work out perfectly, but meh).

- If we really need properties on framebuffer we imo should add support
  for that in drm core. But I don't think that's required, since we can
  just mark framebuffers as non-legacy as soon as we've seen the first
  ->dirtyfb ioctl call. Doesn't even need any locking since we can only
  ever upgrade from false to true. New userspace which explicitly tells
  the kernel when to invalidate an fb can just do a dummy dirtyfb call at
  alloc time (or not bother if it never does frontbuffer rendering at
  all).

- I don't think we want to have a delayed reenable timer for old userspace
  which doesn't do ->dirtyfb calls. Afaics we've established that we need
  userspace's cooperation, and adding the complexity to reanable power
  saving features for legacy userspace is imo too much trouble. It's also
  something which we'll have a hard time to validate and regression test.

  And I really don't like new code which doesn't have good automated test
  coverage ;-)

- The issue with just killing psr/fbc (i.e. not just disabling it for a
  bit) on the first frontbuffer rendering is that we seem to have too many
  false positives with the current checks. I haven't traced stuff really
  but at least across a pageflip the ->pin_count check is too coarse and
  will fire for the logical new backbuffer if we're unlucky. So I think we
  need to add new, precise (from the userspaces pov) tracking of when a
  gem buffer object is used for psr or fbc:

  bool fbc_target;
  int psr_target;

  I think we need to add those to the gem object and not the framebuffer
  object since that's the place where we can check it in the
  busy/set_domain ioctl. To avoid locking madness this should be protected
  with a new frontbuffer tracking lock. Of course we'll still first check
  pin_count to optimize away the lock taking for most objects.

  Note that we only need a bool for fbc since only ever one framebuffer
  object can be used for fbc, so no refcounting is needed. But potentially
  for psr we can display more than one framebuffer object on a crtc with
  psr enabled, hence the refcount.

- Then we can fully disable fbc if fbc_target is set and fully disable psr
  if psr_target is set. And if we update them in setcrtc/setplane and
  for pageflips we'll always know which objects are logically the
  frontbuffer and won't have a mess with false-positive invalidation
  events due to us pipelining everthing.

  For legacy userspace we'd only ever reanable psr/fbc again at modeset
  and pageflip time.

  For non-legacy framebuffers (i.e. those where userspace called ->dirtyfb
  at least once) we'll just never set those tracking bits. There's a bit
  of ugliness around when userspace changes the fb from legacy to
  non-legacy mode while it's in use, but I guess we can fix that by simply
  clearing all the tracking bits when that happens (to avoid forgetting
  about an fbc/psr reference).

- To make this really useful (and also make sure it's properly tested) I
  think we should abolish all the hw frontbuffer rendering tracking in the
  gtt and system agent. We only need the hw to invalidate fbc/psr on an
  mmio write to the buffer address (or an MI_FLIP), which seems to be
  enabled unconditionally (excluding frobbing debug registers). For fbc (I
  haven't looked too closely at the psr patches) we certainly need to fix
  up our code to no longer disable fbc temporarily across a pageflip -
  that was only necessary with fbc1, not with fbc2 (which we have since
  about gm45).

  This way we should be able to work around the performance downsides and
  for psr also be able to enable psr when we have sprites displayed on the
  screen. And since modern compositors use sprites a lot this is imo
  important.

- Testing: I'm somewhat hopeful that we could check whether all this
  invalidation tracking works correctly with the pipe CRC computations.
  And if we disable all hw tracking failure to disable fbc/psr or flush
  stuff should _really_ show up badly even for manual testing.

Cheers, Daniel

> -Chris
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-07-18  7:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-20 17:18 RFC frontbuffer write tracking Chris Wilson
2013-06-20 17:18 ` [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush Chris Wilson
2013-06-20 17:18 ` [PATCH 2/5] drm/i915: Expose framebuffer parameters Chris Wilson
2013-06-20 17:18 ` [PATCH 3/5] drm/i915: Add a powersave framebuffer parameter Chris Wilson
2013-06-20 17:18 ` [PATCH 4/5] drm/i915: Track modifications to the frontbuffer Chris Wilson
2013-06-20 17:18 ` [PATCH 5/5] drm/i915: Defer the FBC w/a invalidation Chris Wilson
2013-07-18  7:40 ` RFC frontbuffer write tracking Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.