intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Set DERRMR around batches required vblank events
@ 2012-07-26 20:30 Chris Wilson
  2012-08-29 15:33 ` Chris Wilson
  0 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2012-07-26 20:30 UTC (permalink / raw)
  To: intel-gfx

In order for the Display Engine to send messages to the Render Engine,
that event must be unmasked in the Display Engine Render Response Mask
Register (DERRMR). By default all events are masked to prevent unwanted
messages to conserve power, and it is strongly recommended that only
single events be unmasked at any time. So in order to pipeline the
register writes around individual batchbuffers, userspace must notify
the kernel when it requires a vblank event, this patch implements an
interface to do so with an single (pipe, event) request through the
execbuffer flags.

Note that another workaround is required for IVB, in that we must also
prevent RC6 sleeps whilst waiting upon an event. To do that we also
pipeline a forcewake into the second MT slot.

Open issues:

* How to handle failure to pipeline the register changes? Abort? Sync &
undo?

* Serialising DERRMR across rings.

* Enabling vblank interrupts?

References: https://bugs.freedesktop.org/show_bug.cgi?id=50244
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   65 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            |    1 +
 include/drm/i915_drm.h                     |   17 ++++++++
 4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8921c4f..5debf31 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1009,6 +1009,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_WAIT_TIMEOUT:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_VSYNC:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d84ca98..90d8847 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1050,6 +1050,48 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto err;
 
+	if (args->flags & I915_EXEC_VSYNC_ENABLE &&
+	    INTEL_INFO(dev)->gen >= 6) {
+		int pipe, event;
+
+		printk(KERN_ERR "enabling sync\n");
+
+		/* XXX Inter-ring serialisation? */
+
+		event = I915_EXEC_VSYNC_GET_EVENT(args->flags);
+		if (event > I915_EXEC_VSYNC_VBLANK) {
+			ret = -EINVAL;
+			goto err;
+		}
+		pipe = I915_EXEC_VSYNC_GET_PIPE(args->flags);
+		if (pipe >= dev_priv->num_pipe) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		printk(KERN_ERR "event=%x, pipe=%d\n", event, pipe);
+		event = 1 << (event + 8 * pipe);
+
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			goto err;
+
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, ~event);
+		intel_ring_advance(ring);
+
+		if (INTEL_INFO(dev)->gen >= 7 &&
+		    intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, FORCEWAKE_MT);
+			intel_ring_emit(ring, _MASKED_BIT_ENABLE(2));
+			intel_ring_advance(ring);
+		}
+	}
+
 	trace_i915_gem_ring_dispatch(ring, seqno);
 
 	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
@@ -1072,6 +1114,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
+	if (args->flags & I915_EXEC_VSYNC_ENABLE &&
+	    INTEL_INFO(dev)->gen >= 6) {
+		bool was_interruptible = dev_priv->mm.interruptible;
+
+		dev_priv->mm.interruptible = false;
+		if (INTEL_INFO(dev)->gen >= 7 &&
+		    intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, FORCEWAKE_MT);
+			intel_ring_emit(ring, _MASKED_BIT_DISABLE(2));
+			intel_ring_advance(ring);
+		}
+		if (intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, DERRMR);
+			intel_ring_emit(ring, 0xffffffff);
+			intel_ring_advance(ring);
+		}
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
 	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
 	i915_gem_execbuffer_retire_commands(dev, file, ring);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1a22278..2f3a8f9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3369,6 +3369,7 @@
 #define DEIMR   0x44004
 #define DEIIR   0x44008
 #define DEIER   0x4400c
+#define DERRMR  0x44050
 
 /* GT interrupt.
  * Note that for gen6+ the ring-specific interrupt bits do alias with the
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8cafced4..24f5895 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -313,6 +313,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_LLC		 17
 #define I915_PARAM_HAS_ALIASING_PPGTT	 18
 #define I915_PARAM_HAS_WAIT_TIMEOUT	 19
+#define I915_PARAM_HAS_EXEC_VSYNC	 20
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -691,6 +692,22 @@ struct drm_i915_gem_execbuffer2 {
 /** Resets the SO write offset registers for transform feedback on gen7. */
 #define I915_EXEC_GEN7_SOL_RESET	(1<<8)
 
+#define I915_EXEC_VSYNC_ENABLE		(1<<9)
+#define I915_EXEC_VSYNC_EVENT_SHIFT	(10)
+#define I915_EXEC_VSYNC_EVENT_MASK	(7)
+#define I915_EXEC_VSYNC_SCANLINE	(0)
+#define I915_EXEC_VSYNC_PRIMARY_FLIP	(1)
+#define I915_EXEC_VSYNC_SPRITE_FLIP	(2)
+#define I915_EXEC_VSYNC_VBLANK		(3)
+#define I915_EXEC_VSYNC_PIPE_SHIFT	(13)
+#define I915_EXEC_VSYNC_PIPE_MASK	(7)
+
+#define I915_EXEC_VSYNC(pipe, event) \
+	(I915_EXEC_VSYNC_ENABLE | (pipe) << I915_EXEC_VSYNC_PIPE_SHIFT | (event) << I915_EXEC_VSYNC_EVENT_SHIFT)
+
+#define I915_EXEC_VSYNC_GET_PIPE(F) (((F) >> I915_EXEC_VSYNC_PIPE_SHIFT) & I915_EXEC_VSYNC_PIPE_MASK)
+#define I915_EXEC_VSYNC_GET_EVENT(F) (((F) >> I915_EXEC_VSYNC_EVENT_SHIFT) & I915_EXEC_VSYNC_EVENT_MASK)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread
* [PATCH] drm/i915: Set DERRMR around batches required vblank events
@ 2012-10-16  9:47 Chris Wilson
  2012-10-16 14:15 ` Daniel Vetter
  0 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2012-10-16  9:47 UTC (permalink / raw)
  To: intel-gfx

In order for the Display Engine to send messages to the Render Engine,
that event must be unmasked in the Display Engine Render Response Mask
Register (DERRMR). By default all events are masked to prevent unwanted
messages to conserve power, and it is strongly recommended that only
single events be unmasked at any time. So in order to pipeline the
register writes around individual batchbuffers, userspace must notify
the kernel when it requires a vblank event, this patch implements an
interface to do so with an single (pipe, event) request through the
execbuffer flags.

Note that another workaround is required for IVB, in that we must also
prevent RC6 sleeps whilst waiting upon an event. To do that we also
pipeline a forcewake into the second MT slot.

There are a few unpleasant issues addressed by the patch. The first is
to only allow a single ring to process vsync requests at any time. This
is really a non-issue as the current hardware can only process those
requests on a single ring, but by serialising between rings we should
prevent our future selves from shooting their feet. The second ugly
issue is to handle unwind failures by disabling signals whilst queueing
a vsync request, perhaps a pleasant side-effect. The last issue is the
trick to enable vblank irqs which relies on the deferred vblank put.

References: https://bugs.freedesktop.org/show_bug.cgi?id=50244
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---

I am still not convinced this fixes anything as at the moment I am
simply unable to detect any tearing on my ivb setup if I apply any form of
vblank throttling.

The other issue is that I think if I had a SECURE
(DRM_MASTER|DRM_ROOT_ONLY) batch buffer I could probably do all of this
in userspace.
-Chris

---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_drv.h            |    3 ++
 drivers/gpu/drm/i915/i915_gem.c            |    2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   76 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            |    1 +
 include/drm/i915_drm.h                     |   17 +++++++
 6 files changed, 102 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5779e8f..759a5e6 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1015,6 +1015,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_VSYNC:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dda10c1..db2ca9f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -434,6 +434,9 @@ typedef struct drm_i915_private {
 	struct intel_ring_buffer ring[I915_NUM_RINGS];
 	uint32_t next_seqno;
 
+	struct intel_ring_buffer *vsync_ring;
+	uint32_t vsync_seqno;
+
 	drm_dma_handle_t *status_page_dmah;
 	uint32_t counter;
 	struct drm_i915_gem_object *pwrctx;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f740535..7a5ab2c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2643,6 +2643,8 @@ int i915_gpu_idle(struct drm_device *dev)
 			return ret;
 	}
 
+	dev_priv->vsync_ring = NULL;
+	dev_priv->vsync_seqno = 0;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bcd5aa2..021b967 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -807,6 +807,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_i915_gem_exec_object2 *exec)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	bool was_interruptible = dev_priv->mm.interruptible;
 	struct list_head objects;
 	struct eb_objects *eb;
 	struct drm_i915_gem_object *batch_obj;
@@ -1044,6 +1045,62 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
+	if (args->flags & I915_EXEC_VSYNC_ENABLE && INTEL_INFO(dev)->gen >= 6) {
+		u32 pipe, event;
+
+		if (dev_priv->vsync_seqno && dev_priv->vsync_ring != ring) {
+			ret = i915_wait_seqno(dev_priv->vsync_ring,
+					      dev_priv->vsync_seqno);
+			if (ret)
+				goto err;
+
+			dev_priv->vsync_ring = NULL;
+			dev_priv->vsync_seqno = 0;
+		}
+
+		pipe = I915_EXEC_VSYNC_GET_PIPE(args->flags);
+		event = I915_EXEC_VSYNC_GET_EVENT(args->flags);
+		if (pipe >= dev_priv->num_pipe ||
+		    event > I915_EXEC_VSYNC_VBLANK) {
+			ret = -EINVAL;
+			goto err;
+		}
+		if (event == I915_EXEC_VSYNC_VBLANK) {
+			ret = drm_vblank_get(dev, pipe);
+			if (ret)
+				goto err;
+
+			/* As the release is deferred by a few frames
+			 * we can safely do the release before the queueing.
+			 */
+			drm_vblank_put(dev, pipe);
+		}
+		event = ~(1 << (event + 8 * pipe));
+
+		dev_priv->mm.interruptible = false;
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			goto err;
+
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, event);
+		intel_ring_advance(ring);
+
+		if (INTEL_INFO(dev)->gen >= 7 &&
+		    intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, FORCEWAKE_MT);
+			intel_ring_emit(ring, _MASKED_BIT_ENABLE(2));
+			intel_ring_advance(ring);
+		}
+
+		dev_priv->vsync_ring = ring;
+		dev_priv->vsync_seqno = seqno;
+	}
+
 	trace_i915_gem_ring_dispatch(ring, seqno);
 
 	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
@@ -1066,10 +1123,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
+	if (args->flags & I915_EXEC_VSYNC_ENABLE && INTEL_INFO(dev)->gen >= 6) {
+		if (INTEL_INFO(dev)->gen >= 7 &&
+		    intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, FORCEWAKE_MT);
+			intel_ring_emit(ring, _MASKED_BIT_DISABLE(2));
+			intel_ring_advance(ring);
+		}
+		if (intel_ring_begin(ring, 4) == 0) {
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, DERRMR);
+			intel_ring_emit(ring, 0xffffffff);
+			intel_ring_advance(ring);
+		}
+	}
+
 	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
 	i915_gem_execbuffer_retire_commands(dev, file, ring);
 
 err:
+	dev_priv->mm.interruptible = was_interruptible;
 	eb_destroy(eb);
 	while (!list_empty(&objects)) {
 		struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8120bf2..9897b09 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3391,6 +3391,7 @@
 #define DEIMR   0x44004
 #define DEIIR   0x44008
 #define DEIER   0x4400c
+#define DERRMR  0x44050
 
 /* GT interrupt.
  * Note that for gen6+ the ring-specific interrupt bits do alias with the
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index acfb377..f6e39bb 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -316,6 +316,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_SEMAPHORES	 20
 #define I915_PARAM_HAS_PRIME_VMAP_FLUSH	 21
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
+#define I915_PARAM_HAS_EXEC_VSYNC	 23
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -694,6 +695,22 @@ struct drm_i915_gem_execbuffer2 {
 /** Resets the SO write offset registers for transform feedback on gen7. */
 #define I915_EXEC_GEN7_SOL_RESET	(1<<8)
 
+#define I915_EXEC_VSYNC_ENABLE		(1<<9)
+#define I915_EXEC_VSYNC_EVENT_SHIFT	(10)
+#define I915_EXEC_VSYNC_EVENT_MASK	(7)
+#define I915_EXEC_VSYNC_SCANLINE	(0)
+#define I915_EXEC_VSYNC_PRIMARY_FLIP	(1)
+#define I915_EXEC_VSYNC_SPRITE_FLIP	(2)
+#define I915_EXEC_VSYNC_VBLANK		(3)
+#define I915_EXEC_VSYNC_PIPE_SHIFT	(13)
+#define I915_EXEC_VSYNC_PIPE_MASK	(7)
+
+#define I915_EXEC_VSYNC(pipe, event) \
+	(I915_EXEC_VSYNC_ENABLE | (pipe) << I915_EXEC_VSYNC_PIPE_SHIFT | (event) << I915_EXEC_VSYNC_EVENT_SHIFT)
+
+#define I915_EXEC_VSYNC_GET_PIPE(F) (((F) >> I915_EXEC_VSYNC_PIPE_SHIFT) & I915_EXEC_VSYNC_PIPE_MASK)
+#define I915_EXEC_VSYNC_GET_EVENT(F) (((F) >> I915_EXEC_VSYNC_EVENT_SHIFT) & I915_EXEC_VSYNC_EVENT_MASK)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-10-17 13:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-26 20:30 [PATCH] drm/i915: Set DERRMR around batches required vblank events Chris Wilson
2012-08-29 15:33 ` Chris Wilson
2012-10-16  9:47 Chris Wilson
2012-10-16 14:15 ` Daniel Vetter
2012-10-17 13:58   ` Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).