intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* force wake reference counting (another try)
@ 2011-04-12  1:01 Ben Widawsky
  2011-04-12  1:01 ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12  1:01 UTC (permalink / raw)
  To: intel-gfx


Contrary to the discussion on IRC, I have decided not to add
config.mutex as part of the warning criteria. Upon further thought, it
just seemed incorrect to me. This patch series implies that all register
reads which may require a forcewake must now hold struct_mutex, ie. make
struct_mutex more global. The alternative is to invent a new lock for
forcewake which must be acquired prior to said register reads.

I know, that sucks. It has implications on non-SNB platforms, and that
is exactly what I was hoping to avoid. The current code is dangerous. If
two threads need register reads requiring forcewake but aren't
protected by the same mutex, it will lead to unpredictable register
reads (I can't think of a case where it will actually work correctly).

So once again, I expect this patch to potentially generate a lot of
warnings, but I consider all of those warnings to be serious bugs for
SNB.

If anyone has clever ideas on how to handle this outside of what I've
already mentioned, please let me know.

I expect ongoing patches to fix these issues as they come up.

Ben

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/4] drm/i915: proper use of forcewake
  2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
@ 2011-04-12  1:01 ` Ben Widawsky
  2011-04-12  1:01 ` [PATCH 2/4] drm/i915: refcounts for forcewake Ben Widawsky
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12  1:01 UTC (permalink / raw)
  To: intel-gfx

Moved the macros around to properly do reads and writes for the given
gpu. This is to address special requirements for gen6 (SNB) reads and
writes.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   56 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    4 +-
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..8981173 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1313,9 +1313,31 @@ extern void intel_display_print_error_state(struct seq_file *m,
 } while (0)
 
 
+
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE) )
+
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		__gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1328,6 +1350,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1381,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..8e8821f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,8 +14,8 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_RING_READ(reg) I915_READ(reg)
+#define I915_RING_WRITE(reg, val) I915_WRITE(reg, val)
 
 #define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
 #define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/4] drm/i915: refcounts for forcewake
  2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
  2011-04-12  1:01 ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
@ 2011-04-12  1:01 ` Ben Widawsky
  2011-04-12  1:01 ` [PATCH 3/4] drm/i915: userspace interface to the forcewake refcount Ben Widawsky
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12  1:01 UTC (permalink / raw)
  To: intel-gfx

The reference count provides a clean way for userspace to gain access to
correct register reads and writes by initiating forcewake.

The mechanism also makes it possible to limit the time struct_mutex is
held on register reads and writes to the time it takes to obtain a
reference. However there is no safe use-case for this yet as register
reads and writes are currently expected to be protected by a mutex.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |   10 ++++++++--
 drivers/gpu/drm/i915/i915_drv.c      |   20 ++++++++++++++++++--
 drivers/gpu/drm/i915/i915_drv.h      |   10 ++++++----
 drivers/gpu/drm/i915/intel_display.c |    8 ++++----
 4 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29..2edb8b2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -852,6 +852,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
 
 	if (IS_GEN5(dev)) {
 		u16 rgvswctl = I915_READ16(MEMSWCTL);
@@ -873,7 +874,11 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		int max_freq;
 
 		/* RPSTAT1 is in the GT power well */
-		__gen6_gt_force_wake_get(dev_priv);
+		ret = mutex_lock_interruptible(&dev->struct_mutex);
+		if (ret)
+			return ret;
+
+		gen6_gt_force_wake_get(dev_priv);
 
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
@@ -918,7 +923,8 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * 50);
 
-		__gen6_gt_force_wake_put(dev_priv);
+		gen6_gt_force_wake_put(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd..ce59cd4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -263,7 +263,7 @@ void intel_detect_pch (struct drm_device *dev)
 	}
 }
 
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	int count;
 
@@ -279,12 +279,28 @@ void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 		udelay(10);
 }
 
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+
+	if (dev_priv->forcewake_count++ == 0)
+		__gen6_gt_force_wake_get(dev_priv);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
 	POSTING_READ(FORCEWAKE);
 }
 
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+
+	if (--dev_priv->forcewake_count == 0)
+		__gen6_gt_force_wake_put(dev_priv);
+}
+
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
 	int loop = 500;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8981173..f024927 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -703,6 +703,8 @@ typedef struct drm_i915_private {
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+
+	int forcewake_count;
 } drm_i915_private_t;
 
 struct drm_i915_gem_object {
@@ -1318,8 +1320,8 @@ extern void intel_display_print_error_state(struct seq_file *m,
  * must be set to prevent GT core from power down and stale values being
  * returned.
  */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 /* We give fast paths for the really cool registers */
@@ -1332,9 +1334,9 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
 	u##x val = 0; \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_force_wake_get(dev_priv); \
+		gen6_gt_force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
-		__gen6_gt_force_wake_put(dev_priv); \
+		gen6_gt_force_wake_put(dev_priv); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04..4585005 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1828,7 +1828,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	__gen6_gt_force_wake_get(dev_priv);
+	gen6_gt_force_wake_get(dev_priv);
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
 		GEN6_BLITTER_LOCK_SHIFT;
@@ -1839,7 +1839,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 			 GEN6_BLITTER_LOCK_SHIFT);
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-	__gen6_gt_force_wake_put(dev_priv);
+	gen6_gt_force_wake_put(dev_priv);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -6853,7 +6853,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	 * userspace...
 	 */
 	I915_WRITE(GEN6_RC_STATE, 0);
-	__gen6_gt_force_wake_get(dev_priv);
+	gen6_gt_force_wake_get(dev_priv);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6951,7 +6951,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
 
-	__gen6_gt_force_wake_put(dev_priv);
+	gen6_gt_force_wake_put(dev_priv);
 }
 
 void intel_enable_clock_gating(struct drm_device *dev)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/4] drm/i915: userspace interface to the forcewake refcount
  2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
  2011-04-12  1:01 ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
  2011-04-12  1:01 ` [PATCH 2/4] drm/i915: refcounts for forcewake Ben Widawsky
@ 2011-04-12  1:01 ` Ben Widawsky
  2011-04-12  1:01 ` [PATCH 4/4] drm/i915: fewer warning patch (temporary) Ben Widawsky
  2011-04-12  8:02 ` force wake reference counting (another try) Chris Wilson
  4 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12  1:01 UTC (permalink / raw)
  To: intel-gfx

Provide debugfs access to the refcounted forcewake. This allows a
userspace utility to access some of those hard to reach registers on
newer GPUs. The interface is root-only.

The interface is referred to as a forcewake lock. The term lock refers
to the synchronization between software and hardware (powering down the
GPU).

A note for those not reading the comments in the patches:
* "acquiring" the forcewake lock from userspace can fail. The utilities
  in userspace to read and write registers are inferior to the utilities
  in the kernel, and so failure (and inability to force acquire the
  lock) is acceptable.
* "releasing" the forcewake lock can hang. The conditions under which
  this can happen are so severe it would almost certainly require a
  reboot anyway (i915 unload would also hang).

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   84 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.c     |   12 +++++
 2 files changed, 96 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2edb8b2..dda687a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1192,6 +1192,17 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	seq_printf(m, "forcewake count = %d\n", dev_priv->forcewake_count);
+
+	return 0;
+}
+
 static int
 i915_wedged_open(struct inode *inode,
 		 struct file *filp)
@@ -1294,6 +1305,72 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
 	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
 }
 
+static int i915_forcewake_open(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/*
+	 * If the driver hangs while holding the lock, the user should turn on
+	 * register read/write tracing, or use the other available debug options
+	 * to to find the cause.
+	 */
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	gen6_gt_force_wake_get(dev_priv);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+int i915_forcewake_release(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/*
+	 * If we hang, it implies that we have a locking issue, or some other
+	 * oops in the driver. If we lock up userspace, that is an unfortunate
+	 * but acceptable consequence.
+	 */
+	mutex_lock(&dev->struct_mutex);
+	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static const struct file_operations i915_forcewake_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_forcewake_open,
+	.release = i915_forcewake_release,
+};
+
+static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct dentry *ent;
+
+	ent = debugfs_create_file("i915_forcewake_lock",
+				  S_IRWXU,
+				  root, dev,
+				  &i915_forcewake_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	return 0;
+}
+
 static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -1330,6 +1407,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sr_status", i915_sr_status, 0},
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -1341,6 +1419,10 @@ int i915_debugfs_init(struct drm_minor *minor)
 	if (ret)
 		return ret;
 
+	ret = i915_forcewake_create(minor->debugfs_root, minor);
+	if (ret)
+		return ret;
+
 	return drm_debugfs_create_files(i915_debugfs_list,
 					I915_DEBUGFS_ENTRIES,
 					minor->debugfs_root, minor);
@@ -1350,6 +1432,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
 {
 	drm_debugfs_remove_files(i915_debugfs_list,
 				 I915_DEBUGFS_ENTRIES, minor);
+	drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
+				 1, minor);
 	drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
 				 1, minor);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ce59cd4..0894e9f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -281,6 +281,15 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 
 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
+	/*
+	 * struct_mutex protects the reference count. It must be held here. This
+	 * means that any register read which requires forcewake must hold
+	 * struct_mutex. Register reads which were previously protected by
+	 * config.mutex that require forcewake must now also hold struct_mutex.
+	 *
+	 * The other option is to introduce a new forcewake lock which must be
+	 * acquired prior to any register read.
+	 */
 	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
 
 	if (dev_priv->forcewake_count++ == 0)
@@ -295,6 +304,9 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
+	/*
+	 * See gen6_gt_force_wake_get()
+	 */
 	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
 
 	if (--dev_priv->forcewake_count == 0)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/4] drm/i915: fewer warning patch (temporary)
  2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
                   ` (2 preceding siblings ...)
  2011-04-12  1:01 ` [PATCH 3/4] drm/i915: userspace interface to the forcewake refcount Ben Widawsky
@ 2011-04-12  1:01 ` Ben Widawsky
  2011-04-12  8:02 ` force wake reference counting (another try) Chris Wilson
  4 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12  1:01 UTC (permalink / raw)
  To: intel-gfx

This patch may help mask some of the warnings. Because it's only a
requirement to hold struct_mutex to obtain a reference, it is acceptable
from a hardware perspective to not hold struct_mutex if there is already
a reference. However, it's still extremely dangerous since there is no
real synchronization with the reference count (it could change right
after our check). This should only be used by developers who want to
debug some of the warnings, but want to deal with a possibly smaller set
of warnings to start with. It should not go upstream.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0894e9f..d5748ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -290,7 +290,8 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 	 * The other option is to introduce a new forcewake lock which must be
 	 * acquired prior to any register read.
 	 */
-	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex) &&
+		!dev_priv->forcewake_count);
 
 	if (dev_priv->forcewake_count++ == 0)
 		__gen6_gt_force_wake_get(dev_priv);
@@ -307,7 +308,8 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 	/*
 	 * See gen6_gt_force_wake_get()
 	 */
-	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex) &&
+		!dev_priv->forcewake_count);
 
 	if (--dev_priv->forcewake_count == 0)
 		__gen6_gt_force_wake_put(dev_priv);
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
                   ` (3 preceding siblings ...)
  2011-04-12  1:01 ` [PATCH 4/4] drm/i915: fewer warning patch (temporary) Ben Widawsky
@ 2011-04-12  8:02 ` Chris Wilson
  2011-04-12 16:30   ` Ben Widawsky
  4 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2011-04-12  8:02 UTC (permalink / raw)
  To: Ben Widawsky, intel-gfx

On Mon, 11 Apr 2011 18:01:15 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> So once again, I expect this patch to potentially generate a lot of
> warnings, but I consider all of those warnings to be serious bugs for
> SNB.
> 
> If anyone has clever ideas on how to handle this outside of what I've
> already mentioned, please let me know.
> 
> I expect ongoing patches to fix these issues as they come up.

Continuing the general discussion from IRC, we really do need to clarify
which locks we expect to be holding when reading different sets of
registers.  Along with similar documentation over which parts of our
structures are covered by struct_mutex, mode_config.lock and the ensemble
of spinlocks.  This task is not limited to just our driver, but we need to
review the core as well as looking at how to improve the locking overall.
(The clear example that something sucks is that probing outputs causes a
rendering stall (fortunately less noticeable on recent hardware where
hotplug is prevalent and the polling itself is much quicker, but still
present.)

The only concern I have with the extra warnings are if we leave false
positives in the code we submit upstream. Those warnings will quickly
become regression reports and angry users. Alas, no clever ideas.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12  8:02 ` force wake reference counting (another try) Chris Wilson
@ 2011-04-12 16:30   ` Ben Widawsky
  2011-04-12 16:56     ` Keith Packard
  0 siblings, 1 reply; 17+ messages in thread
From: Ben Widawsky @ 2011-04-12 16:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:02:21AM +0100, Chris Wilson wrote:
> On Mon, 11 Apr 2011 18:01:15 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > So once again, I expect this patch to potentially generate a lot of
> > warnings, but I consider all of those warnings to be serious bugs for
> > SNB.
> > 
> > If anyone has clever ideas on how to handle this outside of what I've
> > already mentioned, please let me know.
> > 
> > I expect ongoing patches to fix these issues as they come up.
> 
> Continuing the general discussion from IRC, we really do need to clarify
> which locks we expect to be holding when reading different sets of
> registers.  Along with similar documentation over which parts of our
> structures are covered by struct_mutex, mode_config.lock and the ensemble
> of spinlocks.  This task is not limited to just our driver, but we need to
> review the core as well as looking at how to improve the locking overall.
> (The clear example that something sucks is that probing outputs causes a
> rendering stall (fortunately less noticeable on recent hardware where
> hotplug is prevalent and the polling itself is much quicker, but still
> present.)

No doubt about it. It seems quite difficult to do with the amount of
change from generation to generation. It looks like we even have
register changes between steppings.

> 
> The only concern I have with the extra warnings are if we leave false
> positives in the code we submit upstream. Those warnings will quickly
> become regression reports and angry users. Alas, no clever ideas.
> -Chris

I am going to spend at least a day tracking down, and hopefully fixing
warnings if you agree with my next statement that it is in fact a
problem. My hope is there aren't too many cases.

The assertion I'm making is these are not false positives. I don't have
data yet to suggest this is happening, and I don't know the code well
enough to have a hunch one way or the other. However, here are two
possible examples of why it's broken with both the current, and
refcounted code.

Current:
Thread A acquires struct_mutex
Thread A goes to wake up the GT (takes a while)
Thread B acquires congif.mutex
Thread A finishes wake up, does the read
Thread B goes to wake up the GT (gets out fast because it's awake)
Thread A finishes, puts the GT to sleep
Thread B reads are potentially corrupted from here on

Refcounted:
Thread A acquires struct_mutex
Thread A increments refcounts, starts waking GT
Thread B acquires config.mutex
Thread B goes to wake GT, refcount is 1, does the reads before GT is
	actually awake

Ben

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12 16:30   ` Ben Widawsky
@ 2011-04-12 16:56     ` Keith Packard
  2011-04-12 17:21       ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Keith Packard @ 2011-04-12 16:56 UTC (permalink / raw)
  To: Ben Widawsky, Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 513 bytes --]

On Tue, 12 Apr 2011 09:30:22 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:

> I am going to spend at least a day tracking down, and hopefully fixing
> warnings if you agree with my next statement that it is in fact a
> problem. My hope is there aren't too many cases.

I can't see how we can survive without using exactly one lock covering
the wake locks on SNB. Has someone tried sticking a simple spinlock
around the whole sequence (force_wake_get, op, force_wake_put)?

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12 16:56     ` Keith Packard
@ 2011-04-12 17:21       ` Chris Wilson
  2011-04-12 17:41         ` Keith Packard
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2011-04-12 17:21 UTC (permalink / raw)
  To: Keith Packard, Ben Widawsky; +Cc: intel-gfx

On Tue, 12 Apr 2011 09:56:03 -0700, Keith Packard <keithp@keithp.com> wrote:
> On Tue, 12 Apr 2011 09:30:22 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> 
> > I am going to spend at least a day tracking down, and hopefully fixing
> > warnings if you agree with my next statement that it is in fact a
> > problem. My hope is there aren't too many cases.
> 
> I can't see how we can survive without using exactly one lock covering
> the wake locks on SNB. Has someone tried sticking a simple spinlock
> around the whole sequence (force_wake_get, op, force_wake_put)?

Agreed. I had been working under the assumption that dev->struct_mutex was
the sufficient lock. This may be entirely due to the false premise that we
only needed i915_gt_read() for the ring registers. I still haven't looked
through just what registers are impacted.

Ben, as you work through this can you amend the register names to
include whether it is inside the GT power well? And a precise next to
gen6_gt_forcewake_get() on which groups of registers are affected.

GEN6_GT_x vs GEN6_y?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12 17:21       ` Chris Wilson
@ 2011-04-12 17:41         ` Keith Packard
  2011-04-13  1:31           ` Ben Widawsky
  0 siblings, 1 reply; 17+ messages in thread
From: Keith Packard @ 2011-04-12 17:41 UTC (permalink / raw)
  To: Chris Wilson, Ben Widawsky; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 807 bytes --]

On Tue, 12 Apr 2011 18:21:23 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Agreed. I had been working under the assumption that dev->struct_mutex was
> the sufficient lock. This may be entirely due to the false premise that we
> only needed i915_gt_read() for the ring registers. I still haven't looked
> through just what registers are impacted.

Seems like we should start using a spinlock and wake lock around all
register accesses, then figure out which registers are not within the GT
power well and split those off to a separate macro which avoids both. If
we finally discover that all wake-lock requiring registers are now
obviously covered by the struct mutex, we could then consider removing
the spinlock.

Make it work, then make it fast.

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-12 17:41         ` Keith Packard
@ 2011-04-13  1:31           ` Ben Widawsky
  2011-04-13  5:31             ` Keith Packard
  2011-04-13  5:52             ` Chris Wilson
  0 siblings, 2 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-13  1:31 UTC (permalink / raw)
  To: Keith Packard; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1898 bytes --]

On Tue, Apr 12, 2011 at 10:41:47AM -0700, Keith Packard wrote:
> On Tue, 12 Apr 2011 18:21:23 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Agreed. I had been working under the assumption that dev->struct_mutex was
> > the sufficient lock. This may be entirely due to the false premise that we
> > only needed i915_gt_read() for the ring registers. I still haven't looked
> > through just what registers are impacted.
> 
> Seems like we should start using a spinlock and wake lock around all
> register accesses, then figure out which registers are not within the GT
> power well and split those off to a separate macro which avoids both. If
> we finally discover that all wake-lock requiring registers are now
> obviously covered by the struct mutex, we could then consider removing
> the spinlock.
> 
> Make it work, then make it fast.
> 
> -- 
> keith.packard@intel.com

I think we have no other option since the first thing that
i915_driver_irq_handler() does is read IIR, which according to the limited
knowledge I have requires forcewake.

I was hoping if I could just fix the current issues, which is mostly
focused around fbc, we'd be fine, but then I saw this backtrace:

 <IRQ>  [<ffffffff8105659a>] warn_slowpath_common+0x7a/0xb0
 [<ffffffff810565e5>] warn_slowpath_null+0x15/0x20
 [<ffffffffa05185e3>] gen6_gt_force_wake_get+0xf3/0x110 [i915]
 [<ffffffffa0524875>] i915_driver_irq_handler+0x2175/0x2190 [i915]
 [<ffffffff8139320d>] ? _raw_spin_unlock_irq+0x3d/0x60
 [<ffffffff810c1cb4>] handle_irq_event_percpu+0x74/0x270
 [<ffffffff810c1ef3>] handle_irq_event+0x43/0x70
 [<ffffffff810c443f>] handle_edge_irq+0x6f/0x120
 [<ffffffff8100db5d>] handle_irq+0x1d/0x30
 [<ffffffff8100d7d8>] do_IRQ+0x58/0xe0
 [<ffffffff81393613>] common_interrupt+0x13/0x13

and all hope was lost.

So next up is exactly what Keith recommended.

Ben

[-- Attachment #1.2: Type: application/pgp-signature, Size: 490 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-13  1:31           ` Ben Widawsky
@ 2011-04-13  5:31             ` Keith Packard
  2011-04-13  5:52             ` Chris Wilson
  1 sibling, 0 replies; 17+ messages in thread
From: Keith Packard @ 2011-04-13  5:31 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 418 bytes --]

On Tue, 12 Apr 2011 18:31:51 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:

> I think we have no other option since the first thing that
> i915_driver_irq_handler() does is read IIR, which according to the limited
> knowledge I have requires forcewake.

I think the eventual plan will be to figure out if there are any
frequently used registers which *don't* require forcewake.

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-13  1:31           ` Ben Widawsky
  2011-04-13  5:31             ` Keith Packard
@ 2011-04-13  5:52             ` Chris Wilson
  2011-04-13  6:35               ` Ben Widawsky
  1 sibling, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2011-04-13  5:52 UTC (permalink / raw)
  To: Ben Widawsky, Keith Packard; +Cc: intel-gfx

On Tue, 12 Apr 2011 18:31:51 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> I think we have no other option since the first thing that
> i915_driver_irq_handler() does is read IIR, which according to the limited
> knowledge I have requires forcewake.

That makes no sense at all. :(

But then I'm only a lowly sw engineer,
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: force wake reference counting (another try)
  2011-04-13  5:52             ` Chris Wilson
@ 2011-04-13  6:35               ` Ben Widawsky
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-13  6:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 06:52:15AM +0100, Chris Wilson wrote:
> On Tue, 12 Apr 2011 18:31:51 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > I think we have no other option since the first thing that
> > i915_driver_irq_handler() does is read IIR, which according to the limited
> > knowledge I have requires forcewake.
> 
> That makes no sense at all. :(
>
> But then I'm only a lowly sw engineer,
> -Chris

This was my initial thought as well, but as I was saying on IRC... my
only guess is they assumed that the time to service the interrupt is
less than the time it takes to powerdown, and that the GT must be awake
to send an interrupt. My other guess is I've got this all wrong.

However if we go with the former:
x = interrupt latency
y = interrupt servicing
z = time before the GT powers down
they assume (x + y) < z

And we need to remember that y is only enough time to get in and set the
forceawake bit before the GT has powered down. So this actually seems
like a reasonable thing to assume since I think powerdown time is in
microsecond granularity.

To what Keith said just above this, it does seem there are some
registers in the <0x40000 range which are special, but I don't see IIR
there. I am surprised by this as well, and I'd suggest we try to find
people to verify it.

Ben

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/4] drm/i915: proper use of forcewake
  2011-04-09 20:31 ` forcewake patches (ignore previous, please) Ben Widawsky
@ 2011-04-09 20:31   ` Ben Widawsky
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-09 20:31 UTC (permalink / raw)
  To: intel-gfx

Moved the macros around to properly do reads and writes for the given
gpu. This is to address special requirements for gen6 (SNB) reads and
writes.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   56 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    4 +-
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..8981173 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1313,9 +1313,31 @@ extern void intel_display_print_error_state(struct seq_file *m,
 } while (0)
 
 
+
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE) )
+
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		__gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1328,6 +1350,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1381,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..8e8821f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,8 +14,8 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_RING_READ(reg) I915_READ(reg)
+#define I915_RING_WRITE(reg, val) I915_WRITE(reg, val)
 
 #define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
 #define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 1/4] drm/i915: proper use of forcewake
       [not found]     ` <1302380787-2957-3-git-send-email-ben@bwidawsk.net>
@ 2011-04-09 20:26       ` Ben Widawsky
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-09 20:26 UTC (permalink / raw)
  To: intel-gfx

Moved the macros around to properly do reads and writes for the given
gpu. This is to address special requirements for gen6 (SNB) reads and
writes.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   56 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    4 +-
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..8981173 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1313,9 +1313,31 @@ extern void intel_display_print_error_state(struct seq_file *m,
 } while (0)
 
 
+
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE) )
+
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		__gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1328,6 +1350,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1381,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..8e8821f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,8 +14,8 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_RING_READ(reg) I915_READ(reg)
+#define I915_RING_WRITE(reg, val) I915_WRITE(reg, val)
 
 #define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
 #define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 1/4] drm/i915: proper use of forcewake
  2011-04-08 17:47 forcewake junk, RFC, RFT(test) Ben Widawsky
@ 2011-04-08 17:47 ` Ben Widawsky
  2011-04-09 20:26 ` forcewake junk, part2 Ben Widawsky
  2011-04-09 20:31 ` forcewake patches (ignore previous, please) Ben Widawsky
  2 siblings, 0 replies; 17+ messages in thread
From: Ben Widawsky @ 2011-04-08 17:47 UTC (permalink / raw)
  To: intel-gfx

Moved the macros around to properly do reads and writes for the given
gpu. This is to address special requirements for gen6 (SNB) reads and
writes.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   56 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    4 +-
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..8981173 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1313,9 +1313,31 @@ extern void intel_display_print_error_state(struct seq_file *m,
 } while (0)
 
 
+
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE) )
+
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		__gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1328,6 +1350,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1381,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..8e8821f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,8 +14,8 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_RING_READ(reg) I915_READ(reg)
+#define I915_RING_WRITE(reg, val) I915_WRITE(reg, val)
 
 #define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
 #define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2011-04-13  6:35 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-12  1:01 force wake reference counting (another try) Ben Widawsky
2011-04-12  1:01 ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
2011-04-12  1:01 ` [PATCH 2/4] drm/i915: refcounts for forcewake Ben Widawsky
2011-04-12  1:01 ` [PATCH 3/4] drm/i915: userspace interface to the forcewake refcount Ben Widawsky
2011-04-12  1:01 ` [PATCH 4/4] drm/i915: fewer warning patch (temporary) Ben Widawsky
2011-04-12  8:02 ` force wake reference counting (another try) Chris Wilson
2011-04-12 16:30   ` Ben Widawsky
2011-04-12 16:56     ` Keith Packard
2011-04-12 17:21       ` Chris Wilson
2011-04-12 17:41         ` Keith Packard
2011-04-13  1:31           ` Ben Widawsky
2011-04-13  5:31             ` Keith Packard
2011-04-13  5:52             ` Chris Wilson
2011-04-13  6:35               ` Ben Widawsky
  -- strict thread matches above, loose matches on Subject: below --
2011-04-08 17:47 forcewake junk, RFC, RFT(test) Ben Widawsky
2011-04-08 17:47 ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
2011-04-09 20:26 ` forcewake junk, part2 Ben Widawsky
2011-04-09 20:26   ` (no subject) Ben Widawsky
     [not found]     ` <1302380787-2957-3-git-send-email-ben@bwidawsk.net>
2011-04-09 20:26       ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky
2011-04-09 20:31 ` forcewake patches (ignore previous, please) Ben Widawsky
2011-04-09 20:31   ` [PATCH 1/4] drm/i915: proper use of forcewake Ben Widawsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).