intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* forcewake v4 (now with spinlock)
@ 2011-04-14 18:13 Ben Widawsky
  2011-04-14 18:13 ` (no subject) Ben Widawsky
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
  To: intel-gfx


As the patches say, I don't think adding this spinlock will have too
much of a performance impact (I couldn't notice anything in my limited
testing), because the serializing locks are already held when acquiring
this lock. I suppose it now serializes access between stuct_mutex and
config.lock, but in most cases I don't think that's big.

Perhaps the ugliest change is spin_lock() in debugfs. But as I argue in
the comments, if you can't get the lock there, register reads are also
failing, and hung somewhere else.

Ben

^ permalink raw reply	[flat|nested] 14+ messages in thread

* (no subject)
  2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
@ 2011-04-14 18:13 ` Ben Widawsky
  2011-04-14 18:13 ` [PATCH 1/3] drm/i915: proper use of forcewake Ben Widawsky
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
  To: intel-gfx

GIT: [Intel-gfx] [PATCH 1/3] drm/i915: proper use of forcewake
GIT: [Intel-gfx] [PATCH 2/3] drm/i915: refcounts for forcewake
GIT: [Intel-gfx] [PATCH 3/3] drm/i915: userspace interface to the forcewake refcount

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] drm/i915: proper use of forcewake
  2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
  2011-04-14 18:13 ` (no subject) Ben Widawsky
@ 2011-04-14 18:13 ` Ben Widawsky
  2011-04-14 18:13 ` [PATCH 2/3] drm/i915: reference counted forcewake Ben Widawsky
  2011-04-14 18:13 ` [PATCH 3/3] drm/i915: userspace interface to the forcewake Ben Widawsky
  3 siblings, 0 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
  To: intel-gfx

Moved the macros around to properly do reads and writes for the given
gpu. This is to address special requirements for gen6 (SNB) reads and
writes.

v2:
no change

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   56 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    4 +-
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..8981173 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1313,9 +1313,31 @@ extern void intel_display_print_error_state(struct seq_file *m,
 } while (0)
 
 
+
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE) )
+
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		__gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1328,6 +1350,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1381,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..8e8821f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,8 +14,8 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_RING_READ(reg) I915_READ(reg)
+#define I915_RING_WRITE(reg, val) I915_WRITE(reg, val)
 
 #define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
 #define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
  2011-04-14 18:13 ` (no subject) Ben Widawsky
  2011-04-14 18:13 ` [PATCH 1/3] drm/i915: proper use of forcewake Ben Widawsky
@ 2011-04-14 18:13 ` Ben Widawsky
  2011-04-14 19:12   ` Ben Widawsky
  2011-04-16  6:52   ` Chris Wilson
  2011-04-14 18:13 ` [PATCH 3/3] drm/i915: userspace interface to the forcewake Ben Widawsky
  3 siblings, 2 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
  To: intel-gfx

Provide a reference count to track the forcewake state of the GPU. The
savings is up to 1 UC read if the unit is already awake, but the main
goal is to give userspace some mechanism to wake the GPU.

v2:
Added a spin_lock for synchronizing access to forcewake. The lock
should not be heavily contested because any caller will either have
struct_mutex or config.mutex, and those locks should be much more
serializing than this. In terms of locking order:

1.[config.mutex]
2.	[struct_mutex]
3.		forcewake_lock

By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
_locked version of the read function for callers wishing to prevent the
GT from possibly going to sleep in between reads. (We could also use
these functions to try to get system state if the lock somehow becomes
stuck).

Removed all previous callers of gen6_gt_force_wake_get/put because the
register access should now implicitly be safe.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    5 -----
 drivers/gpu/drm/i915/i915_dma.c      |    2 ++
 drivers/gpu/drm/i915/i915_drv.c      |   20 ++++++++++++++++++--
 drivers/gpu/drm/i915/i915_drv.h      |   28 ++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_display.c |    5 -----
 5 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29..cc3818f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -872,9 +872,6 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		u32 rpdownei, rpcurdown, rpprevdown;
 		int max_freq;
 
-		/* RPSTAT1 is in the GT power well */
-		__gen6_gt_force_wake_get(dev_priv);
-
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
 		rpcurup = I915_READ(GEN6_RP_CUR_UP);
@@ -917,8 +914,6 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * 50);
-
-		__gen6_gt_force_wake_put(dev_priv);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7273037..838bbae 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2025,6 +2025,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->error_lock);
+	if (IS_GEN6(dev))
+		spin_lock_init(&dev_priv->forcewake_lock);
 
 	if (IS_MOBILE(dev) || !IS_GEN2(dev))
 		dev_priv->num_pipe = 2;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd..1a3a096 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -263,7 +263,7 @@ void intel_detect_pch (struct drm_device *dev)
 	}
 }
 
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	int count;
 
@@ -279,12 +279,28 @@ void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 		udelay(10);
 }
 
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!spin_is_locked(&dev_priv->forcewake_lock));
+
+	if (dev_priv->forcewake_count++ == 0)
+		__gen6_gt_force_wake_get(dev_priv);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
 	POSTING_READ(FORCEWAKE);
 }
 
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!spin_is_locked(&dev_priv->forcewake_lock));
+
+	if (--dev_priv->forcewake_count == 0)
+		__gen6_gt_force_wake_put(dev_priv);
+}
+
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
 	int loop = 500;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8981173..320daf1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -703,6 +703,9 @@ typedef struct drm_i915_private {
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+
+	spinlock_t forcewake_lock;
+	int forcewake_count;
 } drm_i915_private_t;
 
 struct drm_i915_gem_object {
@@ -1318,8 +1321,8 @@ extern void intel_display_print_error_state(struct seq_file *m,
  * must be set to prevent GT core from power down and stale values being
  * returned.
  */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 /* We give fast paths for the really cool registers */
@@ -1332,15 +1335,32 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
 	u##x val = 0; \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_force_wake_get(dev_priv); \
+		unsigned long flags; \
+		spin_lock_irqsave(&(dev_priv)->forcewake_lock, flags); \
+		gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		gen6_gt_force_wake_put(dev_priv); \
+		spin_unlock_irqrestore(&(dev_priv)->forcewake_lock, flags); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
+	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+	return val; \
+} \
+\
+static inline u##x i915_read##x##_locked(struct drm_i915_private *dev_priv, u32 reg) { \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		gen6_gt_force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
-		__gen6_gt_force_wake_put(dev_priv); \
+		gen6_gt_force_wake_put(dev_priv); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
+
 __i915_read(8, b)
 __i915_read(16, w)
 __i915_read(32, l)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04..bd85272 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1828,7 +1828,6 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	__gen6_gt_force_wake_get(dev_priv);
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
 		GEN6_BLITTER_LOCK_SHIFT;
@@ -1839,7 +1838,6 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 			 GEN6_BLITTER_LOCK_SHIFT);
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -6853,7 +6851,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	 * userspace...
 	 */
 	I915_WRITE(GEN6_RC_STATE, 0);
-	__gen6_gt_force_wake_get(dev_priv);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6950,8 +6947,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_PMIMR, 0);
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
-
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 void intel_enable_clock_gating(struct drm_device *dev)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] drm/i915: userspace interface to the forcewake
  2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
                   ` (2 preceding siblings ...)
  2011-04-14 18:13 ` [PATCH 2/3] drm/i915: reference counted forcewake Ben Widawsky
@ 2011-04-14 18:13 ` Ben Widawsky
  2011-04-14 19:13   ` Ben Widawsky
  2011-04-14 19:56   ` Paul Menzel
  3 siblings, 2 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
  To: intel-gfx

userspace to the forcewake reference count via debugfs.

v2:
use new spin_locks instead of struct_mutex

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   77 +++++++++++++++++++++++++++++++++++
 1 files changed, 77 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cc3818f..fed41b9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1181,6 +1181,17 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	seq_printf(m, "forcewake count = %d\n", dev_priv->forcewake_count);
+
+	return 0;
+}
+
 static int
 i915_wedged_open(struct inode *inode,
 		 struct file *filp)
@@ -1283,6 +1294,65 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
 	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
 }
 
+static int i915_forcewake_open(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/*
+	 * Paranoid people might say that it's bad that we can potentially hang
+	 * userspace spinning on this lock. However, if we cannot acquire this
+	 * lock it means that almost certainly the driver has hung, is not
+	 * unload-able, and the GPU is not resettable. Therefore hanging here is
+	 * probably a minor inconvenience not to be seen my almost every user.
+	 */
+	spin_lock_irq(&dev_priv->forcewake_lock);
+	gen6_gt_force_wake_get(dev_priv);
+	spin_unlock_irq(&dev_priv->forcewake_lock);
+
+	return 0;
+}
+
+int i915_forcewake_release(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/* see i915_forcewake_open() */
+	spin_lock_irq(&dev_priv->forcewake_lock);
+	gen6_gt_force_wake_put(dev_priv);
+	spin_unlock_irq(&dev_priv->forcewake_lock);
+
+	return 0;
+}
+
+static const struct file_operations i915_forcewake_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_forcewake_open,
+	.release = i915_forcewake_release,
+};
+
+static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct dentry *ent;
+
+	ent = debugfs_create_file("i915_forcewake_user",
+				  S_IRWXU,
+				  root, dev,
+				  &i915_forcewake_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	return 0;
+}
+
 static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -1319,6 +1389,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sr_status", i915_sr_status, 0},
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -1330,6 +1401,10 @@ int i915_debugfs_init(struct drm_minor *minor)
 	if (ret)
 		return ret;
 
+	ret = i915_forcewake_create(minor->debugfs_root, minor);
+	if (ret)
+		return ret;
+
 	return drm_debugfs_create_files(i915_debugfs_list,
 					I915_DEBUGFS_ENTRIES,
 					minor->debugfs_root, minor);
@@ -1339,6 +1414,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
 {
 	drm_debugfs_remove_files(i915_debugfs_list,
 				 I915_DEBUGFS_ENTRIES, minor);
+	drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
+				 1, minor);
 	drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
 				 1, minor);
 }
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-14 18:13 ` [PATCH 2/3] drm/i915: reference counted forcewake Ben Widawsky
@ 2011-04-14 19:12   ` Ben Widawsky
  2011-04-16  6:52   ` Chris Wilson
  1 sibling, 0 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 19:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Provide a reference count to track the forcewake state of the GPU. The
savings is up to 1 UC read if the unit is already awake, but the main
goal is to give userspace some mechanism to wake the GPU.

v2:
Added a spin_lock for synchronizing access to forcewake. The lock
should not be heavily contested because any caller will either have
struct_mutex or config.mutex, and those locks should be much more
serializing than this. In terms of locking order:

1.[config.mutex]
2.	[struct_mutex]
3.		forcewake_lock

By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
_locked version of the read function for callers wishing to prevent the
GT from possibly going to sleep in between reads. (We could also use
these functions to try to get system state if the lock somehow becomes
stuck).

Removed all previous callers of gen6_gt_force_wake_get/put because the
register access should now implicitly be safe.

v3:
Moved locking into gen6_gt_force_wake_get/put. Makes the code easier,
and also has saner semantics.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    5 -----
 drivers/gpu/drm/i915/i915_dma.c      |    2 ++
 drivers/gpu/drm/i915/i915_drv.c      |   26 ++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_drv.h      |   19 +++++++++++++++----
 drivers/gpu/drm/i915/intel_display.c |    5 -----
 5 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29..cc3818f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -872,9 +872,6 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		u32 rpdownei, rpcurdown, rpprevdown;
 		int max_freq;
 
-		/* RPSTAT1 is in the GT power well */
-		__gen6_gt_force_wake_get(dev_priv);
-
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
 		rpcurup = I915_READ(GEN6_RP_CUR_UP);
@@ -917,8 +914,6 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * 50);
-
-		__gen6_gt_force_wake_put(dev_priv);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7273037..838bbae 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2025,6 +2025,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->error_lock);
+	if (IS_GEN6(dev))
+		spin_lock_init(&dev_priv->forcewake_lock);
 
 	if (IS_MOBILE(dev) || !IS_GEN2(dev))
 		dev_priv->num_pipe = 2;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd..d31102e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -263,7 +263,7 @@ void intel_detect_pch (struct drm_device *dev)
 	}
 }
 
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	int count;
 
@@ -279,12 +279,34 @@ void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 		udelay(10);
 }
 
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dev_priv->forcewake_lock, flags);
+
+	if (dev_priv->forcewake_count++ == 0)
+		__gen6_gt_force_wake_get(dev_priv);
+
+	spin_unlock_irqrestore(&dev_priv->forcewake_lock, flags);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
 	POSTING_READ(FORCEWAKE);
 }
 
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dev_priv->forcewake_lock, flags);
+
+	if (--dev_priv->forcewake_count == 0)
+		__gen6_gt_force_wake_put(dev_priv);
+
+	spin_unlock_irqrestore(&dev_priv->forcewake_lock, flags);
+}
+
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
 	int loop = 500;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8981173..88433ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -703,6 +703,9 @@ typedef struct drm_i915_private {
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+
+	spinlock_t forcewake_lock;
+	int forcewake_count;
 } drm_i915_private_t;
 
 struct drm_i915_gem_object {
@@ -1318,8 +1321,8 @@ extern void intel_display_print_error_state(struct seq_file *m,
  * must be set to prevent GT core from power down and stale values being
  * returned.
  */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 /* We give fast paths for the really cool registers */
@@ -1332,15 +1335,23 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
 	u##x val = 0; \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_force_wake_get(dev_priv); \
+		gen6_gt_force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
-		__gen6_gt_force_wake_put(dev_priv); \
+		gen6_gt_force_wake_put(dev_priv); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
+} \
+\
+static inline u##x i915_read##x##_awake(struct drm_i915_private *dev_priv, u32 reg) { \
+	u##x val = 0; \
+	val = read##y(dev_priv->regs + reg); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+	return val; \
 }
+
 __i915_read(8, b)
 __i915_read(16, w)
 __i915_read(32, l)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04..bd85272 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1828,7 +1828,6 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	__gen6_gt_force_wake_get(dev_priv);
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
 		GEN6_BLITTER_LOCK_SHIFT;
@@ -1839,7 +1838,6 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 			 GEN6_BLITTER_LOCK_SHIFT);
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -6853,7 +6851,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	 * userspace...
 	 */
 	I915_WRITE(GEN6_RC_STATE, 0);
-	__gen6_gt_force_wake_get(dev_priv);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6950,8 +6947,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_PMIMR, 0);
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
-
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 void intel_enable_clock_gating(struct drm_device *dev)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] drm/i915: userspace interface to the forcewake
  2011-04-14 18:13 ` [PATCH 3/3] drm/i915: userspace interface to the forcewake Ben Widawsky
@ 2011-04-14 19:13   ` Ben Widawsky
  2011-04-14 19:56   ` Paul Menzel
  1 sibling, 0 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-14 19:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

userspace to the forcewake reference count via debugfs.

v2:
use new spin_locks instead of struct_mutex

v3:
spin locks are acquired by the get/put functions now

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   73 +++++++++++++++++++++++++++++++++++
 1 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cc3818f..be7dba0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1181,6 +1181,17 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	seq_printf(m, "forcewake count = %d\n", dev_priv->forcewake_count);
+
+	return 0;
+}
+
 static int
 i915_wedged_open(struct inode *inode,
 		 struct file *filp)
@@ -1283,6 +1294,61 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
 	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
 }
 
+static int i915_forcewake_open(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/*
+	 * Paranoid people might say that it's bad that we can potentially hang
+	 * userspace spinning on this lock. However, if we cannot acquire this
+	 * lock it means that almost certainly the driver has hung, is not
+	 * unload-able, and the GPU is not resettable. Therefore hanging here is
+	 * probably a minor inconvenience not to be seen my almost every user.
+	 */
+	gen6_gt_force_wake_get(dev_priv);
+
+	return 0;
+}
+
+int i915_forcewake_release(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/* see i915_forcewake_open() */
+	gen6_gt_force_wake_put(dev_priv);
+
+	return 0;
+}
+
+static const struct file_operations i915_forcewake_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_forcewake_open,
+	.release = i915_forcewake_release,
+};
+
+static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct dentry *ent;
+
+	ent = debugfs_create_file("i915_forcewake_user",
+				  S_IRWXU,
+				  root, dev,
+				  &i915_forcewake_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	return 0;
+}
+
 static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -1319,6 +1385,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sr_status", i915_sr_status, 0},
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -1330,6 +1397,10 @@ int i915_debugfs_init(struct drm_minor *minor)
 	if (ret)
 		return ret;
 
+	ret = i915_forcewake_create(minor->debugfs_root, minor);
+	if (ret)
+		return ret;
+
 	return drm_debugfs_create_files(i915_debugfs_list,
 					I915_DEBUGFS_ENTRIES,
 					minor->debugfs_root, minor);
@@ -1339,6 +1410,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
 {
 	drm_debugfs_remove_files(i915_debugfs_list,
 				 I915_DEBUGFS_ENTRIES, minor);
+	drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
+				 1, minor);
 	drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
 				 1, minor);
 }
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] drm/i915: userspace interface to the forcewake
  2011-04-14 18:13 ` [PATCH 3/3] drm/i915: userspace interface to the forcewake Ben Widawsky
  2011-04-14 19:13   ` Ben Widawsky
@ 2011-04-14 19:56   ` Paul Menzel
  2011-04-16  7:05     ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Paul Menzel @ 2011-04-14 19:56 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2527 bytes --]

Dear Ben,


Am Donnerstag, den 14.04.2011, 11:13 -0700 schrieb Ben Widawsky:
> userspace to the forcewake reference count via debugfs.
> 
> v2:
> use new spin_locks instead of struct_mutex

in my opinion these remarks should not go into the commit message.
Reading the commit log the reader is not interested in what patch
iteration some change was introduced.

> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---

Reviewing patches the comment, what changed in what iteration, is useful
though. So it should go after the `---` and be followed by a `---`, so
that `git am` ignores it.

        commit message
        
        S-o-b: Joe User <j@example.org>
        ---
        v2: changed foo
        ---
        diff

>  drivers/gpu/drm/i915/i915_debugfs.c |   77 +++++++++++++++++++++++++++++++++++
>  1 files changed, 77 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cc3818f..fed41b9 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1181,6 +1181,17 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
>  	return 0;
>  }
>  
> +static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
> +{
> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	seq_printf(m, "forcewake count = %d\n", dev_priv->forcewake_count);
> +
> +	return 0;
> +}
> +
>  static int
>  i915_wedged_open(struct inode *inode,
>  		 struct file *filp)
> @@ -1283,6 +1294,65 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
>  	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
>  }
>  
> +static int i915_forcewake_open(struct inode *inode, struct file *file)
> +{
> +	struct drm_device *dev = inode->i_private;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	if (!IS_GEN6(dev))
> +		return 0;
> +
> +	/*
> +	 * Paranoid people might say that it's bad that we can potentially hang
> +	 * userspace spinning on this lock. However, if we cannot acquire this
> +	 * lock it means that almost certainly the driver has hung, is not
> +	 * unload-able, and the GPU is not resettable. Therefore hanging here is
> +	 * probably a minor inconvenience not to be seen my almost every user.

s/my/by/

[…]


Thanks,

Paul

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-14 18:13 ` [PATCH 2/3] drm/i915: reference counted forcewake Ben Widawsky
  2011-04-14 19:12   ` Ben Widawsky
@ 2011-04-16  6:52   ` Chris Wilson
  2011-04-18 17:31     ` Ben Widawsky
  1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2011-04-16  6:52 UTC (permalink / raw)
  To: Ben Widawsky, intel-gfx

On Thu, 14 Apr 2011 11:13:46 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Provide a reference count to track the forcewake state of the GPU. The
> savings is up to 1 UC read if the unit is already awake, but the main
> goal is to give userspace some mechanism to wake the GPU.
> 
> v2:
> Added a spin_lock for synchronizing access to forcewake. The lock
> should not be heavily contested because any caller will either have
> struct_mutex or config.mutex, and those locks should be much more
> serializing than this. In terms of locking order:
> 
> 1.[config.mutex]
> 2.	[struct_mutex]
> 3.		forcewake_lock
> 
> By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
> _locked version of the read function for callers wishing to prevent the
> GT from possibly going to sleep in between reads. (We could also use
> these functions to try to get system state if the lock somehow becomes
> stuck).

This patch needs to be split again. I want the update to the existing
interface and users to be separate from the introduction of a new
interface. (This means that should we ever regret that interface we can
rip it out without undoing the fixes.)

Replacing the get()...long sequence of read/writes...put() did make me cry
a little, but it is indeed safer to move the check into the individual
reads (though there is nothing to prevent the rc6-window from closing
during the middle of that sequence... we need to check that is ok) and too
ugly to special case those rare times when we do modify 20 regs at once
(or maybe we have to?).

Again, let's change the macro without removing the existing get()...put()
users and then remove those as a patch+review on a per-case basis.

> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -2025,6 +2025,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  
>  	spin_lock_init(&dev_priv->irq_lock);
>  	spin_lock_init(&dev_priv->error_lock);
> +	if (IS_GEN6(dev))
> +		spin_lock_init(&dev_priv->forcewake_lock);

Just do it.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] drm/i915: userspace interface to the forcewake
  2011-04-14 19:56   ` Paul Menzel
@ 2011-04-16  7:05     ` Chris Wilson
  2011-04-17 16:55       ` Ben Widawsky
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2011-04-16  7:05 UTC (permalink / raw)
  To: Paul Menzel, intel-gfx

On Thu, 14 Apr 2011 21:56:02 +0200, Paul Menzel <paulepanter@users.sourceforge.net> wrote:
> Dear Ben,
> 
> 
> Am Donnerstag, den 14.04.2011, 11:13 -0700 schrieb Ben Widawsky:
> > userspace to the forcewake reference count via debugfs.
> > 
> > v2:
> > use new spin_locks instead of struct_mutex
> 
> in my opinion these remarks should not go into the commit message.
> Reading the commit log the reader is not interested in what patch
> iteration some change was introduced.

In principle, I differ. I appreciate knowing the evolution of a patch as
it winds its way upstream. From those notes, I can infer what questions
were asked, how much attention the patch received, what the major
criticisms were and how they were addressed. Important insights should we
ever need revisit the patch again later.

In an ideal world, each of these would be expounded upon in the changelog
itself so that we had a concise discussion of the what/why/how (and even
who) addressing all the salient background points and debating the wisdom
of the various approaches to fixing the problem, before describing the
ins-and-out of the actual fix implemented.

In this particular case, I agree (and had planned to drop them after
seeing "v2: no change" ;-). After the discussion of why we need a
spin lock in the opening patch, further mentioning of the mutex is then
irrelevant.

But Ben... I seemed to have missed the real reason why we need the
spinlock. You have to remind me or else I will keep whining on like a
broken record. ;-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] drm/i915: userspace interface to the forcewake
  2011-04-16  7:05     ` Chris Wilson
@ 2011-04-17 16:55       ` Ben Widawsky
  0 siblings, 0 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-17 16:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Paul Menzel

On Sat, Apr 16, 2011 at 08:05:42AM +0100, Chris Wilson wrote:
> On Thu, 14 Apr 2011 21:56:02 +0200, Paul Menzel <paulepanter@users.sourceforge.net> wrote:

> But Ben... I seemed to have missed the real reason why we need the
> spinlock. You have to remind me or else I will keep whining on like a
> broken record. ;-)
> -Chris
> 

The reason rests on 1 major conclusion, we must make sure FORCEAWAKE
(FORCEWAKE_ACK in the code) is set prior to reading any register in the
range 0-0x3ffff. If I've misunderstood the way this works, and that is
not correct, then you can stop reading now.

The simplest case which shows why we need a spinlock is in reading IIR
(as discussed previously, including my theory why we probably don't
actually have an issue today) in the interrupt handler. We can't get
struct_mutex there, so we're forced to either bump up struct_mutex to a
spinlock, or introduce a new one.

There were one or two cases which got uncovered with the warning, which
I can't find from code inspection right now, where we write to these
registers with just config.mutex. In those cases, we could just acquire
struct_mutex after config.mutex, and that should fix those problems.

Now assuming access is synchronized, here is how I believe it should
work:

/*
 * Sorry for using the register names from the doc, which differ from
 * our code, but I'm writing this while reading the docs, not our code.
 */
u32 reg = IIR; // As an example
if (reg < 0x40000) {
	while(!I915_READ(GTFORCEAWAKE)) {
		I915_WRITE(FORCEWAKE, FWAKE2)
		I915_POSTING_READ(FORCEWAKE);
		gen6_gt_drain_write_fifo(); // ;)
	}
}

I would honestly prefer being wrong about this :-)
Ben

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-16  6:52   ` Chris Wilson
@ 2011-04-18 17:31     ` Ben Widawsky
  2011-04-19  5:48       ` Chris Wilson
  2011-04-19 15:12       ` Keith Packard
  0 siblings, 2 replies; 14+ messages in thread
From: Ben Widawsky @ 2011-04-18 17:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, Apr 16, 2011 at 07:52:44AM +0100, Chris Wilson wrote:
> On Thu, 14 Apr 2011 11:13:46 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > Provide a reference count to track the forcewake state of the GPU. The
> > savings is up to 1 UC read if the unit is already awake, but the main
> > goal is to give userspace some mechanism to wake the GPU.
> > 
> > v2:
> > Added a spin_lock for synchronizing access to forcewake. The lock
> > should not be heavily contested because any caller will either have
> > struct_mutex or config.mutex, and those locks should be much more
> > serializing than this. In terms of locking order:
> > 
> > 1.[config.mutex]
> > 2.	[struct_mutex]
> > 3.		forcewake_lock
> > 
> > By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
> > _locked version of the read function for callers wishing to prevent the
> > GT from possibly going to sleep in between reads. (We could also use
> > these functions to try to get system state if the lock somehow becomes
> > stuck).
> 
> This patch needs to be split again. I want the update to the existing
> interface and users to be separate from the introduction of a new
> interface. (This means that should we ever regret that interface we can
> rip it out without undoing the fixes.)
> 
> Replacing the get()...long sequence of read/writes...put() did make me cry
> a little, but it is indeed safer to move the check into the individual
> reads (though there is nothing to prevent the rc6-window from closing
> during the middle of that sequence... we need to check that is ok) and too
> ugly to special case those rare times when we do modify 20 regs at once
> (or maybe we have to?).

In case you didn't notice, there is a new interface
i915_read##x##_awake. This should serve that need. In other words:
gen6_gt_force_wake_get()
while(foo)
	i915_read32_awake()
gen6_gt_force_wake_put()

I'll split the patches. I can also use the awake() variant for the
existing users, if you're okay with the awake() function (I was actually
expecting a comment from you on that). For the relevant functions, it
should be as simple as:
s/I915_READ32/i915_read32_awake
s/__gen6_gt_force_wake_/gen6_gt_force_wake_/

Jesse has started the email to verify IIR is in fact a problem for us.
So I'll postpone resubmitting the patches until that's confirmed.

> 
> Again, let's change the macro without removing the existing get()...put()
> users and then remove those as a patch+review on a per-case basis.
> 
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -2025,6 +2025,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> >  
> >  	spin_lock_init(&dev_priv->irq_lock);
> >  	spin_lock_init(&dev_priv->error_lock);
> > +	if (IS_GEN6(dev))
> > +		spin_lock_init(&dev_priv->forcewake_lock);
> 
> Just do it.

Once again, I went back and forth and picked the one I thought you would
like. I'm learning...

> -Chris

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-18 17:31     ` Ben Widawsky
@ 2011-04-19  5:48       ` Chris Wilson
  2011-04-19 15:12       ` Keith Packard
  1 sibling, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2011-04-19  5:48 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 18 Apr 2011 10:31:43 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> I'll split the patches. I can also use the awake() variant for the
> existing users, if you're okay with the awake() function (I was actually
> expecting a comment from you on that). For the relevant functions, it
> should be as simple as:
> s/I915_READ32/i915_read32_awake
> s/__gen6_gt_force_wake_/gen6_gt_force_wake_/

I did see the interface and also noted the absence of users... I'd like to
see the patches to convert the current long sequences structured in such
a way as to not break the current assumption that rc6 doesn't trigger
in the middle.
 
> Jesse has started the email to verify IIR is in fact a problem for us.
> So I'll postpone resubmitting the patches until that's confirmed.

That will be a relief and offer some simplifications no doubt...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] drm/i915: reference counted forcewake
  2011-04-18 17:31     ` Ben Widawsky
  2011-04-19  5:48       ` Chris Wilson
@ 2011-04-19 15:12       ` Keith Packard
  1 sibling, 0 replies; 14+ messages in thread
From: Keith Packard @ 2011-04-19 15:12 UTC (permalink / raw)
  To: Ben Widawsky, Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 276 bytes --]

On Mon, 18 Apr 2011 10:31:43 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:

> gen6_gt_force_wake_get()
> while(foo)
> 	i915_read32_awake()
> gen6_gt_force_wake_put()

Uh. Anything looping on registers might not want to hold the lock...

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-04-19 15:12 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
2011-04-14 18:13 ` (no subject) Ben Widawsky
2011-04-14 18:13 ` [PATCH 1/3] drm/i915: proper use of forcewake Ben Widawsky
2011-04-14 18:13 ` [PATCH 2/3] drm/i915: reference counted forcewake Ben Widawsky
2011-04-14 19:12   ` Ben Widawsky
2011-04-16  6:52   ` Chris Wilson
2011-04-18 17:31     ` Ben Widawsky
2011-04-19  5:48       ` Chris Wilson
2011-04-19 15:12       ` Keith Packard
2011-04-14 18:13 ` [PATCH 3/3] drm/i915: userspace interface to the forcewake Ben Widawsky
2011-04-14 19:13   ` Ben Widawsky
2011-04-14 19:56   ` Paul Menzel
2011-04-16  7:05     ` Chris Wilson
2011-04-17 16:55       ` Ben Widawsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).