All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: bdw expands ACTHD to 64bit
@ 2014-03-19 21:54 Chris Wilson
  2014-03-19 23:06 ` Ben Widawsky
  2014-03-20 16:56 ` Tvrtko Ursulin
  0 siblings, 2 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-19 21:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

As Broadwell has an increased virtual address size, it requires more
than 32 bits to store offsets into its address space. This includes the
debug registers to track the current HEAD of the individual rings, which
may be anywhere within the per-process address spaces. In order to find
the full location, we need to read the high bits from a second register.
We then also need to expand our storage to keep track of the larger
address.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Timo Aaltonen <tjaalton@ubuntu.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
 drivers/gpu/drm/i915/i915_reg.h         |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
 6 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed67b4abf9e3..ee913b63a945 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -354,12 +354,12 @@ struct drm_i915_error_state {
 		u32 ipeir;
 		u32 ipehr;
 		u32 instdone;
-		u32 acthd;
 		u32 bbstate;
 		u32 instpm;
 		u32 instps;
 		u32 seqno;
 		u64 bbaddr;
+		u64 acthd;
 		u32 fault_reg;
 		u32 faddr;
 		u32 rc_psmi; /* sleep state */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b153a16ead0a..9519aa240614 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
 	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
 	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
-	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
+	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
 	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1dd9d3628919..b79792317f39 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
 semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 cmd, ipehr, acthd, acthd_min;
+	u64 acthd, acthd_min;
+	u32 cmd, ipehr;
 
 	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
 	if ((ipehr & ~(0x3 << 16)) !=
@@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
+ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
 		return;
 
 	for_each_ring(ring, dev_priv, i) {
-		u32 seqno, acthd;
+		u64 acthd;
+		u32 seqno;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f010ff7e7e2a..3c464d307a2b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -708,6 +708,7 @@ enum punit_power_well {
 #define BLT_HWS_PGA_GEN7	(0x04280)
 #define VEBOX_HWS_PGA_GEN7	(0x04380)
 #define RING_ACTHD(base)	((base)+0x74)
+#define RING_ACTHD_UDW(base)	((base)+0x5c)
 #define RING_NOPID(base)	((base)+0x94)
 #define RING_IMR(base)		((base)+0xa8)
 #define RING_TIMESTAMP(base)	((base)+0x358)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a01911c16f8..a6ceb2c6f36d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -417,13 +417,19 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
 	I915_WRITE_TAIL(ring, value);
 }
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
-			RING_ACTHD(ring->mmio_base) : ACTHD;
 
-	return I915_READ(acthd_reg);
+	u32 reg = (INTEL_INFO(ring->dev)->gen >= 4 ?
+		   RING_ACTHD(ring->mmio_base) : ACTHD);
+	u64 acthd;
+
+	acthd = I915_READ(reg);
+	if (INTEL_INFO(ring->dev)->gen >= 8)
+		acthd |= (u64)I915_READ(RING_ACTHD_UDW(ring->mmio_base)) << 32;
+
+	return acthd;
 }
 
 static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
@@ -820,8 +826,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
 	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency)
-		intel_ring_get_active_head(ring);
+	if (!lazy_coherency) {
+		struct drm_i915_private *dev_priv = ring->dev->dev_private;
+		POSTING_READ(RING_ACTHD(ring->mmio_base));
+	}
+
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 69b908622e14..e2872c6b522b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
 #define HANGCHECK_SCORE_RING_HUNG 31
 
 struct intel_ring_hangcheck {
-	bool deadlock;
+	u64 acthd;
 	u32 seqno;
-	u32 acthd;
 	int score;
 	enum intel_ring_hangcheck_action action;
+	bool deadlock;
 };
 
 struct  intel_ring_buffer {
@@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
 int intel_init_blt_ring_buffer(struct drm_device *dev);
 int intel_init_vebox_ring_buffer(struct drm_device *dev);
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
 void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
 
 static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-19 21:54 [PATCH] drm/i915: bdw expands ACTHD to 64bit Chris Wilson
@ 2014-03-19 23:06 ` Ben Widawsky
  2014-03-20  7:54   ` Chris Wilson
  2014-03-20 16:56 ` Tvrtko Ursulin
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2014-03-19 23:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Wed, Mar 19, 2014 at 09:54:48PM +0000, Chris Wilson wrote:
> As Broadwell has an increased virtual address size, it requires more
> than 32 bits to store offsets into its address space. This includes the
> debug registers to track the current HEAD of the individual rings, which
> may be anywhere within the per-process address spaces. In order to find
> the full location, we need to read the high bits from a second register.
> We then also need to expand our storage to keep track of the larger
> address.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>  drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
>  drivers/gpu/drm/i915/i915_reg.h         |  1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++++++++++++++------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
>  6 files changed, 26 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed67b4abf9e3..ee913b63a945 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>  		u32 ipeir;
>  		u32 ipehr;
>  		u32 instdone;
> -		u32 acthd;
>  		u32 bbstate;
>  		u32 instpm;
>  		u32 instps;
>  		u32 seqno;
>  		u64 bbaddr;
> +		u64 acthd;
>  		u32 fault_reg;
>  		u32 faddr;
>  		u32 rc_psmi; /* sleep state */
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b153a16ead0a..9519aa240614 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);

%016x?

if (gen8)
	%016x
?

>  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1dd9d3628919..b79792317f39 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	u32 cmd, ipehr, acthd, acthd_min;
> +	u64 acthd, acthd_min;
> +	u32 cmd, ipehr;
>  
>  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>  	if ((ipehr & ~(0x3 << 16)) !=
> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>  }
>  
>  static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  		return;
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		u32 seqno, acthd;
> +		u64 acthd;
> +		u32 seqno;
>  		bool busy = true;
>  
>  		semaphore_clear_deadlocks(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f010ff7e7e2a..3c464d307a2b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -708,6 +708,7 @@ enum punit_power_well {
>  #define BLT_HWS_PGA_GEN7	(0x04280)
>  #define VEBOX_HWS_PGA_GEN7	(0x04380)
>  #define RING_ACTHD(base)	((base)+0x74)
> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>  #define RING_NOPID(base)	((base)+0x94)
>  #define RING_IMR(base)		((base)+0xa8)
>  #define RING_TIMESTAMP(base)	((base)+0x358)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911c16f8..a6ceb2c6f36d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -417,13 +417,19 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>  	I915_WRITE_TAIL(ring, value);
>  }
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>  {
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> -			RING_ACTHD(ring->mmio_base) : ACTHD;
>  
> -	return I915_READ(acthd_reg);
> +	u32 reg = (INTEL_INFO(ring->dev)->gen >= 4 ?
> +		   RING_ACTHD(ring->mmio_base) : ACTHD);
> +	u64 acthd;
> +
> +	acthd = I915_READ(reg);
> +	if (INTEL_INFO(ring->dev)->gen >= 8)
> +		acthd |= (u64)I915_READ(RING_ACTHD_UDW(ring->mmio_base)) << 32;

I had to double check we didn't need to explicitly 0 the upper bits :-)

> +
> +	return acthd;
>  }
>  
>  static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
> @@ -820,8 +826,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency)
> -		intel_ring_get_active_head(ring);
> +	if (!lazy_coherency) {
> +		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +		POSTING_READ(RING_ACTHD(ring->mmio_base));
> +	}
> +
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69b908622e14..e2872c6b522b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
>  #define HANGCHECK_SCORE_RING_HUNG 31
>  
>  struct intel_ring_hangcheck {
> -	bool deadlock;
> +	u64 acthd;
>  	u32 seqno;
> -	u32 acthd;
>  	int score;
>  	enum intel_ring_hangcheck_action action;
> +	bool deadlock;
>  };
>  
>  struct  intel_ring_buffer {
> @@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
>  int intel_init_blt_ring_buffer(struct drm_device *dev);
>  int intel_init_vebox_ring_buffer(struct drm_device *dev);
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
>  void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
>  
>  static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)

With the 016 pad fix:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-19 23:06 ` Ben Widawsky
@ 2014-03-20  7:54   ` Chris Wilson
  2014-03-20 15:17     ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2014-03-20  7:54 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Ben Widawsky

On Wed, Mar 19, 2014 at 04:06:38PM -0700, Ben Widawsky wrote:
> On Wed, Mar 19, 2014 at 09:54:48PM +0000, Chris Wilson wrote:
> > As Broadwell has an increased virtual address size, it requires more
> > than 32 bits to store offsets into its address space. This includes the
> > debug registers to track the current HEAD of the individual rings, which
> > may be anywhere within the per-process address spaces. In order to find
> > the full location, we need to read the high bits from a second register.
> > We then also need to expand our storage to keep track of the larger
> > address.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
> >  drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
> >  drivers/gpu/drm/i915/i915_reg.h         |  1 +
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++++++++++++++------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
> >  6 files changed, 26 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index ed67b4abf9e3..ee913b63a945 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -354,12 +354,12 @@ struct drm_i915_error_state {
> >  		u32 ipeir;
> >  		u32 ipehr;
> >  		u32 instdone;
> > -		u32 acthd;
> >  		u32 bbstate;
> >  		u32 instpm;
> >  		u32 instps;
> >  		u32 seqno;
> >  		u64 bbaddr;
> > +		u64 acthd;
> >  		u32 fault_reg;
> >  		u32 faddr;
> >  		u32 rc_psmi; /* sleep state */
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index b153a16ead0a..9519aa240614 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> > -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> > +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> 
> %016x?
> 
> if (gen8)
> 	%016x
> ?

I wasn't sure either, but I thought since we didn't do anything special
for BBADDR, to leave ACTHD alone.

I wonder if it would help splitting it up, having to count 8 extra
leading zeros is going to be a nightmare (especially as the decoder
doesn't pad it right either...)

For reading, I think I would prefer
err_printf(m, "  ACTHD: 0x%016llx [0x%08x %08x]\n",
           ring->acthd, (u32)(ring->acthd>>32), (u32)(ring->acthd));

or maybe just
err_printf(m, "  ACTHD: 0x%08x %08x\n",
           (u32)(ring->acthd>>32), (u32)(ring->acthd));
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-20  7:54   ` Chris Wilson
@ 2014-03-20 15:17     ` Daniel Vetter
  2014-03-20 16:28       ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2014-03-20 15:17 UTC (permalink / raw)
  To: Chris Wilson, Ben Widawsky, intel-gfx, Ben Widawsky

On Thu, Mar 20, 2014 at 07:54:19AM +0000, Chris Wilson wrote:
> On Wed, Mar 19, 2014 at 04:06:38PM -0700, Ben Widawsky wrote:
> > On Wed, Mar 19, 2014 at 09:54:48PM +0000, Chris Wilson wrote:
> > > As Broadwell has an increased virtual address size, it requires more
> > > than 32 bits to store offsets into its address space. This includes the
> > > debug registers to track the current HEAD of the individual rings, which
> > > may be anywhere within the per-process address spaces. In order to find
> > > the full location, we need to read the high bits from a second register.
> > > We then also need to expand our storage to keep track of the larger
> > > address.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > > Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h         |  2 +-
> > >  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
> > >  drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
> > >  drivers/gpu/drm/i915/i915_reg.h         |  1 +
> > >  drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++++++++++++++------
> > >  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
> > >  6 files changed, 26 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index ed67b4abf9e3..ee913b63a945 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -354,12 +354,12 @@ struct drm_i915_error_state {
> > >  		u32 ipeir;
> > >  		u32 ipehr;
> > >  		u32 instdone;
> > > -		u32 acthd;
> > >  		u32 bbstate;
> > >  		u32 instpm;
> > >  		u32 instps;
> > >  		u32 seqno;
> > >  		u64 bbaddr;
> > > +		u64 acthd;
> > >  		u32 fault_reg;
> > >  		u32 faddr;
> > >  		u32 rc_psmi; /* sleep state */
> > > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > > index b153a16ead0a..9519aa240614 100644
> > > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > > @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> > >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> > >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> > >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> > > -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> > > +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> > 
> > %016x?
> > 
> > if (gen8)
> > 	%016x
> > ?
> 
> I wasn't sure either, but I thought since we didn't do anything special
> for BBADDR, to leave ACTHD alone.
> 
> I wonder if it would help splitting it up, having to count 8 extra
> leading zeros is going to be a nightmare (especially as the decoder
> doesn't pad it right either...)
> 
> For reading, I think I would prefer
> err_printf(m, "  ACTHD: 0x%016llx [0x%08x %08x]\n",
>            ring->acthd, (u32)(ring->acthd>>32), (u32)(ring->acthd));
> 
> or maybe just
> err_printf(m, "  ACTHD: 0x%08x %08x\n",
>            (u32)(ring->acthd>>32), (u32)(ring->acthd));

I expect this to lead to a day-long wtf session once we have 64b ppgtt
enabled and a leaking X hung ;-) I use g* a lot in vim when reading error
states, so dropping information is dangerous.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-20 15:17     ` Daniel Vetter
@ 2014-03-20 16:28       ` Chris Wilson
  2014-03-20 16:34         ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2014-03-20 16:28 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Ben Widawsky, intel-gfx, Ben Widawsky

On Thu, Mar 20, 2014 at 04:17:26PM +0100, Daniel Vetter wrote:
> On Thu, Mar 20, 2014 at 07:54:19AM +0000, Chris Wilson wrote:
> > On Wed, Mar 19, 2014 at 04:06:38PM -0700, Ben Widawsky wrote:
> > > On Wed, Mar 19, 2014 at 09:54:48PM +0000, Chris Wilson wrote:
> > > > As Broadwell has an increased virtual address size, it requires more
> > > > than 32 bits to store offsets into its address space. This includes the
> > > > debug registers to track the current HEAD of the individual rings, which
> > > > may be anywhere within the per-process address spaces. In order to find
> > > > the full location, we need to read the high bits from a second register.
> > > > We then also need to expand our storage to keep track of the larger
> > > > address.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > > > Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h         |  2 +-
> > > >  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
> > > >  drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
> > > >  drivers/gpu/drm/i915/i915_reg.h         |  1 +
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++++++++++++++------
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
> > > >  6 files changed, 26 insertions(+), 14 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > > index ed67b4abf9e3..ee913b63a945 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -354,12 +354,12 @@ struct drm_i915_error_state {
> > > >  		u32 ipeir;
> > > >  		u32 ipehr;
> > > >  		u32 instdone;
> > > > -		u32 acthd;
> > > >  		u32 bbstate;
> > > >  		u32 instpm;
> > > >  		u32 instps;
> > > >  		u32 seqno;
> > > >  		u64 bbaddr;
> > > > +		u64 acthd;
> > > >  		u32 fault_reg;
> > > >  		u32 faddr;
> > > >  		u32 rc_psmi; /* sleep state */
> > > > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > > > index b153a16ead0a..9519aa240614 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > > > @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> > > >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> > > >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> > > >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> > > > -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> > > > +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> > > 
> > > %016x?
> > > 
> > > if (gen8)
> > > 	%016x
> > > ?
> > 
> > I wasn't sure either, but I thought since we didn't do anything special
> > for BBADDR, to leave ACTHD alone.
> > 
> > I wonder if it would help splitting it up, having to count 8 extra
> > leading zeros is going to be a nightmare (especially as the decoder
> > doesn't pad it right either...)
> > 
> > For reading, I think I would prefer
> > err_printf(m, "  ACTHD: 0x%016llx [0x%08x %08x]\n",
> >            ring->acthd, (u32)(ring->acthd>>32), (u32)(ring->acthd));
> > 
> > or maybe just
> > err_printf(m, "  ACTHD: 0x%08x %08x\n",
> >            (u32)(ring->acthd>>32), (u32)(ring->acthd));
> 
> I expect this to lead to a day-long wtf session once we have 64b ppgtt
> enabled and a leaking X hung ;-) I use g* a lot in vim when reading error
> states, so dropping information is dangerous.

I'm not following, where is the information loss? If you haven't already
guessed, we are seeing ACTHD above 4GiB already.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-20 16:28       ` Chris Wilson
@ 2014-03-20 16:34         ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2014-03-20 16:34 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Ben Widawsky, intel-gfx, Ben Widawsky

On Thu, Mar 20, 2014 at 5:28 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > I wasn't sure either, but I thought since we didn't do anything special
>> > for BBADDR, to leave ACTHD alone.
>> >
>> > I wonder if it would help splitting it up, having to count 8 extra
>> > leading zeros is going to be a nightmare (especially as the decoder
>> > doesn't pad it right either...)
>> >
>> > For reading, I think I would prefer
>> > err_printf(m, "  ACTHD: 0x%016llx [0x%08x %08x]\n",
>> >            ring->acthd, (u32)(ring->acthd>>32), (u32)(ring->acthd));
>> >
>> > or maybe just
>> > err_printf(m, "  ACTHD: 0x%08x %08x\n",
>> >            (u32)(ring->acthd>>32), (u32)(ring->acthd));
>>
>> I expect this to lead to a day-long wtf session once we have 64b ppgtt
>> enabled and a leaking X hung ;-) I use g* a lot in vim when reading error
>> states, so dropping information is dangerous.
>
> I'm not following, where is the information loss? If you haven't already
> guessed, we are seeing ACTHD above 4GiB already.

Utter failure to read your diff. I agree that some form of split would
look nice.

/me hides in shame
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-19 21:54 [PATCH] drm/i915: bdw expands ACTHD to 64bit Chris Wilson
  2014-03-19 23:06 ` Ben Widawsky
@ 2014-03-20 16:56 ` Tvrtko Ursulin
  2014-03-20 21:41   ` Chris Wilson
  2014-03-20 21:48   ` [PATCH] drm/i915: Broadwell " Chris Wilson
  1 sibling, 2 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2014-03-20 16:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Ben Widawsky


On 03/19/2014 09:54 PM, Chris Wilson wrote:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911c16f8..a6ceb2c6f36d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -417,13 +417,19 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>   	I915_WRITE_TAIL(ring, value);
>   }
>
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>   {
>   	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> -			RING_ACTHD(ring->mmio_base) : ACTHD;
>
> -	return I915_READ(acthd_reg);
> +	u32 reg = (INTEL_INFO(ring->dev)->gen >= 4 ?
> +		   RING_ACTHD(ring->mmio_base) : ACTHD);
> +	u64 acthd;
> +
> +	acthd = I915_READ(reg);
> +	if (INTEL_INFO(ring->dev)->gen >= 8)
> +		acthd |= (u64)I915_READ(RING_ACTHD_UDW(ring->mmio_base)) << 32;
> +
> +	return acthd;
>   }

Can it happen, and does anyone care, for a low dword to wrap so instead 
of say, 0x00010000, this function falsely returns 0x0001ffff ?

Tvrtko

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: bdw expands ACTHD to 64bit
  2014-03-20 16:56 ` Tvrtko Ursulin
@ 2014-03-20 21:41   ` Chris Wilson
  2014-03-20 21:48   ` [PATCH] drm/i915: Broadwell " Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-20 21:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Ben Widawsky

On Thu, Mar 20, 2014 at 04:56:57PM +0000, Tvrtko Ursulin wrote:
> 
> On 03/19/2014 09:54 PM, Chris Wilson wrote:
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >index 7a01911c16f8..a6ceb2c6f36d 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >@@ -417,13 +417,19 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
> >  	I915_WRITE_TAIL(ring, value);
> >  }
> >
> >-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >  {
> >  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> >-	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> >-			RING_ACTHD(ring->mmio_base) : ACTHD;
> >
> >-	return I915_READ(acthd_reg);
> >+	u32 reg = (INTEL_INFO(ring->dev)->gen >= 4 ?
> >+		   RING_ACTHD(ring->mmio_base) : ACTHD);
> >+	u64 acthd;
> >+
> >+	acthd = I915_READ(reg);
> >+	if (INTEL_INFO(ring->dev)->gen >= 8)
> >+		acthd |= (u64)I915_READ(RING_ACTHD_UDW(ring->mmio_base)) << 32;
> >+
> >+	return acthd;
> >  }
> 
> Can it happen, and does anyone care, for a low dword to wrap so
> instead of say, 0x00010000, this function falsely returns 0x0001ffff
> ?

Yeah, it could happen. This is used in error capture where the other rings
may still be running, and for hangcheck where it would not matter as it
would still differ on the next pass. And only when multiple passes
stalled would the hangcheck fire.

Anyway, it is probably better to handle this correctly in case we do use
in a sensitive check later.

Thanks,
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-20 16:56 ` Tvrtko Ursulin
  2014-03-20 21:41   ` Chris Wilson
@ 2014-03-20 21:48   ` Chris Wilson
  2014-03-21 10:03     ` Tvrtko Ursulin
  1 sibling, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2014-03-20 21:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

As Broadwell has an increased virtual address size, it requires more
than 32 bits to store offsets into its address space. This includes the
debug registers to track the current HEAD of the individual rings, which
may be anywhere within the per-process address spaces. In order to find
the full location, we need to read the high bits from a second register.
We then also need to expand our storage to keep track of the larger
address.

v2: Carefully read the two registers to catch wraparound between
    the reads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Timo Aaltonen <tjaalton@ubuntu.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
 drivers/gpu/drm/i915/i915_reg.h         |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
 6 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54182536dc46..d5a4a14d6723 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -354,12 +354,12 @@ struct drm_i915_error_state {
 		u32 ipeir;
 		u32 ipehr;
 		u32 instdone;
-		u32 acthd;
 		u32 bbstate;
 		u32 instpm;
 		u32 instps;
 		u32 seqno;
 		u64 bbaddr;
+		u64 acthd;
 		u32 fault_reg;
 		u32 faddr;
 		u32 rc_psmi; /* sleep state */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b153a16ead0a..9519aa240614 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
 	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
 	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
-	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
+	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
 	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 77dbef6af185..9caae9840c78 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
 semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 cmd, ipehr, acthd, acthd_min;
+	u64 acthd, acthd_min;
+	u32 cmd, ipehr;
 
 	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
 	if ((ipehr & ~(0x3 << 16)) !=
@@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
+ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
 		return;
 
 	for_each_ring(ring, dev_priv, i) {
-		u32 seqno, acthd;
+		u64 acthd;
+		u32 seqno;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f010ff7e7e2a..3c464d307a2b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -708,6 +708,7 @@ enum punit_power_well {
 #define BLT_HWS_PGA_GEN7	(0x04280)
 #define VEBOX_HWS_PGA_GEN7	(0x04380)
 #define RING_ACTHD(base)	((base)+0x74)
+#define RING_ACTHD_UDW(base)	((base)+0x5c)
 #define RING_NOPID(base)	((base)+0x94)
 #define RING_IMR(base)		((base)+0xa8)
 #define RING_TIMESTAMP(base)	((base)+0x358)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a01911c16f8..45d8011e5a6c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -417,13 +417,28 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
 	I915_WRITE_TAIL(ring, value);
 }
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
-			RING_ACTHD(ring->mmio_base) : ACTHD;
+	u64 acthd;
 
-	return I915_READ(acthd_reg);
+	if (INTEL_INFO(ring->dev)->gen >= 8) {
+		u32 upper, lower, tmp;
+
+		tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
+		do {
+			upper = tmp;
+			lower = I915_READ(RING_ACTHD(ring->mmio_base));
+			tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
+		} while (upper != tmp);
+
+		acthd = (u64)upper << 32 | lower;
+	} else if (INTEL_INFO(ring->dev)->gen >= 4)
+		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
+	else
+		acthd = I915_READ(ACTHD);
+
+	return acthd;
 }
 
 static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
@@ -820,8 +835,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
 	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency)
-		intel_ring_get_active_head(ring);
+	if (!lazy_coherency) {
+		struct drm_i915_private *dev_priv = ring->dev->dev_private;
+		POSTING_READ(RING_ACTHD(ring->mmio_base));
+	}
+
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 69b908622e14..e2872c6b522b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
 #define HANGCHECK_SCORE_RING_HUNG 31
 
 struct intel_ring_hangcheck {
-	bool deadlock;
+	u64 acthd;
 	u32 seqno;
-	u32 acthd;
 	int score;
 	enum intel_ring_hangcheck_action action;
+	bool deadlock;
 };
 
 struct  intel_ring_buffer {
@@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
 int intel_init_blt_ring_buffer(struct drm_device *dev);
 int intel_init_vebox_ring_buffer(struct drm_device *dev);
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
 void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
 
 static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-20 21:48   ` [PATCH] drm/i915: Broadwell " Chris Wilson
@ 2014-03-21 10:03     ` Tvrtko Ursulin
  2014-03-21 10:14       ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Tvrtko Ursulin @ 2014-03-21 10:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Ben Widawsky


On 03/20/2014 09:48 PM, Chris Wilson wrote:
> As Broadwell has an increased virtual address size, it requires more
> than 32 bits to store offsets into its address space. This includes the
> debug registers to track the current HEAD of the individual rings, which
> may be anywhere within the per-process address spaces. In order to find
> the full location, we need to read the high bits from a second register.
> We then also need to expand our storage to keep track of the larger
> address.
>
> v2: Carefully read the two registers to catch wraparound between
>      the reads.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  2 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>   drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
>   drivers/gpu/drm/i915/i915_reg.h         |  1 +
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++++++++++------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
>   6 files changed, 35 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 54182536dc46..d5a4a14d6723 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>   		u32 ipeir;
>   		u32 ipehr;
>   		u32 instdone;
> -		u32 acthd;
>   		u32 bbstate;
>   		u32 instpm;
>   		u32 instps;
>   		u32 seqno;
>   		u64 bbaddr;
> +		u64 acthd;
>   		u32 fault_reg;
>   		u32 faddr;
>   		u32 rc_psmi; /* sleep state */
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b153a16ead0a..9519aa240614 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>   	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>   	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>   	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);

I thought conclusion elsewhere in the thread for this was to include all 
64-bits in the output?

>   	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>   	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>   	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 77dbef6af185..9caae9840c78 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>   semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>   {
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	u32 cmd, ipehr, acthd, acthd_min;
> +	u64 acthd, acthd_min;
> +	u32 cmd, ipehr;
>
>   	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>   	if ((ipehr & ~(0x3 << 16)) !=
> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>   }
>
>   static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>   {
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>   		return;
>
>   	for_each_ring(ring, dev_priv, i) {
> -		u32 seqno, acthd;
> +		u64 acthd;
> +		u32 seqno;
>   		bool busy = true;
>
>   		semaphore_clear_deadlocks(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f010ff7e7e2a..3c464d307a2b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -708,6 +708,7 @@ enum punit_power_well {
>   #define BLT_HWS_PGA_GEN7	(0x04280)
>   #define VEBOX_HWS_PGA_GEN7	(0x04380)
>   #define RING_ACTHD(base)	((base)+0x74)
> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>   #define RING_NOPID(base)	((base)+0x94)
>   #define RING_IMR(base)		((base)+0xa8)
>   #define RING_TIMESTAMP(base)	((base)+0x358)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911c16f8..45d8011e5a6c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -417,13 +417,28 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>   	I915_WRITE_TAIL(ring, value);
>   }
>
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>   {
>   	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> -			RING_ACTHD(ring->mmio_base) : ACTHD;
> +	u64 acthd;
>
> -	return I915_READ(acthd_reg);
> +	if (INTEL_INFO(ring->dev)->gen >= 8) {
> +		u32 upper, lower, tmp;
> +
> +		tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> +		do {
> +			upper = tmp;
> +			lower = I915_READ(RING_ACTHD(ring->mmio_base));
> +			tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> +		} while (upper != tmp);

Looks good. Slightly more defensive approach would be to only retry once 
and log something horrible if upper word wraps twice. Ben's suggestion 
to also validate that the lower dword has really wrapped makes sense as 
well, if stuffing more and more conditionals and this call path is not a 
problem.

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 10:03     ` Tvrtko Ursulin
@ 2014-03-21 10:14       ` Chris Wilson
  2014-03-21 10:50         ` Tvrtko Ursulin
  0 siblings, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2014-03-21 10:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Ben Widawsky

On Fri, Mar 21, 2014 at 10:03:38AM +0000, Tvrtko Ursulin wrote:
> 
> On 03/20/2014 09:48 PM, Chris Wilson wrote:
> >As Broadwell has an increased virtual address size, it requires more
> >than 32 bits to store offsets into its address space. This includes the
> >debug registers to track the current HEAD of the individual rings, which
> >may be anywhere within the per-process address spaces. In order to find
> >the full location, we need to read the high bits from a second register.
> >We then also need to expand our storage to keep track of the larger
> >address.
> >
> >v2: Carefully read the two registers to catch wraparound between
> >     the reads.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> >Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h         |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
> >  drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
> >  drivers/gpu/drm/i915/i915_reg.h         |  1 +
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++++++++++------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
> >  6 files changed, 35 insertions(+), 14 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 54182536dc46..d5a4a14d6723 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -354,12 +354,12 @@ struct drm_i915_error_state {
> >  		u32 ipeir;
> >  		u32 ipehr;
> >  		u32 instdone;
> >-		u32 acthd;
> >  		u32 bbstate;
> >  		u32 instpm;
> >  		u32 instps;
> >  		u32 seqno;
> >  		u64 bbaddr;
> >+		u64 acthd;
> >  		u32 fault_reg;
> >  		u32 faddr;
> >  		u32 rc_psmi; /* sleep state */
> >diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> >index b153a16ead0a..9519aa240614 100644
> >--- a/drivers/gpu/drm/i915/i915_gpu_error.c
> >+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> >@@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> >-	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> >+	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> 
> I thought conclusion elsewhere in the thread for this was to include
> all 64-bits in the output?

Eh? They are... There's a second patch to change the way we print 64bit
values.
 
> >  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
> >  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
> >  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index 77dbef6af185..9caae9840c78 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
> >  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
> >  {
> >  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> >-	u32 cmd, ipehr, acthd, acthd_min;
> >+	u64 acthd, acthd_min;
> >+	u32 cmd, ipehr;
> >
> >  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
> >  	if ((ipehr & ~(0x3 << 16)) !=
> >@@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> >  }
> >
> >  static enum intel_ring_hangcheck_action
> >-ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> >+ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
> >  {
> >  	struct drm_device *dev = ring->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >@@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
> >  		return;
> >
> >  	for_each_ring(ring, dev_priv, i) {
> >-		u32 seqno, acthd;
> >+		u64 acthd;
> >+		u32 seqno;
> >  		bool busy = true;
> >
> >  		semaphore_clear_deadlocks(dev_priv);
> >diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >index f010ff7e7e2a..3c464d307a2b 100644
> >--- a/drivers/gpu/drm/i915/i915_reg.h
> >+++ b/drivers/gpu/drm/i915/i915_reg.h
> >@@ -708,6 +708,7 @@ enum punit_power_well {
> >  #define BLT_HWS_PGA_GEN7	(0x04280)
> >  #define VEBOX_HWS_PGA_GEN7	(0x04380)
> >  #define RING_ACTHD(base)	((base)+0x74)
> >+#define RING_ACTHD_UDW(base)	((base)+0x5c)
> >  #define RING_NOPID(base)	((base)+0x94)
> >  #define RING_IMR(base)		((base)+0xa8)
> >  #define RING_TIMESTAMP(base)	((base)+0x358)
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >index 7a01911c16f8..45d8011e5a6c 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >@@ -417,13 +417,28 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
> >  	I915_WRITE_TAIL(ring, value);
> >  }
> >
> >-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >  {
> >  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> >-	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> >-			RING_ACTHD(ring->mmio_base) : ACTHD;
> >+	u64 acthd;
> >
> >-	return I915_READ(acthd_reg);
> >+	if (INTEL_INFO(ring->dev)->gen >= 8) {
> >+		u32 upper, lower, tmp;
> >+
> >+		tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> >+		do {
> >+			upper = tmp;
> >+			lower = I915_READ(RING_ACTHD(ring->mmio_base));
> >+			tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> >+		} while (upper != tmp);
> 
> Looks good. Slightly more defensive approach would be to only retry
> once and log something horrible if upper word wraps twice. Ben's
> suggestion to also validate that the lower dword has really wrapped
> makes sense as well, if stuffing more and more conditionals and this
> call path is not a problem.

We don't elsewhere. If you go crazy, there is an indeterminant time
lapse between the two reads, so the wrap around may be undetectable. As
it is an expected condition that will happen eventually, we don't even
need to log it, just handle it. Particularly as I was thinking of
refactoring this as I915_READ64_2x32(lower_reg, upper_reg);
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 10:14       ` Chris Wilson
@ 2014-03-21 10:50         ` Tvrtko Ursulin
  2014-03-21 12:00           ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Tvrtko Ursulin @ 2014-03-21 10:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Ben Widawsky, Timo Aaltonen


On 03/21/2014 10:14 AM, Chris Wilson wrote:
> On Fri, Mar 21, 2014 at 10:03:38AM +0000, Tvrtko Ursulin wrote:
>>
>> On 03/20/2014 09:48 PM, Chris Wilson wrote:
>>> As Broadwell has an increased virtual address size, it requires more
>>> than 32 bits to store offsets into its address space. This includes the
>>> debug registers to track the current HEAD of the individual rings, which
>>> may be anywhere within the per-process address spaces. In order to find
>>> the full location, we need to read the high bits from a second register.
>>> We then also need to expand our storage to keep track of the larger
>>> address.
>>>
>>> v2: Carefully read the two registers to catch wraparound between
>>>      the reads.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
>>> Cc: Timo Aaltonen <tjaalton@ubuntu.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h         |  2 +-
>>>   drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>>>   drivers/gpu/drm/i915/i915_irq.c         |  8 +++++---
>>>   drivers/gpu/drm/i915/i915_reg.h         |  1 +
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 30 ++++++++++++++++++++++++------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
>>>   6 files changed, 35 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 54182536dc46..d5a4a14d6723 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>>>   		u32 ipeir;
>>>   		u32 ipehr;
>>>   		u32 instdone;
>>> -		u32 acthd;
>>>   		u32 bbstate;
>>>   		u32 instpm;
>>>   		u32 instps;
>>>   		u32 seqno;
>>>   		u64 bbaddr;
>>> +		u64 acthd;
>>>   		u32 fault_reg;
>>>   		u32 faddr;
>>>   		u32 rc_psmi; /* sleep state */
>>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> index b153a16ead0a..9519aa240614 100644
>>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>>   	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>>>   	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>>>   	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
>>> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
>>> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
>>
>> I thought conclusion elsewhere in the thread for this was to include
>> all 64-bits in the output?
>
> Eh? They are... There's a second patch to change the way we print 64bit
> values.

Ok, missed that.

>>>   	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>>>   	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>>>   	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index 77dbef6af185..9caae9840c78 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>>>   semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>>>   {
>>>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>> -	u32 cmd, ipehr, acthd, acthd_min;
>>> +	u64 acthd, acthd_min;
>>> +	u32 cmd, ipehr;
>>>
>>>   	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>>>   	if ((ipehr & ~(0x3 << 16)) !=
>>> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>>>   }
>>>
>>>   static enum intel_ring_hangcheck_action
>>> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
>>> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>>>   {
>>>   	struct drm_device *dev = ring->dev;
>>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>>>   		return;
>>>
>>>   	for_each_ring(ring, dev_priv, i) {
>>> -		u32 seqno, acthd;
>>> +		u64 acthd;
>>> +		u32 seqno;
>>>   		bool busy = true;
>>>
>>>   		semaphore_clear_deadlocks(dev_priv);
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>>> index f010ff7e7e2a..3c464d307a2b 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -708,6 +708,7 @@ enum punit_power_well {
>>>   #define BLT_HWS_PGA_GEN7	(0x04280)
>>>   #define VEBOX_HWS_PGA_GEN7	(0x04380)
>>>   #define RING_ACTHD(base)	((base)+0x74)
>>> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>>>   #define RING_NOPID(base)	((base)+0x94)
>>>   #define RING_IMR(base)		((base)+0xa8)
>>>   #define RING_TIMESTAMP(base)	((base)+0x358)
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> index 7a01911c16f8..45d8011e5a6c 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> @@ -417,13 +417,28 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>>>   	I915_WRITE_TAIL(ring, value);
>>>   }
>>>
>>> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>>> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>>>   {
>>>   	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>>> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
>>> -			RING_ACTHD(ring->mmio_base) : ACTHD;
>>> +	u64 acthd;
>>>
>>> -	return I915_READ(acthd_reg);
>>> +	if (INTEL_INFO(ring->dev)->gen >= 8) {
>>> +		u32 upper, lower, tmp;
>>> +
>>> +		tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
>>> +		do {
>>> +			upper = tmp;
>>> +			lower = I915_READ(RING_ACTHD(ring->mmio_base));
>>> +			tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
>>> +		} while (upper != tmp);
>>
>> Looks good. Slightly more defensive approach would be to only retry
>> once and log something horrible if upper word wraps twice. Ben's
>> suggestion to also validate that the lower dword has really wrapped
>> makes sense as well, if stuffing more and more conditionals and this
>> call path is not a problem.
>
> We don't elsewhere. If you go crazy, there is an indeterminant time
> lapse between the two reads, so the wrap around may be undetectable. As
> it is an expected condition that will happen eventually, we don't even
> need to log it, just handle it. Particularly as I was thinking of

No, think you misunderstood me. I said "slightly more defensive" just in 
the sense that in case of weird hardware failures you have a potentially 
infinite loop now, where you don't really need a loop - probabilities 
strongly suggest you cannot get two upper dword wraps between the reads. 
So it is enough to read the upper dword twice, without the loop. Same 
effect, slightly more defensive in reality.

Lower dword WARN (What Ben suggests I think) would be another level of 
defensiveness, to double-check wrap looks sensible if it was detected.

> refactoring this as I915_READ64_2x32(lower_reg, upper_reg);

Thats good.

Tvrtko

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 10:50         ` Tvrtko Ursulin
@ 2014-03-21 12:00           ` Chris Wilson
  2014-03-21 12:05             ` [PATCH] drm/i915: Split 64bit hexadecimal addresses to make them easier to read Chris Wilson
                               ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-21 12:00 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Ben Widawsky

On Fri, Mar 21, 2014 at 10:50:05AM +0000, Tvrtko Ursulin wrote:
> No, think you misunderstood me. I said "slightly more defensive"
> just in the sense that in case of weird hardware failures you have a
> potentially infinite loop now, where you don't really need a loop -
> probabilities strongly suggest you cannot get two upper dword wraps
> between the reads. So it is enough to read the upper dword twice,
> without the loop. Same effect, slightly more defensive in reality.

Yup, misunderstood what you wanted. If in doubt, C is much more
concise ;-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 45d8011..8c82316 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -425,12 +425,14 @@ u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
        if (INTEL_INFO(ring->dev)->gen >= 8) {
                u32 upper, lower, tmp;
 
+               upper = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
+               lower = I915_READ(RING_ACTHD(ring->mmio_base));
                tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
-               do {
+               if (upper != tmp) {
                        upper = tmp;
                        lower = I915_READ(RING_ACTHD(ring->mmio_base));
-                       tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
-               } while (upper != tmp);
+                       WARN_ON(I915_READ(RING_ACTHD_UDW(ring->mmio_base) != upper);
+               }
 
                acthd = (u64)upper << 32 | lower;
        } else if (INTEL_INFO(ring->dev)->gen >= 4)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH] drm/i915: Split 64bit hexadecimal addresses to make them easier to read
  2014-03-21 12:00           ` Chris Wilson
@ 2014-03-21 12:05             ` Chris Wilson
  2014-03-21 12:19             ` [PATCH] drm/i915: Broadwell expands ACTHD to 64bit Tvrtko Ursulin
  2014-03-21 12:41             ` Chris Wilson
  2 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-21 12:05 UTC (permalink / raw)
  To: intel-gfx

Broadwell introduces large address spaces, greater than 32bits in width.
These require that we then store and print 64bit values. If we were to
zero pad them out to 16 hexadecimal places, we have to carefully count
the leading zeroes - which is easy to make a mistake. Conversely, if we
do not zero pad out to 16, but keep it padding to 8 hexadecimal places,
it is very easy to miss an address that is actually larger than 4GiB. A
suggested compromise is to insert a space between the upper and lower
dwords of the address so that we can continue with our accustom 32bit
parser. (Alternatively, we could do the equivalent in our userspace
decoder.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9519aa2..b9d1017 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -248,12 +248,12 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
 	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
 	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
-	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
+	err_printf(m, "  ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
 	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
 	if (INTEL_INFO(dev)->gen >= 4) {
-		err_printf(m, "  BBADDR: 0x%08llx\n", ring->bbaddr);
+		err_printf(m, "  BBADDR: 0x%08x %08x\n", (u32)(ring->bbaddr>>32), (u32)ring->bbaddr);
 		err_printf(m, "  BB_STATE: 0x%08x\n", ring->bbstate);
 		err_printf(m, "  INSTPS: 0x%08x\n", ring->instps);
 	}
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 12:00           ` Chris Wilson
  2014-03-21 12:05             ` [PATCH] drm/i915: Split 64bit hexadecimal addresses to make them easier to read Chris Wilson
@ 2014-03-21 12:19             ` Tvrtko Ursulin
  2014-03-21 12:41             ` Chris Wilson
  2 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2014-03-21 12:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Ben Widawsky, Timo Aaltonen


On 03/21/2014 12:00 PM, Chris Wilson wrote:
> On Fri, Mar 21, 2014 at 10:50:05AM +0000, Tvrtko Ursulin wrote:
>> No, think you misunderstood me. I said "slightly more defensive"
>> just in the sense that in case of weird hardware failures you have a
>> potentially infinite loop now, where you don't really need a loop -
>> probabilities strongly suggest you cannot get two upper dword wraps
>> between the reads. So it is enough to read the upper dword twice,
>> without the loop. Same effect, slightly more defensive in reality.
>
> Yup, misunderstood what you wanted. If in doubt, C is much more
> concise ;-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 45d8011..8c82316 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -425,12 +425,14 @@ u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>          if (INTEL_INFO(ring->dev)->gen >= 8) {
>                  u32 upper, lower, tmp;
>
> +               upper = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> +               lower = I915_READ(RING_ACTHD(ring->mmio_base));
>                  tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> -               do {
> +               if (upper != tmp) {
>                          upper = tmp;
>                          lower = I915_READ(RING_ACTHD(ring->mmio_base));
> -                       tmp = I915_READ(RING_ACTHD_UDW(ring->mmio_base));
> -               } while (upper != tmp);
> +                       WARN_ON(I915_READ(RING_ACTHD_UDW(ring->mmio_base) != upper);
> +               }
>
>                  acthd = (u64)upper << 32 | lower;
>          } else if (INTEL_INFO(ring->dev)->gen >= 4)

Yes, I was just uneasy with the loop. Also Ben's suggestion in case of 
wrap was I think:

WARN_ON(I915_READ(RING_ACTHD(ring->mmio_base) >= lower);

Or in other words, if we have observed the upper wrap, check that the 
lower matches with that observation. But I feel bad now that we are 
over-engineering this. Perhaps these WARNs are just silly.

Tvrtko

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 12:00           ` Chris Wilson
  2014-03-21 12:05             ` [PATCH] drm/i915: Split 64bit hexadecimal addresses to make them easier to read Chris Wilson
  2014-03-21 12:19             ` [PATCH] drm/i915: Broadwell expands ACTHD to 64bit Tvrtko Ursulin
@ 2014-03-21 12:41             ` Chris Wilson
  2014-03-25  2:41               ` Ben Widawsky
  2014-03-27  0:09               ` Ben Widawsky
  2 siblings, 2 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-21 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

As Broadwell has an increased virtual address size, it requires more
than 32 bits to store offsets into its address space. This includes the
debug registers to track the current HEAD of the individual rings, which
may be anywhere within the per-process address spaces. In order to find
the full location, we need to read the high bits from a second register.
We then also need to expand our storage to keep track of the larger
address.

v2: Carefully read the two registers to catch wraparound between
    the reads.
v3: Use a WARN_ON rather than loop indefinitely on an unstable
    register read.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Timo Aaltonen <tjaalton@ubuntu.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |   13 ++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +-
 drivers/gpu/drm/i915/i915_irq.c         |    8 +++++---
 drivers/gpu/drm/i915/i915_reg.h         |    1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    6 +++---
 6 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5418253..4c09fb2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -354,12 +354,12 @@ struct drm_i915_error_state {
 		u32 ipeir;
 		u32 ipehr;
 		u32 instdone;
-		u32 acthd;
 		u32 bbstate;
 		u32 instpm;
 		u32 instps;
 		u32 seqno;
 		u64 bbaddr;
+		u64 acthd;
 		u32 fault_reg;
 		u32 faddr;
 		u32 rc_psmi; /* sleep state */
@@ -2786,6 +2786,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
 #define I915_WRITE64(reg, val)	dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
 #define I915_READ64(reg)	dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
 
+#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
+		u32 upper = I915_READ(upper_reg);			\
+		u32 lower = I915_READ(lower_reg);			\
+		u32 tmp = I915_READ(upper_reg);				\
+		if (upper != tmp) {					\
+			upper = tmp;					\
+			lower = I915_READ(lower_reg);			\
+			WARN_ON(I915_READ(upper_reg) != upper);		\
+		}							\
+		(u64)upper << 32 | lower; })
+
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b153a16..9519aa2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
 	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
 	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
-	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
+	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
 	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 77dbef6..9caae98 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
 semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 cmd, ipehr, acthd, acthd_min;
+	u64 acthd, acthd_min;
+	u32 cmd, ipehr;
 
 	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
 	if ((ipehr & ~(0x3 << 16)) !=
@@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
+ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
 		return;
 
 	for_each_ring(ring, dev_priv, i) {
-		u32 seqno, acthd;
+		u64 acthd;
+		u32 seqno;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f010ff7..3c464d3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -708,6 +708,7 @@ enum punit_power_well {
 #define BLT_HWS_PGA_GEN7	(0x04280)
 #define VEBOX_HWS_PGA_GEN7	(0x04380)
 #define RING_ACTHD(base)	((base)+0x74)
+#define RING_ACTHD_UDW(base)	((base)+0x5c)
 #define RING_NOPID(base)	((base)+0x94)
 #define RING_IMR(base)		((base)+0xa8)
 #define RING_TIMESTAMP(base)	((base)+0x358)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a01911..ce8ddee 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -417,13 +417,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
 	I915_WRITE_TAIL(ring, value);
 }
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
-			RING_ACTHD(ring->mmio_base) : ACTHD;
+	u64 acthd;
 
-	return I915_READ(acthd_reg);
+	if (INTEL_INFO(ring->dev)->gen >= 8)
+		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
+					 RING_ACTHD_UDW(ring->mmio_base));
+	else if (INTEL_INFO(ring->dev)->gen >= 4)
+		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
+	else
+		acthd = I915_READ(ACTHD);
+
+	return acthd;
 }
 
 static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
@@ -820,8 +827,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
 	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency)
-		intel_ring_get_active_head(ring);
+	if (!lazy_coherency) {
+		struct drm_i915_private *dev_priv = ring->dev->dev_private;
+		POSTING_READ(RING_ACTHD(ring->mmio_base));
+	}
+
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 69b9086..e2872c6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
 #define HANGCHECK_SCORE_RING_HUNG 31
 
 struct intel_ring_hangcheck {
-	bool deadlock;
+	u64 acthd;
 	u32 seqno;
-	u32 acthd;
 	int score;
 	enum intel_ring_hangcheck_action action;
+	bool deadlock;
 };
 
 struct  intel_ring_buffer {
@@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
 int intel_init_blt_ring_buffer(struct drm_device *dev);
 int intel_init_vebox_ring_buffer(struct drm_device *dev);
 
-u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
+u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
 void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
 
 static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 12:41             ` Chris Wilson
@ 2014-03-25  2:41               ` Ben Widawsky
  2014-03-25  2:43                 ` Ben Widawsky
  2014-03-27  0:09               ` Ben Widawsky
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2014-03-25  2:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> As Broadwell has an increased virtual address size, it requires more
> than 32 bits to store offsets into its address space. This includes the
> debug registers to track the current HEAD of the individual rings, which
> may be anywhere within the per-process address spaces. In order to find
> the full location, we need to read the high bits from a second register.
> We then also need to expand our storage to keep track of the larger
> address.
> 
> v2: Carefully read the two registers to catch wraparound between
>     the reads.
> v3: Use a WARN_ON rather than loop indefinitely on an unstable
>     register read.
> 

I truly feel bad for saying this at v3, but we can probably simplify
this.  Unless I am missing something, we only actually care about the
value of acthd in:

if (ring->hangcheck.acthd != acthd)
	return HANGCHECK_ACTIVE;

I think therefore it would be safe to make hangcheck.acthd a u64.
Compare the lower dword. If they're not equal, then we're done. If they
are equal, compare the UDW. If UDW doesn't match, we're done. If UDW
does match, do another read of the LDW and call it good:

ring_stuck(u32 acthd)
{
	if (lower_32_bits(ring->hangcheck.acthd) != acthd)
		return HANGCHECK_ACTIVE;
	else if (upper_32_bits(ring->hangcheck.acthd) !=
			I915_READ(ACTHD_UDW))
		return HANGCHECK_ACTIVE
	else if (acthd != I915_READ(RING_ACTHD))
		return HANGCHECK_ACTIVE;
}

After writing that, I'm not really sure how much less complex it is, but I
think it gets the same job done. Potentially there is some MMIO savings,
but I'd guess it to be negligible.

Jesse, please request ACTHD is expanded to a proper 64b register for
gen100000000.


> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |   13 ++++++++++++-
>  drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +-
>  drivers/gpu/drm/i915/i915_irq.c         |    8 +++++---
>  drivers/gpu/drm/i915/i915_reg.h         |    1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    6 +++---
>  6 files changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5418253..4c09fb2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>  		u32 ipeir;
>  		u32 ipehr;
>  		u32 instdone;
> -		u32 acthd;
>  		u32 bbstate;
>  		u32 instpm;
>  		u32 instps;
>  		u32 seqno;
>  		u64 bbaddr;
> +		u64 acthd;
>  		u32 fault_reg;
>  		u32 faddr;
>  		u32 rc_psmi; /* sleep state */
> @@ -2786,6 +2786,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
>  #define I915_WRITE64(reg, val)	dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
>  #define I915_READ64(reg)	dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
>  
> +#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
> +		u32 upper = I915_READ(upper_reg);			\
> +		u32 lower = I915_READ(lower_reg);			\
> +		u32 tmp = I915_READ(upper_reg);				\
> +		if (upper != tmp) {					\
> +			upper = tmp;					\
> +			lower = I915_READ(lower_reg);			\
> +			WARN_ON(I915_READ(upper_reg) != upper);		\
> +		}							\
> +		(u64)upper << 32 | lower; })
> +
>  #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
>  #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
>  
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b153a16..9519aa2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
>  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 77dbef6..9caae98 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	u32 cmd, ipehr, acthd, acthd_min;
> +	u64 acthd, acthd_min;
> +	u32 cmd, ipehr;
>  
>  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>  	if ((ipehr & ~(0x3 << 16)) !=
> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>  }
>  
>  static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  		return;
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		u32 seqno, acthd;
> +		u64 acthd;
> +		u32 seqno;
>  		bool busy = true;
>  
>  		semaphore_clear_deadlocks(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f010ff7..3c464d3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -708,6 +708,7 @@ enum punit_power_well {
>  #define BLT_HWS_PGA_GEN7	(0x04280)
>  #define VEBOX_HWS_PGA_GEN7	(0x04380)
>  #define RING_ACTHD(base)	((base)+0x74)
> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>  #define RING_NOPID(base)	((base)+0x94)
>  #define RING_IMR(base)		((base)+0xa8)
>  #define RING_TIMESTAMP(base)	((base)+0x358)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911..ce8ddee 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -417,13 +417,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>  	I915_WRITE_TAIL(ring, value);
>  }
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>  {
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> -			RING_ACTHD(ring->mmio_base) : ACTHD;
> +	u64 acthd;
>  
> -	return I915_READ(acthd_reg);
> +	if (INTEL_INFO(ring->dev)->gen >= 8)
> +		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
> +					 RING_ACTHD_UDW(ring->mmio_base));
> +	else if (INTEL_INFO(ring->dev)->gen >= 4)
> +		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
> +	else
> +		acthd = I915_READ(ACTHD);
> +
> +	return acthd;
>  }
>  
>  static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
> @@ -820,8 +827,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency)
> -		intel_ring_get_active_head(ring);
> +	if (!lazy_coherency) {
> +		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +		POSTING_READ(RING_ACTHD(ring->mmio_base));
> +	}
> +
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69b9086..e2872c6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
>  #define HANGCHECK_SCORE_RING_HUNG 31
>  
>  struct intel_ring_hangcheck {
> -	bool deadlock;
> +	u64 acthd;
>  	u32 seqno;
> -	u32 acthd;
>  	int score;
>  	enum intel_ring_hangcheck_action action;
> +	bool deadlock;
>  };
>  
>  struct  intel_ring_buffer {
> @@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
>  int intel_init_blt_ring_buffer(struct drm_device *dev);
>  int intel_init_vebox_ring_buffer(struct drm_device *dev);
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
>  void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
>  
>  static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
> -- 
> 1.7.9.5
> 

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-25  2:41               ` Ben Widawsky
@ 2014-03-25  2:43                 ` Ben Widawsky
  2014-03-25  7:31                   ` Chris Wilson
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2014-03-25  2:43 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, Mar 24, 2014 at 07:41:17PM -0700, Ben Widawsky wrote:
> On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> > As Broadwell has an increased virtual address size, it requires more
> > than 32 bits to store offsets into its address space. This includes the
> > debug registers to track the current HEAD of the individual rings, which
> > may be anywhere within the per-process address spaces. In order to find
> > the full location, we need to read the high bits from a second register.
> > We then also need to expand our storage to keep track of the larger
> > address.
> > 
> > v2: Carefully read the two registers to catch wraparound between
> >     the reads.
> > v3: Use a WARN_ON rather than loop indefinitely on an unstable
> >     register read.
> > 
> 
> I truly feel bad for saying this at v3, but we can probably simplify
> this.  Unless I am missing something, we only actually care about the
> value of acthd in:
> 
> if (ring->hangcheck.acthd != acthd)
> 	return HANGCHECK_ACTIVE;
> 
> I think therefore it would be safe to make hangcheck.acthd a u64.
> Compare the lower dword. If they're not equal, then we're done. If they
> are equal, compare the UDW. If UDW doesn't match, we're done. If UDW
> does match, do another read of the LDW and call it good:
> 
> ring_stuck(u32 acthd)
> {
> 	if (lower_32_bits(ring->hangcheck.acthd) != acthd)
> 		return HANGCHECK_ACTIVE;
> 	else if (upper_32_bits(ring->hangcheck.acthd) !=
> 			I915_READ(ACTHD_UDW))
> 		return HANGCHECK_ACTIVE
> 	else if (acthd != I915_READ(RING_ACTHD))
> 		return HANGCHECK_ACTIVE;
> }
> 
> After writing that, I'm not really sure how much less complex it is, but I
> think it gets the same job done. Potentially there is some MMIO savings,
> but I'd guess it to be negligible.
> 
> Jesse, please request ACTHD is expanded to a proper 64b register for
> gen100000000.

Right after I sent that, I realized that doesn't provide for potentially
corrupting ring->hangcheck.acthd. So I am back to thinking this method
is better.

> 
> 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |   13 ++++++++++++-
> >  drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +-
> >  drivers/gpu/drm/i915/i915_irq.c         |    8 +++++---
> >  drivers/gpu/drm/i915/i915_reg.h         |    1 +
> >  drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    6 +++---
> >  6 files changed, 38 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5418253..4c09fb2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -354,12 +354,12 @@ struct drm_i915_error_state {
> >  		u32 ipeir;
> >  		u32 ipehr;
> >  		u32 instdone;
> > -		u32 acthd;
> >  		u32 bbstate;
> >  		u32 instpm;
> >  		u32 instps;
> >  		u32 seqno;
> >  		u64 bbaddr;
> > +		u64 acthd;
> >  		u32 fault_reg;
> >  		u32 faddr;
> >  		u32 rc_psmi; /* sleep state */
> > @@ -2786,6 +2786,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
> >  #define I915_WRITE64(reg, val)	dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
> >  #define I915_READ64(reg)	dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
> >  
> > +#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
> > +		u32 upper = I915_READ(upper_reg);			\
> > +		u32 lower = I915_READ(lower_reg);			\
> > +		u32 tmp = I915_READ(upper_reg);				\
> > +		if (upper != tmp) {					\
> > +			upper = tmp;					\
> > +			lower = I915_READ(lower_reg);			\
> > +			WARN_ON(I915_READ(upper_reg) != upper);		\
> > +		}							\
> > +		(u64)upper << 32 | lower; })
> > +
> >  #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
> >  #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index b153a16..9519aa2 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> > -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> > +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> >  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
> >  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
> >  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 77dbef6..9caae98 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
> >  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
> >  {
> >  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > -	u32 cmd, ipehr, acthd, acthd_min;
> > +	u64 acthd, acthd_min;
> > +	u32 cmd, ipehr;
> >  
> >  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
> >  	if ((ipehr & ~(0x3 << 16)) !=
> > @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> >  }
> >  
> >  static enum intel_ring_hangcheck_action
> > -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> > +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
> >  {
> >  	struct drm_device *dev = ring->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> > @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
> >  		return;
> >  
> >  	for_each_ring(ring, dev_priv, i) {
> > -		u32 seqno, acthd;
> > +		u64 acthd;
> > +		u32 seqno;
> >  		bool busy = true;
> >  
> >  		semaphore_clear_deadlocks(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index f010ff7..3c464d3 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -708,6 +708,7 @@ enum punit_power_well {
> >  #define BLT_HWS_PGA_GEN7	(0x04280)
> >  #define VEBOX_HWS_PGA_GEN7	(0x04380)
> >  #define RING_ACTHD(base)	((base)+0x74)
> > +#define RING_ACTHD_UDW(base)	((base)+0x5c)
> >  #define RING_NOPID(base)	((base)+0x94)
> >  #define RING_IMR(base)		((base)+0xa8)
> >  #define RING_TIMESTAMP(base)	((base)+0x358)
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 7a01911..ce8ddee 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -417,13 +417,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
> >  	I915_WRITE_TAIL(ring, value);
> >  }
> >  
> > -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> > +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >  {
> >  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> > -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> > -			RING_ACTHD(ring->mmio_base) : ACTHD;
> > +	u64 acthd;
> >  
> > -	return I915_READ(acthd_reg);
> > +	if (INTEL_INFO(ring->dev)->gen >= 8)
> > +		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
> > +					 RING_ACTHD_UDW(ring->mmio_base));
> > +	else if (INTEL_INFO(ring->dev)->gen >= 4)
> > +		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
> > +	else
> > +		acthd = I915_READ(ACTHD);
> > +
> > +	return acthd;
> >  }
> >  
> >  static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
> > @@ -820,8 +827,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  	/* Workaround to force correct ordering between irq and seqno writes on
> >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> >  	 * ACTHD) before reading the status page. */
> > -	if (!lazy_coherency)
> > -		intel_ring_get_active_head(ring);
> > +	if (!lazy_coherency) {
> > +		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > +		POSTING_READ(RING_ACTHD(ring->mmio_base));
> > +	}
> > +
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index 69b9086..e2872c6 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
> >  #define HANGCHECK_SCORE_RING_HUNG 31
> >  
> >  struct intel_ring_hangcheck {
> > -	bool deadlock;
> > +	u64 acthd;
> >  	u32 seqno;
> > -	u32 acthd;
> >  	int score;
> >  	enum intel_ring_hangcheck_action action;
> > +	bool deadlock;
> >  };
> >  
> >  struct  intel_ring_buffer {
> > @@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
> >  int intel_init_blt_ring_buffer(struct drm_device *dev);
> >  int intel_init_vebox_ring_buffer(struct drm_device *dev);
> >  
> > -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> > +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> >  void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
> >  
> >  static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
> > -- 
> > 1.7.9.5
> > 
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-25  2:43                 ` Ben Widawsky
@ 2014-03-25  7:31                   ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-25  7:31 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Ben Widawsky

On Mon, Mar 24, 2014 at 07:43:48PM -0700, Ben Widawsky wrote:
> On Mon, Mar 24, 2014 at 07:41:17PM -0700, Ben Widawsky wrote:
> > On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> > > As Broadwell has an increased virtual address size, it requires more
> > > than 32 bits to store offsets into its address space. This includes the
> > > debug registers to track the current HEAD of the individual rings, which
> > > may be anywhere within the per-process address spaces. In order to find
> > > the full location, we need to read the high bits from a second register.
> > > We then also need to expand our storage to keep track of the larger
> > > address.
> > > 
> > > v2: Carefully read the two registers to catch wraparound between
> > >     the reads.
> > > v3: Use a WARN_ON rather than loop indefinitely on an unstable
> > >     register read.
> > > 
> > 
> > I truly feel bad for saying this at v3, but we can probably simplify
> > this.  Unless I am missing something, we only actually care about the
> > value of acthd in:
> > 
> > if (ring->hangcheck.acthd != acthd)
> > 	return HANGCHECK_ACTIVE;
> > 
> > I think therefore it would be safe to make hangcheck.acthd a u64.
> > Compare the lower dword. If they're not equal, then we're done. If they
> > are equal, compare the UDW. If UDW doesn't match, we're done. If UDW
> > does match, do another read of the LDW and call it good:
> > 
> > ring_stuck(u32 acthd)
> > {
> > 	if (lower_32_bits(ring->hangcheck.acthd) != acthd)
> > 		return HANGCHECK_ACTIVE;
> > 	else if (upper_32_bits(ring->hangcheck.acthd) !=
> > 			I915_READ(ACTHD_UDW))
> > 		return HANGCHECK_ACTIVE
> > 	else if (acthd != I915_READ(RING_ACTHD))
> > 		return HANGCHECK_ACTIVE;
> > }
> > 
> > After writing that, I'm not really sure how much less complex it is, but I
> > think it gets the same job done. Potentially there is some MMIO savings,
> > but I'd guess it to be negligible.
> > 
> > Jesse, please request ACTHD is expanded to a proper 64b register for
> > gen100000000.
> 
> Right after I sent that, I realized that doesn't provide for potentially
> corrupting ring->hangcheck.acthd. So I am back to thinking this method
> is better.

Plus having read64_2x32 should make it easier to dtrt next time.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-21 12:41             ` Chris Wilson
  2014-03-25  2:41               ` Ben Widawsky
@ 2014-03-27  0:09               ` Ben Widawsky
  2014-03-27  7:32                 ` Daniel Vetter
  2014-03-27  7:45                 ` Chris Wilson
  1 sibling, 2 replies; 22+ messages in thread
From: Ben Widawsky @ 2014-03-27  0:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Ben Widawsky

On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> As Broadwell has an increased virtual address size, it requires more
> than 32 bits to store offsets into its address space. This includes the
> debug registers to track the current HEAD of the individual rings, which
> may be anywhere within the per-process address spaces. In order to find
> the full location, we need to read the high bits from a second register.
> We then also need to expand our storage to keep track of the larger
> address.
> 
> v2: Carefully read the two registers to catch wraparound between
>     the reads.
> v3: Use a WARN_ON rather than loop indefinitely on an unstable
>     register read.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |   13 ++++++++++++-
>  drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +-
>  drivers/gpu/drm/i915/i915_irq.c         |    8 +++++---
>  drivers/gpu/drm/i915/i915_reg.h         |    1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    6 +++---
>  6 files changed, 38 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5418253..4c09fb2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>  		u32 ipeir;
>  		u32 ipehr;
>  		u32 instdone;
> -		u32 acthd;
>  		u32 bbstate;
>  		u32 instpm;
>  		u32 instps;
>  		u32 seqno;
>  		u64 bbaddr;
> +		u64 acthd;
>  		u32 fault_reg;
>  		u32 faddr;
>  		u32 rc_psmi; /* sleep state */
> @@ -2786,6 +2786,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
>  #define I915_WRITE64(reg, val)	dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
>  #define I915_READ64(reg)	dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
>  
> +#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
> +		u32 upper = I915_READ(upper_reg);			\
> +		u32 lower = I915_READ(lower_reg);			\
> +		u32 tmp = I915_READ(upper_reg);				\
> +		if (upper != tmp) {					\
> +			upper = tmp;					\
> +			lower = I915_READ(lower_reg);			\
> +			WARN_ON(I915_READ(upper_reg) != upper);		\
> +		}							\
> +		(u64)upper << 32 | lower; })
> +


May as well get the most recent value of upper:
		WARN_ON((tmp = I915_READ(upper_reg)) != upper);
	}
	return (u64)tmp << 32 | lower;

with or without:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

>  #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
>  #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
>  
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b153a16..9519aa2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
>  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 77dbef6..9caae98 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -	u32 cmd, ipehr, acthd, acthd_min;
> +	u64 acthd, acthd_min;
> +	u32 cmd, ipehr;
>  
>  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>  	if ((ipehr & ~(0x3 << 16)) !=
> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>  }
>  
>  static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  		return;
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		u32 seqno, acthd;
> +		u64 acthd;
> +		u32 seqno;
>  		bool busy = true;
>  
>  		semaphore_clear_deadlocks(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f010ff7..3c464d3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -708,6 +708,7 @@ enum punit_power_well {
>  #define BLT_HWS_PGA_GEN7	(0x04280)
>  #define VEBOX_HWS_PGA_GEN7	(0x04380)
>  #define RING_ACTHD(base)	((base)+0x74)
> +#define RING_ACTHD_UDW(base)	((base)+0x5c)
>  #define RING_NOPID(base)	((base)+0x94)
>  #define RING_IMR(base)		((base)+0xa8)
>  #define RING_TIMESTAMP(base)	((base)+0x358)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911..ce8ddee 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -417,13 +417,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
>  	I915_WRITE_TAIL(ring, value);
>  }
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
>  {
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> -			RING_ACTHD(ring->mmio_base) : ACTHD;
> +	u64 acthd;
>  
> -	return I915_READ(acthd_reg);
> +	if (INTEL_INFO(ring->dev)->gen >= 8)
> +		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
> +					 RING_ACTHD_UDW(ring->mmio_base));
> +	else if (INTEL_INFO(ring->dev)->gen >= 4)
> +		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
> +	else
> +		acthd = I915_READ(ACTHD);
> +
> +	return acthd;
>  }
>  
>  static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
> @@ -820,8 +827,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency)
> -		intel_ring_get_active_head(ring);
> +	if (!lazy_coherency) {
> +		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +		POSTING_READ(RING_ACTHD(ring->mmio_base));
> +	}
> +
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69b9086..e2872c6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
>  #define HANGCHECK_SCORE_RING_HUNG 31
>  
>  struct intel_ring_hangcheck {
> -	bool deadlock;
> +	u64 acthd;
>  	u32 seqno;
> -	u32 acthd;
>  	int score;
>  	enum intel_ring_hangcheck_action action;
> +	bool deadlock;
>  };
>  
>  struct  intel_ring_buffer {
> @@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
>  int intel_init_blt_ring_buffer(struct drm_device *dev);
>  int intel_init_vebox_ring_buffer(struct drm_device *dev);
>  
> -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
>  void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
>  
>  static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-27  0:09               ` Ben Widawsky
@ 2014-03-27  7:32                 ` Daniel Vetter
  2014-03-27  7:45                 ` Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2014-03-27  7:32 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Ben Widawsky

On Wed, Mar 26, 2014 at 05:09:53PM -0700, Ben Widawsky wrote:
> On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> > As Broadwell has an increased virtual address size, it requires more
> > than 32 bits to store offsets into its address space. This includes the
> > debug registers to track the current HEAD of the individual rings, which
> > may be anywhere within the per-process address spaces. In order to find
> > the full location, we need to read the high bits from a second register.
> > We then also need to expand our storage to keep track of the larger
> > address.
> > 
> > v2: Carefully read the two registers to catch wraparound between
> >     the reads.
> > v3: Use a WARN_ON rather than loop indefinitely on an unstable
> >     register read.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Timo Aaltonen <tjaalton@ubuntu.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |   13 ++++++++++++-
> >  drivers/gpu/drm/i915/i915_gpu_error.c   |    2 +-
> >  drivers/gpu/drm/i915/i915_irq.c         |    8 +++++---
> >  drivers/gpu/drm/i915/i915_reg.h         |    1 +
> >  drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    6 +++---
> >  6 files changed, 38 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5418253..4c09fb2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -354,12 +354,12 @@ struct drm_i915_error_state {
> >  		u32 ipeir;
> >  		u32 ipehr;
> >  		u32 instdone;
> > -		u32 acthd;
> >  		u32 bbstate;
> >  		u32 instpm;
> >  		u32 instps;
> >  		u32 seqno;
> >  		u64 bbaddr;
> > +		u64 acthd;
> >  		u32 fault_reg;
> >  		u32 faddr;
> >  		u32 rc_psmi; /* sleep state */
> > @@ -2786,6 +2786,17 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
> >  #define I915_WRITE64(reg, val)	dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
> >  #define I915_READ64(reg)	dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
> >  
> > +#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
> > +		u32 upper = I915_READ(upper_reg);			\
> > +		u32 lower = I915_READ(lower_reg);			\
> > +		u32 tmp = I915_READ(upper_reg);				\
> > +		if (upper != tmp) {					\
> > +			upper = tmp;					\
> > +			lower = I915_READ(lower_reg);			\
> > +			WARN_ON(I915_READ(upper_reg) != upper);		\
> > +		}							\
> > +		(u64)upper << 32 | lower; })
> > +
> 
> 
> May as well get the most recent value of upper:
> 		WARN_ON((tmp = I915_READ(upper_reg)) != upper);
> 	}
> 	return (u64)tmp << 32 | lower;
> 
> with or without:
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

I've punted on this and pulled this (and the follow-up polish patch in).

> 
> >  #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
> >  #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index b153a16..9519aa2 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
> >  	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
> >  	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> > -	err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> > +	err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);
> >  	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
> >  	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
> >  	err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 77dbef6..9caae98 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
> >  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
> >  {
> >  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > -	u32 cmd, ipehr, acthd, acthd_min;
> > +	u64 acthd, acthd_min;
> > +	u32 cmd, ipehr;

With a little conflict here - this hunk seems spurious.

Thanks, Daniel

> >  
> >  	ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
> >  	if ((ipehr & ~(0x3 << 16)) !=
> > @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> >  }
> >  
> >  static enum intel_ring_hangcheck_action
> > -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> > +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
> >  {
> >  	struct drm_device *dev = ring->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> > @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
> >  		return;
> >  
> >  	for_each_ring(ring, dev_priv, i) {
> > -		u32 seqno, acthd;
> > +		u64 acthd;
> > +		u32 seqno;
> >  		bool busy = true;
> >  
> >  		semaphore_clear_deadlocks(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index f010ff7..3c464d3 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -708,6 +708,7 @@ enum punit_power_well {
> >  #define BLT_HWS_PGA_GEN7	(0x04280)
> >  #define VEBOX_HWS_PGA_GEN7	(0x04380)
> >  #define RING_ACTHD(base)	((base)+0x74)
> > +#define RING_ACTHD_UDW(base)	((base)+0x5c)
> >  #define RING_NOPID(base)	((base)+0x94)
> >  #define RING_IMR(base)		((base)+0xa8)
> >  #define RING_TIMESTAMP(base)	((base)+0x358)
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 7a01911..ce8ddee 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -417,13 +417,20 @@ static void ring_write_tail(struct intel_ring_buffer *ring,
> >  	I915_WRITE_TAIL(ring, value);
> >  }
> >  
> > -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> > +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring)
> >  {
> >  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> > -	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
> > -			RING_ACTHD(ring->mmio_base) : ACTHD;
> > +	u64 acthd;
> >  
> > -	return I915_READ(acthd_reg);
> > +	if (INTEL_INFO(ring->dev)->gen >= 8)
> > +		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
> > +					 RING_ACTHD_UDW(ring->mmio_base));
> > +	else if (INTEL_INFO(ring->dev)->gen >= 4)
> > +		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
> > +	else
> > +		acthd = I915_READ(ACTHD);
> > +
> > +	return acthd;
> >  }
> >  
> >  static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
> > @@ -820,8 +827,11 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  	/* Workaround to force correct ordering between irq and seqno writes on
> >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> >  	 * ACTHD) before reading the status page. */
> > -	if (!lazy_coherency)
> > -		intel_ring_get_active_head(ring);
> > +	if (!lazy_coherency) {
> > +		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > +		POSTING_READ(RING_ACTHD(ring->mmio_base));
> > +	}
> > +
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index 69b9086..e2872c6 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -46,11 +46,11 @@ enum intel_ring_hangcheck_action {
> >  #define HANGCHECK_SCORE_RING_HUNG 31
> >  
> >  struct intel_ring_hangcheck {
> > -	bool deadlock;
> > +	u64 acthd;
> >  	u32 seqno;
> > -	u32 acthd;
> >  	int score;
> >  	enum intel_ring_hangcheck_action action;
> > +	bool deadlock;
> >  };
> >  
> >  struct  intel_ring_buffer {
> > @@ -294,7 +294,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev);
> >  int intel_init_blt_ring_buffer(struct drm_device *dev);
> >  int intel_init_vebox_ring_buffer(struct drm_device *dev);
> >  
> > -u32 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> > +u64 intel_ring_get_active_head(struct intel_ring_buffer *ring);
> >  void intel_ring_setup_status_page(struct intel_ring_buffer *ring);
> >  
> >  static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring)
> > -- 
> > 1.7.9.5
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] drm/i915: Broadwell expands ACTHD to 64bit
  2014-03-27  0:09               ` Ben Widawsky
  2014-03-27  7:32                 ` Daniel Vetter
@ 2014-03-27  7:45                 ` Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2014-03-27  7:45 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Ben Widawsky

On Wed, Mar 26, 2014 at 05:09:53PM -0700, Ben Widawsky wrote:
> On Fri, Mar 21, 2014 at 12:41:53PM +0000, Chris Wilson wrote:
> > +#define I915_READ64_2x32(lower_reg, upper_reg) ({			\
> > +		u32 upper = I915_READ(upper_reg);			\
> > +		u32 lower = I915_READ(lower_reg);			\
> > +		u32 tmp = I915_READ(upper_reg);				\
> > +		if (upper != tmp) {					\
> > +			upper = tmp;					\
> > +			lower = I915_READ(lower_reg);			\
> > +			WARN_ON(I915_READ(upper_reg) != upper);		\
> > +		}							\
> > +		(u64)upper << 32 | lower; })
> > +
> 
> 
> May as well get the most recent value of upper:
> 		WARN_ON((tmp = I915_READ(upper_reg)) != upper);
> 	}
> 	return (u64)tmp << 32 | lower;

Bleh, I thought if the WARN ever fires, the result is so unstable that
it really doesn't matter what we return, or we go full-loop.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2014-03-27  7:45 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-19 21:54 [PATCH] drm/i915: bdw expands ACTHD to 64bit Chris Wilson
2014-03-19 23:06 ` Ben Widawsky
2014-03-20  7:54   ` Chris Wilson
2014-03-20 15:17     ` Daniel Vetter
2014-03-20 16:28       ` Chris Wilson
2014-03-20 16:34         ` Daniel Vetter
2014-03-20 16:56 ` Tvrtko Ursulin
2014-03-20 21:41   ` Chris Wilson
2014-03-20 21:48   ` [PATCH] drm/i915: Broadwell " Chris Wilson
2014-03-21 10:03     ` Tvrtko Ursulin
2014-03-21 10:14       ` Chris Wilson
2014-03-21 10:50         ` Tvrtko Ursulin
2014-03-21 12:00           ` Chris Wilson
2014-03-21 12:05             ` [PATCH] drm/i915: Split 64bit hexadecimal addresses to make them easier to read Chris Wilson
2014-03-21 12:19             ` [PATCH] drm/i915: Broadwell expands ACTHD to 64bit Tvrtko Ursulin
2014-03-21 12:41             ` Chris Wilson
2014-03-25  2:41               ` Ben Widawsky
2014-03-25  2:43                 ` Ben Widawsky
2014-03-25  7:31                   ` Chris Wilson
2014-03-27  0:09               ` Ben Widawsky
2014-03-27  7:32                 ` Daniel Vetter
2014-03-27  7:45                 ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.