All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
@ 2013-08-01 17:39 Chris Wilson
  2013-08-01 17:39 ` [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Chris Wilson @ 2013-08-01 17:39 UTC (permalink / raw)
  To: intel-gfx

Haswell GT3e has the unique feature of supporting Write-Through cacheing
of objects within the eLLC/LLC. The purpose of this is to enable the display
plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
that we, in theory, get the best of both worlds, perfect display and fast
access.

However, we still need to be careful as the CPU does not see the WT when
accessing the cache. In particular, this means that we need to flush the
cache lines after writing to an object through the CPU, and on
transitioning from a cached state to WT.

v2: Actually do the clflush on transition to WT, nagging by Ville.
v3: Flush the CPU cache after writes into WT objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/i915_dma.c     |  3 +++
 drivers/gpu/drm/i915/i915_drv.h     |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c     | 34 +++++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++-
 include/uapi/drm/i915_drm.h         |  1 +
 5 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8da0b3d..75989fc 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev);
 		break;
+	case I915_PARAM_HAS_WT:
+		value = HAS_WT(dev);
+		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
 		value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34d2b9d..d27a82a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -452,6 +452,7 @@ enum i915_cache_level {
 	I915_CACHE_NONE = 0,
 	I915_CACHE_LLC,
 	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+	I915_CACHE_WT,
 };
 
 typedef uint32_t gen6_gtt_pte_t;
@@ -1344,7 +1345,7 @@ struct drm_i915_gem_object {
 	unsigned int pending_fenced_gpu_access:1;
 	unsigned int fenced_gpu_access:1;
 
-	unsigned int cache_level:2;
+	unsigned int cache_level:3;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
 	unsigned int has_global_gtt_mapping:1;
@@ -1547,6 +1548,7 @@ struct drm_i915_file_private {
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
+#define HAS_WT(dev)            (IS_HASWELL(dev) && ((struct drm_i915_private *)(dev)->dev_private)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 5)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99362f7..12ff60a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -60,6 +60,16 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
+static bool is_cpu_write_cached(enum i915_cache_level cl)
+{
+	return cl == I915_CACHE_LLC || cl == I915_CACHE_LLC_MLC;
+}
+
+static bool is_cpu_read_cached(enum i915_cache_level cl)
+{
+	return cl != I915_CACHE_NONE;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
 	if (obj->tiling_mode)
@@ -510,7 +520,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		 * read domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will dirty the data
 		 * anyway again before the next pread happens. */
-		if (obj->cache_level == I915_CACHE_NONE)
+		if (!is_cpu_read_cached(obj->cache_level))
 			needs_clflush = 1;
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -827,7 +837,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		 * write domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
-		if (obj->cache_level == I915_CACHE_NONE)
+		if (!is_cpu_write_cached(obj->cache_level))
 			needs_clflush_after = 1;
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, true);
@@ -837,8 +847,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	}
 	/* Same trick applies for invalidate partially written cachelines before
 	 * writing.  */
-	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
-	    && obj->cache_level == I915_CACHE_NONE)
+	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) &&
+	    !is_cpu_read_cached(obj->cache_level))
 		needs_clflush_before = 1;
 
 	ret = i915_gem_object_get_pages(obj);
@@ -996,7 +1006,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	if (obj->cache_level == I915_CACHE_NONE &&
+	if (!is_cpu_write_cached(obj->cache_level) &&
 	    obj->tiling_mode == I915_TILING_NONE &&
 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
@@ -1432,7 +1442,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
 	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+	if (is_cpu_read_cached(obj->cache_level) && !HAS_LLC(dev)) {
 		ret = -EINVAL;
 		goto unlock;
 	}
@@ -3286,11 +3296,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (obj->cache_level != I915_CACHE_NONE)
+	if (is_cpu_write_cached(obj->cache_level))
 		return;
 
 	trace_i915_gem_object_clflush(obj);
-
 	drm_clflush_sg(obj->pages);
 }
 
@@ -3447,7 +3456,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		i915_gem_obj_ggtt_set_color(obj, cache_level);
 	}
 
-	if (cache_level == I915_CACHE_NONE) {
+	if (is_cpu_write_cached(obj->cache_level) != is_cpu_write_cached(cache_level)) {
 		u32 old_read_domains, old_write_domain;
 
 		/* If we're coming from LLC cached, then we haven't
@@ -3565,7 +3574,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * of uncaching, which would allow us to flush all the LLC-cached data
 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 	 */
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
 	if (ret)
 		return ret;
 
@@ -3638,7 +3648,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj);
+		if (obj->cache_level == I915_CACHE_NONE &&
+		    !HAS_LLC(obj->base.dev))
+			i915_gem_clflush_object(obj);
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0522d00..072a348 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -54,6 +54,7 @@
 					 (((bits) & 0x8) << (11 - 3)))
 #define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 
 static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
 				      enum i915_cache_level level)
@@ -116,8 +117,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
-	if (level != I915_CACHE_NONE)
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE0;
+		break;
+	default:
 		pte |= HSW_WB_ELLC_LLC_AGE0;
+		break;
+	}
 
 	return pte;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e47cf00..e831292 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -338,6 +338,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PINNED_BATCHES	 24
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
+#define I915_PARAM_HAS_WT     	 	 27
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
1.8.4.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function
  2013-08-01 17:39 [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Chris Wilson
@ 2013-08-01 17:39 ` Chris Wilson
  2013-08-02 16:22   ` Paulo Zanoni
  2013-08-02 14:51 ` [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Ville Syrjälä
  2013-08-02 18:45 ` Andy Lutomirski
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2013-08-01 17:39 UTC (permalink / raw)
  To: intel-gfx

Some of our macros we trying to convert from an drm_device to a
drm_i915_private and then use the pointer inline. This is not only
cumbersome but prone to error. Replacing it with a typesafe function
should help catch those errors in future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d27a82a..6d1736f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1224,6 +1224,11 @@ typedef struct drm_i915_private {
 	struct i915_ums_state ums;
 } drm_i915_private_t;
 
+inline static struct drm_i915_private *to_i915(const struct drm_device *dev)
+{
+	return dev->dev_private;
+}
+
 /* Iterate over initialised rings */
 #define for_each_ring(ring__, dev_priv__, i__) \
 	for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
@@ -1498,7 +1503,7 @@ struct drm_i915_file_private {
 	struct i915_ctx_hang_stats hang_stats;
 };
 
-#define INTEL_INFO(dev)	(((struct drm_i915_private *) (dev)->dev_private)->info)
+#define INTEL_INFO(dev)	(to_i915(dev)->info)
 
 #define IS_I830(dev)		((dev)->pci_device == 0x3577)
 #define IS_845G(dev)		((dev)->pci_device == 0x2562)
@@ -1548,7 +1553,7 @@ struct drm_i915_file_private {
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
-#define HAS_WT(dev)            (IS_HASWELL(dev) && ((struct drm_i915_private *)(dev)->dev_private)->ellc_size)
+#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 5)
@@ -1593,7 +1598,7 @@ struct drm_i915_file_private {
 #define INTEL_PCH_LPT_DEVICE_ID_TYPE		0x8c00
 #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
 
-#define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
+#define INTEL_PCH_TYPE(dev) (to_i915(dev)->pch_type)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
-- 
1.8.4.rc0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-01 17:39 [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Chris Wilson
  2013-08-01 17:39 ` [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function Chris Wilson
@ 2013-08-02 14:51 ` Ville Syrjälä
  2013-08-02 20:03   ` Chris Wilson
  2013-08-02 18:45 ` Andy Lutomirski
  2 siblings, 1 reply; 11+ messages in thread
From: Ville Syrjälä @ 2013-08-02 14:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Aug 01, 2013 at 06:39:54PM +0100, Chris Wilson wrote:
> Haswell GT3e has the unique feature of supporting Write-Through cacheing
> of objects within the eLLC/LLC. The purpose of this is to enable the display
> plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
> that we, in theory, get the best of both worlds, perfect display and fast
> access.
> 
> However, we still need to be careful as the CPU does not see the WT when
> accessing the cache. In particular, this means that we need to flush the
> cache lines after writing to an object through the CPU, and on
> transitioning from a cached state to WT.
> 
> v2: Actually do the clflush on transition to WT, nagging by Ville.
> v3: Flush the CPU cache after writes into WT objects.

v4?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_dma.c     |  3 +++
>  drivers/gpu/drm/i915/i915_drv.h     |  4 +++-
>  drivers/gpu/drm/i915/i915_gem.c     | 34 +++++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++-
>  include/uapi/drm/i915_drm.h         |  1 +
>  5 files changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 8da0b3d..75989fc 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_HAS_LLC:
>  		value = HAS_LLC(dev);
>  		break;
> +	case I915_PARAM_HAS_WT:
> +		value = HAS_WT(dev);
> +		break;
>  	case I915_PARAM_HAS_ALIASING_PPGTT:
>  		value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
>  		break;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 34d2b9d..d27a82a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -452,6 +452,7 @@ enum i915_cache_level {
>  	I915_CACHE_NONE = 0,
>  	I915_CACHE_LLC,
>  	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
> +	I915_CACHE_WT,
>  };
>  
>  typedef uint32_t gen6_gtt_pte_t;
> @@ -1344,7 +1345,7 @@ struct drm_i915_gem_object {
>  	unsigned int pending_fenced_gpu_access:1;
>  	unsigned int fenced_gpu_access:1;
>  
> -	unsigned int cache_level:2;
> +	unsigned int cache_level:3;
>  
>  	unsigned int has_aliasing_ppgtt_mapping:1;
>  	unsigned int has_global_gtt_mapping:1;
> @@ -1547,6 +1548,7 @@ struct drm_i915_file_private {
>  #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
>  #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
>  #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
> +#define HAS_WT(dev)            (IS_HASWELL(dev) && ((struct drm_i915_private *)(dev)->dev_private)->ellc_size)
>  #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
>  
>  #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 5)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99362f7..12ff60a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -60,6 +60,16 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
>  static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
>  static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
>  
> +static bool is_cpu_write_cached(enum i915_cache_level cl)
> +{
> +	return cl == I915_CACHE_LLC || cl == I915_CACHE_LLC_MLC;
> +}
> +
> +static bool is_cpu_read_cached(enum i915_cache_level cl)
> +{
> +	return cl != I915_CACHE_NONE;
> +}
> +
>  static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
>  {
>  	if (obj->tiling_mode)
> @@ -510,7 +520,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
>  		 * read domain and manually flush cachelines (if required). This
>  		 * optimizes for the case when the gpu will dirty the data
>  		 * anyway again before the next pread happens. */
> -		if (obj->cache_level == I915_CACHE_NONE)
> +		if (!is_cpu_read_cached(obj->cache_level))

Based on what we discussed on irc about GPU snooping even on UC
accesses, couldn't this even be:

!HAS_LLC(dev) && obj->cache_level == I915_CACHE_NONE

BTW I was toying around a bit w/ gem_cacheing. I modified it to map the
scratch bo just once and use DRM_I915_GEM_WAIT to wait for the GPU so
that I could avoid the set_domain stuff. Then I ran it w/ both cached
and uncached bo, and it passes on IVB in both cases. On ILK it fails
in the uncached bo case, but passes in the cached bo case. So that
result appears to support the notion that UC accesses snoop on LLC
platforms. But of course I didn't try other use cases than the BLT
vs. CPU that gem_cacheing tests.

>  			needs_clflush = 1;
>  		if (i915_gem_obj_ggtt_bound(obj)) {
>  			ret = i915_gem_object_set_to_gtt_domain(obj, false);
> @@ -827,7 +837,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		 * write domain and manually flush cachelines (if required). This
>  		 * optimizes for the case when the gpu will use the data
>  		 * right away and we therefore have to clflush anyway. */
> -		if (obj->cache_level == I915_CACHE_NONE)
> +		if (!is_cpu_write_cached(obj->cache_level))
>  			needs_clflush_after = 1;

This makese sense. Although if we were to separate scanout UC from other
UC uses, even this clflush could be avoided for non-scanout cases
on LLC platforms.

>  		if (i915_gem_obj_ggtt_bound(obj)) {
>  			ret = i915_gem_object_set_to_gtt_domain(obj, true);
> @@ -837,8 +847,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	}
>  	/* Same trick applies for invalidate partially written cachelines before
>  	 * writing.  */
> -	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
> -	    && obj->cache_level == I915_CACHE_NONE)
> +	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) &&
> +	    !is_cpu_read_cached(obj->cache_level))

Again, I think this could be:
!HAS_LLC(dev) && obj->cache_level == I915_CACHE_NONE

>  		needs_clflush_before = 1;
>  
>  	ret = i915_gem_object_get_pages(obj);
> @@ -996,7 +1006,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		goto out;
>  	}
>  
> -	if (obj->cache_level == I915_CACHE_NONE &&
> +	if (!is_cpu_write_cached(obj->cache_level) &&
>  	    obj->tiling_mode == I915_TILING_NONE &&
>  	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {

Might we want to skip the GTT path on LLC platforms completely?

>  		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
> @@ -1432,7 +1442,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	trace_i915_gem_object_fault(obj, page_offset, true, write);
>  
>  	/* Access to snoopable pages through the GTT is incoherent. */
> -	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
> +	if (is_cpu_read_cached(obj->cache_level) && !HAS_LLC(dev)) {
>  		ret = -EINVAL;
>  		goto unlock;
>  	}
> @@ -3286,11 +3296,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
>  	 * snooping behaviour occurs naturally as the result of our domain
>  	 * tracking.
>  	 */
> -	if (obj->cache_level != I915_CACHE_NONE)
> +	if (is_cpu_write_cached(obj->cache_level))
>  		return;
>  
>  	trace_i915_gem_object_clflush(obj);
> -
>  	drm_clflush_sg(obj->pages);
>  }
>  
> @@ -3447,7 +3456,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		i915_gem_obj_ggtt_set_color(obj, cache_level);
>  	}
>  
> -	if (cache_level == I915_CACHE_NONE) {
> +	if (is_cpu_write_cached(obj->cache_level) != is_cpu_write_cached(cache_level)) {
>  		u32 old_read_domains, old_write_domain;
>  
>  		/* If we're coming from LLC cached, then we haven't
> @@ -3565,7 +3574,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * of uncaching, which would allow us to flush all the LLC-cached data
>  	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
>  	 */
> -	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> +	ret = i915_gem_object_set_cache_level(obj,
> +					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
>  	if (ret)
>  		return ret;
>  
> @@ -3638,7 +3648,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
>  
>  	/* Flush the CPU cache if it's still invalid. */
>  	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
> -		i915_gem_clflush_object(obj);
> +		if (obj->cache_level == I915_CACHE_NONE &&
> +		    !HAS_LLC(obj->base.dev))
> +			i915_gem_clflush_object(obj);
>  
>  		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0522d00..072a348 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -54,6 +54,7 @@
>  					 (((bits) & 0x8) << (11 - 3)))
>  #define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
>  #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
> +#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
>  
>  static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
>  				      enum i915_cache_level level)
> @@ -116,8 +117,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>  	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
>  
> -	if (level != I915_CACHE_NONE)
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= HSW_WT_ELLC_LLC_AGE0;
> +		break;
> +	default:
>  		pte |= HSW_WB_ELLC_LLC_AGE0;
> +		break;
> +	}
>  
>  	return pte;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index e47cf00..e831292 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -338,6 +338,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_PINNED_BATCHES	 24
>  #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
> +#define I915_PARAM_HAS_WT     	 	 27
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 1.8.4.rc0

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function
  2013-08-01 17:39 ` [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function Chris Wilson
@ 2013-08-02 16:22   ` Paulo Zanoni
  2013-08-05  6:55     ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Paulo Zanoni @ 2013-08-02 16:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

2013/8/1 Chris Wilson <chris@chris-wilson.co.uk>:
> Some of our macros we trying to convert from an drm_device to a
> drm_i915_private and then use the pointer inline. This is not only
> cumbersome but prone to error. Replacing it with a typesafe function
> should help catch those errors in future.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d27a82a..6d1736f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1224,6 +1224,11 @@ typedef struct drm_i915_private {
>         struct i915_ums_state ums;
>  } drm_i915_private_t;
>
> +inline static struct drm_i915_private *to_i915(const struct drm_device *dev)
> +{
> +       return dev->dev_private;
> +}
> +
>  /* Iterate over initialised rings */
>  #define for_each_ring(ring__, dev_priv__, i__) \
>         for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
> @@ -1498,7 +1503,7 @@ struct drm_i915_file_private {
>         struct i915_ctx_hang_stats hang_stats;
>  };
>
> -#define INTEL_INFO(dev)        (((struct drm_i915_private *) (dev)->dev_private)->info)
> +#define INTEL_INFO(dev)        (to_i915(dev)->info)
>
>  #define IS_I830(dev)           ((dev)->pci_device == 0x3577)
>  #define IS_845G(dev)           ((dev)->pci_device == 0x2562)
> @@ -1548,7 +1553,7 @@ struct drm_i915_file_private {
>  #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
>  #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
>  #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
> -#define HAS_WT(dev)            (IS_HASWELL(dev) && ((struct drm_i915_private *)(dev)->dev_private)->ellc_size)
> +#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
>  #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
>
>  #define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)->gen >= 5)
> @@ -1593,7 +1598,7 @@ struct drm_i915_file_private {
>  #define INTEL_PCH_LPT_DEVICE_ID_TYPE           0x8c00
>  #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE                0x9c00
>
> -#define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
> +#define INTEL_PCH_TYPE(dev) (to_i915(dev)->pch_type)
>  #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
>  #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
>  #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
> --
> 1.8.4.rc0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx



-- 
Paulo Zanoni

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-01 17:39 [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Chris Wilson
  2013-08-01 17:39 ` [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function Chris Wilson
  2013-08-02 14:51 ` [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Ville Syrjälä
@ 2013-08-02 18:45 ` Andy Lutomirski
  2013-08-02 19:21   ` Ben Widawsky
  2 siblings, 1 reply; 11+ messages in thread
From: Andy Lutomirski @ 2013-08-02 18:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On 08/01/2013 10:39 AM, Chris Wilson wrote:
> Haswell GT3e has the unique feature of supporting Write-Through cacheing
> of objects within the eLLC/LLC. The purpose of this is to enable the display
> plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
> that we, in theory, get the best of both worlds, perfect display and fast
> access.
> 
> However, we still need to be careful as the CPU does not see the WT when
> accessing the cache. In particular, this means that we need to flush the
> cache lines after writing to an object through the CPU, and on
> transitioning from a cached state to WT.
> 

I'm planning on adding ioremap_wt, etc sometime soon (for an unrelated
reason).  Would this be useful here?

If so, do you need it for real RAM (i.e. pages that the kernel considers
to be direct-mappable RAM) or just for MMIO space?

--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-02 18:45 ` Andy Lutomirski
@ 2013-08-02 19:21   ` Ben Widawsky
  2013-08-02 19:47     ` Andy Lutomirski
  0 siblings, 1 reply; 11+ messages in thread
From: Ben Widawsky @ 2013-08-02 19:21 UTC (permalink / raw)
  To: Andy Lutomirski; +Cc: intel-gfx

On Fri, Aug 02, 2013 at 11:45:22AM -0700, Andy Lutomirski wrote:
> On 08/01/2013 10:39 AM, Chris Wilson wrote:
> > Haswell GT3e has the unique feature of supporting Write-Through cacheing
> > of objects within the eLLC/LLC. The purpose of this is to enable the display
> > plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
> > that we, in theory, get the best of both worlds, perfect display and fast
> > access.
> > 
> > However, we still need to be careful as the CPU does not see the WT when
> > accessing the cache. In particular, this means that we need to flush the
> > cache lines after writing to an object through the CPU, and on
> > transitioning from a cached state to WT.
> > 
> 
> I'm planning on adding ioremap_wt, etc sometime soon (for an unrelated
> reason).  Would this be useful here?

I don't think so. We should never be ioremapping the buffers with these
mappings.

> 
> If so, do you need it for real RAM (i.e. pages that the kernel considers
> to be direct-mappable RAM) or just for MMIO space?
> 
> --Andy

It is for real RAM, but again, not something we should ever be
ioremapping.


-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-02 19:21   ` Ben Widawsky
@ 2013-08-02 19:47     ` Andy Lutomirski
  2013-08-02 20:08       ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Andy Lutomirski @ 2013-08-02 19:47 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Fri, Aug 2, 2013 at 12:21 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, Aug 02, 2013 at 11:45:22AM -0700, Andy Lutomirski wrote:
>> On 08/01/2013 10:39 AM, Chris Wilson wrote:
>> > Haswell GT3e has the unique feature of supporting Write-Through cacheing
>> > of objects within the eLLC/LLC. The purpose of this is to enable the display
>> > plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
>> > that we, in theory, get the best of both worlds, perfect display and fast
>> > access.
>> >
>> > However, we still need to be careful as the CPU does not see the WT when
>> > accessing the cache. In particular, this means that we need to flush the
>> > cache lines after writing to an object through the CPU, and on
>> > transitioning from a cached state to WT.
>> >
>>
>> I'm planning on adding ioremap_wt, etc sometime soon (for an unrelated
>> reason).  Would this be useful here?
>
> I don't think so. We should never be ioremapping the buffers with these
> mappings.
>
>>
>> If so, do you need it for real RAM (i.e. pages that the kernel considers
>> to be direct-mappable RAM) or just for MMIO space?
>>
>> --Andy
>
> It is for real RAM, but again, not something we should ever be
> ioremapping.

I asked the question poorly.  The change I'm planning to make would be
to add WT mappings on a roughly equal footing with WC.  You could use
mmap protection bits (assuming there's a free bit in there),
set_memory_wt, or whatever the appropriate API is here.

(I know approximately nothing about what's going on in i915 -- I just
noticed someone else mentioning WT mappings.  For my use, I can get
away with skipping this on real ram, although it would be nice.  The
tricky part about using real ram is finding some space in struct
page.)

--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-02 14:51 ` [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Ville Syrjälä
@ 2013-08-02 20:03   ` Chris Wilson
  2013-08-04 19:47     ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2013-08-02 20:03 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Fri, Aug 02, 2013 at 05:51:49PM +0300, Ville Syrjälä wrote:
> On Thu, Aug 01, 2013 at 06:39:54PM +0100, Chris Wilson wrote:
> > Haswell GT3e has the unique feature of supporting Write-Through cacheing
> > of objects within the eLLC/LLC. The purpose of this is to enable the display
> > plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
> > that we, in theory, get the best of both worlds, perfect display and fast
> > access.
> > 
> > However, we still need to be careful as the CPU does not see the WT when
> > accessing the cache. In particular, this means that we need to flush the
> > cache lines after writing to an object through the CPU, and on
> > transitioning from a cached state to WT.
> > 
> > v2: Actually do the clflush on transition to WT, nagging by Ville.
> > v3: Flush the CPU cache after writes into WT objects.
> 
> v4?
v3.2: ditto

> >  {
> >  	if (obj->tiling_mode)
> > @@ -510,7 +520,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
> >  		 * read domain and manually flush cachelines (if required). This
> >  		 * optimizes for the case when the gpu will dirty the data
> >  		 * anyway again before the next pread happens. */
> > -		if (obj->cache_level == I915_CACHE_NONE)
> > +		if (!is_cpu_read_cached(obj->cache_level))
> 
> Based on what we discussed on irc about GPU snooping even on UC
> accesses, couldn't this even be:
> 
> !HAS_LLC(dev) && obj->cache_level == I915_CACHE_NONE

Yes, I came to that conclusion later on as well. I like having the
is_cached() functions to try and clarify the reason for the tests, but I
don't think I have the names right yet.
 
> BTW I was toying around a bit w/ gem_cacheing. I modified it to map the
> scratch bo just once and use DRM_I915_GEM_WAIT to wait for the GPU so
> that I could avoid the set_domain stuff. Then I ran it w/ both cached
> and uncached bo, and it passes on IVB in both cases. On ILK it fails
> in the uncached bo case, but passes in the cached bo case. So that
> result appears to support the notion that UC accesses snoop on LLC
> platforms. But of course I didn't try other use cases than the BLT
> vs. CPU that gem_cacheing tests.

[snipping similar comments]

> > -	if (obj->cache_level == I915_CACHE_NONE &&
> > +	if (!is_cpu_write_cached(obj->cache_level) &&
> >  	    obj->tiling_mode == I915_TILING_NONE &&
> >  	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
> 
> Might we want to skip the GTT path on LLC platforms completely?

If we do need to clflush (e.g. uncached buffer for UC/WT scanouts), then
our experiments (last conducted on SNB iirc) suggest that we prefer to
use the GTT wc writes. But for real world cases of pwrite, we use shmem
on llc, and normally gtt with !llc.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-02 19:47     ` Andy Lutomirski
@ 2013-08-02 20:08       ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2013-08-02 20:08 UTC (permalink / raw)
  To: Andy Lutomirski; +Cc: Ben Widawsky, intel-gfx

On Fri, Aug 02, 2013 at 12:47:05PM -0700, Andy Lutomirski wrote:
> On Fri, Aug 2, 2013 at 12:21 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, Aug 02, 2013 at 11:45:22AM -0700, Andy Lutomirski wrote:
> >> On 08/01/2013 10:39 AM, Chris Wilson wrote:
> >> > Haswell GT3e has the unique feature of supporting Write-Through cacheing
> >> > of objects within the eLLC/LLC. The purpose of this is to enable the display
> >> > plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
> >> > that we, in theory, get the best of both worlds, perfect display and fast
> >> > access.
> >> >
> >> > However, we still need to be careful as the CPU does not see the WT when
> >> > accessing the cache. In particular, this means that we need to flush the
> >> > cache lines after writing to an object through the CPU, and on
> >> > transitioning from a cached state to WT.
> >> >
> >>
> >> I'm planning on adding ioremap_wt, etc sometime soon (for an unrelated
> >> reason).  Would this be useful here?
> >
> > I don't think so. We should never be ioremapping the buffers with these
> > mappings.
> >
> >>
> >> If so, do you need it for real RAM (i.e. pages that the kernel considers
> >> to be direct-mappable RAM) or just for MMIO space?
> >>
> >> --Andy
> >
> > It is for real RAM, but again, not something we should ever be
> > ioremapping.
> 
> I asked the question poorly.  The change I'm planning to make would be
> to add WT mappings on a roughly equal footing with WC.  You could use
> mmap protection bits (assuming there's a free bit in there),
> set_memory_wt, or whatever the appropriate API is here.

No, the WT bit here is only applied to the GPU. From the CPU side, we
maintain our usual WB view of the physical pages, and a WC alias through
the aperture.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris
  2013-08-02 20:03   ` Chris Wilson
@ 2013-08-04 19:47     ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2013-08-04 19:47 UTC (permalink / raw)
  To: Chris Wilson, Ville Syrjälä, intel-gfx, Kenneth Graunke

On Fri, Aug 2, 2013 at 10:03 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Fri, Aug 02, 2013 at 05:51:49PM +0300, Ville Syrjälä wrote:
>> On Thu, Aug 01, 2013 at 06:39:54PM +0100, Chris Wilson wrote:
>> > Haswell GT3e has the unique feature of supporting Write-Through cacheing
>> > of objects within the eLLC/LLC. The purpose of this is to enable the display
>> > plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
>> > that we, in theory, get the best of both worlds, perfect display and fast
>> > access.
>> >
>> > However, we still need to be careful as the CPU does not see the WT when
>> > accessing the cache. In particular, this means that we need to flush the
>> > cache lines after writing to an object through the CPU, and on
>> > transitioning from a cached state to WT.
>> >
>> > v2: Actually do the clflush on transition to WT, nagging by Ville.
>> > v3: Flush the CPU cache after writes into WT objects.
>>
>> v4?
> v3.2: ditto

v3 (or whatever exactly ended up in Ben's tree) slighty changes the
get_caching_ioctl semantics: WT objects are set as cached, which I
think is a slight abi breakage. I didn't check what mesa exactly does
for scanout buffers (and if that concept is even consistent between
all the different winsys) but I think the easy cop-out is to just
treat WT as uncached in get_caching.

That option also avoids me asking for igt coverage before merging ;-)

>> >  {
>> >     if (obj->tiling_mode)
>> > @@ -510,7 +520,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
>> >              * read domain and manually flush cachelines (if required). This
>> >              * optimizes for the case when the gpu will dirty the data
>> >              * anyway again before the next pread happens. */
>> > -           if (obj->cache_level == I915_CACHE_NONE)
>> > +           if (!is_cpu_read_cached(obj->cache_level))
>>
>> Based on what we discussed on irc about GPU snooping even on UC
>> accesses, couldn't this even be:
>>
>> !HAS_LLC(dev) && obj->cache_level == I915_CACHE_NONE
>
> Yes, I came to that conclusion later on as well. I like having the
> is_cached() functions to try and clarify the reason for the tests, but I
> don't think I have the names right yet.

Another idea would be to split the cache_level from the detailed
cache_options (with stuff like aga, L3$ caching/what we currently
still call MLC, ...). Cpu flushing code would then only check for
cache_level.

I like the approach here though, so maybe s/_cached/_coherent/ for bikeshed.

btw I've asked Ben to resend his s/age0/age3/ patch also for Iris, so
yours would need to be rebased on top.

Cheers, Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function
  2013-08-02 16:22   ` Paulo Zanoni
@ 2013-08-05  6:55     ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2013-08-05  6:55 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx

On Fri, Aug 02, 2013 at 01:22:51PM -0300, Paulo Zanoni wrote:
> 2013/8/1 Chris Wilson <chris@chris-wilson.co.uk>:
> > Some of our macros we trying to convert from an drm_device to a
> > drm_i915_private and then use the pointer inline. This is not only
> > cumbersome but prone to error. Replacing it with a typesafe function
> > should help catch those errors in future.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-08-05  6:55 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-01 17:39 [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Chris Wilson
2013-08-01 17:39 ` [PATCH 2/2] drm/i915: Tidy the macro casting by using an inline function Chris Wilson
2013-08-02 16:22   ` Paulo Zanoni
2013-08-05  6:55     ` Daniel Vetter
2013-08-02 14:51 ` [PATCH 1/2] drm/i915: Use Write-Through cacheing for the display plane on Iris Ville Syrjälä
2013-08-02 20:03   ` Chris Wilson
2013-08-04 19:47     ` Daniel Vetter
2013-08-02 18:45 ` Andy Lutomirski
2013-08-02 19:21   ` Ben Widawsky
2013-08-02 19:47     ` Andy Lutomirski
2013-08-02 20:08       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.