All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Nonblocking maps
@ 2011-09-22 23:27 Ben Widawsky
  2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kilarski, Bernard R, Daniel Vetter

After going back and forth many times, I think Daniel and I have agreed
on the solution for non-blocking maps.

This new interface adds a new call to map buffers non-blocking if
possible. In actuality it may block, but it will track if the buffer
needs flushing or not and does the right thing for the use. This relies
on a new IOCTL to determine the cache type of the object.

With the posted benchmark, there are significant improvements on Gen5
with a very synthetic test meant to test unnecessary blocking on
architectures. The test is posted in the series for reference, but isn't
actually useful in it's current form other than to prove there is a
potential performance improvement. The test shows no performance
regression on Gen6.

Ben

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/i915: IOCTL to query the cache level of a BO.
  2011-09-22 23:27 [PATCH 0/4] Nonblocking maps Ben Widawsky
@ 2011-09-22 23:27 ` Ben Widawsky
  2011-09-23  8:04   ` Daniel Vetter
  2011-09-23  9:00   ` Chris Wilson
  2011-09-22 23:27 ` [PATCH] intel: non-blocking mmaps on the cheap Ben Widawsky
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R

Querying a BO's cache level can help clients wishing to map the buffer.
For example, an Ironlake machine could query the buffer to determine it
should use GTT mappings, while a Sandybridge machine running the same
code would prefer to use regular CPU mapped buffers since the cache is
coherent.

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Eric Anholt <eric@anholt.net>
Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_dma.c |    1 +
 drivers/gpu/drm/i915/i915_drv.h |    2 ++
 drivers/gpu/drm/i915/i915_gem.c |   22 ++++++++++++++++++++++
 include/drm/i915_drm.h          |    7 +++++++
 4 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8a3942c..8178cbb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2292,6 +2292,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHE_TYPE, i915_gem_get_cache_type_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7916bd9..2dfcb3d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1118,6 +1118,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+int i915_gem_get_cache_type_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
 int i915_gem_init_object(struct drm_gem_object *obj);
 int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a546a71..a120a74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3777,6 +3777,28 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 }
 
 int
+i915_gem_get_cache_type_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+
+	struct drm_i915_gem_get_cache_type *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret = 0;
+
+	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
+	if (&obj->base == NULL) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	args->cache_level = obj->cache_level;
+	drm_gem_object_unreference_unlocked(&obj->base);
+
+out:
+	return ret;
+}
+
+int
 i915_gem_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 28c0d11..a0dff8a 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_GET_CACHE_TYPE	0x2a
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -239,6 +240,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_GEM_GET_CACHE_TYPE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_TYPE, struct drm_i915_gem_get_cache_type)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -844,4 +846,9 @@ struct drm_intel_overlay_attrs {
 	__u32 gamma5;
 };
 
+struct drm_i915_gem_get_cache_type {
+	__u32 handle;
+	__u32 cache_level;
+};
+
 #endif				/* _I915_DRM_H_ */
-- 
1.7.6.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] intel: non-blocking mmaps on the cheap
  2011-09-22 23:27 [PATCH 0/4] Nonblocking maps Ben Widawsky
  2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
@ 2011-09-22 23:27 ` Ben Widawsky
  2011-09-22 23:35   ` Ben Widawsky
  2011-10-06 20:55   ` Eric Anholt
  2011-09-22 23:27 ` [PATCH] i965: use nonblocking maps MapRangeBuffer Ben Widawsky
  2011-09-22 23:27 ` [PATCH] gpu-tools: nonblocking map test Ben Widawsky
  3 siblings, 2 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R

From: Daniel Vetter <daniel.vetter@ffwll.ch>

This adds a new mode to map gem objects in a non-blocking way.

The new kernel interface required to get the caching level/coherency
is not yet wired up. All the code to transparently choose between gtt
mappings or (if coherent) cpu mappings is already in place, though.

To make this useable for mesa, we need to allow other buffer acces in
between non-blocking modes. It's not a big problem if this accidentaly
blocks once when switching back to non-blocking maps, but domains need
to be correctly tracked.

I've gone through historical kernel versions and luckily the only
place we move a bo from the gtt domain to the cpu domain is in the
set_ioctl. pwrite/pread don't move objects into the cpu domain as long
as they're untiled or llc cached.

So track whether an object is in the gtt domain and invalidate this
only on access to cpu mappings.

[patch reworked by Ben]

Cc: Eric Anholt <eric@anholt.net>
Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 include/drm/i915_drm.h   |    7 ++
 intel/intel_bufmgr.h     |    3 +
 intel/intel_bufmgr_gem.c |  180 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 173 insertions(+), 17 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index adc2392..4b62222 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_GET_CACHE_TYPE	0x2a
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_GEM_GET_CACHE_TYPE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_TYPE, struct drm_i915_gem_get_cache_type)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -835,4 +837,9 @@ struct drm_intel_overlay_attrs {
 	__u32 gamma5;
 };
 
+struct drm_i915_gem_get_cache_type {
+	__u32 handle;
+	__u32 cache_level;
+};
+
 #endif				/* _I915_DRM_H_ */
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 889ef46..116a9d7 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -149,6 +149,9 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
 
+int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped);
+int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo);
+
 int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
 
 int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 4f4de92..b868a1c 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -141,6 +141,8 @@ struct _drm_intel_bo_gem {
 	uint32_t swizzle_mode;
 	unsigned long stride;
 
+	unsigned cache_coherent : 1;
+
 	time_t free_time;
 
 	/** Array passed to the DRM containing relocation information. */
@@ -998,15 +1000,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
 	}
 }
 
-static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
+static int do_mmap_cpu(drm_intel_bufmgr_gem *bufmgr_gem,
+		       drm_intel_bo_gem *bo_gem)
 {
-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	/* Allow recursive mapping. Mesa may recursively map buffers with
 	 * nested display loops.
 	 */
@@ -1018,7 +1016,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 		memset(&mmap_arg, 0, sizeof(mmap_arg));
 		mmap_arg.handle = bo_gem->gem_handle;
 		mmap_arg.offset = 0;
-		mmap_arg.size = bo->size;
+		mmap_arg.size = bo_gem->bo.size;
 		ret = drmIoctl(bufmgr_gem->fd,
 			       DRM_IOCTL_I915_GEM_MMAP,
 			       &mmap_arg);
@@ -1027,11 +1025,51 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
 			    __FILE__, __LINE__, bo_gem->gem_handle,
 			    bo_gem->name, strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
 	}
+	return 0;
+}
+
+enum i915_cache_level {
+	I915_CACHE_NONE,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+ */
+};
+
+static int get_cache_type(drm_intel_bo *bo)
+{
+	struct drm_i915_gem_get_cache_type cache = {0, 0};
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	int ret = 0;
+
+	cache.handle = bo_gem->gem_handle;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHE_TYPE,
+		       &cache);
+
+	/* This should maintain old behavior */
+	if (ret)
+		return 0;
+
+	return cache.cache_level;
+}
+
+static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = do_mmap_cpu(bufmgr_gem, bo_gem);
+	if (ret != 0) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
 	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->mem_virtual);
 	bo->virtual = bo_gem->mem_virtual;
@@ -1051,20 +1089,19 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 		    strerror(errno));
 	}
 
+	if (get_cache_type(bo) != I915_CACHE_LLC)
+		bo_gem->cache_coherent = 0;
+
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
 }
 
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int do_mmap_gtt(drm_intel_bufmgr_gem *bufmgr_gem,
+		       drm_intel_bo_gem *bo_gem)
 {
-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	/* Get a mapping of the buffer if we haven't before. */
 	if (bo_gem->gtt_virtual == NULL) {
 		struct drm_i915_gem_mmap_gtt mmap_arg;
@@ -1085,12 +1122,11 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    __FILE__, __LINE__,
 			    bo_gem->gem_handle, bo_gem->name,
 			    strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 
 		/* and mmap it */
-		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
+		bo_gem->gtt_virtual = mmap(0, bo_gem->bo.size, PROT_READ | PROT_WRITE,
 					   MAP_SHARED, bufmgr_gem->fd,
 					   mmap_arg.offset);
 		if (bo_gem->gtt_virtual == MAP_FAILED) {
@@ -1100,11 +1136,28 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    __FILE__, __LINE__,
 			    bo_gem->gem_handle, bo_gem->name,
 			    strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 	}
 
+	return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = do_mmap_gtt(bufmgr_gem, bo_gem);
+	if (ret != 0) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
 	bo->virtual = bo_gem->gtt_virtual;
 
 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
@@ -1123,6 +1176,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 		    strerror(errno));
 	}
 
+	bo_gem->cache_coherent = 1;
+
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
@@ -1282,6 +1337,97 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
 		    set_domain.read_domains, set_domain.write_domain,
 		    strerror(errno));
 	}
+
+	bo_gem->cache_coherent = 1;
+}
+
+/**
+ * Map an object without blocking on the GPU if possible.
+ *
+ * This automatically chooses either the GTT mapping or if coherent and faster,
+ * the CPU mapping.
+ *
+ * Not allowed on tiled buffers (to prevent headaches with swizzling and
+ * tracking the gem domain) or to share these buffers with flink, though that's
+ * not currently tracked.
+ */
+int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int gpu_coherent_cpu_map = 0;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	assert(bo_gem->tiling_mode == I915_TILING_NONE);
+
+	if (bufmgr_gem->gen >= 6)
+		gpu_coherent_cpu_map = 1;
+
+	if (gpu_coherent_cpu_map && get_cache_type(bo) == I915_CACHE_LLC) {
+		ret = do_mmap_cpu(bufmgr_gem, bo_gem);
+		if (ret != 0) {
+			pthread_mutex_unlock(&bufmgr_gem->lock);
+			return ret;
+		}
+
+		bo->virtual = bo_gem->mem_virtual;
+		*gtt_mapped = 1;
+	} else {
+		ret = do_mmap_gtt(bufmgr_gem, bo_gem);
+		if (ret != 0) {
+			pthread_mutex_unlock(&bufmgr_gem->lock);
+			return ret;
+		}
+
+		bo->virtual = bo_gem->gtt_virtual;
+		*gtt_mapped = 0;
+	}
+
+	DBG("bo_map_nonblocking: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
+	    bo_gem->gtt_virtual);
+
+	/* Move it to the GTT domain in case it isn't there yet 
+	 * In the coherent buffers case, below variable is modified at map time.
+         */
+	if (!bo_gem->cache_coherent) {
+		set_domain.handle = bo_gem->gem_handle;
+		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+		ret = drmIoctl(bufmgr_gem->fd,
+			       DRM_IOCTL_I915_GEM_SET_DOMAIN,
+			       &set_domain);
+		if (ret != 0) {
+			DBG("%s:%d: Error setting domain %d: %s\n",
+			    __FILE__, __LINE__, bo_gem->gem_handle,
+			    strerror(errno));
+		}
+
+		bo_gem->cache_coherent = 1;
+	}
+
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return 0;
+}
+
+/**
+ * unmap an object in the non-blocking mode
+ */
+int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	int ret = 0;
+
+	if (bo == NULL)
+		return 0;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	bo->virtual = NULL;
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
 }
 
 static void
-- 
1.7.6.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] i965: use nonblocking maps MapRangeBuffer
  2011-09-22 23:27 [PATCH 0/4] Nonblocking maps Ben Widawsky
  2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
  2011-09-22 23:27 ` [PATCH] intel: non-blocking mmaps on the cheap Ben Widawsky
@ 2011-09-22 23:27 ` Ben Widawsky
  2011-09-23 17:15   ` Eric Anholt
  2011-09-22 23:27 ` [PATCH] gpu-tools: nonblocking map test Ben Widawsky
  3 siblings, 1 reply; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Mesa Devs, Daniel Vetter

This makes the code a lot cleaner, and theoretically faster (not many
real world tests use this GL extension).

Cc: Eric Anholt <eric@anholt.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Mesa Devs <mesa-dev@lists.freedesktop.org>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c |   48 ++------------------
 1 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index d35a50e..91dddce 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -296,8 +296,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
    }
 }
 
-
-
 /**
  * Called via glMapBufferRange and glMapBuffer
  *
@@ -363,50 +361,14 @@ intel_bufferobj_map_range(struct gl_context * ctx,
       return NULL;
    }
 
-   /* If the user doesn't care about existing buffer contents and mapping
-    * would cause us to block, then throw out the old buffer.
-    */
-   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
-       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
-       drm_intel_bo_busy(intel_obj->buffer)) {
-      drm_intel_bo_unreference(intel_obj->buffer);
-      intel_bufferobj_alloc_buffer(intel, intel_obj);
-   }
-
-   /* If the user is mapping a range of an active buffer object but
-    * doesn't require the current contents of that range, make a new
-    * BO, and we'll copy what they put in there out at unmap or
-    * FlushRange time.
-    */
    if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
-       drm_intel_bo_busy(intel_obj->buffer)) {
-      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+       drm_intel_bo_busy(intel_obj->buffer) &&
+       (access & GL_MAP_FLUSH_EXPLICIT_BIT)) {
 	 intel_obj->range_map_buffer = malloc(length);
 	 obj->Pointer = intel_obj->range_map_buffer;
-      } else {
-	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
-						      "range map",
-						      length, 64);
-	 if (!(access & GL_MAP_READ_BIT)) {
-	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
-	    intel_obj->mapped_gtt = GL_TRUE;
-	 } else {
-	    drm_intel_bo_map(intel_obj->range_map_bo,
-			     (access & GL_MAP_WRITE_BIT) != 0);
-	    intel_obj->mapped_gtt = GL_FALSE;
-	 }
-	 obj->Pointer = intel_obj->range_map_bo->virtual;
-      }
-      return obj->Pointer;
-   }
-
-   if (!(access & GL_MAP_READ_BIT)) {
-      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
-      intel_obj->mapped_gtt = GL_TRUE;
-   } else {
-      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
-      intel_obj->mapped_gtt = GL_FALSE;
-   }
+	 return obj->Pointer;
+   } else
+    drm_intel_gem_bo_map_nonblocking(intel_obj->buffer, &intel_obj->mapped_gtt);
 
    obj->Pointer = intel_obj->buffer->virtual + offset;
    return obj->Pointer;
-- 
1.7.6.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] gpu-tools: nonblocking map test
  2011-09-22 23:27 [PATCH 0/4] Nonblocking maps Ben Widawsky
                   ` (2 preceding siblings ...)
  2011-09-22 23:27 ` [PATCH] i965: use nonblocking maps MapRangeBuffer Ben Widawsky
@ 2011-09-22 23:27 ` Ben Widawsky
  3 siblings, 0 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 tests/Makefile.am  |    1 +
 tests/gem_wo_map.c |  237 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test.sh      |   16 ++++
 3 files changed, 254 insertions(+), 0 deletions(-)
 create mode 100644 tests/gem_wo_map.c
 create mode 100755 tests/test.sh

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4d5230c..b7020f1 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -51,6 +51,7 @@ TESTS = getversion \
 	gem_double_irq_loop \
 	gem_ring_sync_loop \
 	gem_pipe_control_store_loop \
+	gem_wo_map \
 	$(NULL)
 
 HANG = \
diff --git a/tests/gem_wo_map.c b/tests/gem_wo_map.c
new file mode 100644
index 0000000..7c2beb4
--- /dev/null
+++ b/tests/gem_wo_map.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+drm_intel_bo *src, *dst;
+int fd;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+int num_writes = 1;
+
+#define NUM_MAPS 128 * 1024
+#define NUM_BLITS 128 * 1024
+#define SURFACE_SIZE (1<<20)
+#define SURFACE_PAGEES (SURFACE_SIZE >> 12)
+#define SURFACE_PAGES (SURFACE_SIZE >> 12)
+
+bool thread_ret;
+
+/* acts like a gpu memcpy */
+static void
+blit(void)
+{
+	uint32_t pitch = 4096;
+	uint32_t lines = SURFACE_SIZE / pitch;
+	uint32_t line_width = pitch;
+
+	BEGIN_BATCH(8);
+	OUT_BATCH((2<<29) | (0x43<<22) | (0x3<<20) | 4);
+	OUT_BATCH((0x3<<24) | (0xcc<<16) | pitch);
+	OUT_BATCH((lines<<16) | line_width);
+	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(pitch);
+	OUT_RELOC(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static inline int
+get_offset(void)
+{
+	return (rand() % SURFACE_PAGES) * 4096;
+}
+
+static void
+touch_page(char * const base)
+{
+	int i = 0;
+	for(i = 0; i < num_writes; i++) {
+		base[get_offset()] = 1;
+	}
+}
+
+static void *
+blits(void *arg)
+{
+	int i = 0;
+	for (i = 0; i < NUM_BLITS; i++) {
+		blit();
+		usleep(100);
+	}
+	pthread_exit(NULL);
+}
+
+static void *
+maps(void *arg)
+{
+	int i = 0;
+	for (i = 0; i < NUM_MAPS; i++) {
+		drm_intel_bo_map(src, 1);
+		touch_page(src->virtual);
+		drm_intel_bo_unmap(src);
+	}
+	pthread_exit(NULL);
+}
+
+static void *
+maps_nb(void *arg)
+{
+	int crap;
+	int i = 0;
+	for (i = 0; i < NUM_MAPS; i++) {
+		drm_intel_gem_bo_map_nonblocking(src, &crap);
+		touch_page(src->virtual);
+		drm_intel_gem_bo_unmap_nonblocking(src);
+	}
+	pthread_exit(NULL);
+}
+
+static void *
+functional_test_nb(void *arg)
+{
+	int crap;
+	drm_intel_gem_bo_map_nonblocking(src, &crap);
+	((char *)src->virtual)[1000] = 1;
+	drm_intel_gem_bo_unmap_nonblocking(src);
+	blit();
+	drm_intel_bo_wait_rendering(batch->bo);
+
+	drm_intel_bo_map(dst, 0);
+	thread_ret = (((char *)dst->virtual)[1000] == 1);
+	drm_intel_bo_unmap(dst);
+	pthread_exit(&thread_ret);
+}
+
+static void *
+functional_test(void *arg)
+{
+	drm_intel_bo_map(src, 1);
+	((char *)src->virtual)[1000] = 1;
+	drm_intel_bo_unmap(src);
+	blit();
+	drm_intel_bo_wait_rendering(batch->bo);
+
+	drm_intel_bo_map(dst, 0);
+	thread_ret = (((char *)dst->virtual)[1000] == 1);
+	drm_intel_bo_unmap(dst);
+	pthread_exit(&thread_ret);
+}
+
+typedef void * (*thread_function)(void *);
+thread_function ffuncs[2] = {functional_test, functional_test_nb};
+thread_function sfuncs[2] = {maps, maps_nb};
+
+static void usage(const char *name)
+{
+	fprintf(stderr, "Usage: %s [-n non blocking] "
+			"[-f function test] "
+			"[-c count]\n", name);
+}
+
+int main(int argc, char **argv)
+{
+	pthread_t blit_thread;
+	pthread_t map_thread;
+	int test = 0;
+	int non_block = 0;
+	char opt;
+	bool ret[1];
+
+	while ((opt = getopt(argc, argv, "nfc:")) != -1) {
+		switch(opt) {
+		case 'c':
+			num_writes = atoi(optarg);
+			break;
+		case 'f':
+			test = 2;
+			break;
+		case 'n':
+			non_block = 1;
+			break;
+		case 'h':
+		case '?':
+		default:
+			usage(argv[0]);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	srandom(0xdeadbeef);
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	src = drm_intel_bo_alloc(bufmgr, "src", SURFACE_SIZE, 4096);
+	dst = drm_intel_bo_alloc(bufmgr, "dst", SURFACE_SIZE, 4096);
+
+	switch (test) {
+	case 2:
+		pthread_create(&map_thread, NULL, ffuncs[non_block], NULL);
+		pthread_join(map_thread, (void **)&ret);
+		assert(*ret == true);
+		break;
+	default:
+		pthread_create(&blit_thread, NULL, blits, NULL);
+		pthread_create(&map_thread, NULL, sfuncs[non_block], NULL);
+		pthread_join(map_thread, NULL);
+		/* We don't want to time blit performance for this benchmark,
+		 * though it is relevant.
+		 */
+		pthread_join(blit_thread, NULL);
+		break;
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/test.sh b/tests/test.sh
new file mode 100755
index 0000000..9ffdf3a
--- /dev/null
+++ b/tests/test.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+echo "Map non block"
+time ./gem_wo_map -n -c 500
+time ./gem_wo_map -n -c 500
+time ./gem_wo_map -n -c 500
+time ./gem_wo_map -n -c 500
+time ./gem_wo_map -n -c 500
+time ./gem_wo_map -n -c 500
+echo "Map"
+time ./gem_wo_map -c 500
+time ./gem_wo_map -c 500
+time ./gem_wo_map -c 500
+time ./gem_wo_map -c 500
+time ./gem_wo_map -c 500
+time ./gem_wo_map -c 500
-- 
1.7.6.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] intel: non-blocking mmaps on the cheap
  2011-09-22 23:27 ` [PATCH] intel: non-blocking mmaps on the cheap Ben Widawsky
@ 2011-09-22 23:35   ` Ben Widawsky
  2011-09-23  8:52     ` Daniel Vetter
  2011-10-06 20:55   ` Eric Anholt
  1 sibling, 1 reply; 15+ messages in thread
From: Ben Widawsky @ 2011-09-22 23:35 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R

From: Daniel Vetter <daniel.vetter@ffwll.ch>

This adds a new mode to map gem objects in a non-blocking way.

The new kernel interface required to get the caching level/coherency
is not yet wired up. All the code to transparently choose between gtt
mappings or (if coherent) cpu mappings is already in place, though.

To make this useable for mesa, we need to allow other buffer acces in
between non-blocking modes. It's not a big problem if this accidentaly
blocks once when switching back to non-blocking maps, but domains need
to be correctly tracked.

I've gone through historical kernel versions and luckily the only
place we move a bo from the gtt domain to the cpu domain is in the
set_ioctl. pwrite/pread don't move objects into the cpu domain as long
as they're untiled or llc cached.

So track whether an object is in the gtt domain and invalidate this
only on access to cpu mappings.

[patch reworked by Ben]

Cc: Eric Anholt <eric@anholt.net>
Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 include/drm/i915_drm.h   |    7 ++
 intel/intel_bufmgr.h     |    3 +
 intel/intel_bufmgr_gem.c |  181 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 174 insertions(+), 17 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index adc2392..4b62222 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_GET_CACHE_TYPE	0x2a
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_GEM_GET_CACHE_TYPE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_TYPE, struct drm_i915_gem_get_cache_type)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -835,4 +837,9 @@ struct drm_intel_overlay_attrs {
 	__u32 gamma5;
 };
 
+struct drm_i915_gem_get_cache_type {
+	__u32 handle;
+	__u32 cache_level;
+};
+
 #endif				/* _I915_DRM_H_ */
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 889ef46..116a9d7 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -149,6 +149,9 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
 
+int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped);
+int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo);
+
 int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
 
 int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 4f4de92..5b7e053 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -141,6 +141,8 @@ struct _drm_intel_bo_gem {
 	uint32_t swizzle_mode;
 	unsigned long stride;
 
+	unsigned cache_coherent : 1;
+
 	time_t free_time;
 
 	/** Array passed to the DRM containing relocation information. */
@@ -998,15 +1000,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
 	}
 }
 
-static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
+static int do_mmap_cpu(drm_intel_bufmgr_gem *bufmgr_gem,
+		       drm_intel_bo_gem *bo_gem)
 {
-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	/* Allow recursive mapping. Mesa may recursively map buffers with
 	 * nested display loops.
 	 */
@@ -1018,7 +1016,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 		memset(&mmap_arg, 0, sizeof(mmap_arg));
 		mmap_arg.handle = bo_gem->gem_handle;
 		mmap_arg.offset = 0;
-		mmap_arg.size = bo->size;
+		mmap_arg.size = bo_gem->bo.size;
 		ret = drmIoctl(bufmgr_gem->fd,
 			       DRM_IOCTL_I915_GEM_MMAP,
 			       &mmap_arg);
@@ -1027,11 +1025,52 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
 			    __FILE__, __LINE__, bo_gem->gem_handle,
 			    bo_gem->name, strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
 	}
+	return 0;
+}
+
+enum i915_cache_level {
+	I915_CACHE_NONE,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+ */
+};
+
+static int get_cache_type(drm_intel_bo *bo)
+{
+	struct drm_i915_gem_get_cache_type cache = {0, 0};
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	int ret = 0;
+
+	cache.handle = bo_gem->gem_handle;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHE_TYPE,
+		       &cache);
+
+	/* This should maintain old behavior */
+	if (ret)
+		return I915_CACHE_NONE;
+
+#define CACHE_LEVEL 0xff
+	return cache.cache_level & CACHE_LEVEL;
+}
+
+static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = do_mmap_cpu(bufmgr_gem, bo_gem);
+	if (ret != 0) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
 	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->mem_virtual);
 	bo->virtual = bo_gem->mem_virtual;
@@ -1051,20 +1090,19 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 		    strerror(errno));
 	}
 
+	if (get_cache_type(bo) != I915_CACHE_LLC)
+		bo_gem->cache_coherent = 0;
+
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
 }
 
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int do_mmap_gtt(drm_intel_bufmgr_gem *bufmgr_gem,
+		       drm_intel_bo_gem *bo_gem)
 {
-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	/* Get a mapping of the buffer if we haven't before. */
 	if (bo_gem->gtt_virtual == NULL) {
 		struct drm_i915_gem_mmap_gtt mmap_arg;
@@ -1085,12 +1123,11 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    __FILE__, __LINE__,
 			    bo_gem->gem_handle, bo_gem->name,
 			    strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 
 		/* and mmap it */
-		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
+		bo_gem->gtt_virtual = mmap(0, bo_gem->bo.size, PROT_READ | PROT_WRITE,
 					   MAP_SHARED, bufmgr_gem->fd,
 					   mmap_arg.offset);
 		if (bo_gem->gtt_virtual == MAP_FAILED) {
@@ -1100,11 +1137,28 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    __FILE__, __LINE__,
 			    bo_gem->gem_handle, bo_gem->name,
 			    strerror(errno));
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 	}
 
+	return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = do_mmap_gtt(bufmgr_gem, bo_gem);
+	if (ret != 0) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
 	bo->virtual = bo_gem->gtt_virtual;
 
 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
@@ -1123,6 +1177,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 		    strerror(errno));
 	}
 
+	bo_gem->cache_coherent = 1;
+
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	return 0;
@@ -1282,6 +1338,97 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
 		    set_domain.read_domains, set_domain.write_domain,
 		    strerror(errno));
 	}
+
+	bo_gem->cache_coherent = 1;
+}
+
+/**
+ * Map an object without blocking on the GPU if possible.
+ *
+ * This automatically chooses either the GTT mapping or if coherent and faster,
+ * the CPU mapping.
+ *
+ * Not allowed on tiled buffers (to prevent headaches with swizzling and
+ * tracking the gem domain) or to share these buffers with flink, though that's
+ * not currently tracked.
+ */
+int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int gpu_coherent_cpu_map = 0;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	assert(bo_gem->tiling_mode == I915_TILING_NONE);
+
+	if (bufmgr_gem->gen >= 6)
+		gpu_coherent_cpu_map = 1;
+
+	if (gpu_coherent_cpu_map && get_cache_type(bo) == I915_CACHE_LLC) {
+		ret = do_mmap_cpu(bufmgr_gem, bo_gem);
+		if (ret != 0) {
+			pthread_mutex_unlock(&bufmgr_gem->lock);
+			return ret;
+		}
+
+		bo->virtual = bo_gem->mem_virtual;
+		*gtt_mapped = 1;
+	} else {
+		ret = do_mmap_gtt(bufmgr_gem, bo_gem);
+		if (ret != 0) {
+			pthread_mutex_unlock(&bufmgr_gem->lock);
+			return ret;
+		}
+
+		bo->virtual = bo_gem->gtt_virtual;
+		*gtt_mapped = 0;
+	}
+
+	DBG("bo_map_nonblocking: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
+	    bo_gem->gtt_virtual);
+
+	/* Move it to the GTT domain in case it isn't there yet 
+	 * In the coherent buffers case, below variable is modified at map time.
+         */
+	if (!bo_gem->cache_coherent) {
+		set_domain.handle = bo_gem->gem_handle;
+		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+		ret = drmIoctl(bufmgr_gem->fd,
+			       DRM_IOCTL_I915_GEM_SET_DOMAIN,
+			       &set_domain);
+		if (ret != 0) {
+			DBG("%s:%d: Error setting domain %d: %s\n",
+			    __FILE__, __LINE__, bo_gem->gem_handle,
+			    strerror(errno));
+		}
+
+		bo_gem->cache_coherent = 1;
+	}
+
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return 0;
+}
+
+/**
+ * unmap an object in the non-blocking mode
+ */
+int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	int ret = 0;
+
+	if (bo == NULL)
+		return 0;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	bo->virtual = NULL;
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
 }
 
 static void
-- 
1.7.6.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: IOCTL to query the cache level of a BO.
  2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
@ 2011-09-23  8:04   ` Daniel Vetter
  2011-09-23  9:00   ` Chris Wilson
  1 sibling, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2011-09-23  8:04 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, intel-gfx, Kilarski, Bernard R

On Thu, Sep 22, 2011 at 04:27:10PM -0700, Ben Widawsky wrote:
> Querying a BO's cache level can help clients wishing to map the buffer.
> For example, an Ironlake machine could query the buffer to determine it
> should use GTT mappings, while a Sandybridge machine running the same
> code would prefer to use regular CPU mapped buffers since the cache is
> coherent.
> 
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Eric Anholt <eric@anholt.net>
> Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] intel: non-blocking mmaps on the cheap
  2011-09-22 23:35   ` Ben Widawsky
@ 2011-09-23  8:52     ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2011-09-23  8:52 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, intel-gfx, Kilarski, Bernard R

On Thu, Sep 22, 2011 at 04:35:20PM -0700, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> This adds a new mode to map gem objects in a non-blocking way.
> 
> The new kernel interface required to get the caching level/coherency
> is not yet wired up. All the code to transparently choose between gtt
> mappings or (if coherent) cpu mappings is already in place, though.
> 
> To make this useable for mesa, we need to allow other buffer acces in
> between non-blocking modes. It's not a big problem if this accidentaly
> blocks once when switching back to non-blocking maps, but domains need
> to be correctly tracked.
> 
> I've gone through historical kernel versions and luckily the only
> place we move a bo from the gtt domain to the cpu domain is in the
> set_ioctl. pwrite/pread don't move objects into the cpu domain as long
> as they're untiled or llc cached.
> 
> So track whether an object is in the gtt domain and invalidate this
> only on access to cpu mappings.
> 
> [patch reworked by Ben]
> 
> Cc: Eric Anholt <eric@anholt.net>
> Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

A bit of bikeshedding below ;-)

> ---
>  include/drm/i915_drm.h   |    7 ++
>  intel/intel_bufmgr.h     |    3 +
>  intel/intel_bufmgr_gem.c |  181 +++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 174 insertions(+), 17 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index adc2392..4b62222 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
>  #define DRM_I915_OVERLAY_ATTRS	0x28
>  #define DRM_I915_GEM_EXECBUFFER2	0x29
> +#define DRM_I915_GEM_GET_CACHE_TYPE	0x2a
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
>  #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
>  #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
> +#define DRM_IOCTL_I915_GEM_GET_CACHE_TYPE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_TYPE, struct drm_i915_gem_get_cache_type)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -835,4 +837,9 @@ struct drm_intel_overlay_attrs {
>  	__u32 gamma5;
>  };
>  
> +struct drm_i915_gem_get_cache_type {
> +	__u32 handle;
> +	__u32 cache_level;
> +};
> +
>  #endif				/* _I915_DRM_H_ */
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 889ef46..116a9d7 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -149,6 +149,9 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
>  int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
>  void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
>  
> +int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped);
> +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo);
> +
>  int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
>  
>  int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 4f4de92..5b7e053 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -141,6 +141,8 @@ struct _drm_intel_bo_gem {
>  	uint32_t swizzle_mode;
>  	unsigned long stride;
>  
> +	unsigned cache_coherent : 1;

I prefere my in_gtt_domain - it spells clearly what it tracks, whereas
with cache_coherent I momentarily confused it with LLC coherency and went:
wtf, why do we need to call set_domain(gtt) for llc objects?

>  	time_t free_time;
>  
>  	/** Array passed to the DRM containing relocation information. */
> @@ -998,15 +1000,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
>  	}
>  }
>  
> -static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> +static int do_mmap_cpu(drm_intel_bufmgr_gem *bufmgr_gem,
> +		       drm_intel_bo_gem *bo_gem)
>  {
> -	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> -	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> -	struct drm_i915_gem_set_domain set_domain;
>  	int ret;
>  
> -	pthread_mutex_lock(&bufmgr_gem->lock);
> -
>  	/* Allow recursive mapping. Mesa may recursively map buffers with
>  	 * nested display loops.
>  	 */
> @@ -1018,7 +1016,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  		memset(&mmap_arg, 0, sizeof(mmap_arg));
>  		mmap_arg.handle = bo_gem->gem_handle;
>  		mmap_arg.offset = 0;
> -		mmap_arg.size = bo->size;
> +		mmap_arg.size = bo_gem->bo.size;
>  		ret = drmIoctl(bufmgr_gem->fd,
>  			       DRM_IOCTL_I915_GEM_MMAP,
>  			       &mmap_arg);
> @@ -1027,11 +1025,52 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
>  			    __FILE__, __LINE__, bo_gem->gem_handle,
>  			    bo_gem->name, strerror(errno));
> -			pthread_mutex_unlock(&bufmgr_gem->lock);
>  			return ret;
>  		}
>  		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
>  	}
> +	return 0;
> +}
> +
> +enum i915_cache_level {
> +	I915_CACHE_NONE,
> +	I915_CACHE_LLC,
> +	I915_CACHE_LLC_MLC, /* gen6+ */
> +};
> +
> +static int get_cache_type(drm_intel_bo *bo)
> +{
> +	struct drm_i915_gem_get_cache_type cache = {0, 0};
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	int ret = 0;
> +
> +	cache.handle = bo_gem->gem_handle;
> +	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHE_TYPE,
> +		       &cache);
> +
> +	/* This should maintain old behavior */
> +	if (ret)
> +		return I915_CACHE_NONE;
> +
> +#define CACHE_LEVEL 0xff
> +	return cache.cache_level & CACHE_LEVEL;
> +}
> +
> +static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	struct drm_i915_gem_set_domain set_domain;
> +	int ret;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +	ret = do_mmap_cpu(bufmgr_gem, bo_gem);
> +	if (ret != 0) {
> +		pthread_mutex_unlock(&bufmgr_gem->lock);
> +		return ret;
> +	}
>  	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
>  	    bo_gem->mem_virtual);
>  	bo->virtual = bo_gem->mem_virtual;
> @@ -1051,20 +1090,19 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  		    strerror(errno));
>  	}
>  
> +	if (get_cache_type(bo) != I915_CACHE_LLC)
> +		bo_gem->cache_coherent = 0;
> +
>  	pthread_mutex_unlock(&bufmgr_gem->lock);
>  
>  	return 0;
>  }
>  
> -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> +static int do_mmap_gtt(drm_intel_bufmgr_gem *bufmgr_gem,
> +		       drm_intel_bo_gem *bo_gem)
>  {
> -	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> -	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> -	struct drm_i915_gem_set_domain set_domain;
>  	int ret;
>  
> -	pthread_mutex_lock(&bufmgr_gem->lock);
> -
>  	/* Get a mapping of the buffer if we haven't before. */
>  	if (bo_gem->gtt_virtual == NULL) {
>  		struct drm_i915_gem_mmap_gtt mmap_arg;
> @@ -1085,12 +1123,11 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
>  			    __FILE__, __LINE__,
>  			    bo_gem->gem_handle, bo_gem->name,
>  			    strerror(errno));
> -			pthread_mutex_unlock(&bufmgr_gem->lock);
>  			return ret;
>  		}
>  
>  		/* and mmap it */
> -		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
> +		bo_gem->gtt_virtual = mmap(0, bo_gem->bo.size, PROT_READ | PROT_WRITE,
>  					   MAP_SHARED, bufmgr_gem->fd,
>  					   mmap_arg.offset);
>  		if (bo_gem->gtt_virtual == MAP_FAILED) {
> @@ -1100,11 +1137,28 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
>  			    __FILE__, __LINE__,
>  			    bo_gem->gem_handle, bo_gem->name,
>  			    strerror(errno));
> -			pthread_mutex_unlock(&bufmgr_gem->lock);
>  			return ret;
>  		}
>  	}
>  
> +	return 0;
> +}
> +
> +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	struct drm_i915_gem_set_domain set_domain;
> +	int ret;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +	ret = do_mmap_gtt(bufmgr_gem, bo_gem);
> +	if (ret != 0) {
> +		pthread_mutex_unlock(&bufmgr_gem->lock);
> +		return ret;
> +	}
> +
>  	bo->virtual = bo_gem->gtt_virtual;
>  
>  	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> @@ -1123,6 +1177,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
>  		    strerror(errno));
>  	}
>  
> +	bo_gem->cache_coherent = 1;
> +
>  	pthread_mutex_unlock(&bufmgr_gem->lock);
>  
>  	return 0;
> @@ -1282,6 +1338,97 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
>  		    set_domain.read_domains, set_domain.write_domain,
>  		    strerror(errno));
>  	}
> +
> +	bo_gem->cache_coherent = 1;
> +}
> +
> +/**
> + * Map an object without blocking on the GPU if possible.
> + *
> + * This automatically chooses either the GTT mapping or if coherent and faster,
> + * the CPU mapping.
> + *
> + * Not allowed on tiled buffers (to prevent headaches with swizzling and
> + * tracking the gem domain) or to share these buffers with flink, though that's
> + * not currently tracked.
> + */
> +int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped)

I don't quite see the need for gtt_mapped. mesa only uses this to call the
correct unmap, but
- they're all the same anyway
- you still add an unmap_nonblocking.
Also, I think map_nonblocking should only use cpu maps when they're
actually equivalent (coherency-wise) to gtt maps.

So I think the right thing to do is to kill this and teach mesa to keep
track of 3 different mapings.

> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	struct drm_i915_gem_set_domain set_domain;
> +	int gpu_coherent_cpu_map = 0;
> +	int ret;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +	assert(bo_gem->tiling_mode == I915_TILING_NONE);
> +
> +	if (bufmgr_gem->gen >= 6)
> +		gpu_coherent_cpu_map = 1;
> +
> +	if (gpu_coherent_cpu_map && get_cache_type(bo) == I915_CACHE_LLC) {
> +		ret = do_mmap_cpu(bufmgr_gem, bo_gem);
> +		if (ret != 0) {
> +			pthread_mutex_unlock(&bufmgr_gem->lock);
> +			return ret;
> +		}
> +
> +		bo->virtual = bo_gem->mem_virtual;
> +		*gtt_mapped = 1;
> +	} else {
> +		ret = do_mmap_gtt(bufmgr_gem, bo_gem);
> +		if (ret != 0) {
> +			pthread_mutex_unlock(&bufmgr_gem->lock);
> +			return ret;
> +		}
> +
> +		bo->virtual = bo_gem->gtt_virtual;
> +		*gtt_mapped = 0;
> +	}
> +
> +	DBG("bo_map_nonblocking: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> +	    bo_gem->gtt_virtual);
> +
> +	/* Move it to the GTT domain in case it isn't there yet 
> +	 * In the coherent buffers case, below variable is modified at map time.
> +         */
> +	if (!bo_gem->cache_coherent) {
> +		set_domain.handle = bo_gem->gem_handle;
> +		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
> +		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
> +		ret = drmIoctl(bufmgr_gem->fd,
> +			       DRM_IOCTL_I915_GEM_SET_DOMAIN,
> +			       &set_domain);
> +		if (ret != 0) {
> +			DBG("%s:%d: Error setting domain %d: %s\n",
> +			    __FILE__, __LINE__, bo_gem->gem_handle,
> +			    strerror(errno));
> +		}
> +
> +		bo_gem->cache_coherent = 1;
> +	}
> +
> +	pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +	return 0;
> +}
> +
> +/**
> + * unmap an object in the non-blocking mode
> + */
> +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	int ret = 0;
> +
> +	if (bo == NULL)
> +		return 0;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +	bo->virtual = NULL;
> +	pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +	return ret;
>  }
>  
>  static void
> -- 
> 1.7.6.3
> 

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: IOCTL to query the cache level of a BO.
  2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
  2011-09-23  8:04   ` Daniel Vetter
@ 2011-09-23  9:00   ` Chris Wilson
  1 sibling, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2011-09-23  9:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R

On Thu, 22 Sep 2011 16:27:10 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Querying a BO's cache level can help clients wishing to map the buffer.
> For example, an Ironlake machine could query the buffer to determine it
> should use GTT mappings, while a Sandybridge machine running the same
> code would prefer to use regular CPU mapped buffers since the cache is
> coherent.
> 
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Eric Anholt <eric@anholt.net>
> Cc: "Kilarski, Bernard R" <bernard.r.kilarski@intel.com>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

This is indeed useful.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i965: use nonblocking maps MapRangeBuffer
  2011-09-22 23:27 ` [PATCH] i965: use nonblocking maps MapRangeBuffer Ben Widawsky
@ 2011-09-23 17:15   ` Eric Anholt
  2011-09-23 18:46     ` Ben Widawsky
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Anholt @ 2011-09-23 17:15 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Mesa Devs


[-- Attachment #1.1: Type: text/plain, Size: 3589 bytes --]

On Thu, 22 Sep 2011 16:27:12 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> This makes the code a lot cleaner, and theoretically faster (not many
> real world tests use this GL extension).
> 
> Cc: Eric Anholt <eric@anholt.net>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Mesa Devs <mesa-dev@lists.freedesktop.org>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  src/mesa/drivers/dri/intel/intel_buffer_objects.c |   48 ++------------------
>  1 files changed, 5 insertions(+), 43 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> index d35a50e..91dddce 100644
> --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> @@ -296,8 +296,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
>     }
>  }
>  
> -
> -
>  /**
>   * Called via glMapBufferRange and glMapBuffer
>   *
> @@ -363,50 +361,14 @@ intel_bufferobj_map_range(struct gl_context * ctx,
>        return NULL;
>     }
>  
> -   /* If the user doesn't care about existing buffer contents and mapping
> -    * would cause us to block, then throw out the old buffer.
> -    */
> -   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
> -       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
> -       drm_intel_bo_busy(intel_obj->buffer)) {
> -      drm_intel_bo_unreference(intel_obj->buffer);
> -      intel_bufferobj_alloc_buffer(intel, intel_obj);
> -   }

Why did you delete this code?  Applications rely on this happening.

> -
> -   /* If the user is mapping a range of an active buffer object but
> -    * doesn't require the current contents of that range, make a new
> -    * BO, and we'll copy what they put in there out at unmap or
> -    * FlushRange time.
> -    */
>     if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
> -       drm_intel_bo_busy(intel_obj->buffer)) {
> -      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
> +       drm_intel_bo_busy(intel_obj->buffer) &&
> +       (access & GL_MAP_FLUSH_EXPLICIT_BIT)) {
>  	 intel_obj->range_map_buffer = malloc(length);
>  	 obj->Pointer = intel_obj->range_map_buffer;
> -      } else {
> -	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
> -						      "range map",
> -						      length, 64);
> -	 if (!(access & GL_MAP_READ_BIT)) {
> -	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
> -	    intel_obj->mapped_gtt = GL_TRUE;
> -	 } else {
> -	    drm_intel_bo_map(intel_obj->range_map_bo,
> -			     (access & GL_MAP_WRITE_BIT) != 0);
> -	    intel_obj->mapped_gtt = GL_FALSE;
> -	 }
> -	 obj->Pointer = intel_obj->range_map_bo->virtual;
> -      }
> -      return obj->Pointer;
> -   }
> -
> -   if (!(access & GL_MAP_READ_BIT)) {
> -      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
> -      intel_obj->mapped_gtt = GL_TRUE;
> -   } else {
> -      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
> -      intel_obj->mapped_gtt = GL_FALSE;
> -   }
> +	 return obj->Pointer;
> +   } else
> +    drm_intel_gem_bo_map_nonblocking(intel_obj->buffer, &intel_obj->mapped_gtt);

So, if either INVALIDATE or FLUSH_EXPLICIT is not set, or the buffer is
idle, you use the nonblocking map instead of the normal blocking map?
How is that supposed to be correct?

This patch should be adding one new case and an early return.  If it
does something else, that's a bad sign.

>     obj->Pointer = intel_obj->buffer->virtual + offset;
>     return obj->Pointer;
> -- 
> 1.7.6.3
> 

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i965: use nonblocking maps MapRangeBuffer
  2011-09-23 17:15   ` Eric Anholt
@ 2011-09-23 18:46     ` Ben Widawsky
  2011-09-23 18:56       ` [Intel-gfx] " Ben Widawsky
  0 siblings, 1 reply; 15+ messages in thread
From: Ben Widawsky @ 2011-09-23 18:46 UTC (permalink / raw)
  To: Eric Anholt; +Cc: Daniel Vetter, intel-gfx, Mesa Devs

On Fri, 23 Sep 2011 10:15:02 -0700
Eric Anholt <eric@anholt.net> wrote:

> On Thu, 22 Sep 2011 16:27:12 -0700, Ben Widawsky <ben@bwidawsk.net>
> wrote:
> > This makes the code a lot cleaner, and theoretically faster (not
> > many real world tests use this GL extension).
> > 
> > Cc: Eric Anholt <eric@anholt.net>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Mesa Devs <mesa-dev@lists.freedesktop.org>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  src/mesa/drivers/dri/intel/intel_buffer_objects.c |   48
> > ++------------------ 1 files changed, 5 insertions(+), 43
> > deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index
> > d35a50e..91dddce 100644 ---
> > a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++
> > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -296,8
> > +296,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, }
> >  }
> >  
> > -
> > -
> >  /**
> >   * Called via glMapBufferRange and glMapBuffer
> >   *
> > @@ -363,50 +361,14 @@ intel_bufferobj_map_range(struct gl_context *
> > ctx, return NULL;
> >     }
> >  
> > -   /* If the user doesn't care about existing buffer contents and
> > mapping
> > -    * would cause us to block, then throw out the old buffer.
> > -    */
> > -   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
> > -       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
> > -       drm_intel_bo_busy(intel_obj->buffer)) {
> > -      drm_intel_bo_unreference(intel_obj->buffer);
> > -      intel_bufferobj_alloc_buffer(intel, intel_obj);
> > -   }
> 
> Why did you delete this code?  Applications rely on this happening.

Maybe I got confused by the order of operations, but I thought this
should be,
If synchronized and invalidate and busy, just create a new buffer. This
*should* be handled correctly by the nonblocking call below. Maybe I'm
mistaken.

> 
> > -
> > -   /* If the user is mapping a range of an active buffer object but
> > -    * doesn't require the current contents of that range, make a
> > new
> > -    * BO, and we'll copy what they put in there out at unmap or
> > -    * FlushRange time.
> > -    */
> >     if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
> > -       drm_intel_bo_busy(intel_obj->buffer)) {
> > -      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
> > +       drm_intel_bo_busy(intel_obj->buffer) &&
> > +       (access & GL_MAP_FLUSH_EXPLICIT_BIT)) {
> >  	 intel_obj->range_map_buffer = malloc(length);
> >  	 obj->Pointer = intel_obj->range_map_buffer;
> > -      } else {
> > -	 intel_obj->range_map_bo =
> > drm_intel_bo_alloc(intel->bufmgr,
> > -						      "range map",
> > -						      length, 64);
> > -	 if (!(access & GL_MAP_READ_BIT)) {
> > -	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
> > -	    intel_obj->mapped_gtt = GL_TRUE;
> > -	 } else {
> > -	    drm_intel_bo_map(intel_obj->range_map_bo,
> > -			     (access & GL_MAP_WRITE_BIT) != 0);
> > -	    intel_obj->mapped_gtt = GL_FALSE;
> > -	 }
> > -	 obj->Pointer = intel_obj->range_map_bo->virtual;
> > -      }
> > -      return obj->Pointer;
> > -   }
> > -
> > -   if (!(access & GL_MAP_READ_BIT)) {
> > -      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
> > -      intel_obj->mapped_gtt = GL_TRUE;
> > -   } else {
> > -      drm_intel_bo_map(intel_obj->buffer, (access &
> > GL_MAP_WRITE_BIT) != 0);
> > -      intel_obj->mapped_gtt = GL_FALSE;
> > -   }
> > +	 return obj->Pointer;
> > +   } else
> > +    drm_intel_gem_bo_map_nonblocking(intel_obj->buffer,
> > &intel_obj->mapped_gtt);
> 
> So, if either INVALIDATE or FLUSH_EXPLICIT is not set, or the buffer
> is idle, you use the nonblocking map instead of the normal blocking
> map? How is that supposed to be correct?
> 
> This patch should be adding one new case and an early return.  If it
> does something else, that's a bad sign.
> 
> >     obj->Pointer = intel_obj->buffer->virtual + offset;
> >     return obj->Pointer;

First of all, I realize this diff is pretty awful, so for the sake of
readability let me put the relevant part here:

   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
       drm_intel_bo_busy(intel_obj->buffer) &&
       (access & GL_MAP_FLUSH_EXPLICIT_BIT)) {
	 intel_obj->range_map_buffer = malloc(length);
	 obj->Pointer = intel_obj->range_map_buffer;
>>>>>>>> return obj->Pointer;
   } else
       drm_intel_gem_bo_map_nonblocking(intel_obj->buffer,
	        &intel_obj->mapped_gtt);

   obj->Pointer = intel_obj->buffer->virtual + offset;
   return obj->Pointer;

The reason why I think this works is drm_intel_gem_bo_map_nonblocking()
is supposed to do the right thing (so the name is misleading). Please
look at the libdrm patch in the series and see if you still agree.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Intel-gfx] [PATCH] i965: use nonblocking maps MapRangeBuffer
  2011-09-23 18:46     ` Ben Widawsky
@ 2011-09-23 18:56       ` Ben Widawsky
  2011-09-23 19:21         ` Ben Widawsky
  0 siblings, 1 reply; 15+ messages in thread
From: Ben Widawsky @ 2011-09-23 18:56 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Mesa Devs, Daniel Vetter

On Fri, 23 Sep 2011 11:46:41 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Fri, 23 Sep 2011 10:15:02 -0700
> Eric Anholt <eric@anholt.net> wrote:
> 
> > On Thu, 22 Sep 2011 16:27:12 -0700, Ben Widawsky <ben@bwidawsk.net>
> > wrote:
> > > This makes the code a lot cleaner, and theoretically faster (not
> > > many real world tests use this GL extension).
> > > 
> > > Cc: Eric Anholt <eric@anholt.net>
> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Mesa Devs <mesa-dev@lists.freedesktop.org>
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > >  src/mesa/drivers/dri/intel/intel_buffer_objects.c |   48
> > > ++------------------ 1 files changed, 5 insertions(+), 43
> > > deletions(-)
> > > 
> > > diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> > > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index
> > > d35a50e..91dddce 100644 ---
> > > a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++
> > > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -296,8
> > > +296,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, }
> > >  }
> > >  
> > > -
> > > -
> > >  /**
> > >   * Called via glMapBufferRange and glMapBuffer
> > >   *
> > > @@ -363,50 +361,14 @@ intel_bufferobj_map_range(struct gl_context
> > > * ctx, return NULL;
> > >     }
> > >  
> > > -   /* If the user doesn't care about existing buffer contents and
> > > mapping
> > > -    * would cause us to block, then throw out the old buffer.
> > > -    */
> > > -   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
> > > -       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
> > > -       drm_intel_bo_busy(intel_obj->buffer)) {
> > > -      drm_intel_bo_unreference(intel_obj->buffer);
> > > -      intel_bufferobj_alloc_buffer(intel, intel_obj);
> > > -   }
> > 
> > Why did you delete this code?  Applications rely on this happening.
> 
> Maybe I got confused by the order of operations, but I thought this
> should be,
> If synchronized and invalidate and busy, just create a new buffer.
> This *should* be handled correctly by the nonblocking call below.
> Maybe I'm mistaken.

Sorry, want to clarify this some more:

So we no longer ever create a new buffer to try to speed things up. I
was hoping the nonblocking stuff would fix this. Actually this patch was
sort of my attempt to remove as much as possible and see what you'd say.
If you already know which cases REALLY need the new buffeobj, I'll add
those back.

Ben

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] i965: use nonblocking maps MapRangeBuffer
  2011-09-23 18:56       ` [Intel-gfx] " Ben Widawsky
@ 2011-09-23 19:21         ` Ben Widawsky
  0 siblings, 0 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-09-23 19:21 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Mesa Devs, intel-gfx, Daniel Vetter

On Fri, 23 Sep 2011 11:56:59 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Fri, 23 Sep 2011 11:46:41 -0700
> Ben Widawsky <ben@bwidawsk.net> wrote:
> 
> > On Fri, 23 Sep 2011 10:15:02 -0700
> > Eric Anholt <eric@anholt.net> wrote:
> > 
> > > On Thu, 22 Sep 2011 16:27:12 -0700, Ben Widawsky
> > > <ben@bwidawsk.net> wrote:
> > > > This makes the code a lot cleaner, and theoretically faster (not
> > > > many real world tests use this GL extension).
> > > > 
> > > > Cc: Eric Anholt <eric@anholt.net>
> > > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > > Cc: Mesa Devs <mesa-dev@lists.freedesktop.org>
> > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > > ---
> > > >  src/mesa/drivers/dri/intel/intel_buffer_objects.c |   48
> > > > ++------------------ 1 files changed, 5 insertions(+), 43
> > > > deletions(-)
> > > > 
> > > > diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
> > > > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index
> > > > d35a50e..91dddce 100644 ---
> > > > a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++
> > > > b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -296,8
> > > > +296,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, }
> > > >  }
> > > >  
> > > > -
> > > > -
> > > >  /**
> > > >   * Called via glMapBufferRange and glMapBuffer
> > > >   *
> > > > @@ -363,50 +361,14 @@ intel_bufferobj_map_range(struct
> > > > gl_context
> > > > * ctx, return NULL;
> > > >     }
> > > >  
> > > > -   /* If the user doesn't care about existing buffer contents
> > > > and mapping
> > > > -    * would cause us to block, then throw out the old buffer.
> > > > -    */
> > > > -   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
> > > > -       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
> > > > -       drm_intel_bo_busy(intel_obj->buffer)) {
> > > > -      drm_intel_bo_unreference(intel_obj->buffer);
> > > > -      intel_bufferobj_alloc_buffer(intel, intel_obj);
> > > > -   }
> > > 
> > > Why did you delete this code?  Applications rely on this
> > > happening.
> > 
> > Maybe I got confused by the order of operations, but I thought this
> > should be,
> > If synchronized and invalidate and busy, just create a new buffer.
> > This *should* be handled correctly by the nonblocking call below.
> > Maybe I'm mistaken.
> 
> Sorry, want to clarify this some more:
> 
> So we no longer ever create a new buffer to try to speed things up. I
> was hoping the nonblocking stuff would fix this. Actually this patch
> was sort of my attempt to remove as much as possible and see what
> you'd say. If you already know which cases REALLY need the new
> buffeobj, I'll add those back.
> 
> Ben

Forget this, let me resubmit.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] intel: non-blocking mmaps on the cheap
  2011-09-22 23:27 ` [PATCH] intel: non-blocking mmaps on the cheap Ben Widawsky
  2011-09-22 23:35   ` Ben Widawsky
@ 2011-10-06 20:55   ` Eric Anholt
  2011-10-06 22:56     ` Ben Widawsky
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Anholt @ 2011-10-06 20:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky, Kilarski, Bernard R


[-- Attachment #1.1: Type: text/plain, Size: 677 bytes --]

On Thu, 22 Sep 2011 16:27:11 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> +/**
> + * unmap an object in the non-blocking mode
> + */
> +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	int ret = 0;
> +
> +	if (bo == NULL)
> +		return 0;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +	bo->virtual = NULL;
> +	pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +	return ret;

You dereffed bo before checking for NULL, so the compiler will just drop
that NULL check.  I realize this is copy'n'paste from bo_unmap_gtt, but
I don't see why this new copy of bo_unmap_gtt exists anyway.


[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] intel: non-blocking mmaps on the cheap
  2011-10-06 20:55   ` Eric Anholt
@ 2011-10-06 22:56     ` Ben Widawsky
  0 siblings, 0 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-10-06 22:56 UTC (permalink / raw)
  To: Eric Anholt; +Cc: Daniel Vetter, intel-gfx, Kilarski, Bernard R


[-- Attachment #1.1: Type: text/plain, Size: 1054 bytes --]

On Thu, Oct 06, 2011 at 01:55:49PM -0700, Eric Anholt wrote:
> On Thu, 22 Sep 2011 16:27:11 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > +/**
> > + * unmap an object in the non-blocking mode
> > + */
> > +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
> > +{
> > +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> > +	int ret = 0;
> > +
> > +	if (bo == NULL)
> > +		return 0;
> > +
> > +	pthread_mutex_lock(&bufmgr_gem->lock);
> > +	bo->virtual = NULL;
> > +	pthread_mutex_unlock(&bufmgr_gem->lock);
> > +
> > +	return ret;
> 
> You dereffed bo before checking for NULL, so the compiler will just drop
> that NULL check.  I realize this is copy'n'paste from bo_unmap_gtt, but
> I don't see why this new copy of bo_unmap_gtt exists anyway.
> 

My original patch did not have the extra unmap, but since we had an unmap for
gtt and non-gtt case, Danvet complains that it wasn't symmetric.

I honestly care so little about what we decide for this, I'd like you two to
fight it out.

Ben

[-- Attachment #1.2: Type: application/pgp-signature, Size: 490 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-10-06 22:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-22 23:27 [PATCH 0/4] Nonblocking maps Ben Widawsky
2011-09-22 23:27 ` [PATCH] drm/i915: IOCTL to query the cache level of a BO Ben Widawsky
2011-09-23  8:04   ` Daniel Vetter
2011-09-23  9:00   ` Chris Wilson
2011-09-22 23:27 ` [PATCH] intel: non-blocking mmaps on the cheap Ben Widawsky
2011-09-22 23:35   ` Ben Widawsky
2011-09-23  8:52     ` Daniel Vetter
2011-10-06 20:55   ` Eric Anholt
2011-10-06 22:56     ` Ben Widawsky
2011-09-22 23:27 ` [PATCH] i965: use nonblocking maps MapRangeBuffer Ben Widawsky
2011-09-23 17:15   ` Eric Anholt
2011-09-23 18:46     ` Ben Widawsky
2011-09-23 18:56       ` [Intel-gfx] " Ben Widawsky
2011-09-23 19:21         ` Ben Widawsky
2011-09-22 23:27 ` [PATCH] gpu-tools: nonblocking map test Ben Widawsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.