All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] intel: Add support for (possibly) unsynchronized maps.
@ 2012-02-25  3:53 Eric Anholt
  2012-02-25  3:53 ` [PATCH] intel: Make use of the new GPU-unsynchronized map functionality in libdrm Eric Anholt
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Eric Anholt @ 2012-02-25  3:53 UTC (permalink / raw)
  To: intel-gfx

This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
1024x768 by 2.06236% +/- 0.50272% (n=11).
---
 intel/intel_bufmgr.h     |    2 +
 intel/intel_bufmgr_gem.c |   72 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 85da8b9..e852eab 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -148,8 +148,10 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
 					     int limit);
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
 int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+
 int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
 void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 187e8ec..12641e1 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1150,15 +1150,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	return 0;
 }
 
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
@@ -1184,7 +1182,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 
@@ -1201,7 +1198,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 	}
@@ -1211,7 +1207,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->gtt_virtual);
 
-	/* Now move it to the GTT domain so that the CPU caches are flushed */
+	return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = map_gtt(bo);
+	if (ret) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
+	/* Now move it to the GTT domain so that the GPU and CPU
+	 * caches are flushed and the GPU isn't actively using the
+	 * buffer.
+	 *
+	 * The pagefault handler does this domain change for us when
+	 * it has unbound the BO from the GTT, but it's up to us to
+	 * tell it when we're about to use things if we had done
+	 * rendering and it still happens to be bound to the GTT.
+	 */
 	set_domain.handle = bo_gem->gem_handle;
 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
 	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
@@ -1229,6 +1251,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 	return 0;
 }
 
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoiding waiting for the GPU to be done reading from
+ * or rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	int ret;
+
+	/* If the CPU cache isn't coherent with the GTT, then use a
+	 * regular synchronized mapping.  The problem is that we don't
+	 * track where the buffer was last used on the CPU side in
+	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+	 * we would potentially corrupt the buffer even when the user
+	 * does reasonable things.
+	 */
+	if (!bufmgr_gem->has_llc)
+		return drm_intel_gem_bo_map_gtt(bo);
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	ret = map_gtt(bo);
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
+}
+
 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] intel: Make use of the new GPU-unsynchronized map functionality in libdrm.
  2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
@ 2012-02-25  3:53 ` Eric Anholt
  2012-02-25  8:55 ` [PATCH] intel: Add support for (possibly) unsynchronized maps Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Eric Anholt @ 2012-02-25  3:53 UTC (permalink / raw)
  To: intel-gfx

Improves Unigine Tropics performance at 1024x768 by 2.06236% +/-
0.50272% (n=11).

DO NOT PUSH: need to depend on a new libdrm release first.
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 26f23fd0..d2a0709 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -367,7 +367,9 @@ intel_bufferobj_map_range(struct gl_context * ctx,
       return obj->Pointer;
    }
 
-   if (!(access & GL_MAP_READ_BIT)) {
+   if (access & GL_MAP_UNSYNCHRONIZED_BIT)
+      drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
+   else if (!(access & GL_MAP_READ_BIT)) {
       drm_intel_gem_bo_map_gtt(intel_obj->buffer);
    } else {
       drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
  2012-02-25  3:53 ` [PATCH] intel: Make use of the new GPU-unsynchronized map functionality in libdrm Eric Anholt
@ 2012-02-25  8:55 ` Chris Wilson
  2012-02-25  9:03 ` Paul Menzel
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2012-02-25  8:55 UTC (permalink / raw)
  To: Eric Anholt, intel-gfx

On Fri, 24 Feb 2012 19:53:22 -0800, Eric Anholt <eric@anholt.net> wrote:
> This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
> in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
> 1024x768 by 2.06236% +/- 0.50272% (n=11).

Oh well, weakly coherent wins.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
  2012-02-25  3:53 ` [PATCH] intel: Make use of the new GPU-unsynchronized map functionality in libdrm Eric Anholt
  2012-02-25  8:55 ` [PATCH] intel: Add support for (possibly) unsynchronized maps Chris Wilson
@ 2012-02-25  9:03 ` Paul Menzel
  2012-02-25 11:00 ` Daniel Vetter
  2012-02-28  7:35 ` [PATCH 1/2] intel: Fix error check for I915_PARAM_HAS_LLC Eric Anholt
  4 siblings, 0 replies; 11+ messages in thread
From: Paul Menzel @ 2012-02-25  9:03 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 856 bytes --]

Dear Eric,


Am Freitag, den 24.02.2012, 19:53 -0800 schrieb Eric Anholt:

[…]

> +/**
> + * Performs a mapping of the buffer object like the normal GTT
> + * mapping, but avoiding waiting for the GPU to be done reading from

s/avoiding/avoids/?

> + * or rendering to the buffer.
> + *
> + * This is used in the implementation of GL_ARB_map_buffer_range: The
> + * user asks to create a buffer, then does a mapping, fills some
> + * space, runs a drawing command, then asks to map it again without
> + * synchronizing because it guarantees that it won't write over the
> + * data that the GPU is busy using (or, more specifically, that if it
> + * does write over the data, it acknowledges that rendering is
> + * undefined).
> + */
> +
> +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)

[…]


Thanks,

Paul

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
                   ` (2 preceding siblings ...)
  2012-02-25  9:03 ` Paul Menzel
@ 2012-02-25 11:00 ` Daniel Vetter
  2012-02-25 11:16   ` Chris Wilson
                     ` (2 more replies)
  2012-02-28  7:35 ` [PATCH 1/2] intel: Fix error check for I915_PARAM_HAS_LLC Eric Anholt
  4 siblings, 3 replies; 11+ messages in thread
From: Daniel Vetter @ 2012-02-25 11:00 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx

On Fri, Feb 24, 2012 at 07:53:22PM -0800, Eric Anholt wrote:
> This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
> in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
> 1024x768 by 2.06236% +/- 0.50272% (n=11).
> ---

A few questions:
- iirc Ben's non-blocking stuff also worked for non-llc machines - I guess
  you haven't looked into this because we don't have a non-llc platform
  that runs ungine?

- in my pwrite experience, writing through cpu maps beats writing through
  the gtt on llc machines. This has the added benefit that it reduces
  pressure on the mappable gtt. Have you tried that, too?

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25 11:00 ` Daniel Vetter
@ 2012-02-25 11:16   ` Chris Wilson
  2012-02-26 20:28   ` Kenneth Graunke
  2012-02-27 19:05   ` Eric Anholt
  2 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2012-02-25 11:16 UTC (permalink / raw)
  To: Daniel Vetter, Eric Anholt; +Cc: intel-gfx

On Sat, 25 Feb 2012 12:00:07 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> - in my pwrite experience, writing through cpu maps beats writing through
>   the gtt on llc machines. This has the added benefit that it reduces
>   pressure on the mappable gtt. Have you tried that, too?

Speaking of which, those wonderful pwrite patches are still MIA?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25 11:00 ` Daniel Vetter
  2012-02-25 11:16   ` Chris Wilson
@ 2012-02-26 20:28   ` Kenneth Graunke
  2012-02-27 11:15     ` Ben Widawsky
  2012-02-27 19:05   ` Eric Anholt
  2 siblings, 1 reply; 11+ messages in thread
From: Kenneth Graunke @ 2012-02-26 20:28 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On 02/25/2012 03:00 AM, Daniel Vetter wrote:
> On Fri, Feb 24, 2012 at 07:53:22PM -0800, Eric Anholt wrote:
>> This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
>> in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
>> 1024x768 by 2.06236% +/- 0.50272% (n=11).
>> ---
>
> A few questions:
> - iirc Ben's non-blocking stuff also worked for non-llc machines - I guess
>    you haven't looked into this because we don't have a non-llc platform
>    that runs ungine?

Tropics works on Ironlake, too, it's just slow.  Haven't tried earlier.

> - in my pwrite experience, writing through cpu maps beats writing through
>    the gtt on llc machines. This has the added benefit that it reduces
>    pressure on the mappable gtt. Have you tried that, too?
>
> Cheers, Daniel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-26 20:28   ` Kenneth Graunke
@ 2012-02-27 11:15     ` Ben Widawsky
  0 siblings, 0 replies; 11+ messages in thread
From: Ben Widawsky @ 2012-02-27 11:15 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Sun, 26 Feb 2012 12:28:09 -0800
Kenneth Graunke <kenneth@whitecape.org> wrote:

> On 02/25/2012 03:00 AM, Daniel Vetter wrote:
> > On Fri, Feb 24, 2012 at 07:53:22PM -0800, Eric Anholt wrote:
> >> This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
> >> in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
> >> 1024x768 by 2.06236% +/- 0.50272% (n=11).
> >> ---
> >
> > A few questions:
> > - iirc Ben's non-blocking stuff also worked for non-llc machines - I guess
> >    you haven't looked into this because we don't have a non-llc platform
> >    that runs ungine?
> 
> Tropics works on Ironlake, too, it's just slow.  Haven't tried earlier.

iirc, Eric even started to review those patches.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] intel: Add support for (possibly) unsynchronized maps.
  2012-02-25 11:00 ` Daniel Vetter
  2012-02-25 11:16   ` Chris Wilson
  2012-02-26 20:28   ` Kenneth Graunke
@ 2012-02-27 19:05   ` Eric Anholt
  2 siblings, 0 replies; 11+ messages in thread
From: Eric Anholt @ 2012-02-27 19:05 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1175 bytes --]

On Sat, 25 Feb 2012 12:00:07 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Fri, Feb 24, 2012 at 07:53:22PM -0800, Eric Anholt wrote:
> > This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
> > in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
> > 1024x768 by 2.06236% +/- 0.50272% (n=11).
> > ---
> 
> A few questions:
> - iirc Ben's non-blocking stuff also worked for non-llc machines - I guess
>   you haven't looked into this because we don't have a non-llc platform
>   that runs ungine?

I gave up on Ben's non-blocking stuff for being way too many patches all
smashed into one.  I wanted a simple fix that could be extended later if
other apps have other problems, while completely fixing this app (a
~21ms wait shortly after starting a new frame).

> - in my pwrite experience, writing through cpu maps beats writing through
>   the gtt on llc machines. This has the added benefit that it reduces
>   pressure on the mappable gtt. Have you tried that, too?

I haven't played with that, but it would be fun to at some point once we
get there.  Right now, the CPU overhead of the app isn't in this path.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] intel: Fix error check for I915_PARAM_HAS_LLC.
  2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
                   ` (3 preceding siblings ...)
  2012-02-25 11:00 ` Daniel Vetter
@ 2012-02-28  7:35 ` Eric Anholt
  2012-02-28  7:35   ` [PATCH 2/2] intel: Add support for (possibly) unsynchronized maps Eric Anholt
  4 siblings, 1 reply; 11+ messages in thread
From: Eric Anholt @ 2012-02-28  7:35 UTC (permalink / raw)
  To: intel-gfx

drmIoctl returns -1 on error with errno set to the error value.  Other
users of it in this file just check for != 0, and only use errno when
they need to send an error value on to the caller of the API.
---
 intel/intel_bufmgr_gem.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 0f33b71..b2b9951 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -2410,7 +2410,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 
 	gp.param = I915_PARAM_HAS_LLC;
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -EINVAL) {
+	if (ret != 0) {
 		/* Kernel does not supports HAS_LLC query, fallback to GPU
 		 * generation detection and assume that we have LLC on GEN6/7
 		 */
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] intel: Add support for (possibly) unsynchronized maps.
  2012-02-28  7:35 ` [PATCH 1/2] intel: Fix error check for I915_PARAM_HAS_LLC Eric Anholt
@ 2012-02-28  7:35   ` Eric Anholt
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Anholt @ 2012-02-28  7:35 UTC (permalink / raw)
  To: intel-gfx

This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
in GL_ARB_map_buffer_range.  Improves Unigine Tropics performance at
1024x768 by 0.958822% +/- 0.118466% (n=48).

v2: Fix comment grammar.
---

Updated commit message for retesting on the previous commit.  It turns
out last time I tested against 3 mesa commits, the first of which was
a 2% win and the drm change (once I'd fixed it up to skip !LLC) didn't
work.  With the LLC detection fix in place, the new numbers are for
the single incremental Mesa commit to use this feature, and it was
confirmed to avoid blocking entirely according to perf.

 intel/intel_bufmgr.h     |    2 +
 intel/intel_bufmgr_gem.c |   72 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 85da8b9..e852eab 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -148,8 +148,10 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr);
 void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
 					     int limit);
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
 int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+
 int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
 void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
 void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index b2b9951..b9985fb 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1182,15 +1182,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	return 0;
 }
 
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
-	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
-	pthread_mutex_lock(&bufmgr_gem->lock);
-
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
@@ -1216,7 +1214,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 
@@ -1233,7 +1230,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 			    strerror(errno));
 			if (--bo_gem->map_count == 0)
 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
-			pthread_mutex_unlock(&bufmgr_gem->lock);
 			return ret;
 		}
 	}
@@ -1243,7 +1239,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
 	    bo_gem->gtt_virtual);
 
-	/* Now move it to the GTT domain so that the CPU caches are flushed */
+	return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = map_gtt(bo);
+	if (ret) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
+	/* Now move it to the GTT domain so that the GPU and CPU
+	 * caches are flushed and the GPU isn't actively using the
+	 * buffer.
+	 *
+	 * The pagefault handler does this domain change for us when
+	 * it has unbound the BO from the GTT, but it's up to us to
+	 * tell it when we're about to use things if we had done
+	 * rendering and it still happens to be bound to the GTT.
+	 */
 	VG_CLEAR(set_domain);
 	set_domain.handle = bo_gem->gem_handle;
 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -1264,7 +1286,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
 	return 0;
 }
 
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoids waiting for the GPU to be done reading from
+ * or rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	int ret;
+
+	/* If the CPU cache isn't coherent with the GTT, then use a
+	 * regular synchronized mapping.  The problem is that we don't
+	 * track where the buffer was last used on the CPU side in
+	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+	 * we would potentially corrupt the buffer even when the user
+	 * does reasonable things.
+	 */
+	if (!bufmgr_gem->has_llc)
+		return drm_intel_gem_bo_map_gtt(bo);
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	ret = map_gtt(bo);
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
+}
+
 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-02-28  7:35 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-25  3:53 [PATCH] intel: Add support for (possibly) unsynchronized maps Eric Anholt
2012-02-25  3:53 ` [PATCH] intel: Make use of the new GPU-unsynchronized map functionality in libdrm Eric Anholt
2012-02-25  8:55 ` [PATCH] intel: Add support for (possibly) unsynchronized maps Chris Wilson
2012-02-25  9:03 ` Paul Menzel
2012-02-25 11:00 ` Daniel Vetter
2012-02-25 11:16   ` Chris Wilson
2012-02-26 20:28   ` Kenneth Graunke
2012-02-27 11:15     ` Ben Widawsky
2012-02-27 19:05   ` Eric Anholt
2012-02-28  7:35 ` [PATCH 1/2] intel: Fix error check for I915_PARAM_HAS_LLC Eric Anholt
2012-02-28  7:35   ` [PATCH 2/2] intel: Add support for (possibly) unsynchronized maps Eric Anholt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.