intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 05/14] drm/i915: Avoid forcing relocations through the mappable GTT or CPU
Date: Mon,  3 Dec 2012 11:49:03 +0000	[thread overview]
Message-ID: <1354535352-3506-6-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk>

If the object lies outside of the mappable GTT aperture, do not force it
through the CPU domain for relocations, but simply flush the writes as
we perform them and then queue a chipset flush.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   85 ++++++++++++++++------------
 1 file changed, 50 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 802d925..c77a57d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -33,6 +33,9 @@
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
 
+#define __EXEC_OBJECT_HAS_PIN (1<<31)
+#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+
 struct eb_objects {
 	int and;
 	struct hlist_head buckets[0];
@@ -95,10 +98,16 @@ eb_destroy(struct eb_objects *eb)
 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 {
 	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
-		!obj->map_and_fenceable ||
 		obj->cache_level != I915_CACHE_NONE);
 }
 
+static inline struct page *
+gtt_offset_to_page(struct drm_i915_gem_object *obj, u32 offset)
+{
+	offset -= obj->gtt_space->start;
+	return i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+}
+
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_objects *eb,
@@ -193,22 +202,20 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	reloc->delta += target_offset;
+	reloc->offset += obj->gtt_offset;
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
 		char *vaddr;
 
-		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
 		if (ret)
 			return ret;
 
-		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-							     reloc->offset >> PAGE_SHIFT));
-		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+		vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset));
+		*(uint32_t *)(vaddr + offset_in_page(reloc->offset)) = reloc->delta;
 		kunmap_atomic(vaddr);
 	} else {
 		struct drm_i915_private *dev_priv = dev->dev_private;
-		uint32_t __iomem *reloc_entry;
-		void __iomem *reloc_page;
+		unsigned page_offset;
 
 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
 		if (ret)
@@ -219,13 +226,28 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 			return ret;
 
 		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += obj->gtt_offset;
-		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-						      reloc->offset & PAGE_MASK);
-		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
-		iowrite32(reloc->delta, reloc_entry);
-		io_mapping_unmap_atomic(reloc_page);
+		page_offset = offset_in_page(reloc->offset);
+
+		if (reloc->offset < dev_priv->mm.gtt_mappable_end) {
+			void __iomem *reloc_page;
+
+			reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
+							      reloc->offset & PAGE_MASK);
+			iowrite32(reloc->delta, reloc_page + page_offset);
+			io_mapping_unmap_atomic(reloc_page);
+		} else {
+			char *vaddr;
+
+			vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset));
+
+			drm_clflush_virt_range(vaddr + page_offset, 4);
+			*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+			drm_clflush_virt_range(vaddr + page_offset, 4);
+
+			kunmap_atomic(vaddr);
+
+			obj->base.pending_write_domain |= I915_GEM_DOMAIN_CPU;
+		}
 	}
 
 	/* and update the user's relocation entry */
@@ -323,16 +345,6 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
-#define  __EXEC_OBJECT_HAS_PIN (1<<31)
-#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
-
-static int
-need_reloc_mappable(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(obj);
-}
-
 static int
 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 				   struct intel_ring_buffer *ring)
@@ -340,16 +352,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence, need_mappable;
+	bool need_fence;
 	int ret;
 
 	need_fence =
 		has_fenced_gpu_access &&
 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable = need_fence || need_reloc_mappable(obj);
 
-	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
+	ret = i915_gem_object_pin(obj, entry->alignment, need_fence, false);
 	if (ret)
 		return ret;
 
@@ -412,7 +423,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	INIT_LIST_HEAD(&ordered_objects);
 	while (!list_empty(objects)) {
 		struct drm_i915_gem_exec_object2 *entry;
-		bool need_fence, need_mappable;
+		bool need_fence;
 
 		obj = list_first_entry(objects,
 				       struct drm_i915_gem_object,
@@ -423,9 +434,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			has_fenced_gpu_access &&
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 			obj->tiling_mode != I915_TILING_NONE;
-		need_mappable = need_fence || need_reloc_mappable(obj);
 
-		if (need_mappable)
+		if (need_fence)
 			list_move(&obj->exec_list, &ordered_objects);
 		else
 			list_move_tail(&obj->exec_list, &ordered_objects);
@@ -455,7 +465,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		/* Unbind any ill-fitting objects or pin. */
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence, need_mappable;
+			bool need_fence;
 
 			if (!obj->gtt_space)
 				continue;
@@ -464,10 +474,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 				has_fenced_gpu_access &&
 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 				obj->tiling_mode != I915_TILING_NONE;
-			need_mappable = need_fence || need_reloc_mappable(obj);
 
 			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
-			    (need_mappable && !obj->map_and_fenceable))
+			    (need_fence && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
 				ret = i915_gem_execbuffer_reserve_object(obj, ring);
@@ -614,10 +623,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 		if (ret)
 			return ret;
 
+		flush_domains |= obj->base.write_domain;
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj);
 
-		flush_domains |= obj->base.write_domain;
+		/* Used as an internal marker during relocation processing */
+		if (obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS) {
+			flush_domains |= obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS;
+			obj->base.pending_write_domain &= I915_GEM_GPU_DOMAINS;
+		}
 	}
 
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
-- 
1.7.10.4

  parent reply	other threads:[~2012-12-03 11:50 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-03 11:48 A bunch of random execbuffer patches Chris Wilson
2012-12-03 11:48 ` [PATCH 01/14] drm/i915: Move the get_pages assertions up to the right layer Chris Wilson
2012-12-03 11:49 ` [PATCH 02/14] drm/i915: Decouple the object from the unbound list before freeing pages Chris Wilson
2012-12-03 16:16   ` Daniel Vetter
2012-12-03 11:49 ` [PATCH 03/14] drm/i915: Bail if we attempt to allocate pages for a purged object Chris Wilson
2012-12-03 11:49 ` [PATCH 04/14] drm/i915: Defer the unbind for a fence change until the next get_fence() Chris Wilson
2012-12-03 11:49 ` Chris Wilson [this message]
2012-12-03 11:49 ` [PATCH 06/14] drm: Optionally create mm blocks from top-to-bottom Chris Wilson
2012-12-03 11:49 ` [PATCH 07/14] drm/i915: Preferentially allocate mappable GTT space to uncached bo Chris Wilson
2012-12-03 11:49 ` [PATCH 08/14] drm/i915: Tighten the checks for invalid relocation domains Chris Wilson
2012-12-03 11:49 ` [PATCH 09/14] drm/i915: Remove check for conflicting relocation write-domains Chris Wilson
2012-12-03 19:18   ` Daniel Vetter
2012-12-03 21:03     ` [PATCH] drm/i915: Reduce memory pressure during shrinker by preallocating swizzle pages Chris Wilson
2012-12-07  0:16       ` Daniel Vetter
2012-12-03 11:49 ` [PATCH 10/14] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
2012-12-03 11:49 ` [PATCH 11/14] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
2012-12-03 11:49 ` [PATCH 12/14] drm/i915: Allow userspace to hint that the relocations were known Chris Wilson
2012-12-03 11:49 ` [PATCH 13/14] drm/i915: Use the reloc.handle as an index into the execbuffer array Chris Wilson
2012-12-03 11:49 ` [PATCH 14/14] drm/i915: Allow userspace to request an object at a specific offset Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1354535352-3506-6-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).