From mboxrd@z Thu Jan 1 00:00:00 1970 From: Chris Wilson Subject: [PATCH 05/14] drm/i915: Avoid forcing relocations through the mappable GTT or CPU Date: Mon, 3 Dec 2012 11:49:03 +0000 Message-ID: <1354535352-3506-6-git-send-email-chris@chris-wilson.co.uk> References: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from relay.fireflyinternet.com (relay1.fireflyinternet.com [217.160.24.105]) by gabe.freedesktop.org (Postfix) with ESMTP id 1B141E5D25 for ; Mon, 3 Dec 2012 03:50:42 -0800 (PST) Received: from fireflyinternet.com (unverified [109.228.6.235]) by relay.fireflyinternet.com (FireflyRelay1) with ESMTP id 782817-2000100 for ; Mon, 03 Dec 2012 11:52:42 +0000 In-Reply-To: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org If the object lies outside of the mappable GTT aperture, do not force it through the CPU domain for relocations, but simply flush the writes as we perform them and then queue a chipset flush. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 85 ++++++++++++++++------------ 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 802d925..c77a57d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,9 @@ #include "intel_drv.h" #include +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) + struct eb_objects { int and; struct hlist_head buckets[0]; @@ -95,10 +98,16 @@ eb_destroy(struct eb_objects *eb) static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || - !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } +static inline struct page * +gtt_offset_to_page(struct drm_i915_gem_object *obj, u32 offset) +{ + offset -= obj->gtt_space->start; + return i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -193,22 +202,20 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; + reloc->offset += obj->gtt_offset; if (use_cpu_reloc(obj)) { - uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) return ret; - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + *(uint32_t *)(vaddr + offset_in_page(reloc->offset)) = reloc->delta; kunmap_atomic(vaddr); } else { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; + unsigned page_offset; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -219,13 +226,28 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + (reloc->offset & ~PAGE_MASK)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); + page_offset = offset_in_page(reloc->offset); + + if (reloc->offset < dev_priv->mm.gtt_mappable_end) { + void __iomem *reloc_page; + + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc->offset & PAGE_MASK); + iowrite32(reloc->delta, reloc_page + page_offset); + io_mapping_unmap_atomic(reloc_page); + } else { + char *vaddr; + + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + + drm_clflush_virt_range(vaddr + page_offset, 4); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + drm_clflush_virt_range(vaddr + page_offset, 4); + + kunmap_atomic(vaddr); + + obj->base.pending_write_domain |= I915_GEM_DOMAIN_CPU; + } } /* and update the user's relocation entry */ @@ -323,16 +345,6 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) - -static int -need_reloc_mappable(struct drm_i915_gem_object *obj) -{ - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - return entry->relocation_count && !use_cpu_reloc(obj); -} - static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) @@ -340,16 +352,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence, need_mappable; + bool need_fence; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + ret = i915_gem_object_pin(obj, entry->alignment, need_fence, false); if (ret) return ret; @@ -412,7 +423,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; + bool need_fence; obj = list_first_entry(objects, struct drm_i915_gem_object, @@ -423,9 +434,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - if (need_mappable) + if (need_fence) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); @@ -455,7 +465,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence, need_mappable; + bool need_fence; if (!obj->gtt_space) continue; @@ -464,10 +474,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || - (need_mappable && !obj->map_and_fenceable)) + (need_fence && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else ret = i915_gem_execbuffer_reserve_object(obj, ring); @@ -614,10 +623,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, if (ret) return ret; + flush_domains |= obj->base.write_domain; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); - flush_domains |= obj->base.write_domain; + /* Used as an internal marker during relocation processing */ + if (obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS) { + flush_domains |= obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS; + obj->base.pending_write_domain &= I915_GEM_GPU_DOMAINS; + } } if (flush_domains & I915_GEM_DOMAIN_CPU) -- 1.7.10.4