All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
Date: Thu, 15 May 2014 16:55:25 +0100	[thread overview]
Message-ID: <1400169327-29383-1-git-send-email-chris@chris-wilson.co.uk> (raw)

With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |   8 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 146 +++++++++++++++--------------
 2 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f8b2c16745f8..844ea6048321 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2807,9 +2807,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	i915_gem_gtt_finish_object(obj);
 
 	list_del_init(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -3159,6 +3158,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
+		if (WARN_ON(!obj->map_and_fenceable))
+			return -EINVAL;
+
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
@@ -4170,8 +4172,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c7ee1e3013ac..b4d8c31c0919 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 
 struct eb_vmas {
 	struct list_head vmas;
@@ -532,14 +533,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_ring_buffer *ring,
 				bool *need_reloc)
@@ -547,19 +540,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	unsigned flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 
@@ -595,6 +581,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	return 0;
 }
 
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+	if (entry->relocation_count == 0)
+		return false;
+
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
+
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct list_head *vmas,
@@ -627,9 +634,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -657,25 +665,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		/* Unbind any ill-fitting objects or pin. */
 		list_for_each_entry(vma, vmas, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-			bool need_fence, need_mappable;
 
 			obj = vma->obj;
 
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-			need_mappable = need_fence || need_reloc_mappable(vma);
-
-			WARN_ON((need_mappable || need_fence) &&
-			       !i915_is_ggtt(vma->vm));
-
-			if ((entry->alignment &&
-			     vma->node.start & (entry->alignment - 1)) ||
-			    (need_mappable && !obj->map_and_fenceable))
+			if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+			    (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -776,9 +773,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 		 * relocations were valid.
 		 */
 		for (j = 0; j < exec[i].relocation_count; j++) {
-			if (copy_to_user(&user_relocs[j].presumed_offset,
-					 &invalid_offset,
-					 sizeof(invalid_offset))) {
+			if (__copy_to_user(&user_relocs[j].presumed_offset,
+					   &invalid_offset,
+					   sizeof(invalid_offset))) {
 				ret = -EFAULT;
 				mutex_lock(&dev->struct_mutex);
 				goto err;
@@ -1268,33 +1265,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	ret = i915_switch_context(ring, ctx);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    mode != dev_priv->relative_constants_mode) {
 		ret = intel_ring_begin(ring, 4);
 		if (ret)
-				goto err;
+			goto err_batch;
 
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1308,30 +1300,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		ret = i915_reset_gen7_sol_offsets(dev, ring);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
-
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
 			ret = i915_emit_box(dev, &cliprects[i],
 					    args->DR1, args->DR4);
 			if (ret)
-				goto err;
+				goto err_batch;
 
 			ret = ring->dispatch_execbuffer(ring,
 							exec_start, exec_len,
 							flags);
 			if (ret)
-				goto err;
+				goto err_batch;
 		}
 	} else {
 		ret = ring->dispatch_execbuffer(ring,
 						exec_start, exec_len,
 						flags);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1339,6 +1330,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
+err_batch:
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
@@ -1420,18 +1414,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
 	if (!ret) {
+		struct drm_i915_gem_exec_object __user *user_exec_list =
+			to_user_ptr(args->buffers_ptr);
+
 		/* Copy the new buffer offsets back to the user's exec list. */
-		for (i = 0; i < args->buffer_count; i++)
-			exec_list[i].offset = exec2_list[i].offset;
-		/* ... and back out to userspace */
-		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
-				   exec_list,
-				   sizeof(*exec_list) * args->buffer_count);
-		if (ret) {
-			ret = -EFAULT;
-			DRM_DEBUG("failed to copy %d exec entries "
-				  "back to user (%d)\n",
-				  args->buffer_count, ret);
+		for (i = 0; i < args->buffer_count; i++) {
+			ret = __copy_to_user(&user_exec_list[i].offset,
+					     &exec2_list[i].offset,
+					     sizeof(user_exec_list[i].offset));
+			if (ret) {
+				ret = -EFAULT;
+				DRM_DEBUG("failed to copy %d exec entries "
+					  "back to user (%d)\n",
+					  args->buffer_count, ret);
+				break;
+			}
 		}
 	}
 
@@ -1482,14 +1479,21 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
-		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
-				   exec2_list,
-				   sizeof(*exec2_list) * args->buffer_count);
-		if (ret) {
-			ret = -EFAULT;
-			DRM_DEBUG("failed to copy %d exec entries "
-				  "back to user (%d)\n",
-				  args->buffer_count, ret);
+		struct drm_i915_gem_exec_object2 *user_exec_list =
+				   to_user_ptr(args->buffers_ptr);
+		int i;
+
+		for (i = 0; i < args->buffer_count; i++) {
+			ret = __copy_to_user(&user_exec_list[i].offset,
+					     &exec2_list[i].offset,
+					     sizeof(user_exec_list[i].offset));
+			if (ret) {
+				ret = -EFAULT;
+				DRM_DEBUG("failed to copy %d exec entries "
+					  "back to user\n",
+					  args->buffer_count);
+				break;
+			}
 		}
 	}
 
-- 
2.0.0.rc2

             reply	other threads:[~2014-05-15 15:55 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-15 15:55 Chris Wilson [this message]
2014-05-15 15:55 ` [PATCH 2/3] drm/i915: Prevent negative relocation deltas from wrapping Chris Wilson
2014-05-15 15:55 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
2014-05-15 21:12 ` [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1400169327-29383-1-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=benjamin.widawsky@intel.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.