All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 12/14] drm/i915: Allow userspace to hint that the relocations were known
Date: Mon,  3 Dec 2012 11:49:10 +0000	[thread overview]
Message-ID: <1354535352-3506-13-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk>

Userspace is able to hint to the kernel that its command stream and
auxiliary state buffers already hold the correct presumed addresses and
so the relocation process may be skipped if the kernel does not need to
move any buffers in preparation for the execbuffer. Thus for the common
case where the allotment of buffers is static between batches, we can
avoid the overhead of individually checking the relocation entries.

Note that this requires userspace to supply the domain tracking and
requests for workarounds itself that would otherwise be computed based
upon the relocation entries.

Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:

c2d/gm45: 618000.0/sec to 632000.0/sec.
i3-330m: 748000.0/sec to 830000.0/sec.

(measured relative to a baseline with neither optimisations applied).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   62 ++++++++++++++++++----------
 include/uapi/drm/i915_drm.h                |   14 +++++++
 3 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2635ee6..9da7863 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
 		break;
+	case I915_PARAM_HAS_EXEC_NO_RELOC:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 90e704e..62a1489 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -385,7 +385,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 
 static int
 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
-				   struct intel_ring_buffer *ring)
+				   struct intel_ring_buffer *ring,
+				   bool *need_reloc)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
@@ -425,7 +426,18 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 		obj->has_aliasing_ppgtt_mapping = 1;
 	}
 
-	entry->offset = obj->gtt_offset;
+	if (entry->offset != obj->gtt_offset) {
+		entry->offset = obj->gtt_offset;
+		*need_reloc = true;
+	}
+
+	if (entry->flags & EXEC_OBJECT_WRITE)
+		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
+
+	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
+	    !obj->has_global_gtt_mapping)
+		i915_gem_gtt_bind_object(obj, obj->cache_level);
+
 	return 0;
 }
 
@@ -451,7 +463,8 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
-			    struct list_head *objects)
+			    struct list_head *objects,
+			    bool *need_relocs)
 {
 	struct drm_i915_gem_object *obj;
 	struct list_head ordered_objects;
@@ -478,7 +491,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		else
 			list_move_tail(&obj->exec_list, &ordered_objects);
 
-		obj->base.pending_read_domains = 0;
+		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 		obj->base.pending_write_domain = 0;
 		obj->pending_fenced_gpu_access = false;
 	}
@@ -517,7 +530,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_fence && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_execbuffer_reserve_object(obj, ring);
+				ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
 			if (ret)
 				goto err;
 		}
@@ -527,7 +540,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			if (obj->gtt_space)
 				continue;
 
-			ret = i915_gem_execbuffer_reserve_object(obj, ring);
+			ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
 			if (ret)
 				goto err;
 		}
@@ -547,16 +560,18 @@ err:		/* Decrement pin count for bound objects */
 
 static int
 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
+				  struct drm_i915_gem_execbuffer2 *args,
 				  struct drm_file *file,
 				  struct intel_ring_buffer *ring,
 				  struct eb_objects *eb,
-				  struct drm_i915_gem_exec_object2 *exec,
-				  int count)
+				  struct drm_i915_gem_exec_object2 *exec)
 {
 	struct drm_i915_gem_relocation_entry *reloc;
 	struct drm_i915_gem_object *obj;
+	bool need_relocs;
 	int *reloc_offset;
 	int i, total, ret;
+	int count = args->buffer_count;
 
 	/* We may process another execbuffer during the unlock... */
 	while (!list_empty(&eb->objects)) {
@@ -611,7 +626,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	if (ret)
 		goto err;
 
-	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
+	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
+	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
 	if (ret)
 		goto err;
 
@@ -675,6 +691,9 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 static bool
 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 {
+	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
+		return false;
+
 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 }
 
@@ -688,6 +707,9 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
 		int length; /* limited by fault_in_pages_readable() */
 
+		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
+			return -EINVAL;
+
 		/* First check for malicious input causing overflow */
 		if (exec[i].relocation_count >
 		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
@@ -695,9 +717,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 
 		length = exec[i].relocation_count *
 			sizeof(struct drm_i915_gem_relocation_entry);
-		if (!access_ok(VERIFY_READ, ptr, length))
-			return -EFAULT;
-
 		/* we may also need to update the presumed offsets */
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 			return -EFAULT;
@@ -785,14 +804,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct intel_ring_buffer *ring;
 	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 exec_start, exec_len;
-	u32 mask;
-	u32 flags;
+	u32 mask, flags;
 	int ret, mode, i;
+	bool need_relocs;
 
-	if (!i915_gem_check_execbuffer(args)) {
-		DRM_DEBUG("execbuf with invalid offset/length\n");
+	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
-	}
 
 	ret = validate_exec_list(exec, args->buffer_count);
 	if (ret)
@@ -929,17 +946,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			       exec_list);
 
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
-	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
+	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
+	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
 	if (ret)
 		goto err;
 
 	/* The objects are in their final locations, apply the relocations. */
-	ret = i915_gem_execbuffer_relocate(dev, eb);
+	if (need_relocs)
+		ret = i915_gem_execbuffer_relocate(dev, eb);
 	if (ret) {
 		if (ret == -EFAULT) {
-			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
-								eb, exec,
-								args->buffer_count);
+			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
+								eb, exec);
 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 		}
 		if (ret)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b746a3c..8057515 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PRIME_VMAP_FLUSH	 21
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
 #define I915_PARAM_HAS_SECURE_BATCHES	 23
+#define I915_PARAM_HAS_EXEC_NO_RELOC	 24
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -627,7 +628,11 @@ struct drm_i915_gem_exec_object2 {
 	__u64 offset;
 
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+#define EXEC_OBJECT_NEEDS_GTT	(1<<1)
+#define EXEC_OBJECT_WRITE	(1<<2)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
 	__u64 flags;
+
 	__u64 rsvd1;
 	__u64 rsvd2;
 };
@@ -677,6 +682,15 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_SECURE		(1<<9)
 
+/** Provide a hint to the kernel that the command stream and auxilliary
+ * state buffers already holds the correct presumed addresses and so the
+ * relocation process may be skipped if no buffers need to be moved in
+ * preparation for the execbuffer.
+ */
+#define I915_EXEC_NO_RELOC		(1<<10)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4

  parent reply	other threads:[~2012-12-03 11:50 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-03 11:48 A bunch of random execbuffer patches Chris Wilson
2012-12-03 11:48 ` [PATCH 01/14] drm/i915: Move the get_pages assertions up to the right layer Chris Wilson
2012-12-03 11:49 ` [PATCH 02/14] drm/i915: Decouple the object from the unbound list before freeing pages Chris Wilson
2012-12-03 16:16   ` Daniel Vetter
2012-12-03 11:49 ` [PATCH 03/14] drm/i915: Bail if we attempt to allocate pages for a purged object Chris Wilson
2012-12-03 11:49 ` [PATCH 04/14] drm/i915: Defer the unbind for a fence change until the next get_fence() Chris Wilson
2012-12-03 11:49 ` [PATCH 05/14] drm/i915: Avoid forcing relocations through the mappable GTT or CPU Chris Wilson
2012-12-03 11:49 ` [PATCH 06/14] drm: Optionally create mm blocks from top-to-bottom Chris Wilson
2012-12-03 11:49 ` [PATCH 07/14] drm/i915: Preferentially allocate mappable GTT space to uncached bo Chris Wilson
2012-12-03 11:49 ` [PATCH 08/14] drm/i915: Tighten the checks for invalid relocation domains Chris Wilson
2012-12-03 11:49 ` [PATCH 09/14] drm/i915: Remove check for conflicting relocation write-domains Chris Wilson
2012-12-03 19:18   ` Daniel Vetter
2012-12-03 21:03     ` [PATCH] drm/i915: Reduce memory pressure during shrinker by preallocating swizzle pages Chris Wilson
2012-12-07  0:16       ` Daniel Vetter
2012-12-03 11:49 ` [PATCH 10/14] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
2012-12-03 11:49 ` [PATCH 11/14] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
2012-12-03 11:49 ` Chris Wilson [this message]
2012-12-03 11:49 ` [PATCH 13/14] drm/i915: Use the reloc.handle as an index into the execbuffer array Chris Wilson
2012-12-03 11:49 ` [PATCH 14/14] drm/i915: Allow userspace to request an object at a specific offset Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1354535352-3506-13-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.