All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 11/41] drm/i915: Defer active reference until required
Date: Thu, 20 Oct 2016 16:03:53 +0100	[thread overview]
Message-ID: <20161020150423.4560-12-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20161020150423.4560-1-chris@chris-wilson.co.uk>

We only need the active reference to keep the object alive after the
handle has been deleted (so as to prevent a synchronous gem_close). Why
then pay the price of a kref on every execbuf when we can insert that
final active ref just in time for the handle deletion?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
 8 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6a308ad5e5a6..4e93c3797d90 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2234,6 +2234,12 @@ struct drm_i915_gem_object {
 	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+
+	/**
 	 * This is set if the object has been written to since last bound
 	 * to the GTT
 	 */
@@ -2394,6 +2400,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
 	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
 }
 
+static inline bool
+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
+{
+	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
+
 static inline unsigned int
 i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90af27d88980..ee8d1405013e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2664,7 +2664,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
 		list_move_tail(&obj->global_list,
 			       &request->i915->mm.bound_list);
 
-	i915_gem_object_put(obj);
+	if (i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_clear_active_reference(obj);
+		i915_gem_object_put(obj);
+	}
 }
 
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2938,6 +2941,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
+
+	if (i915_gem_object_is_active(obj) &&
+	    !i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_set_active_reference(obj);
+		i915_gem_object_get(obj);
+	}
 	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
@@ -4476,6 +4485,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	intel_runtime_pm_put(dev_priv);
 }
 
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
+	if (i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
 int i915_gem_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ed989596d9a3..cb25cad3318c 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			i915_gem_object_put(obj);
+			__i915_gem_object_release_unless_active(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5dca32ac1c67..47e888cc721f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 		if (ce->ring)
 			intel_ring_free(ce->ring);
 
-		i915_vma_put(ce->state);
+		__i915_gem_object_release_unless_active(ce->state->obj);
 	}
 
 	put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cdd7b3d49a41..1afa6dbe8472 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1298,8 +1298,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_object_is_active(obj))
-		i915_gem_object_get(obj);
 	i915_gem_object_set_active(obj, idx);
 	i915_gem_active_set(&obj->last_read[idx], req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 947d5ad51fb7..a3a364478a89 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3734,11 +3734,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(p_vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	__i915_gem_object_release_unless_active(obj);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index a98c0f42badd..e7c3dbcc6c81 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	i915_vma_move_to_active(so.vma, req, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
+	i915_vma_close(so.vma);
 err_obj:
-	i915_gem_object_put(obj);
+	__i915_gem_object_release_unless_active(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2396c651bd0f..e29e84813234 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1764,14 +1764,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(&engine->status_page.vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_gem_object_unpin_map(vma->obj);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	i915_gem_object_unpin_map(obj);
+	__i915_gem_object_release_unless_active(obj);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
@@ -1969,7 +1974,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 void
 intel_ring_free(struct intel_ring *ring)
 {
-	i915_vma_put(ring->vma);
+	struct drm_i915_gem_object *obj = ring->vma->obj;
+
+	i915_vma_close(ring->vma);
+	__i915_gem_object_release_unless_active(obj);
+
 	kfree(ring);
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-10-20 15:04 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-20 15:03 Multiple timelines, multiple times Chris Wilson
2016-10-20 15:03 ` [PATCH 01/41] drm/i915: Move user fault tracking to a separate list Chris Wilson
2016-10-20 15:03 ` [PATCH 02/41] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-20 15:03 ` [PATCH 03/41] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-20 15:03 ` [PATCH 04/41] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-21 10:19   ` Imre Deak
2016-10-20 15:03 ` [PATCH 05/41] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-21 12:48   ` Imre Deak
2016-10-21 13:52     ` Chris Wilson
2016-10-21 14:05     ` [PATCH v2] " Chris Wilson
2016-10-24  9:55       ` Imre Deak
2016-10-20 15:03 ` [PATCH 06/41] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-10-20 15:03 ` [PATCH 07/41] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-10-20 15:03 ` [PATCH 08/41] drm/i915: Remove superfluous wait_for_error() from throttle-ioctl Chris Wilson
2016-10-20 15:41   ` Joonas Lahtinen
2016-10-20 15:03 ` [PATCH 09/41] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-10-20 15:03 ` [PATCH 10/41] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-10-20 15:03 ` Chris Wilson [this message]
2016-10-20 15:03 ` [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-10-20 16:22   ` Tvrtko Ursulin
2016-10-20 20:36     ` Chris Wilson
2016-10-21  7:21       ` Tvrtko Ursulin
2016-10-21  7:50         ` Chris Wilson
2016-10-21  7:53           ` Tvrtko Ursulin
2016-10-21  7:56   ` [PATCH v4] " Chris Wilson
2016-10-21  8:07     ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 13/41] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-10-20 15:03 ` [PATCH 14/41] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-10-20 15:03 ` [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
2016-10-20 15:55   ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 16/41] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
2016-10-20 15:03 ` [PATCH 17/41] drm/i915: Refactor object page API Chris Wilson
2016-10-20 15:04 ` [PATCH 18/41] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-10-20 15:04 ` [PATCH 19/41] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-10-20 15:04 ` [PATCH 20/41] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 21/41] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 22/41] drm/i915: Implement pwrite " Chris Wilson
2016-10-20 15:04 ` [PATCH 23/41] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-10-20 15:04 ` [PATCH 24/41] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-10-20 15:04 ` [PATCH 25/41] drm/i915: Use lockless object free Chris Wilson
2016-10-20 15:04 ` [PATCH 26/41] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-10-20 15:04 ` [PATCH 27/41] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-10-20 15:04 ` [PATCH 28/41] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-10-20 15:04 ` [PATCH 29/41] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 30/41] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-10-20 15:04 ` [PATCH 31/41] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-10-20 15:04 ` [PATCH 32/41] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
2016-10-20 15:04 ` [PATCH 33/41] drm/i915: Record space required for breadcrumb emission Chris Wilson
2016-10-20 15:04 ` [PATCH 34/41] drm/i915: Defer " Chris Wilson
2016-10-20 15:04 ` [PATCH 35/41] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-10-20 15:04 ` [PATCH 36/41] drm/i915: Create a unique name for the context Chris Wilson
2016-10-20 15:04 ` [PATCH 37/41] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-10-20 15:04 ` [PATCH 38/41] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
2016-10-20 15:04 ` [PATCH 39/41] drm/i915: Enable multiple timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 40/41] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-10-20 15:04 ` [PATCH 41/41] drm/i915: Support explicit fencing for execbuf Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161020150423.4560-12-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.