All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
@ 2014-10-29  9:52 Thomas Daniel
  2014-10-29  9:52 ` [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode Thomas Daniel
                   ` (5 more replies)
  0 siblings, 6 replies; 42+ messages in thread
From: Thomas Daniel @ 2014-10-29  9:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: shuang.he

No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

Issue: VIZ-4274
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |    4 +++
 drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..df28202 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i) {
 		i915_gem_retire_requests_ring(ring);
 		idle &= list_empty(&ring->request_list);
+		if (i915.enable_execlists) {
+			idle &= list_empty(&ring->execlist_queue);
+			intel_execlists_retire_requests(ring);
+		}
 	}
 
 	if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..87ce445 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
 	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
 	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
 	assert_spin_locked(&ring->execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			 * will update tail past first request's workload */
 			cursor->elsp_submitted = req0->elsp_submitted;
 			list_del(&req0->execlist_link);
-			queue_work(dev_priv->wq, &req0->work);
+			list_add_tail(&req0->execlist_link,
+				&ring->execlist_retired_req_list);
 			req0 = cursor;
 		} else {
 			req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					   u32 request_id)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_ctx_submit_request *head_req;
 
 	assert_spin_locked(&ring->execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 
 			if (--head_req->elsp_submitted <= 0) {
 				list_del(&head_req->execlist_link);
-				queue_work(dev_priv->wq, &head_req->work);
+				list_add_tail(&head_req->execlist_link,
+					&ring->execlist_retired_req_list);
 				return true;
 			}
 		}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
 		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-	struct intel_ctx_submit_request *req =
-		container_of(work, struct intel_ctx_submit_request, work);
-	struct drm_device *dev = req->ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_context_unreference(req->ctx);
-	mutex_unlock(&dev->struct_mutex);
-
-	kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
 				   struct intel_context *to,
 				   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 	i915_gem_context_reference(req->ctx);
 	req->ring = ring;
 	req->tail = tail;
-	INIT_WORK(&req->work, execlists_free_request_task);
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 			WARN(tail_req->elsp_submitted != 0,
 			     "More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
-			queue_work(dev_priv->wq, &tail_req->work);
+			list_add_tail(&tail_req->execlist_link,
+				&ring->execlist_retired_req_list);
 		}
 	}
 
@@ -733,6 +717,29 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	return 0;
 }
 
+void intel_execlists_retire_requests(struct intel_engine_cs *ring)
+{
+	struct intel_ctx_submit_request *req, *tmp;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	unsigned long flags;
+	struct list_head retired_list;
+
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	if (list_empty(&ring->execlist_retired_req_list))
+		return;
+
+	INIT_LIST_HEAD(&retired_list);
+	spin_lock_irqsave(&ring->execlist_lock, flags);
+	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
+	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(req->ctx);
+		list_del(&req->execlist_link);
+	}
+}
+
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1248,6 +1255,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
 	ring->next_context_status_buffer = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33c3b4b..84bbf19 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,11 +104,11 @@ struct intel_ctx_submit_request {
 	u32 tail;
 
 	struct list_head execlist_link;
-	struct work_struct work;
 
 	int elsp_submitted;
 };
 
 void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
+void intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 96479c8..8c002d2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -235,6 +235,7 @@ struct  intel_engine_cs {
 	/* Execlists */
 	spinlock_t execlist_lock;
 	struct list_head execlist_queue;
+	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
@ 2014-10-29  9:52 ` Thomas Daniel
  2014-11-03 15:47   ` Daniel Vetter
  2014-10-29  9:52 ` [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand Thomas Daniel
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 42+ messages in thread
From: Thomas Daniel @ 2014-10-29  9:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: shuang.he

Write HWS_PGA address even in execlists mode as the global hardware status
page is still required.  This address was previously uninitialized and
HWSP writes would clobber whatever buffer happened to reside at GGTT
address 0.

v2: Break out hardware status page setup into a separate function.

Issue: VIZ-2020
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c |   34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87ce445..6b8bf0d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1657,6 +1657,27 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
 	return ret;
 }
 
+static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
+		struct drm_i915_gem_object *default_ctx_obj)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	/* The status page is offset 0 from the default context object
+	 * in LRC mode. */
+	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
+	ring->status_page.page_addr =
+			kmap(sg_page(default_ctx_obj->pages->sgl));
+	if (ring->status_page.page_addr == NULL)
+		return -ENOMEM;
+	ring->status_page.obj = default_ctx_obj;
+
+	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
+			(u32)ring->status_page.gfx_addr);
+	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
+
+	return 0;
+}
+
 /**
  * intel_lr_context_deferred_create() - create the LRC specific bits of a context
  * @ctx: LR context to create.
@@ -1742,14 +1763,11 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	ctx->engine[ring->id].state = ctx_obj;
 
 	if (ctx == ring->default_context) {
-		/* The status page is offset 0 from the default context object
-		 * in LRC mode. */
-		ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(ctx_obj);
-		ring->status_page.page_addr =
-				kmap(sg_page(ctx_obj->pages->sgl));
-		if (ring->status_page.page_addr == NULL)
-			return -ENOMEM;
-		ring->status_page.obj = ctx_obj;
+		ret = lrc_setup_hardware_status_page(ring, ctx_obj);
+		if (ret) {
+			DRM_ERROR("Failed to setup hardware status page\n");
+			goto error;
+		}
 	}
 
 	if (ring->id == RCS && !ctx->rcs_initialized) {
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
  2014-10-29  9:52 ` [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode Thomas Daniel
@ 2014-10-29  9:52 ` Thomas Daniel
  2014-11-03 16:54   ` Daniel Vetter
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
  2014-10-29  9:52 ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object " Thomas Daniel
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 42+ messages in thread
From: Thomas Daniel @ 2014-10-29  9:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: shuang.he

From: Oscar Mateo <oscar.mateo@intel.com>

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring->status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
Do not unpin default context in free_request.

v4: Break out pin and unpin into functions.  Fix style problems reported
by checkpatch

Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
 drivers/gpu/drm/i915/i915_drv.h     |    2 +
 drivers/gpu/drm/i915/i915_gem.c     |   39 ++++++++++++-------
 drivers/gpu/drm/i915/intel_lrc.c    |   73 +++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
 5 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e60d5c2..6eaf813 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 				continue;
 
 			if (ctx_obj) {
-				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
-				uint32_t *reg_state = kmap_atomic(page);
+				struct page *page;
+				uint32_t *reg_state;
 				int j;
 
+				i915_gem_obj_ggtt_pin(ctx_obj,
+						GEN8_LR_CONTEXT_ALIGN, 0);
+
+				page = i915_gem_object_get_page(ctx_obj, 1);
+				reg_state = kmap_atomic(page);
+
 				seq_printf(m, "CONTEXT: %s %u\n", ring->name,
 						intel_execlists_ctx_id(ctx_obj));
 
@@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 				}
 				kunmap_atomic(reg_state);
 
+				i915_gem_object_ggtt_unpin(ctx_obj);
+
 				seq_putc(m, '\n');
 			}
 		}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 059330c..632b88d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -655,6 +655,8 @@ struct intel_context {
 	struct {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
+		int unpin_count;
+		struct mutex unpin_lock;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df28202..8a00dea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+	struct intel_context *ctx = request->ctx;
+
 	list_del(&request->list);
 	i915_gem_request_remove_from_client(request);
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+	if (i915.enable_execlists && ctx) {
+		struct intel_engine_cs *ring = request->ring;
 
+		if (ctx != ring->default_context)
+			intel_lr_context_unpin(ring, ctx);
+		i915_gem_context_unreference(ctx);
+	}
 	kfree(request);
 }
 
@@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	}
 
 	/*
+	 * Clear the execlists queue up before freeing the requests, as those
+	 * are the ones that keep the context and ringbuffer backing objects
+	 * pinned in place.
+	 */
+	while (!list_empty(&ring->execlist_queue)) {
+		struct intel_ctx_submit_request *submit_req;
+
+		submit_req = list_first_entry(&ring->execlist_queue,
+				struct intel_ctx_submit_request,
+				execlist_link);
+		list_del(&submit_req->execlist_link);
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(submit_req->ctx);
+		kfree(submit_req);
+	}
+
+	/*
 	 * We must free the requests after all the corresponding objects have
 	 * been moved off active lists. Which is the same order as the normal
 	 * retire_requests function does. This is important if object hold
@@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 		i915_gem_free_request(request);
 	}
 
-	while (!list_empty(&ring->execlist_queue)) {
-		struct intel_ctx_submit_request *submit_req;
-
-		submit_req = list_first_entry(&ring->execlist_queue,
-				struct intel_ctx_submit_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
-		intel_runtime_pm_put(dev_priv);
-		i915_gem_context_unreference(submit_req->ctx);
-		kfree(submit_req);
-	}
-
 	/* These may not have been flush before the reset, do so now */
 	kfree(ring->preallocated_lazy_request);
 	ring->preallocated_lazy_request = NULL;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b8bf0d..7950357 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,8 +139,6 @@
 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
 #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
 
-#define GEN8_LR_CONTEXT_ALIGN 4096
-
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
 #define RING_EXECLIST0_VALID		(1 << 0x4)
@@ -800,9 +798,42 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 	execlists_context_queue(ring, ctx, ringbuf->tail);
 }
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	int ret = 0;
+
+	mutex_lock(&ctx->engine[ring->id].unpin_lock);
+	if (ctx->engine[ring->id].unpin_count++ == 0) {
+		ret = i915_gem_obj_ggtt_pin(ctx_obj,
+				GEN8_LR_CONTEXT_ALIGN, 0);
+		if (ret)
+			ctx->engine[ring->id].unpin_count = 0;
+	}
+	mutex_unlock(&ctx->engine[ring->id].unpin_lock);
+
+	return ret;
+}
+
+void intel_lr_context_unpin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+
+	if (ctx_obj) {
+		mutex_lock(&ctx->engine[ring->id].unpin_lock);
+		if (--ctx->engine[ring->id].unpin_count == 0)
+			i915_gem_object_ggtt_unpin(ctx_obj);
+		mutex_unlock(&ctx->engine[ring->id].unpin_lock);
+	}
+}
+
 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
 				    struct intel_context *ctx)
 {
+	int ret;
+
 	if (ring->outstanding_lazy_seqno)
 		return 0;
 
@@ -813,6 +844,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
 		if (request == NULL)
 			return -ENOMEM;
 
+		if (ctx != ring->default_context) {
+			ret = intel_lr_context_pin(ring, ctx);
+			if (ret) {
+				kfree(request);
+				return ret;
+			}
+		}
+
 		/* Hold a reference to the context this request belongs to
 		 * (we will need it when the time comes to emit/retire the
 		 * request).
@@ -1625,13 +1664,18 @@ void intel_lr_context_free(struct intel_context *ctx)
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
-		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 
 		if (ctx_obj) {
+			struct intel_ringbuffer *ringbuf =
+					ctx->engine[i].ringbuf;
+			struct intel_engine_cs *ring = ringbuf->ring;
+
 			intel_destroy_ringbuffer_obj(ringbuf);
 			kfree(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
+			if (ctx == ring->default_context)
+				i915_gem_object_ggtt_unpin(ctx_obj);
 			drm_gem_object_unreference(&ctx_obj->base);
+			mutex_destroy(&ctx->engine[i].unpin_lock);
 		}
 	}
 }
@@ -1694,6 +1738,7 @@ static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring)
 {
+	const bool is_global_default_ctx = (ctx == ring->default_context);
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *ctx_obj;
 	uint32_t context_size;
@@ -1713,18 +1758,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
-		drm_gem_object_unreference(&ctx_obj->base);
-		return ret;
+	if (is_global_default_ctx) {
+		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+		if (ret) {
+			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
+					ret);
+			drm_gem_object_unreference(&ctx_obj->base);
+			return ret;
+		}
 	}
 
 	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
 	if (!ringbuf) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
 				ring->name);
-		i915_gem_object_ggtt_unpin(ctx_obj);
+		if (is_global_default_ctx)
+			i915_gem_object_ggtt_unpin(ctx_obj);
 		drm_gem_object_unreference(&ctx_obj->base);
 		ret = -ENOMEM;
 		return ret;
@@ -1761,6 +1810,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 
 	ctx->engine[ring->id].ringbuf = ringbuf;
 	ctx->engine[ring->id].state = ctx_obj;
+	mutex_init(&ctx->engine[ring->id].unpin_lock);
 
 	if (ctx == ring->default_context) {
 		ret = lrc_setup_hardware_status_page(ring, ctx_obj);
@@ -1786,7 +1836,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 
 error:
 	kfree(ringbuf);
-	i915_gem_object_ggtt_unpin(ctx_obj);
+	if (is_global_default_ctx)
+		i915_gem_object_ggtt_unpin(ctx_obj);
 	drm_gem_object_unreference(&ctx_obj->base);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 84bbf19..14b216b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,6 +24,8 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
+#define GEN8_LR_CONTEXT_ALIGN 4096
+
 /* Execlists regs */
 #define RING_ELSP(ring)			((ring)->mmio_base+0x230)
 #define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
@@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
+void intel_lr_context_unpin(struct intel_engine_cs *ring,
+		struct intel_context *ctx);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
  2014-10-29  9:52 ` [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode Thomas Daniel
  2014-10-29  9:52 ` [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand Thomas Daniel
@ 2014-10-29  9:52 ` Thomas Daniel
  2014-10-29 14:38   ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing shuang.he
  2014-11-13 10:28   ` [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Thomas Daniel
  2014-11-03 15:33 ` [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Daniel Vetter
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 42+ messages in thread
From: Thomas Daniel @ 2014-10-29  9:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: shuang.he

Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

v3: Rebased. Take a pin reference of the ringbuffer for each item
in the execlist request queue because the hardware may still be using
the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
is executed.  The ringbuffer must remain pinned until the context save
is complete.  No longer pin and unpin ringbuffer in
populate_lr_context() - this transient address is meaningless and the
pinning can cause a sleep while atomic.

v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
Downgraded pinning check BUG_ONs to WARN_ONs.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        |  110 ++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_lrc.h        |    1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   85 ++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
 4 files changed, 133 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7950357..b5ae4fa 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -202,6 +202,9 @@ enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx);
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -339,7 +342,9 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+				    struct drm_i915_gem_object *ring_obj,
+				    u32 tail)
 {
 	struct page *page;
 	uint32_t *reg_state;
@@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tai
 	reg_state = kmap_atomic(page);
 
 	reg_state[CTX_RING_TAIL+1] = tail;
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
 	kunmap_atomic(reg_state);
 
@@ -358,21 +364,25 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
 				    struct intel_context *to0, u32 tail0,
 				    struct intel_context *to1, u32 tail1)
 {
-	struct drm_i915_gem_object *ctx_obj0;
+	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
 	struct drm_i915_gem_object *ctx_obj1 = NULL;
+	struct intel_ringbuffer *ringbuf1 = NULL;
 
-	ctx_obj0 = to0->engine[ring->id].state;
 	BUG_ON(!ctx_obj0);
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_ctx_write_tail(ctx_obj0, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
 
 	if (to1) {
+		ringbuf1 = to1->engine[ring->id].ringbuf;
 		ctx_obj1 = to1->engine[ring->id].state;
 		BUG_ON(!ctx_obj1);
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_ctx_write_tail(ctx_obj1, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -435,9 +445,9 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 		struct drm_i915_gem_object *ctx_obj =
 				head_req->ctx->engine[ring->id].state;
 		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");
 
+			/* If the request has been merged, it is possible to get
+			 * here with an unsubmitted request. */
 			if (--head_req->elsp_submitted <= 0) {
 				list_del(&head_req->execlist_link);
 				list_add_tail(&head_req->execlist_link,
@@ -485,8 +495,7 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
 
 		if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(ring, status_id))
-					WARN(1, "Lite Restored request removed from queue\n");
+				execlists_check_remove_request(ring, status_id);
 			} else
 				WARN(1, "Preemption without Lite Restore\n");
 		}
@@ -524,6 +533,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 		return -ENOMEM;
 	req->ctx = to;
 	i915_gem_context_reference(req->ctx);
+
+	if (to != ring->default_context)
+		intel_lr_context_pin(ring, to);
+
 	req->ring = ring;
 	req->tail = tail;
 
@@ -544,7 +557,7 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 
 		if (to == tail_req->ctx) {
 			WARN(tail_req->elsp_submitted != 0,
-			     "More than 2 already-submitted reqs queued\n");
+				"More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
 			list_add_tail(&tail_req->execlist_link,
 				&ring->execlist_retired_req_list);
@@ -732,6 +745,12 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 	spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		struct intel_context *ctx = req->ctx;
+		struct drm_i915_gem_object *ctx_obj =
+				ctx->engine[ring->id].state;
+
+		if (ctx_obj && (ctx != ring->default_context))
+			intel_lr_context_unpin(ring, ctx);
 		intel_runtime_pm_put(dev_priv);
 		i915_gem_context_unreference(req->ctx);
 		list_del(&req->execlist_link);
@@ -802,6 +821,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 	int ret = 0;
 
 	mutex_lock(&ctx->engine[ring->id].unpin_lock);
@@ -809,22 +829,37 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		ret = i915_gem_obj_ggtt_pin(ctx_obj,
 				GEN8_LR_CONTEXT_ALIGN, 0);
 		if (ret)
-			ctx->engine[ring->id].unpin_count = 0;
+			goto reset_unpin_count;
+
+		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+		if (ret)
+			goto unpin_ctx_obj;
 	}
 	mutex_unlock(&ctx->engine[ring->id].unpin_lock);
 
 	return ret;
+
+unpin_ctx_obj:
+	i915_gem_object_ggtt_unpin(ctx_obj);
+reset_unpin_count:
+	ctx->engine[ring->id].unpin_count = 0;
+	mutex_unlock(&ctx->engine[ring->id].unpin_lock);
+
+	return ret;
 }
 
 void intel_lr_context_unpin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 
 	if (ctx_obj) {
 		mutex_lock(&ctx->engine[ring->id].unpin_lock);
-		if (--ctx->engine[ring->id].unpin_count == 0)
+		if (--ctx->engine[ring->id].unpin_count == 0) {
+			intel_unpin_ringbuffer_obj(ringbuf);
 			i915_gem_object_ggtt_unpin(ctx_obj);
+		}
 		mutex_unlock(&ctx->engine[ring->id].unpin_lock);
 	}
 }
@@ -1542,7 +1577,6 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 	struct page *page;
 	uint32_t *reg_state;
@@ -1588,7 +1622,9 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
 	reg_state[CTX_RING_TAIL+1] = 0;
 	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	/* Ring buffer start address is not known until the buffer is pinned.
+	 * It is written to the context image in execlists_update_context()
+	 */
 	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
 	reg_state[CTX_RING_BUFFER_CONTROL+1] =
 			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
@@ -1670,10 +1706,12 @@ void intel_lr_context_free(struct intel_context *ctx)
 					ctx->engine[i].ringbuf;
 			struct intel_engine_cs *ring = ringbuf->ring;
 
+			if (ctx == ring->default_context) {
+				intel_unpin_ringbuffer_obj(ringbuf);
+				i915_gem_object_ggtt_unpin(ctx_obj);
+			}
 			intel_destroy_ringbuffer_obj(ringbuf);
 			kfree(ringbuf);
-			if (ctx == ring->default_context)
-				i915_gem_object_ggtt_unpin(ctx_obj);
 			drm_gem_object_unreference(&ctx_obj->base);
 			mutex_destroy(&ctx->engine[i].unpin_lock);
 		}
@@ -1772,11 +1810,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	if (!ringbuf) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
 				ring->name);
-		if (is_global_default_ctx)
-			i915_gem_object_ggtt_unpin(ctx_obj);
-		drm_gem_object_unreference(&ctx_obj->base);
 		ret = -ENOMEM;
-		return ret;
+		goto error_unpin_ctx;
 	}
 
 	ringbuf->ring = ring;
@@ -1789,22 +1824,30 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	ringbuf->space = ringbuf->size;
 	ringbuf->last_retired_head = -1;
 
-	/* TODO: For now we put this in the mappable region so that we can reuse
-	 * the existing ringbuffer code which ioremaps it. When we start
-	 * creating many contexts, this will no longer work and we must switch
-	 * to a kmapish interface.
-	 */
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_DEBUG_DRIVER(
+				"Failed to allocate ringbuffer obj %s: %d\n",
 				ring->name, ret);
-		goto error;
+			goto error_free_rbuf;
+		}
+
+		if (is_global_default_ctx) {
+			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+			if (ret) {
+				DRM_ERROR(
+					"Failed to pin and map ringbuffer %s: %d\n",
+					ring->name, ret);
+				goto error_destroy_rbuf;
+			}
+		}
+
 	}
 
 	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
-		intel_destroy_ringbuffer_obj(ringbuf);
 		goto error;
 	}
 
@@ -1826,7 +1869,6 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 			DRM_ERROR("Init render state failed: %d\n", ret);
 			ctx->engine[ring->id].ringbuf = NULL;
 			ctx->engine[ring->id].state = NULL;
-			intel_destroy_ringbuffer_obj(ringbuf);
 			goto error;
 		}
 		ctx->rcs_initialized = true;
@@ -1835,7 +1877,13 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	return 0;
 
 error:
+	if (is_global_default_ctx)
+		intel_unpin_ringbuffer_obj(ringbuf);
+error_destroy_rbuf:
+	intel_destroy_ringbuffer_obj(ringbuf);
+error_free_rbuf:
 	kfree(ringbuf);
+error_unpin_ctx:
 	if (is_global_default_ctx)
 		i915_gem_object_ggtt_unpin(ctx_obj);
 	drm_gem_object_unreference(&ctx_obj->base);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 14b216b..21233a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -110,6 +110,7 @@ struct intel_ctx_submit_request {
 	struct list_head execlist_link;
 
 	int elsp_submitted;
+	bool need_unpin;
 };
 
 void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a8f72e8..0c4aab1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
 	return 0;
 }
 
-void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
-	if (!ringbuf->obj)
-		return;
-
 	iounmap(ringbuf->virtual_start);
+	ringbuf->virtual_start = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
+}
+
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj = ringbuf->obj;
+	int ret;
+
+	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_ggtt_unpin(obj);
+		return ret;
+	}
+
+	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+	if (ringbuf->virtual_start == NULL) {
+		i915_gem_object_ggtt_unpin(obj);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+{
 	drm_gem_object_unreference(&ringbuf->obj->base);
 	ringbuf->obj = NULL;
 }
@@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
-	int ret;
-
-	if (ringbuf->obj)
-		return 0;
 
 	obj = NULL;
 	if (!HAS_LLC(dev))
@@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		goto err_unref;
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		goto err_unpin;
-
-	ringbuf->virtual_start =
-		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
-				ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		ret = -EINVAL;
-		goto err_unpin;
-	}
-
 	ringbuf->obj = obj;
-	return 0;
 
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-	return ret;
+	return 0;
 }
 
 static int intel_init_ring_buffer(struct drm_device *dev,
@@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			goto error;
 	}
 
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
-		goto error;
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
+					ring->name, ret);
+			goto error;
+		}
+
+		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
+					ring->name, ret);
+			intel_destroy_ringbuffer_obj(ringbuf);
+			goto error;
+		}
 	}
 
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	intel_stop_ring_buffer(ring);
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
+	intel_unpin_ringbuffer_obj(ringbuf);
 	intel_destroy_ringbuffer_obj(ringbuf);
 	ring->preallocated_lazy_request = NULL;
 	ring->outstanding_lazy_seqno = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8c002d2..365854ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf);
 void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing
  2014-10-29  9:52 ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object " Thomas Daniel
@ 2014-10-29 14:38   ` shuang.he
  2014-11-13 10:28   ` [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Thomas Daniel
  1 sibling, 0 replies; 42+ messages in thread
From: shuang.he @ 2014-10-29 14:38 UTC (permalink / raw)
  To: shuang.he, intel-gfx, thomas.daniel

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform: baseline_drm_intel_nightly_pass_rate->patch_applied_pass_rate
BYT: pass/total=354/354->353/354
PNV: pass/total=331/331->331/331
ILK: pass/total=341/342->338/342
IVB: pass/total=271/271->271/271
SNB: pass/total=271/271->271/271
HSW: pass/total=271/271->271/271
BDW: pass/total=271/271->271/271
-------------------------------------Detailed-------------------------------------
test_platform: test_suite, test_case, result_with_drm_intel_nightly->result_with_patch_applied
BYT: Intel_gpu_tools, igt_gem_bad_reloc_negative-reloc-lut, PASS->NSPT
ILK: Intel_gpu_tools, igt_drv_suspend_debugfs-reader, PASS->DMESG_WARN
ILK: Intel_gpu_tools, igt_kms_flip_wf_vblank-interruptible, PASS->DMESG_WARN
ILK: Intel_gpu_tools, igt_kms_flip_wf_vblank-vs-modeset, PASS->DMESG_WARN
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
                   ` (2 preceding siblings ...)
  2014-10-29  9:52 ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object " Thomas Daniel
@ 2014-11-03 15:33 ` Daniel Vetter
  2014-11-03 16:05   ` Daniel, Thomas
  2014-11-04  9:11 ` Chris Wilson
  2014-11-07 14:55 ` [PATCH v4 " Thomas Daniel
  5 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-03 15:33 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx, shuang.he

On Wed, Oct 29, 2014 at 09:52:50AM +0000, Thomas Daniel wrote:
> No longer create a work item to clean each execlist queue item.
> Instead, move retired execlist requests to a queue and clean up the
> items during retire_requests.
> 
> v2: Fix legacy ring path broken during overzealous cleanup
> 
> v3: Update idle detection to take execlists queue into account
> 
> Issue: VIZ-4274
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         |    4 +++
>  drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>  4 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 827edb5..df28202 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device *dev)
>  	for_each_ring(ring, dev_priv, i) {
>  		i915_gem_retire_requests_ring(ring);
>  		idle &= list_empty(&ring->request_list);
> +		if (i915.enable_execlists) {
> +			idle &= list_empty(&ring->execlist_queue);
> +			intel_execlists_retire_requests(ring);

This needs to be the other way round I think - we care about idleness
after all the currently processed stuff is retired, not before. Otherwise
we might notice the busy->idle transition one invocation too late.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode
  2014-10-29  9:52 ` [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode Thomas Daniel
@ 2014-11-03 15:47   ` Daniel Vetter
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-03 15:47 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx, shuang.he

On Wed, Oct 29, 2014 at 09:52:51AM +0000, Thomas Daniel wrote:
> Write HWS_PGA address even in execlists mode as the global hardware status
> page is still required.  This address was previously uninitialized and
> HWSP writes would clobber whatever buffer happened to reside at GGTT
> address 0.
> 
> v2: Break out hardware status page setup into a separate function.
> 
> Issue: VIZ-2020
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-11-03 15:33 ` [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Daniel Vetter
@ 2014-11-03 16:05   ` Daniel, Thomas
  2014-11-03 16:17     ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-03 16:05 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Monday, November 03, 2014 3:33 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org; shuang.he@linux.intel.com
> Subject: Re: [Intel-gfx] [PATCH 1/4] drm/i915/bdw: Clean up execlist queue
> items in retire_work
> 
> On Wed, Oct 29, 2014 at 09:52:50AM +0000, Thomas Daniel wrote:
> > No longer create a work item to clean each execlist queue item.
> > Instead, move retired execlist requests to a queue and clean up the
> > items during retire_requests.
> >
> > v2: Fix legacy ring path broken during overzealous cleanup
> >
> > v3: Update idle detection to take execlists queue into account
> >
> > Issue: VIZ-4274
> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c         |    4 +++
> >  drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-----------
> --
> >  drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
> >  4 files changed, 36 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > b/drivers/gpu/drm/i915/i915_gem.c index 827edb5..df28202 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device
> *dev)
> >  	for_each_ring(ring, dev_priv, i) {
> >  		i915_gem_retire_requests_ring(ring);
> >  		idle &= list_empty(&ring->request_list);
> > +		if (i915.enable_execlists) {
> > +			idle &= list_empty(&ring->execlist_queue);
> > +			intel_execlists_retire_requests(ring);
> 
> This needs to be the other way round I think - we care about idleness after all
> the currently processed stuff is retired, not before. Otherwise we might
> notice the busy->idle transition one invocation too late.
I thought for a while about this.  The GPU will be idle when the
execlist_queues are empty.
Intel_execlists_retire_requests() cleans up requests which have already
finished so it is more conservative (in terms of CPU idleness) to check the
queue beforehand.  I thought this would be more desirable than
potentially reporting idleness early...
Intel_execlists_retire_requests() can not affect the state of the queue.
And there is no point checking the execlist_retired_req_list because
execlists_retire_requests() always empties it.

Thomas.

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-11-03 16:05   ` Daniel, Thomas
@ 2014-11-03 16:17     ` Daniel Vetter
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-03 16:17 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Mon, Nov 03, 2014 at 04:05:03PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Monday, November 03, 2014 3:33 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org; shuang.he@linux.intel.com
> > Subject: Re: [Intel-gfx] [PATCH 1/4] drm/i915/bdw: Clean up execlist queue
> > items in retire_work
> > 
> > On Wed, Oct 29, 2014 at 09:52:50AM +0000, Thomas Daniel wrote:
> > > No longer create a work item to clean each execlist queue item.
> > > Instead, move retired execlist requests to a queue and clean up the
> > > items during retire_requests.
> > >
> > > v2: Fix legacy ring path broken during overzealous cleanup
> > >
> > > v3: Update idle detection to take execlists queue into account
> > >
> > > Issue: VIZ-4274
> > > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem.c         |    4 +++
> > >  drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-----------
> > --
> > >  drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
> > >  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
> > >  4 files changed, 36 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > > b/drivers/gpu/drm/i915/i915_gem.c index 827edb5..df28202 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device
> > *dev)
> > >  	for_each_ring(ring, dev_priv, i) {
> > >  		i915_gem_retire_requests_ring(ring);
> > >  		idle &= list_empty(&ring->request_list);
> > > +		if (i915.enable_execlists) {
> > > +			idle &= list_empty(&ring->execlist_queue);
> > > +			intel_execlists_retire_requests(ring);
> > 
> > This needs to be the other way round I think - we care about idleness after all
> > the currently processed stuff is retired, not before. Otherwise we might
> > notice the busy->idle transition one invocation too late.
> I thought for a while about this.  The GPU will be idle when the
> execlist_queues are empty.
> Intel_execlists_retire_requests() cleans up requests which have already
> finished so it is more conservative (in terms of CPU idleness) to check the
> queue beforehand.  I thought this would be more desirable than
> potentially reporting idleness early...
> Intel_execlists_retire_requests() can not affect the state of the queue.
> And there is no point checking the execlist_retired_req_list because
> execlists_retire_requests() always empties it.

Ok, I mixed things up without looking ;-)

But that means you acces the execlist_queue, which is also accessed from
irq code, without holding the required locks? This is all a bit confusing
to poor me ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-10-29  9:52 ` [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand Thomas Daniel
@ 2014-11-03 16:54   ` Daniel Vetter
  2014-11-03 17:00     ` Daniel, Thomas
  2014-11-03 21:04     ` Chris Wilson
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
  1 sibling, 2 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-03 16:54 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx, shuang.he

On Wed, Oct 29, 2014 at 09:52:52AM +0000, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
> 
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
> 
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
> 
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
> 
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
> 
> Issue: VIZ-4277

This doesn't really do the full task since the integration with the
shrinker and related igt testcases are missing. What's your plane here?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-03 16:54   ` Daniel Vetter
@ 2014-11-03 17:00     ` Daniel, Thomas
  2014-11-03 17:11       ` Daniel Vetter
  2014-11-03 21:04     ` Chris Wilson
  1 sibling, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-03 17:00 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Monday, November 03, 2014 4:54 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org; shuang.he@linux.intel.com
> Subject: Re: [Intel-gfx] [PATCH 3/4] drm/i915/bdw: Pin the context backing
> objects to GGTT on-demand
> 
> On Wed, Oct 29, 2014 at 09:52:52AM +0000, Thomas Daniel wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> >
> > Up until now, we have pinned every logical ring context backing object
> > during creation, and left it pinned until destruction. This made my
> > life easier, but it's a harmful thing to do, because we cause
> > fragmentation of the GGTT (and, eventually, we would run out of space).
> >
> > This patch makes the pinning on-demand: the backing objects of the two
> > contexts that are written to the ELSP are pinned right before
> > submission and unpinned once the hardware is done with them. The only
> > context that is still pinned regardless is the global default one, so
> > that the HWS can still be accessed in the same way (ring->status_page).
> >
> > v2: In the early version of this patch, we were pinning the context as
> > we put it into the ELSP: on the one hand, this is very efficient
> > because only a maximum two contexts are pinned at any given time, but
> > on the other hand, we cannot really pin in interrupt time :(
> >
> > v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> > Do not unpin default context in free_request.
> >
> > v4: Break out pin and unpin into functions.  Fix style problems
> > reported by checkpatch
> >
> > Issue: VIZ-4277
> 
> This doesn't really do the full task since the integration with the shrinker and
> related igt testcases are missing. What's your plane here?
This is a rebase and bug fix of the original patch to unblock execlists
enabling.  Plan is to address the rest of the issues after the big
seqno->request rearchitecting change goes in.

Thomas.

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-03 17:00     ` Daniel, Thomas
@ 2014-11-03 17:11       ` Daniel Vetter
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-03 17:11 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Mon, Nov 03, 2014 at 05:00:35PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Monday, November 03, 2014 4:54 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org; shuang.he@linux.intel.com
> > Subject: Re: [Intel-gfx] [PATCH 3/4] drm/i915/bdw: Pin the context backing
> > objects to GGTT on-demand
> > 
> > On Wed, Oct 29, 2014 at 09:52:52AM +0000, Thomas Daniel wrote:
> > > From: Oscar Mateo <oscar.mateo@intel.com>
> > >
> > > Up until now, we have pinned every logical ring context backing object
> > > during creation, and left it pinned until destruction. This made my
> > > life easier, but it's a harmful thing to do, because we cause
> > > fragmentation of the GGTT (and, eventually, we would run out of space).
> > >
> > > This patch makes the pinning on-demand: the backing objects of the two
> > > contexts that are written to the ELSP are pinned right before
> > > submission and unpinned once the hardware is done with them. The only
> > > context that is still pinned regardless is the global default one, so
> > > that the HWS can still be accessed in the same way (ring->status_page).
> > >
> > > v2: In the early version of this patch, we were pinning the context as
> > > we put it into the ELSP: on the one hand, this is very efficient
> > > because only a maximum two contexts are pinned at any given time, but
> > > on the other hand, we cannot really pin in interrupt time :(
> > >
> > > v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> > > Do not unpin default context in free_request.
> > >
> > > v4: Break out pin and unpin into functions.  Fix style problems
> > > reported by checkpatch
> > >
> > > Issue: VIZ-4277
> > 
> > This doesn't really do the full task since the integration with the shrinker and
> > related igt testcases are missing. What's your plane here?
> This is a rebase and bug fix of the original patch to unblock execlists
> enabling.  Plan is to address the rest of the issues after the big
> seqno->request rearchitecting change goes in.

Hm, ok makes sense. Please find a review victim for the remaining 3
patches, preferrably someon who digs around in gem too and is not from the
vpg london team (to spread the knowledge of all this a bit).

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-03 16:54   ` Daniel Vetter
  2014-11-03 17:00     ` Daniel, Thomas
@ 2014-11-03 21:04     ` Chris Wilson
  1 sibling, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2014-11-03 21:04 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, shuang.he

On Mon, Nov 03, 2014 at 05:54:16PM +0100, Daniel Vetter wrote:
> On Wed, Oct 29, 2014 at 09:52:52AM +0000, Thomas Daniel wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> > 
> > Up until now, we have pinned every logical ring context backing object
> > during creation, and left it pinned until destruction. This made my life
> > easier, but it's a harmful thing to do, because we cause fragmentation
> > of the GGTT (and, eventually, we would run out of space).
> > 
> > This patch makes the pinning on-demand: the backing objects of the two
> > contexts that are written to the ELSP are pinned right before submission
> > and unpinned once the hardware is done with them. The only context that
> > is still pinned regardless is the global default one, so that the HWS can
> > still be accessed in the same way (ring->status_page).
> > 
> > v2: In the early version of this patch, we were pinning the context as
> > we put it into the ELSP: on the one hand, this is very efficient because
> > only a maximum two contexts are pinned at any given time, but on the other
> > hand, we cannot really pin in interrupt time :(
> > 
> > v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> > Do not unpin default context in free_request.
> > 
> > v4: Break out pin and unpin into functions.  Fix style problems reported
> > by checkpatch
> > 
> > Issue: VIZ-4277
> 
> This doesn't really do the full task since the integration with the
> shrinker and related igt testcases are missing. What's your plane here?

Oh, I have patches for that. It is remarkably simple.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
                   ` (3 preceding siblings ...)
  2014-11-03 15:33 ` [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Daniel Vetter
@ 2014-11-04  9:11 ` Chris Wilson
  2014-11-07 14:55 ` [PATCH v4 " Thomas Daniel
  5 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2014-11-04  9:11 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx, shuang.he

On Wed, Oct 29, 2014 at 09:52:50AM +0000, Thomas Daniel wrote:
> No longer create a work item to clean each execlist queue item.
> Instead, move retired execlist requests to a queue and clean up the
> items during retire_requests.
> 
> v2: Fix legacy ring path broken during overzealous cleanup
> 
> v3: Update idle detection to take execlists queue into account
> 
> Issue: VIZ-4274
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         |    4 +++
>  drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>  4 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 827edb5..df28202 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device *dev)
>  	for_each_ring(ring, dev_priv, i) {
>  		i915_gem_retire_requests_ring(ring);
>  		idle &= list_empty(&ring->request_list);
> +		if (i915.enable_execlists) {

Every time you do this, a kitten dies.

If only we have an intel_engine_cs that could abstract the differences
between retirement on the various submission ports and encapsulate that
away from the core GEM buffer/request handling.

If only I hadn't already sent a patch showing exactly how to do that.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [PATCH v4 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
                   ` (4 preceding siblings ...)
  2014-11-04  9:11 ` Chris Wilson
@ 2014-11-07 14:55 ` Thomas Daniel
  2014-11-13 10:27   ` [PATCH v5 " Thomas Daniel
  5 siblings, 1 reply; 42+ messages in thread
From: Thomas Daniel @ 2014-11-07 14:55 UTC (permalink / raw)
  To: intel-gfx

No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

v4: Grab execlist lock when checking queue state

Issue: VIZ-4274
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |    9 ++++++
 drivers/gpu/drm/i915/intel_lrc.c        |   52 ++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 4 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..408afe7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i) {
 		i915_gem_retire_requests_ring(ring);
 		idle &= list_empty(&ring->request_list);
+		if (i915.enable_execlists) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&ring->execlist_lock, flags);
+			idle &= list_empty(&ring->execlist_queue);
+			spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+			intel_execlists_retire_requests(ring);
+		}
 	}
 
 	if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..87ce445 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
 	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
 	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
 	assert_spin_locked(&ring->execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			 * will update tail past first request's workload */
 			cursor->elsp_submitted = req0->elsp_submitted;
 			list_del(&req0->execlist_link);
-			queue_work(dev_priv->wq, &req0->work);
+			list_add_tail(&req0->execlist_link,
+				&ring->execlist_retired_req_list);
 			req0 = cursor;
 		} else {
 			req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					   u32 request_id)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_ctx_submit_request *head_req;
 
 	assert_spin_locked(&ring->execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 
 			if (--head_req->elsp_submitted <= 0) {
 				list_del(&head_req->execlist_link);
-				queue_work(dev_priv->wq, &head_req->work);
+				list_add_tail(&head_req->execlist_link,
+					&ring->execlist_retired_req_list);
 				return true;
 			}
 		}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
 		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-	struct intel_ctx_submit_request *req =
-		container_of(work, struct intel_ctx_submit_request, work);
-	struct drm_device *dev = req->ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_context_unreference(req->ctx);
-	mutex_unlock(&dev->struct_mutex);
-
-	kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
 				   struct intel_context *to,
 				   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 	i915_gem_context_reference(req->ctx);
 	req->ring = ring;
 	req->tail = tail;
-	INIT_WORK(&req->work, execlists_free_request_task);
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 			WARN(tail_req->elsp_submitted != 0,
 			     "More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
-			queue_work(dev_priv->wq, &tail_req->work);
+			list_add_tail(&tail_req->execlist_link,
+				&ring->execlist_retired_req_list);
 		}
 	}
 
@@ -733,6 +717,29 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	return 0;
 }
 
+void intel_execlists_retire_requests(struct intel_engine_cs *ring)
+{
+	struct intel_ctx_submit_request *req, *tmp;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	unsigned long flags;
+	struct list_head retired_list;
+
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	if (list_empty(&ring->execlist_retired_req_list))
+		return;
+
+	INIT_LIST_HEAD(&retired_list);
+	spin_lock_irqsave(&ring->execlist_lock, flags);
+	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
+	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(req->ctx);
+		list_del(&req->execlist_link);
+	}
+}
+
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1248,6 +1255,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
 	ring->next_context_status_buffer = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33c3b4b..84bbf19 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,11 +104,11 @@ struct intel_ctx_submit_request {
 	u32 tail;
 
 	struct list_head execlist_link;
-	struct work_struct work;
 
 	int elsp_submitted;
 };
 
 void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
+void intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 96479c8..8c002d2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -235,6 +235,7 @@ struct  intel_engine_cs {
 	/* Execlists */
 	spinlock_t execlist_lock;
 	struct list_head execlist_queue;
+	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH v5 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-11-07 14:55 ` [PATCH v4 " Thomas Daniel
@ 2014-11-13 10:27   ` Thomas Daniel
  2014-11-18  6:29     ` Deepak S
  0 siblings, 1 reply; 42+ messages in thread
From: Thomas Daniel @ 2014-11-13 10:27 UTC (permalink / raw)
  To: intel-gfx

No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

v4: Grab execlist lock when checking queue state

v5: Fix leaking requests by freeing in execlists_retire_requests.

Issue: VIZ-4274
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |    9 ++++++
 drivers/gpu/drm/i915/intel_lrc.c        |   53 ++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..408afe7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i) {
 		i915_gem_retire_requests_ring(ring);
 		idle &= list_empty(&ring->request_list);
+		if (i915.enable_execlists) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&ring->execlist_lock, flags);
+			idle &= list_empty(&ring->execlist_queue);
+			spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+			intel_execlists_retire_requests(ring);
+		}
 	}
 
 	if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..d920297 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
 	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
 	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
 	assert_spin_locked(&ring->execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			 * will update tail past first request's workload */
 			cursor->elsp_submitted = req0->elsp_submitted;
 			list_del(&req0->execlist_link);
-			queue_work(dev_priv->wq, &req0->work);
+			list_add_tail(&req0->execlist_link,
+				&ring->execlist_retired_req_list);
 			req0 = cursor;
 		} else {
 			req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					   u32 request_id)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_ctx_submit_request *head_req;
 
 	assert_spin_locked(&ring->execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 
 			if (--head_req->elsp_submitted <= 0) {
 				list_del(&head_req->execlist_link);
-				queue_work(dev_priv->wq, &head_req->work);
+				list_add_tail(&head_req->execlist_link,
+					&ring->execlist_retired_req_list);
 				return true;
 			}
 		}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
 		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-	struct intel_ctx_submit_request *req =
-		container_of(work, struct intel_ctx_submit_request, work);
-	struct drm_device *dev = req->ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_context_unreference(req->ctx);
-	mutex_unlock(&dev->struct_mutex);
-
-	kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
 				   struct intel_context *to,
 				   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 	i915_gem_context_reference(req->ctx);
 	req->ring = ring;
 	req->tail = tail;
-	INIT_WORK(&req->work, execlists_free_request_task);
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 			WARN(tail_req->elsp_submitted != 0,
 			     "More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
-			queue_work(dev_priv->wq, &tail_req->work);
+			list_add_tail(&tail_req->execlist_link,
+				&ring->execlist_retired_req_list);
 		}
 	}
 
@@ -733,6 +717,30 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	return 0;
 }
 
+void intel_execlists_retire_requests(struct intel_engine_cs *ring)
+{
+	struct intel_ctx_submit_request *req, *tmp;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	unsigned long flags;
+	struct list_head retired_list;
+
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	if (list_empty(&ring->execlist_retired_req_list))
+		return;
+
+	INIT_LIST_HEAD(&retired_list);
+	spin_lock_irqsave(&ring->execlist_lock, flags);
+	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
+	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(req->ctx);
+		list_del(&req->execlist_link);
+		kfree(req);
+	}
+}
+
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -1248,6 +1256,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
 	ring->next_context_status_buffer = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33c3b4b..84bbf19 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,11 +104,11 @@ struct intel_ctx_submit_request {
 	u32 tail;
 
 	struct list_head execlist_link;
-	struct work_struct work;
 
 	int elsp_submitted;
 };
 
 void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
+void intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 96479c8..8c002d2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -235,6 +235,7 @@ struct  intel_engine_cs {
 	/* Execlists */
 	spinlock_t execlist_lock;
 	struct list_head execlist_queue;
+	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-10-29  9:52 ` [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand Thomas Daniel
  2014-11-03 16:54   ` Daniel Vetter
@ 2014-11-13 10:28   ` Thomas Daniel
  2014-11-17 14:38     ` akash goel
                       ` (3 more replies)
  1 sibling, 4 replies; 42+ messages in thread
From: Thomas Daniel @ 2014-11-13 10:28 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring->status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
Do not unpin default context in free_request.

v4: Break out pin and unpin into functions.  Fix style problems reported
by checkpatch

v5: Remove unpin_lock as all pinning and unpinning is done with the struct
mutex already locked.  Add WARN_ONs to make sure this is the case in future.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
 drivers/gpu/drm/i915/i915_drv.h     |    1 +
 drivers/gpu/drm/i915/i915_gem.c     |   39 +++++++++++++-------
 drivers/gpu/drm/i915/intel_lrc.c    |   69 +++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
 5 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e60d5c2..6eaf813 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 				continue;
 
 			if (ctx_obj) {
-				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
-				uint32_t *reg_state = kmap_atomic(page);
+				struct page *page;
+				uint32_t *reg_state;
 				int j;
 
+				i915_gem_obj_ggtt_pin(ctx_obj,
+						GEN8_LR_CONTEXT_ALIGN, 0);
+
+				page = i915_gem_object_get_page(ctx_obj, 1);
+				reg_state = kmap_atomic(page);
+
 				seq_printf(m, "CONTEXT: %s %u\n", ring->name,
 						intel_execlists_ctx_id(ctx_obj));
 
@@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 				}
 				kunmap_atomic(reg_state);
 
+				i915_gem_object_ggtt_unpin(ctx_obj);
+
 				seq_putc(m, '\n');
 			}
 		}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 059330c..3c7299d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -655,6 +655,7 @@ struct intel_context {
 	struct {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
+		int unpin_count;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 408afe7..2ee6996 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+	struct intel_context *ctx = request->ctx;
+
 	list_del(&request->list);
 	i915_gem_request_remove_from_client(request);
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+	if (i915.enable_execlists && ctx) {
+		struct intel_engine_cs *ring = request->ring;
 
+		if (ctx != ring->default_context)
+			intel_lr_context_unpin(ring, ctx);
+		i915_gem_context_unreference(ctx);
+	}
 	kfree(request);
 }
 
@@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	}
 
 	/*
+	 * Clear the execlists queue up before freeing the requests, as those
+	 * are the ones that keep the context and ringbuffer backing objects
+	 * pinned in place.
+	 */
+	while (!list_empty(&ring->execlist_queue)) {
+		struct intel_ctx_submit_request *submit_req;
+
+		submit_req = list_first_entry(&ring->execlist_queue,
+				struct intel_ctx_submit_request,
+				execlist_link);
+		list_del(&submit_req->execlist_link);
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(submit_req->ctx);
+		kfree(submit_req);
+	}
+
+	/*
 	 * We must free the requests after all the corresponding objects have
 	 * been moved off active lists. Which is the same order as the normal
 	 * retire_requests function does. This is important if object hold
@@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 		i915_gem_free_request(request);
 	}
 
-	while (!list_empty(&ring->execlist_queue)) {
-		struct intel_ctx_submit_request *submit_req;
-
-		submit_req = list_first_entry(&ring->execlist_queue,
-				struct intel_ctx_submit_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
-		intel_runtime_pm_put(dev_priv);
-		i915_gem_context_unreference(submit_req->ctx);
-		kfree(submit_req);
-	}
-
 	/* These may not have been flush before the reset, do so now */
 	kfree(ring->preallocated_lazy_request);
 	ring->preallocated_lazy_request = NULL;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 906b985..f7fa0f7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,8 +139,6 @@
 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
 #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
 
-#define GEN8_LR_CONTEXT_ALIGN 4096
-
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
 #define RING_EXECLIST0_VALID		(1 << 0x4)
@@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 	execlists_context_queue(ring, ctx, ringbuf->tail);
 }
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	int ret = 0;
+
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	if (ctx->engine[ring->id].unpin_count++ == 0) {
+		ret = i915_gem_obj_ggtt_pin(ctx_obj,
+				GEN8_LR_CONTEXT_ALIGN, 0);
+		if (ret)
+			ctx->engine[ring->id].unpin_count = 0;
+	}
+
+	return ret;
+}
+
+void intel_lr_context_unpin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+
+	if (ctx_obj) {
+		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+		if (--ctx->engine[ring->id].unpin_count == 0)
+			i915_gem_object_ggtt_unpin(ctx_obj);
+	}
+}
+
 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
 				    struct intel_context *ctx)
 {
+	int ret;
+
 	if (ring->outstanding_lazy_seqno)
 		return 0;
 
@@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
 		if (request == NULL)
 			return -ENOMEM;
 
+		if (ctx != ring->default_context) {
+			ret = intel_lr_context_pin(ring, ctx);
+			if (ret) {
+				kfree(request);
+				return ret;
+			}
+		}
+
 		/* Hold a reference to the context this request belongs to
 		 * (we will need it when the time comes to emit/retire the
 		 * request).
@@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context *ctx)
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
-		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 
 		if (ctx_obj) {
+			struct intel_ringbuffer *ringbuf =
+					ctx->engine[i].ringbuf;
+			struct intel_engine_cs *ring = ringbuf->ring;
+
 			intel_destroy_ringbuffer_obj(ringbuf);
 			kfree(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
+			if (ctx == ring->default_context)
+				i915_gem_object_ggtt_unpin(ctx_obj);
 			drm_gem_object_unreference(&ctx_obj->base);
 		}
 	}
@@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring)
 {
+	const bool is_global_default_ctx = (ctx == ring->default_context);
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *ctx_obj;
 	uint32_t context_size;
@@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 		return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
-		drm_gem_object_unreference(&ctx_obj->base);
-		return ret;
+	if (is_global_default_ctx) {
+		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+		if (ret) {
+			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
+					ret);
+			drm_gem_object_unreference(&ctx_obj->base);
+			return ret;
+		}
 	}
 
 	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
 	if (!ringbuf) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
 				ring->name);
-		i915_gem_object_ggtt_unpin(ctx_obj);
+		if (is_global_default_ctx)
+			i915_gem_object_ggtt_unpin(ctx_obj);
 		drm_gem_object_unreference(&ctx_obj->base);
 		ret = -ENOMEM;
 		return ret;
@@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 
 error:
 	kfree(ringbuf);
-	i915_gem_object_ggtt_unpin(ctx_obj);
+	if (is_global_default_ctx)
+		i915_gem_object_ggtt_unpin(ctx_obj);
 	drm_gem_object_unreference(&ctx_obj->base);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 84bbf19..14b216b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,6 +24,8 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
+#define GEN8_LR_CONTEXT_ALIGN 4096
+
 /* Execlists regs */
 #define RING_ELSP(ring)			((ring)->mmio_base+0x230)
 #define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
@@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
+void intel_lr_context_unpin(struct intel_engine_cs *ring,
+		struct intel_context *ctx);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-10-29  9:52 ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object " Thomas Daniel
  2014-10-29 14:38   ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing shuang.he
@ 2014-11-13 10:28   ` Thomas Daniel
  2014-11-18  5:18     ` akash goel
  2014-11-18  6:37     ` Deepak S
  1 sibling, 2 replies; 42+ messages in thread
From: Thomas Daniel @ 2014-11-13 10:28 UTC (permalink / raw)
  To: intel-gfx

Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

v3: Rebased. Take a pin reference of the ringbuffer for each item
in the execlist request queue because the hardware may still be using
the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
is executed.  The ringbuffer must remain pinned until the context save
is complete.  No longer pin and unpin ringbuffer in
populate_lr_context() - this transient address is meaningless and the
pinning can cause a sleep while atomic.

v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
Downgraded pinning check BUG_ONs to WARN_ONs.

v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
variable.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        |  102 +++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.c |   85 +++++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
 3 files changed, 128 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f7fa0f7..ca20f91 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -202,6 +202,9 @@ enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx);
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -339,7 +342,9 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+				    struct drm_i915_gem_object *ring_obj,
+				    u32 tail)
 {
 	struct page *page;
 	uint32_t *reg_state;
@@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tai
 	reg_state = kmap_atomic(page);
 
 	reg_state[CTX_RING_TAIL+1] = tail;
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
 	kunmap_atomic(reg_state);
 
@@ -358,21 +364,25 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
 				    struct intel_context *to0, u32 tail0,
 				    struct intel_context *to1, u32 tail1)
 {
-	struct drm_i915_gem_object *ctx_obj0;
+	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
 	struct drm_i915_gem_object *ctx_obj1 = NULL;
+	struct intel_ringbuffer *ringbuf1 = NULL;
 
-	ctx_obj0 = to0->engine[ring->id].state;
 	BUG_ON(!ctx_obj0);
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_ctx_write_tail(ctx_obj0, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
 
 	if (to1) {
+		ringbuf1 = to1->engine[ring->id].ringbuf;
 		ctx_obj1 = to1->engine[ring->id].state;
 		BUG_ON(!ctx_obj1);
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_ctx_write_tail(ctx_obj1, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -524,6 +534,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 		return -ENOMEM;
 	req->ctx = to;
 	i915_gem_context_reference(req->ctx);
+
+	if (to != ring->default_context)
+		intel_lr_context_pin(ring, to);
+
 	req->ring = ring;
 	req->tail = tail;
 
@@ -544,7 +558,7 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 
 		if (to == tail_req->ctx) {
 			WARN(tail_req->elsp_submitted != 0,
-			     "More than 2 already-submitted reqs queued\n");
+				"More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
 			list_add_tail(&tail_req->execlist_link,
 				&ring->execlist_retired_req_list);
@@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 	spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+		struct intel_context *ctx = req->ctx;
+		struct drm_i915_gem_object *ctx_obj =
+				ctx->engine[ring->id].state;
+
+		if (ctx_obj && (ctx != ring->default_context))
+			intel_lr_context_unpin(ring, ctx);
 		intel_runtime_pm_put(dev_priv);
 		i915_gem_context_unreference(req->ctx);
 		list_del(&req->execlist_link);
@@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 	int ret = 0;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		ret = i915_gem_obj_ggtt_pin(ctx_obj,
 				GEN8_LR_CONTEXT_ALIGN, 0);
 		if (ret)
-			ctx->engine[ring->id].unpin_count = 0;
+			goto reset_unpin_count;
+
+		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+		if (ret)
+			goto unpin_ctx_obj;
 	}
 
 	return ret;
+
+unpin_ctx_obj:
+	i915_gem_object_ggtt_unpin(ctx_obj);
+reset_unpin_count:
+	ctx->engine[ring->id].unpin_count = 0;
+
+	return ret;
 }
 
 void intel_lr_context_unpin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 
 	if (ctx_obj) {
 		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--ctx->engine[ring->id].unpin_count == 0)
+		if (--ctx->engine[ring->id].unpin_count == 0) {
+			intel_unpin_ringbuffer_obj(ringbuf);
 			i915_gem_object_ggtt_unpin(ctx_obj);
+		}
 	}
 }
 
@@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 	struct page *page;
 	uint32_t *reg_state;
@@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
 	reg_state[CTX_RING_TAIL+1] = 0;
 	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	/* Ring buffer start address is not known until the buffer is pinned.
+	 * It is written to the context image in execlists_update_context()
+	 */
 	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
 	reg_state[CTX_RING_BUFFER_CONTROL+1] =
 			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
@@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct intel_context *ctx)
 					ctx->engine[i].ringbuf;
 			struct intel_engine_cs *ring = ringbuf->ring;
 
+			if (ctx == ring->default_context) {
+				intel_unpin_ringbuffer_obj(ringbuf);
+				i915_gem_object_ggtt_unpin(ctx_obj);
+			}
 			intel_destroy_ringbuffer_obj(ringbuf);
 			kfree(ringbuf);
-			if (ctx == ring->default_context)
-				i915_gem_object_ggtt_unpin(ctx_obj);
 			drm_gem_object_unreference(&ctx_obj->base);
 		}
 	}
@@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	if (!ringbuf) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
 				ring->name);
-		if (is_global_default_ctx)
-			i915_gem_object_ggtt_unpin(ctx_obj);
-		drm_gem_object_unreference(&ctx_obj->base);
 		ret = -ENOMEM;
-		return ret;
+		goto error_unpin_ctx;
 	}
 
 	ringbuf->ring = ring;
@@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	ringbuf->space = ringbuf->size;
 	ringbuf->last_retired_head = -1;
 
-	/* TODO: For now we put this in the mappable region so that we can reuse
-	 * the existing ringbuffer code which ioremaps it. When we start
-	 * creating many contexts, this will no longer work and we must switch
-	 * to a kmapish interface.
-	 */
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_DEBUG_DRIVER(
+				"Failed to allocate ringbuffer obj %s: %d\n",
 				ring->name, ret);
-		goto error;
+			goto error_free_rbuf;
+		}
+
+		if (is_global_default_ctx) {
+			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+			if (ret) {
+				DRM_ERROR(
+					"Failed to pin and map ringbuffer %s: %d\n",
+					ring->name, ret);
+				goto error_destroy_rbuf;
+			}
+		}
+
 	}
 
 	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
-		intel_destroy_ringbuffer_obj(ringbuf);
 		goto error;
 	}
 
@@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 			DRM_ERROR("Init render state failed: %d\n", ret);
 			ctx->engine[ring->id].ringbuf = NULL;
 			ctx->engine[ring->id].state = NULL;
-			intel_destroy_ringbuffer_obj(ringbuf);
 			goto error;
 		}
 		ctx->rcs_initialized = true;
@@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	return 0;
 
 error:
+	if (is_global_default_ctx)
+		intel_unpin_ringbuffer_obj(ringbuf);
+error_destroy_rbuf:
+	intel_destroy_ringbuffer_obj(ringbuf);
+error_free_rbuf:
 	kfree(ringbuf);
+error_unpin_ctx:
 	if (is_global_default_ctx)
 		i915_gem_object_ggtt_unpin(ctx_obj);
 	drm_gem_object_unreference(&ctx_obj->base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a8f72e8..0c4aab1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
 	return 0;
 }
 
-void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
-	if (!ringbuf->obj)
-		return;
-
 	iounmap(ringbuf->virtual_start);
+	ringbuf->virtual_start = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
+}
+
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj = ringbuf->obj;
+	int ret;
+
+	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_ggtt_unpin(obj);
+		return ret;
+	}
+
+	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+	if (ringbuf->virtual_start == NULL) {
+		i915_gem_object_ggtt_unpin(obj);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+{
 	drm_gem_object_unreference(&ringbuf->obj->base);
 	ringbuf->obj = NULL;
 }
@@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
-	int ret;
-
-	if (ringbuf->obj)
-		return 0;
 
 	obj = NULL;
 	if (!HAS_LLC(dev))
@@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		goto err_unref;
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
-		goto err_unpin;
-
-	ringbuf->virtual_start =
-		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
-				ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		ret = -EINVAL;
-		goto err_unpin;
-	}
-
 	ringbuf->obj = obj;
-	return 0;
 
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-	return ret;
+	return 0;
 }
 
 static int intel_init_ring_buffer(struct drm_device *dev,
@@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			goto error;
 	}
 
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
-		goto error;
+	if (ringbuf->obj == NULL) {
+		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
+					ring->name, ret);
+			goto error;
+		}
+
+		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+		if (ret) {
+			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
+					ring->name, ret);
+			intel_destroy_ringbuffer_obj(ringbuf);
+			goto error;
+		}
 	}
 
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	intel_stop_ring_buffer(ring);
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
+	intel_unpin_ringbuffer_obj(ringbuf);
 	intel_destroy_ringbuffer_obj(ringbuf);
 	ring->preallocated_lazy_request = NULL;
 	ring->outstanding_lazy_seqno = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8c002d2..365854ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
+void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
+int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
+				     struct intel_ringbuffer *ringbuf);
 void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 			       struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18  6:40     ` Deepak S
@ 2014-11-17 14:23       ` Daniel Vetter
  2014-11-18 14:27         ` Deepak S
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-17 14:23 UTC (permalink / raw)
  To: Deepak S; +Cc: intel-gfx

On Tue, Nov 18, 2014 at 12:10:51PM +0530, Deepak S wrote:
> On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 906b985..f7fa0f7 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -139,8 +139,6 @@
> >  #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
> >  #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
> >-#define GEN8_LR_CONTEXT_ALIGN 4096
> >-
> >  #define RING_EXECLIST_QFULL		(1 << 0x2)
> >  #define RING_EXECLIST1_VALID		(1 << 0x3)
> >  #define RING_EXECLIST0_VALID		(1 << 0x4)
> >@@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
> >  	execlists_context_queue(ring, ctx, ringbuf->tail);
> >  }
> >+static int intel_lr_context_pin(struct intel_engine_cs *ring,
> >+		struct intel_context *ctx)
> >+{
> >+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> >+	int ret = 0;
> >+
> >+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> 
> With pin specific mutex from previous patch set removed.

Pardon my ignorance but I'm completely lost on this review comment here.
Deepak, can you please elaborate what kind of lock on which exact version
of the previous patch you mean? I didn't find any locking at all in the
preceeding patch here ...

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-11-18  6:39       ` Deepak S
@ 2014-11-17 14:29         ` Daniel Vetter
  2014-11-18 14:30           ` Deepak S
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-17 14:29 UTC (permalink / raw)
  To: Deepak S; +Cc: intel-gfx

On Tue, Nov 18, 2014 at 12:09:54PM +0530, Deepak S wrote:
> On Tuesday 18 November 2014 12:07 PM, Deepak S wrote:
> >With pin specific mutex from previous patch set removed
> 
> Oops This comment was for previous patch in the series :( Since i
> reviewed the patch offline, comments got mixed :)

Please forward these comments from the private discussion to the mailing
list. Review isn't just about code correctness, but about communication -
yes, I (and domain experts) actually read all this stuff that floats
around and will jump into the discussion if there's something important or
tricky being discussed.

Second reason for public review is that the important part about the r-b
tag isn't that review happened, but by whom. So this is all about
reputation building and playing to people's various strenght. And if you
do review in private nothing of that can happen, which makes the review a
lot less useful. So let's extract the most value from all that engineering
time we invest into reviewing and _always_ do the review in public.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
@ 2014-11-17 14:38     ` akash goel
  2014-11-17 14:55       ` Daniel, Thomas
  2014-11-17 18:09     ` Daniel Vetter
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 42+ messages in thread
From: akash goel @ 2014-11-17 14:38 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 13726 bytes --]

Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goels@gmail.com>"

On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.daniel@intel.com>
wrote:

> From: Oscar Mateo <oscar.mateo@intel.com>
>
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
>
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
>
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
>
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
>
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
>
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked.  Add WARN_ONs to make sure this is the case in
> future.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
>  drivers/gpu/drm/i915/i915_drv.h     |    1 +
>  drivers/gpu/drm/i915/i915_gem.c     |   39 +++++++++++++-------
>  drivers/gpu/drm/i915/intel_lrc.c    |   69
> +++++++++++++++++++++++++++++------
>  drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
>  5 files changed, 98 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index e60d5c2..6eaf813 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void
> *unused)
>                                 continue;
>
>                         if (ctx_obj) {
> -                               struct page *page =
> i915_gem_object_get_page(ctx_obj, 1);
> -                               uint32_t *reg_state = kmap_atomic(page);
> +                               struct page *page;
> +                               uint32_t *reg_state;
>                                 int j;
>
> +                               i915_gem_obj_ggtt_pin(ctx_obj,
> +                                               GEN8_LR_CONTEXT_ALIGN, 0);
> +
> +                               page = i915_gem_object_get_page(ctx_obj,
> 1);
> +                               reg_state = kmap_atomic(page);
> +
>                                 seq_printf(m, "CONTEXT: %s %u\n",
> ring->name,
>
> intel_execlists_ctx_id(ctx_obj));
>
> @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void
> *unused)
>                                 }
>                                 kunmap_atomic(reg_state);
>
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
> +
>                                 seq_putc(m, '\n');
>                         }
>                 }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
>         struct {
>                 struct drm_i915_gem_object *state;
>                 struct intel_ringbuffer *ringbuf;
> +               int unpin_count;
>         } engine[I915_NUM_RINGS];
>
>         struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index 408afe7..2ee6996 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct
> drm_i915_private *dev_priv,
>
>  static void i915_gem_free_request(struct drm_i915_gem_request *request)
>  {
> +       struct intel_context *ctx = request->ctx;
> +
>         list_del(&request->list);
>         i915_gem_request_remove_from_client(request);
>
> -       if (request->ctx)
> -               i915_gem_context_unreference(request->ctx);
> +       if (i915.enable_execlists && ctx) {
> +               struct intel_engine_cs *ring = request->ring;
>
> +               if (ctx != ring->default_context)
> +                       intel_lr_context_unpin(ring, ctx);
> +               i915_gem_context_unreference(ctx);
> +       }
>         kfree(request);
>  }
>
> @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct
> drm_i915_private *dev_priv,
>         }
>
>         /*
> +        * Clear the execlists queue up before freeing the requests, as
> those
> +        * are the ones that keep the context and ringbuffer backing
> objects
> +        * pinned in place.
> +        */
> +       while (!list_empty(&ring->execlist_queue)) {
> +               struct intel_ctx_submit_request *submit_req;
> +
> +               submit_req = list_first_entry(&ring->execlist_queue,
> +                               struct intel_ctx_submit_request,
> +                               execlist_link);
> +               list_del(&submit_req->execlist_link);
> +               intel_runtime_pm_put(dev_priv);
> +               i915_gem_context_unreference(submit_req->ctx);
> +               kfree(submit_req);
> +       }
> +
> +       /*
>          * We must free the requests after all the corresponding objects
> have
>          * been moved off active lists. Which is the same order as the
> normal
>          * retire_requests function does. This is important if object hold
> @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct
> drm_i915_private *dev_priv,
>                 i915_gem_free_request(request);
>         }
>
> -       while (!list_empty(&ring->execlist_queue)) {
> -               struct intel_ctx_submit_request *submit_req;
> -
> -               submit_req = list_first_entry(&ring->execlist_queue,
> -                               struct intel_ctx_submit_request,
> -                               execlist_link);
> -               list_del(&submit_req->execlist_link);
> -               intel_runtime_pm_put(dev_priv);
> -               i915_gem_context_unreference(submit_req->ctx);
> -               kfree(submit_req);
> -       }
> -
>         /* These may not have been flush before the reset, do so now */
>         kfree(ring->preallocated_lazy_request);
>         ring->preallocated_lazy_request = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 906b985..f7fa0f7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -139,8 +139,6 @@
>  #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
>  #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>
> -#define GEN8_LR_CONTEXT_ALIGN 4096
> -
>  #define RING_EXECLIST_QFULL            (1 << 0x2)
>  #define RING_EXECLIST1_VALID           (1 << 0x3)
>  #define RING_EXECLIST0_VALID           (1 << 0x4)
> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct
> intel_ringbuffer *ringbuf)
>         execlists_context_queue(ring, ctx, ringbuf->tail);
>  }
>
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx)
> +{
> +       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       int ret = 0;
> +
> +       WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +       if (ctx->engine[ring->id].unpin_count++ == 0) {
> +               ret = i915_gem_obj_ggtt_pin(ctx_obj,
> +                               GEN8_LR_CONTEXT_ALIGN, 0);
> +               if (ret)
> +                       ctx->engine[ring->id].unpin_count = 0;
> +       }
> +
> +       return ret;
> +}
> +
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx)
> +{
> +       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +
> +       if (ctx_obj) {
> +               WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +               if (--ctx->engine[ring->id].unpin_count == 0)
> +                       i915_gem_object_ggtt_unpin(ctx_obj);
> +       }
> +}
> +
>  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>                                     struct intel_context *ctx)
>  {
> +       int ret;
> +
>         if (ring->outstanding_lazy_seqno)
>                 return 0;
>
> @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct
> intel_engine_cs *ring,
>                 if (request == NULL)
>                         return -ENOMEM;
>
> +               if (ctx != ring->default_context) {
> +                       ret = intel_lr_context_pin(ring, ctx);
> +                       if (ret) {
> +                               kfree(request);
> +                               return ret;
> +                       }
> +               }
> +
>                 /* Hold a reference to the context this request belongs to
>                  * (we will need it when the time comes to emit/retire the
>                  * request).
> @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context
> *ctx)
>
>         for (i = 0; i < I915_NUM_RINGS; i++) {
>                 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> -               struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>
>                 if (ctx_obj) {
> +                       struct intel_ringbuffer *ringbuf =
> +                                       ctx->engine[i].ringbuf;
> +                       struct intel_engine_cs *ring = ringbuf->ring;
> +
>                         intel_destroy_ringbuffer_obj(ringbuf);
>                         kfree(ringbuf);
> -                       i915_gem_object_ggtt_unpin(ctx_obj);
> +                       if (ctx == ring->default_context)
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
>                         drm_gem_object_unreference(&ctx_obj->base);
>                 }
>         }
> @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct
> intel_engine_cs *ring,
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                      struct intel_engine_cs *ring)
>  {
> +       const bool is_global_default_ctx = (ctx == ring->default_context);
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_gem_object *ctx_obj;
>         uint32_t context_size;
> @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>                 return ret;
>         }
>
> -       ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> -       if (ret) {
> -               DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> -               drm_gem_object_unreference(&ctx_obj->base);
> -               return ret;
> +       if (is_global_default_ctx) {
> +               ret = i915_gem_obj_ggtt_pin(ctx_obj,
> GEN8_LR_CONTEXT_ALIGN, 0);
> +               if (ret) {
> +                       DRM_DEBUG_DRIVER("Pin LRC backing obj failed:
> %d\n",
> +                                       ret);
> +                       drm_gem_object_unreference(&ctx_obj->base);
> +                       return ret;
> +               }
>         }
>
>         ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
>         if (!ringbuf) {
>                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>                                 ring->name);
> -               i915_gem_object_ggtt_unpin(ctx_obj);
> +               if (is_global_default_ctx)
> +                       i915_gem_object_ggtt_unpin(ctx_obj);
>                 drm_gem_object_unreference(&ctx_obj->base);
>                 ret = -ENOMEM;
>                 return ret;
> @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>
>  error:
>         kfree(ringbuf);
> -       i915_gem_object_ggtt_unpin(ctx_obj);
> +       if (is_global_default_ctx)
> +               i915_gem_object_ggtt_unpin(ctx_obj);
>         drm_gem_object_unreference(&ctx_obj->base);
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h
> b/drivers/gpu/drm/i915/intel_lrc.h
> index 84bbf19..14b216b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -24,6 +24,8 @@
>  #ifndef _INTEL_LRC_H_
>  #define _INTEL_LRC_H_
>
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
>  /* Execlists regs */
>  #define RING_ELSP(ring)                        ((ring)->mmio_base+0x230)
>  #define RING_EXECLIST_STATUS(ring)     ((ring)->mmio_base+0x234)
> @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct
> intel_engine_cs *ring,
>  void intel_lr_context_free(struct intel_context *ctx);
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                      struct intel_engine_cs *ring);
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx);
>
>  /* Execlists */
>  int intel_sanitize_enable_execlists(struct drm_device *dev, int
> enable_execlists);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

[-- Attachment #1.2: Type: text/html, Size: 17562 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-11-18  6:29     ` Deepak S
@ 2014-11-17 14:41       ` akash goel
  0 siblings, 0 replies; 42+ messages in thread
From: akash goel @ 2014-11-17 14:41 UTC (permalink / raw)
  To: Deepak S; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 9097 bytes --]

Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goels@gmail.com>"

On Tue, Nov 18, 2014 at 11:59 AM, Deepak S <deepak.s@intel.com> wrote:

>
> On Thursday 13 November 2014 03:57 PM, Thomas Daniel wrote:
>
>> No longer create a work item to clean each execlist queue item.
>> Instead, move retired execlist requests to a queue and clean up the
>> items during retire_requests.
>>
>> v2: Fix legacy ring path broken during overzealous cleanup
>>
>> v3: Update idle detection to take execlists queue into account
>>
>> v4: Grab execlist lock when checking queue state
>>
>> v5: Fix leaking requests by freeing in execlists_retire_requests.
>>
>> Issue: VIZ-4274
>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c         |    9 ++++++
>>   drivers/gpu/drm/i915/intel_lrc.c        |   53
>> ++++++++++++++++++-------------
>>   drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>>   4 files changed, 42 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_
>> gem.c
>> index 827edb5..408afe7 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
>>         for_each_ring(ring, dev_priv, i) {
>>                 i915_gem_retire_requests_ring(ring);
>>                 idle &= list_empty(&ring->request_list);
>> +               if (i915.enable_execlists) {
>> +                       unsigned long flags;
>> +
>> +                       spin_lock_irqsave(&ring->execlist_lock, flags);
>> +                       idle &= list_empty(&ring->execlist_queue);
>> +                       spin_unlock_irqrestore(&ring->execlist_lock,
>> flags);
>> +
>> +                       intel_execlists_retire_requests(ring);
>> +               }
>>         }
>>         if (idle)
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index cd74e5c..d920297 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct
>> intel_engine_cs *ring)
>>   {
>>         struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
>>         struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
>> -       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>         assert_spin_locked(&ring->execlist_lock);
>>   @@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct
>> intel_engine_cs *ring)
>>                          * will update tail past first request's workload
>> */
>>                         cursor->elsp_submitted = req0->elsp_submitted;
>>                         list_del(&req0->execlist_link);
>> -                       queue_work(dev_priv->wq, &req0->work);
>> +                       list_add_tail(&req0->execlist_link,
>> +                               &ring->execlist_retired_req_list);
>>                         req0 = cursor;
>>                 } else {
>>                         req1 = cursor;
>> @@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct
>> intel_engine_cs *ring)
>>   static bool execlists_check_remove_request(struct intel_engine_cs
>> *ring,
>>                                            u32 request_id)
>>   {
>> -       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>         struct intel_ctx_submit_request *head_req;
>>         assert_spin_locked(&ring->execlist_lock);
>> @@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct
>> intel_engine_cs *ring,
>>                         if (--head_req->elsp_submitted <= 0) {
>>                                 list_del(&head_req->execlist_link);
>> -                               queue_work(dev_priv->wq, &head_req->work);
>> +                               list_add_tail(&head_req->execlist_link,
>> +                                       &ring->execlist_retired_req_
>> list);
>>                                 return true;
>>                         }
>>                 }
>> @@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct
>> intel_engine_cs *ring)
>>                    ((u32)ring->next_context_status_buffer & 0x07) << 8);
>>   }
>>   -static void execlists_free_request_task(struct work_struct *work)
>> -{
>> -       struct intel_ctx_submit_request *req =
>> -               container_of(work, struct intel_ctx_submit_request, work);
>> -       struct drm_device *dev = req->ring->dev;
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> -       intel_runtime_pm_put(dev_priv);
>> -
>> -       mutex_lock(&dev->struct_mutex);
>> -       i915_gem_context_unreference(req->ctx);
>> -       mutex_unlock(&dev->struct_mutex);
>> -
>> -       kfree(req);
>> -}
>> -
>>   static int execlists_context_queue(struct intel_engine_cs *ring,
>>                                    struct intel_context *to,
>>                                    u32 tail)
>> @@ -544,7 +528,6 @@ static int execlists_context_queue(struct
>> intel_engine_cs *ring,
>>         i915_gem_context_reference(req->ctx);
>>         req->ring = ring;
>>         req->tail = tail;
>> -       INIT_WORK(&req->work, execlists_free_request_task);
>>         intel_runtime_pm_get(dev_priv);
>>   @@ -565,7 +548,8 @@ static int execlists_context_queue(struct
>> intel_engine_cs *ring,
>>                         WARN(tail_req->elsp_submitted != 0,
>>                              "More than 2 already-submitted reqs
>> queued\n");
>>                         list_del(&tail_req->execlist_link);
>> -                       queue_work(dev_priv->wq, &tail_req->work);
>> +                       list_add_tail(&tail_req->execlist_link,
>> +                               &ring->execlist_retired_req_list);
>>                 }
>>         }
>>   @@ -733,6 +717,30 @@ int intel_execlists_submission(struct drm_device
>> *dev, struct drm_file *file,
>>         return 0;
>>   }
>>   +void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>> +{
>> +       struct intel_ctx_submit_request *req, *tmp;
>> +       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> +       unsigned long flags;
>> +       struct list_head retired_list;
>> +
>> +       WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>> +       if (list_empty(&ring->execlist_retired_req_list))
>> +               return;
>> +
>> +       INIT_LIST_HEAD(&retired_list);
>> +       spin_lock_irqsave(&ring->execlist_lock, flags);
>> +       list_replace_init(&ring->execlist_retired_req_list,
>> &retired_list);
>> +       spin_unlock_irqrestore(&ring->execlist_lock, flags);
>> +
>> +       list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
>> +               intel_runtime_pm_put(dev_priv);
>> +               i915_gem_context_unreference(req->ctx);
>> +               list_del(&req->execlist_link);
>> +               kfree(req);
>>
>
> Hi Thomas,
>
> I am fine with the current changes after v5.
> Reviewed-by: Deepak S <deepak.s@linux.intel.com>
>
> Thanks
> Deepak
>
>
>  +       }
>> +}
>> +
>>   void intel_logical_ring_stop(struct intel_engine_cs *ring)
>>   {
>>         struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> @@ -1248,6 +1256,7 @@ static int logical_ring_init(struct drm_device
>> *dev, struct intel_engine_cs *rin
>>         init_waitqueue_head(&ring->irq_queue);
>>         INIT_LIST_HEAD(&ring->execlist_queue);
>> +       INIT_LIST_HEAD(&ring->execlist_retired_req_list);
>>         spin_lock_init(&ring->execlist_lock);
>>         ring->next_context_status_buffer = 0;
>>   diff --git a/drivers/gpu/drm/i915/intel_lrc.h
>> b/drivers/gpu/drm/i915/intel_lrc.h
>> index 33c3b4b..84bbf19 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -104,11 +104,11 @@ struct intel_ctx_submit_request {
>>         u32 tail;
>>         struct list_head execlist_link;
>> -       struct work_struct work;
>>         int elsp_submitted;
>>   };
>>     void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
>> +void intel_execlists_retire_requests(struct intel_engine_cs *ring);
>>     #endif /* _INTEL_LRC_H_ */
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 96479c8..8c002d2 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -235,6 +235,7 @@ struct  intel_engine_cs {
>>         /* Execlists */
>>         spinlock_t execlist_lock;
>>         struct list_head execlist_queue;
>> +       struct list_head execlist_retired_req_list;
>>         u8 next_context_status_buffer;
>>         u32             irq_keep_mask; /* bitmask for interrupts that
>> should not be masked */
>>         int             (*emit_request)(struct intel_ringbuffer *ringbuf);
>>
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

[-- Attachment #1.2: Type: text/html, Size: 12324 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-17 14:38     ` akash goel
@ 2014-11-17 14:55       ` Daniel, Thomas
  2014-11-19 17:59         ` Daniel, Thomas
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-17 14:55 UTC (permalink / raw)
  To: intel-gfx

Here is the actual review...

_____________________________________________
From: Daniel, Thomas 
Sent: Wednesday, November 12, 2014 8:52 PM
To: Goel, Akash
Subject: RE: Execlists patches code review


Hi Akash,

I will put the WARN messages back in and remove the need_unpin.
The elsp_submitted count does not behave exactly as you would expect because of some race condition.
Have a look at the patch “Avoid non-lite-restore preemptions” by Oscar Mateo for a description.

Thanks,
Thomas.
_____________________________________________
From: Goel, Akash 
Sent: Tuesday, November 11, 2014 4:37 PM
To: Daniel, Thomas
Subject: RE: Execlists patches code review


Hi Thomas,

Few comments on http://patchwork.freedesktop.org/patch/35830/ 

	int elsp_submitted;
+	bool need_unpin;

This new field has not been used anywhere.


		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");

Sorry couldn’t get this change. Even if a request has been merged, still the elsp_submitted count should not be 0 here, when this function is executed on arrival of Context switch interrupt. When a new request is merged with a previously submitted request, the original value of elsp_submitted is still retained.
 
+			/* If the request has been merged, it is possible to get
+			 * here with an unsubmitted request. */
 			if (--head_req->elsp_submitted <= 0) {




		if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(ring, status_id))
-					WARN(1, "Lite Restored request removed from queue\n");
+				execlists_check_remove_request(ring, status_id);

Same doubt here, thought that in this case of interrupt due to Preemption (Lite restore), which will occur when the same Context is submitted as the one already being executed by the Hw, the count will not drop to 0. Count will drop to 0 when the context switch interrupt will be generated subsequently.

Best regards
Akash
_____________________________________________
From: Goel, Akash 
Sent: Tuesday, November 11, 2014 8:58 PM
To: Daniel, Thomas
Subject: RE: Execlists patches code review


Hi Thomas, 

I was OOP today, I will provide this review comment tomorrow on the GFX mailing list.

Best regards
Akash
_____________________________________________
From: Daniel, Thomas 
Sent: Monday, November 10, 2014 10:41 PM
To: Goel, Akash
Subject: RE: Execlists patches code review


Hi Akash,

Please post this comment to the mailing list.
Assuming nobody else comments I will remove the unpin_lock and replace the mutex_lock(&unpin_lock) with WARN_ON(!mutex_is_locked(&dev->struct_mutex)).

Cheers,
Thomas.

_____________________________________________
From: Goel, Akash 
Sent: Monday, November 10, 2014 11:19 AM
To: Daniel, Thomas
Subject: RE: Execlists patches code review


In context of the 3rd patch  http://patchwork.freedesktop.org/patch/35829/
intel_lr_context_pin is being called from logical_ring_alloc_seqno function and intel_lr_context_unpin  gets called from i915_gem_free_request & i915_gem_reset_ring_cleanup functions

All these 3 paths are already protected by dev->struct_mutex (Global lock), so they will always execute sequentially with respect to each other. 

Do we need to have a new lock ?
+		struct mutex unpin_lock;

Best regards
Akash
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
  2014-11-17 14:38     ` akash goel
@ 2014-11-17 18:09     ` Daniel Vetter
  2014-11-18  9:27       ` Daniel, Thomas
  2014-11-18  6:40     ` Deepak S
  2014-11-24 14:24     ` Daniel Vetter
  3 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-17 18:09 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx

On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
>  	struct {
>  		struct drm_i915_gem_object *state;
>  		struct intel_ringbuffer *ringbuf;
> +		int unpin_count;

Pinning is already refcounted. Why this additional refcount?

And yes I've only realized this now that you've supplied the review
comments from Akash. I really rely upon the review discussions to spot
such low-level implementation details.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Thomas Daniel
@ 2014-11-18  5:18     ` akash goel
  2014-11-18  6:37     ` Deepak S
  1 sibling, 0 replies; 42+ messages in thread
From: akash goel @ 2014-11-18  5:18 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 19106 bytes --]

Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goels@gmail.com>"


On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.daniel@intel.com>
wrote:

> Same as with the context, pinning to GGTT regardless is harmful (it
> badly fragments the GGTT and can even exhaust it).
>
> Unfortunately, this case is also more complex than the previous one
> because we need to map and access the ringbuffer in several places
> along the execbuffer path (and we cannot make do by leaving the
> default ringbuffer pinned, as before). Also, the context object
> itself contains a pointer to the ringbuffer address that we have to
> keep updated if we are going to allow the ringbuffer to move around.
>
> v2: Same as with the context pinning, we cannot really do it during
> an interrupt. Also, pin the default ringbuffers objects regardless
> (makes error capture a lot easier).
>
> v3: Rebased. Take a pin reference of the ringbuffer for each item
> in the execlist request queue because the hardware may still be using
> the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
> is executed.  The ringbuffer must remain pinned until the context save
> is complete.  No longer pin and unpin ringbuffer in
> populate_lr_context() - this transient address is meaningless and the
> pinning can cause a sleep while atomic.
>
> v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
> Downgraded pinning check BUG_ONs to WARN_ONs.
>
> v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
> variable.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        |  102
> +++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   85 +++++++++++++++-----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
>  3 files changed, 128 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> b/drivers/gpu/drm/i915/intel_lrc.c
> index f7fa0f7..ca20f91 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -202,6 +202,9 @@ enum {
>  };
>  #define GEN8_CTX_ID_SHIFT 32
>
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx);
> +
>  /**
>   * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
>   * @dev: DRM device.
> @@ -339,7 +342,9 @@ static void execlists_elsp_write(struct
> intel_engine_cs *ring,
>         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>  }
>
> -static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj,
> u32 tail)
> +static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
> +                                   struct drm_i915_gem_object *ring_obj,
> +                                   u32 tail)
>  {
>         struct page *page;
>         uint32_t *reg_state;
> @@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct
> drm_i915_gem_object *ctx_obj, u32 tai
>         reg_state = kmap_atomic(page);
>
>         reg_state[CTX_RING_TAIL+1] = tail;
> +       reg_state[CTX_RING_BUFFER_START+1] =
> i915_gem_obj_ggtt_offset(ring_obj);
>
>         kunmap_atomic(reg_state);
>
> @@ -358,21 +364,25 @@ static int execlists_submit_context(struct
> intel_engine_cs *ring,
>                                     struct intel_context *to0, u32 tail0,
>                                     struct intel_context *to1, u32 tail1)
>  {
> -       struct drm_i915_gem_object *ctx_obj0;
> +       struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
>         struct drm_i915_gem_object *ctx_obj1 = NULL;
> +       struct intel_ringbuffer *ringbuf1 = NULL;
>
> -       ctx_obj0 = to0->engine[ring->id].state;
>         BUG_ON(!ctx_obj0);
>         WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
> +       WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
>
> -       execlists_ctx_write_tail(ctx_obj0, tail0);
> +       execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
>
>         if (to1) {
> +               ringbuf1 = to1->engine[ring->id].ringbuf;
>                 ctx_obj1 = to1->engine[ring->id].state;
>                 BUG_ON(!ctx_obj1);
>                 WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
> +               WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
>
> -               execlists_ctx_write_tail(ctx_obj1, tail1);
> +               execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
>         }
>
>         execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
> @@ -524,6 +534,10 @@ static int execlists_context_queue(struct
> intel_engine_cs *ring,
>                 return -ENOMEM;
>         req->ctx = to;
>         i915_gem_context_reference(req->ctx);
> +
> +       if (to != ring->default_context)
> +               intel_lr_context_pin(ring, to);
> +
>         req->ring = ring;
>         req->tail = tail;
>
> @@ -544,7 +558,7 @@ static int execlists_context_queue(struct
> intel_engine_cs *ring,
>
>                 if (to == tail_req->ctx) {
>                         WARN(tail_req->elsp_submitted != 0,
> -                            "More than 2 already-submitted reqs
> queued\n");
> +                               "More than 2 already-submitted reqs
> queued\n");
>                         list_del(&tail_req->execlist_link);
>                         list_add_tail(&tail_req->execlist_link,
>                                 &ring->execlist_retired_req_list);
> @@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct
> intel_engine_cs *ring)
>         spin_unlock_irqrestore(&ring->execlist_lock, flags);
>
>         list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> +               struct intel_context *ctx = req->ctx;
> +               struct drm_i915_gem_object *ctx_obj =
> +                               ctx->engine[ring->id].state;
> +
> +               if (ctx_obj && (ctx != ring->default_context))
> +                       intel_lr_context_unpin(ring, ctx);
>                 intel_runtime_pm_put(dev_priv);
>                 i915_gem_context_unreference(req->ctx);
>                 list_del(&req->execlist_link);
> @@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct intel_engine_cs
> *ring,
>                 struct intel_context *ctx)
>  {
>         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>         int ret = 0;
>
>         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> @@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct
> intel_engine_cs *ring,
>                 ret = i915_gem_obj_ggtt_pin(ctx_obj,
>                                 GEN8_LR_CONTEXT_ALIGN, 0);
>                 if (ret)
> -                       ctx->engine[ring->id].unpin_count = 0;
> +                       goto reset_unpin_count;
> +
> +               ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
> +               if (ret)
> +                       goto unpin_ctx_obj;
>         }
>
>         return ret;
> +
> +unpin_ctx_obj:
> +       i915_gem_object_ggtt_unpin(ctx_obj);
> +reset_unpin_count:
> +       ctx->engine[ring->id].unpin_count = 0;
> +
> +       return ret;
>  }
>
>  void intel_lr_context_unpin(struct intel_engine_cs *ring,
>                 struct intel_context *ctx)
>  {
>         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>
>         if (ctx_obj) {
>                 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> -               if (--ctx->engine[ring->id].unpin_count == 0)
> +               if (--ctx->engine[ring->id].unpin_count == 0) {
> +                       intel_unpin_ringbuffer_obj(ringbuf);
>                         i915_gem_object_ggtt_unpin(ctx_obj);
> +               }
>         }
>  }
>
> @@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context *ctx,
> struct drm_i915_gem_object *ctx_o
>  {
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_private *dev_priv = dev->dev_private;
> -       struct drm_i915_gem_object *ring_obj = ringbuf->obj;
>         struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
>         struct page *page;
>         uint32_t *reg_state;
> @@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx,
> struct drm_i915_gem_object *ctx_o
>         reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
>         reg_state[CTX_RING_TAIL+1] = 0;
>         reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
> -       reg_state[CTX_RING_BUFFER_START+1] =
> i915_gem_obj_ggtt_offset(ring_obj);
> +       /* Ring buffer start address is not known until the buffer is
> pinned.
> +        * It is written to the context image in execlists_update_context()
> +        */
>         reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
>         reg_state[CTX_RING_BUFFER_CONTROL+1] =
>                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) |
> RING_VALID;
> @@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct intel_context
> *ctx)
>                                         ctx->engine[i].ringbuf;
>                         struct intel_engine_cs *ring = ringbuf->ring;
>
> +                       if (ctx == ring->default_context) {
> +                               intel_unpin_ringbuffer_obj(ringbuf);
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
> +                       }
>                         intel_destroy_ringbuffer_obj(ringbuf);
>                         kfree(ringbuf);
> -                       if (ctx == ring->default_context)
> -                               i915_gem_object_ggtt_unpin(ctx_obj);
>                         drm_gem_object_unreference(&ctx_obj->base);
>                 }
>         }
> @@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         if (!ringbuf) {
>                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>                                 ring->name);
> -               if (is_global_default_ctx)
> -                       i915_gem_object_ggtt_unpin(ctx_obj);
> -               drm_gem_object_unreference(&ctx_obj->base);
>                 ret = -ENOMEM;
> -               return ret;
> +               goto error_unpin_ctx;
>         }
>
>         ringbuf->ring = ring;
> @@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         ringbuf->space = ringbuf->size;
>         ringbuf->last_retired_head = -1;
>
> -       /* TODO: For now we put this in the mappable region so that we can
> reuse
> -        * the existing ringbuffer code which ioremaps it. When we start
> -        * creating many contexts, this will no longer work and we must
> switch
> -        * to a kmapish interface.
> -        */
> -       ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -       if (ret) {
> -               DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s:
> %d\n",
> +       if (ringbuf->obj == NULL) {
> +               ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_DEBUG_DRIVER(
> +                               "Failed to allocate ringbuffer obj %s:
> %d\n",
>                                 ring->name, ret);
> -               goto error;
> +                       goto error_free_rbuf;
> +               }
> +
> +               if (is_global_default_ctx) {
> +                       ret = intel_pin_and_map_ringbuffer_obj(dev,
> ringbuf);
> +                       if (ret) {
> +                               DRM_ERROR(
> +                                       "Failed to pin and map ringbuffer
> %s: %d\n",
> +                                       ring->name, ret);
> +                               goto error_destroy_rbuf;
> +                       }
> +               }
> +
>         }
>
>         ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
>         if (ret) {
>                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
> -               intel_destroy_ringbuffer_obj(ringbuf);
>                 goto error;
>         }
>
> @@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>                         DRM_ERROR("Init render state failed: %d\n", ret);
>                         ctx->engine[ring->id].ringbuf = NULL;
>                         ctx->engine[ring->id].state = NULL;
> -                       intel_destroy_ringbuffer_obj(ringbuf);
>                         goto error;
>                 }
>                 ctx->rcs_initialized = true;
> @@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         return 0;
>
>  error:
> +       if (is_global_default_ctx)
> +               intel_unpin_ringbuffer_obj(ringbuf);
> +error_destroy_rbuf:
> +       intel_destroy_ringbuffer_obj(ringbuf);
> +error_free_rbuf:
>         kfree(ringbuf);
> +error_unpin_ctx:
>         if (is_global_default_ctx)
>                 i915_gem_object_ggtt_unpin(ctx_obj);
>         drm_gem_object_unreference(&ctx_obj->base);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index a8f72e8..0c4aab1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct
> intel_engine_cs *ring)
>         return 0;
>  }
>
> -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>  {
> -       if (!ringbuf->obj)
> -               return;
> -
>         iounmap(ringbuf->virtual_start);
> +       ringbuf->virtual_start = NULL;
>         i915_gem_object_ggtt_unpin(ringbuf->obj);
> +}
> +
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +                                    struct intel_ringbuffer *ringbuf)
> +{
> +       struct drm_i915_private *dev_priv = to_i915(dev);
> +       struct drm_i915_gem_object *obj = ringbuf->obj;
> +       int ret;
> +
> +       ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> +       if (ret)
> +               return ret;
> +
> +       ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +       if (ret) {
> +               i915_gem_object_ggtt_unpin(obj);
> +               return ret;
> +       }
> +
> +       ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
> +                       i915_gem_obj_ggtt_offset(obj), ringbuf->size);
> +       if (ringbuf->virtual_start == NULL) {
> +               i915_gem_object_ggtt_unpin(obj);
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}
> +
> +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +{
>         drm_gem_object_unreference(&ringbuf->obj->base);
>         ringbuf->obj = NULL;
>  }
> @@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct
> intel_ringbuffer *ringbuf)
>  int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>                                struct intel_ringbuffer *ringbuf)
>  {
> -       struct drm_i915_private *dev_priv = to_i915(dev);
>         struct drm_i915_gem_object *obj;
> -       int ret;
> -
> -       if (ringbuf->obj)
> -               return 0;
>
>         obj = NULL;
>         if (!HAS_LLC(dev))
> @@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device
> *dev,
>         /* mark ring buffers as read-only from GPU side by default */
>         obj->gt_ro = 1;
>
> -       ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> -       if (ret)
> -               goto err_unref;
> -
> -       ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -       if (ret)
> -               goto err_unpin;
> -
> -       ringbuf->virtual_start =
> -               ioremap_wc(dev_priv->gtt.mappable_base +
> i915_gem_obj_ggtt_offset(obj),
> -                               ringbuf->size);
> -       if (ringbuf->virtual_start == NULL) {
> -               ret = -EINVAL;
> -               goto err_unpin;
> -       }
> -
>         ringbuf->obj = obj;
> -       return 0;
>
> -err_unpin:
> -       i915_gem_object_ggtt_unpin(obj);
> -err_unref:
> -       drm_gem_object_unreference(&obj->base);
> -       return ret;
> +       return 0;
>  }
>
>  static int intel_init_ring_buffer(struct drm_device *dev,
> @@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct
> drm_device *dev,
>                         goto error;
>         }
>
> -       ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -       if (ret) {
> -               DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
> ring->name, ret);
> -               goto error;
> +       if (ringbuf->obj == NULL) {
> +               ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
> +                                       ring->name, ret);
> +                       goto error;
> +               }
> +
> +               ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_ERROR("Failed to pin and map ringbuffer %s:
> %d\n",
> +                                       ring->name, ret);
> +                       intel_destroy_ringbuffer_obj(ringbuf);
> +                       goto error;
> +               }
>         }
>
>         /* Workaround an erratum on the i830 which causes a hang if
> @@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct
> intel_engine_cs *ring)
>         intel_stop_ring_buffer(ring);
>         WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE)
> == 0);
>
> +       intel_unpin_ringbuffer_obj(ringbuf);
>         intel_destroy_ringbuffer_obj(ringbuf);
>         ring->preallocated_lazy_request = NULL;
>         ring->outstanding_lazy_seqno = 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8c002d2..365854ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
>  #define I915_GEM_HWS_SCRATCH_INDEX     0x30
>  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX <<
> MI_STORE_DWORD_INDEX_SHIFT)
>
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +                                    struct intel_ringbuffer *ringbuf);
>  void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>  int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>                                struct intel_ringbuffer *ringbuf);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

[-- Attachment #1.2: Type: text/html, Size: 24178 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work
  2014-11-13 10:27   ` [PATCH v5 " Thomas Daniel
@ 2014-11-18  6:29     ` Deepak S
  2014-11-17 14:41       ` akash goel
  0 siblings, 1 reply; 42+ messages in thread
From: Deepak S @ 2014-11-18  6:29 UTC (permalink / raw)
  To: intel-gfx


On Thursday 13 November 2014 03:57 PM, Thomas Daniel wrote:
> No longer create a work item to clean each execlist queue item.
> Instead, move retired execlist requests to a queue and clean up the
> items during retire_requests.
>
> v2: Fix legacy ring path broken during overzealous cleanup
>
> v3: Update idle detection to take execlists queue into account
>
> v4: Grab execlist lock when checking queue state
>
> v5: Fix leaking requests by freeing in execlists_retire_requests.
>
> Issue: VIZ-4274
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c         |    9 ++++++
>   drivers/gpu/drm/i915/intel_lrc.c        |   53 ++++++++++++++++++-------------
>   drivers/gpu/drm/i915/intel_lrc.h        |    2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>   4 files changed, 42 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 827edb5..408afe7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
>   	for_each_ring(ring, dev_priv, i) {
>   		i915_gem_retire_requests_ring(ring);
>   		idle &= list_empty(&ring->request_list);
> +		if (i915.enable_execlists) {
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&ring->execlist_lock, flags);
> +			idle &= list_empty(&ring->execlist_queue);
> +			spin_unlock_irqrestore(&ring->execlist_lock, flags);
> +
> +			intel_execlists_retire_requests(ring);
> +		}
>   	}
>   
>   	if (idle)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index cd74e5c..d920297 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>   {
>   	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
>   	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   
>   	assert_spin_locked(&ring->execlist_lock);
>   
> @@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>   			 * will update tail past first request's workload */
>   			cursor->elsp_submitted = req0->elsp_submitted;
>   			list_del(&req0->execlist_link);
> -			queue_work(dev_priv->wq, &req0->work);
> +			list_add_tail(&req0->execlist_link,
> +				&ring->execlist_retired_req_list);
>   			req0 = cursor;
>   		} else {
>   			req1 = cursor;
> @@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>   static bool execlists_check_remove_request(struct intel_engine_cs *ring,
>   					   u32 request_id)
>   {
> -	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	struct intel_ctx_submit_request *head_req;
>   
>   	assert_spin_locked(&ring->execlist_lock);
> @@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
>   
>   			if (--head_req->elsp_submitted <= 0) {
>   				list_del(&head_req->execlist_link);
> -				queue_work(dev_priv->wq, &head_req->work);
> +				list_add_tail(&head_req->execlist_link,
> +					&ring->execlist_retired_req_list);
>   				return true;
>   			}
>   		}
> @@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
>   		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
>   }
>   
> -static void execlists_free_request_task(struct work_struct *work)
> -{
> -	struct intel_ctx_submit_request *req =
> -		container_of(work, struct intel_ctx_submit_request, work);
> -	struct drm_device *dev = req->ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -
> -	intel_runtime_pm_put(dev_priv);
> -
> -	mutex_lock(&dev->struct_mutex);
> -	i915_gem_context_unreference(req->ctx);
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	kfree(req);
> -}
> -
>   static int execlists_context_queue(struct intel_engine_cs *ring,
>   				   struct intel_context *to,
>   				   u32 tail)
> @@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>   	i915_gem_context_reference(req->ctx);
>   	req->ring = ring;
>   	req->tail = tail;
> -	INIT_WORK(&req->work, execlists_free_request_task);
>   
>   	intel_runtime_pm_get(dev_priv);
>   
> @@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>   			WARN(tail_req->elsp_submitted != 0,
>   			     "More than 2 already-submitted reqs queued\n");
>   			list_del(&tail_req->execlist_link);
> -			queue_work(dev_priv->wq, &tail_req->work);
> +			list_add_tail(&tail_req->execlist_link,
> +				&ring->execlist_retired_req_list);
>   		}
>   	}
>   
> @@ -733,6 +717,30 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
>   	return 0;
>   }
>   
> +void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> +{
> +	struct intel_ctx_submit_request *req, *tmp;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	unsigned long flags;
> +	struct list_head retired_list;
> +
> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +	if (list_empty(&ring->execlist_retired_req_list))
> +		return;
> +
> +	INIT_LIST_HEAD(&retired_list);
> +	spin_lock_irqsave(&ring->execlist_lock, flags);
> +	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
> +	spin_unlock_irqrestore(&ring->execlist_lock, flags);
> +
> +	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> +		intel_runtime_pm_put(dev_priv);
> +		i915_gem_context_unreference(req->ctx);
> +		list_del(&req->execlist_link);
> +		kfree(req);

Hi Thomas,

I am fine with the current changes after v5.
Reviewed-by: Deepak S <deepak.s@linux.intel.com>

Thanks
Deepak

> +	}
> +}
> +
>   void intel_logical_ring_stop(struct intel_engine_cs *ring)
>   {
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> @@ -1248,6 +1256,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>   	init_waitqueue_head(&ring->irq_queue);
>   
>   	INIT_LIST_HEAD(&ring->execlist_queue);
> +	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
>   	spin_lock_init(&ring->execlist_lock);
>   	ring->next_context_status_buffer = 0;
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 33c3b4b..84bbf19 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -104,11 +104,11 @@ struct intel_ctx_submit_request {
>   	u32 tail;
>   
>   	struct list_head execlist_link;
> -	struct work_struct work;
>   
>   	int elsp_submitted;
>   };
>   
>   void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
> +void intel_execlists_retire_requests(struct intel_engine_cs *ring);
>   
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 96479c8..8c002d2 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -235,6 +235,7 @@ struct  intel_engine_cs {
>   	/* Execlists */
>   	spinlock_t execlist_lock;
>   	struct list_head execlist_queue;
> +	struct list_head execlist_retired_req_list;
>   	u8 next_context_status_buffer;
>   	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
>   	int		(*emit_request)(struct intel_ringbuffer *ringbuf);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Thomas Daniel
  2014-11-18  5:18     ` akash goel
@ 2014-11-18  6:37     ` Deepak S
  2014-11-18  6:39       ` Deepak S
  1 sibling, 1 reply; 42+ messages in thread
From: Deepak S @ 2014-11-18  6:37 UTC (permalink / raw)
  To: intel-gfx


On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
> Same as with the context, pinning to GGTT regardless is harmful (it
> badly fragments the GGTT and can even exhaust it).
>
> Unfortunately, this case is also more complex than the previous one
> because we need to map and access the ringbuffer in several places
> along the execbuffer path (and we cannot make do by leaving the
> default ringbuffer pinned, as before). Also, the context object
> itself contains a pointer to the ringbuffer address that we have to
> keep updated if we are going to allow the ringbuffer to move around.
>
> v2: Same as with the context pinning, we cannot really do it during
> an interrupt. Also, pin the default ringbuffers objects regardless
> (makes error capture a lot easier).
>
> v3: Rebased. Take a pin reference of the ringbuffer for each item
> in the execlist request queue because the hardware may still be using
> the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
> is executed.  The ringbuffer must remain pinned until the context save
> is complete.  No longer pin and unpin ringbuffer in
> populate_lr_context() - this transient address is meaningless and the
> pinning can cause a sleep while atomic.
>
> v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
> Downgraded pinning check BUG_ONs to WARN_ONs.
>
> v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
> variable.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        |  102 +++++++++++++++++++++++--------
>   drivers/gpu/drm/i915/intel_ringbuffer.c |   85 +++++++++++++++-----------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
>   3 files changed, 128 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f7fa0f7..ca20f91 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -202,6 +202,9 @@ enum {
>   };
>   #define GEN8_CTX_ID_SHIFT 32
>   
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx);
> +
>   /**
>    * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
>    * @dev: DRM device.
> @@ -339,7 +342,9 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
>   	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>   }
>   
> -static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
> +static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
> +				    struct drm_i915_gem_object *ring_obj,
> +				    u32 tail)
>   {
>   	struct page *page;
>   	uint32_t *reg_state;
> @@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tai
>   	reg_state = kmap_atomic(page);
>   
>   	reg_state[CTX_RING_TAIL+1] = tail;
> +	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
>   
>   	kunmap_atomic(reg_state);
>   
> @@ -358,21 +364,25 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
>   				    struct intel_context *to0, u32 tail0,
>   				    struct intel_context *to1, u32 tail1)
>   {
> -	struct drm_i915_gem_object *ctx_obj0;
> +	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
> +	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
>   	struct drm_i915_gem_object *ctx_obj1 = NULL;
> +	struct intel_ringbuffer *ringbuf1 = NULL;
>   
> -	ctx_obj0 = to0->engine[ring->id].state;
>   	BUG_ON(!ctx_obj0);
>   	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
> +	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
>   
> -	execlists_ctx_write_tail(ctx_obj0, tail0);
> +	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
>   
>   	if (to1) {
> +		ringbuf1 = to1->engine[ring->id].ringbuf;
>   		ctx_obj1 = to1->engine[ring->id].state;
>   		BUG_ON(!ctx_obj1);
>   		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
> +		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
>   
> -		execlists_ctx_write_tail(ctx_obj1, tail1);
> +		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
>   	}
>   
>   	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
> @@ -524,6 +534,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>   		return -ENOMEM;
>   	req->ctx = to;
>   	i915_gem_context_reference(req->ctx);
> +
> +	if (to != ring->default_context)
> +		intel_lr_context_pin(ring, to);
> +
>   	req->ring = ring;
>   	req->tail = tail;
>   
> @@ -544,7 +558,7 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
>   
>   		if (to == tail_req->ctx) {
>   			WARN(tail_req->elsp_submitted != 0,
> -			     "More than 2 already-submitted reqs queued\n");
> +				"More than 2 already-submitted reqs queued\n");
>   			list_del(&tail_req->execlist_link);
>   			list_add_tail(&tail_req->execlist_link,
>   				&ring->execlist_retired_req_list);
> @@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>   	spin_unlock_irqrestore(&ring->execlist_lock, flags);
>   
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> +		struct intel_context *ctx = req->ctx;
> +		struct drm_i915_gem_object *ctx_obj =
> +				ctx->engine[ring->id].state;
> +
> +		if (ctx_obj && (ctx != ring->default_context))
> +			intel_lr_context_unpin(ring, ctx);
>   		intel_runtime_pm_put(dev_priv);
>   		i915_gem_context_unreference(req->ctx);
>   		list_del(&req->execlist_link);
> @@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
>   		struct intel_context *ctx)
>   {
>   	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>   	int ret = 0;
>   
>   	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> @@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
>   		ret = i915_gem_obj_ggtt_pin(ctx_obj,
>   				GEN8_LR_CONTEXT_ALIGN, 0);
>   		if (ret)
> -			ctx->engine[ring->id].unpin_count = 0;
> +			goto reset_unpin_count;
> +
> +		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
> +		if (ret)
> +			goto unpin_ctx_obj;
>   	}
>   
>   	return ret;
> +
> +unpin_ctx_obj:
> +	i915_gem_object_ggtt_unpin(ctx_obj);
> +reset_unpin_count:
> +	ctx->engine[ring->id].unpin_count = 0;
> +
> +	return ret;
>   }
>   
>   void intel_lr_context_unpin(struct intel_engine_cs *ring,
>   		struct intel_context *ctx)
>   {
>   	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>   
>   	if (ctx_obj) {
>   		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));

With pin specific mutex from previous patch set removed
Reviewed-by: Deepak S<deepak.s@linux.intel.com>

> -		if (--ctx->engine[ring->id].unpin_count == 0)
> +		if (--ctx->engine[ring->id].unpin_count == 0) {
> +			intel_unpin_ringbuffer_obj(ringbuf);
>   			i915_gem_object_ggtt_unpin(ctx_obj);
> +		}
>   	}
>   }
>   
> @@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>   {
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
>   	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
>   	struct page *page;
>   	uint32_t *reg_state;
> @@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>   	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
>   	reg_state[CTX_RING_TAIL+1] = 0;
>   	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
> -	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
> +	/* Ring buffer start address is not known until the buffer is pinned.
> +	 * It is written to the context image in execlists_update_context()
> +	 */
>   	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
>   	reg_state[CTX_RING_BUFFER_CONTROL+1] =
>   			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
> @@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct intel_context *ctx)
>   					ctx->engine[i].ringbuf;
>   			struct intel_engine_cs *ring = ringbuf->ring;
>   
> +			if (ctx == ring->default_context) {
> +				intel_unpin_ringbuffer_obj(ringbuf);
> +				i915_gem_object_ggtt_unpin(ctx_obj);
> +			}
>   			intel_destroy_ringbuffer_obj(ringbuf);
>   			kfree(ringbuf);
> -			if (ctx == ring->default_context)
> -				i915_gem_object_ggtt_unpin(ctx_obj);
>   			drm_gem_object_unreference(&ctx_obj->base);
>   		}
>   	}
> @@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   	if (!ringbuf) {
>   		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>   				ring->name);
> -		if (is_global_default_ctx)
> -			i915_gem_object_ggtt_unpin(ctx_obj);
> -		drm_gem_object_unreference(&ctx_obj->base);
>   		ret = -ENOMEM;
> -		return ret;
> +		goto error_unpin_ctx;
>   	}
>   
>   	ringbuf->ring = ring;
> @@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   	ringbuf->space = ringbuf->size;
>   	ringbuf->last_retired_head = -1;
>   
> -	/* TODO: For now we put this in the mappable region so that we can reuse
> -	 * the existing ringbuffer code which ioremaps it. When we start
> -	 * creating many contexts, this will no longer work and we must switch
> -	 * to a kmapish interface.
> -	 */
> -	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
> +	if (ringbuf->obj == NULL) {
> +		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +		if (ret) {
> +			DRM_DEBUG_DRIVER(
> +				"Failed to allocate ringbuffer obj %s: %d\n",
>   				ring->name, ret);
> -		goto error;
> +			goto error_free_rbuf;
> +		}
> +
> +		if (is_global_default_ctx) {
> +			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
> +			if (ret) {
> +				DRM_ERROR(
> +					"Failed to pin and map ringbuffer %s: %d\n",
> +					ring->name, ret);
> +				goto error_destroy_rbuf;
> +			}
> +		}
> +
>   	}
>   
>   	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
>   	if (ret) {
>   		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
> -		intel_destroy_ringbuffer_obj(ringbuf);
>   		goto error;
>   	}
>   
> @@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   			DRM_ERROR("Init render state failed: %d\n", ret);
>   			ctx->engine[ring->id].ringbuf = NULL;
>   			ctx->engine[ring->id].state = NULL;
> -			intel_destroy_ringbuffer_obj(ringbuf);
>   			goto error;
>   		}
>   		ctx->rcs_initialized = true;
> @@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   	return 0;
>   
>   error:
> +	if (is_global_default_ctx)
> +		intel_unpin_ringbuffer_obj(ringbuf);
> +error_destroy_rbuf:
> +	intel_destroy_ringbuffer_obj(ringbuf);
> +error_free_rbuf:
>   	kfree(ringbuf);
> +error_unpin_ctx:
>   	if (is_global_default_ctx)
>   		i915_gem_object_ggtt_unpin(ctx_obj);
>   	drm_gem_object_unreference(&ctx_obj->base);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index a8f72e8..0c4aab1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
>   	return 0;
>   }
>   
> -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>   {
> -	if (!ringbuf->obj)
> -		return;
> -
>   	iounmap(ringbuf->virtual_start);
> +	ringbuf->virtual_start = NULL;
>   	i915_gem_object_ggtt_unpin(ringbuf->obj);
> +}
> +
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +				     struct intel_ringbuffer *ringbuf)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_gem_object *obj = ringbuf->obj;
> +	int ret;
> +
> +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> +	if (ret)
> +		return ret;
> +
> +	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +	if (ret) {
> +		i915_gem_object_ggtt_unpin(obj);
> +		return ret;
> +	}
> +
> +	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
> +			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
> +	if (ringbuf->virtual_start == NULL) {
> +		i915_gem_object_ggtt_unpin(obj);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +{
>   	drm_gem_object_unreference(&ringbuf->obj->base);
>   	ringbuf->obj = NULL;
>   }
> @@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>   int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>   			       struct intel_ringbuffer *ringbuf)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct drm_i915_gem_object *obj;
> -	int ret;
> -
> -	if (ringbuf->obj)
> -		return 0;
>   
>   	obj = NULL;
>   	if (!HAS_LLC(dev))
> @@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>   	/* mark ring buffers as read-only from GPU side by default */
>   	obj->gt_ro = 1;
>   
> -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> -	if (ret)
> -		goto err_unref;
> -
> -	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -	if (ret)
> -		goto err_unpin;
> -
> -	ringbuf->virtual_start =
> -		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
> -				ringbuf->size);
> -	if (ringbuf->virtual_start == NULL) {
> -		ret = -EINVAL;
> -		goto err_unpin;
> -	}
> -
>   	ringbuf->obj = obj;
> -	return 0;
>   
> -err_unpin:
> -	i915_gem_object_ggtt_unpin(obj);
> -err_unref:
> -	drm_gem_object_unreference(&obj->base);
> -	return ret;
> +	return 0;
>   }
>   
>   static int intel_init_ring_buffer(struct drm_device *dev,
> @@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>   			goto error;
>   	}
>   
> -	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -	if (ret) {
> -		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
> -		goto error;
> +	if (ringbuf->obj == NULL) {
> +		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +		if (ret) {
> +			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
> +					ring->name, ret);
> +			goto error;
> +		}
> +
> +		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
> +		if (ret) {
> +			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
> +					ring->name, ret);
> +			intel_destroy_ringbuffer_obj(ringbuf);
> +			goto error;
> +		}
>   	}
>   
>   	/* Workaround an erratum on the i830 which causes a hang if
> @@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>   	intel_stop_ring_buffer(ring);
>   	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>   
> +	intel_unpin_ringbuffer_obj(ringbuf);
>   	intel_destroy_ringbuffer_obj(ringbuf);
>   	ring->preallocated_lazy_request = NULL;
>   	ring->outstanding_lazy_seqno = 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8c002d2..365854ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
>   #define I915_GEM_HWS_SCRATCH_INDEX	0x30
>   #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>   
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +				     struct intel_ringbuffer *ringbuf);
>   void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>   int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>   			       struct intel_ringbuffer *ringbuf);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-11-18  6:37     ` Deepak S
@ 2014-11-18  6:39       ` Deepak S
  2014-11-17 14:29         ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Deepak S @ 2014-11-18  6:39 UTC (permalink / raw)
  To: intel-gfx


On Tuesday 18 November 2014 12:07 PM, Deepak S wrote:
>
> On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
>> Same as with the context, pinning to GGTT regardless is harmful (it
>> badly fragments the GGTT and can even exhaust it).
>>
>> Unfortunately, this case is also more complex than the previous one
>> because we need to map and access the ringbuffer in several places
>> along the execbuffer path (and we cannot make do by leaving the
>> default ringbuffer pinned, as before). Also, the context object
>> itself contains a pointer to the ringbuffer address that we have to
>> keep updated if we are going to allow the ringbuffer to move around.
>>
>> v2: Same as with the context pinning, we cannot really do it during
>> an interrupt. Also, pin the default ringbuffers objects regardless
>> (makes error capture a lot easier).
>>
>> v3: Rebased. Take a pin reference of the ringbuffer for each item
>> in the execlist request queue because the hardware may still be using
>> the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
>> is executed.  The ringbuffer must remain pinned until the context save
>> is complete.  No longer pin and unpin ringbuffer in
>> populate_lr_context() - this transient address is meaningless and the
>> pinning can cause a sleep while atomic.
>>
>> v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
>> Downgraded pinning check BUG_ONs to WARN_ONs.
>>
>> v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
>> variable.
>>
>> Issue: VIZ-4277
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c        |  102 
>> +++++++++++++++++++++++--------
>>   drivers/gpu/drm/i915/intel_ringbuffer.c |   85 
>> +++++++++++++++-----------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
>>   3 files changed, 128 insertions(+), 62 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index f7fa0f7..ca20f91 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -202,6 +202,9 @@ enum {
>>   };
>>   #define GEN8_CTX_ID_SHIFT 32
>>   +static int intel_lr_context_pin(struct intel_engine_cs *ring,
>> +        struct intel_context *ctx);
>> +
>>   /**
>>    * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
>>    * @dev: DRM device.
>> @@ -339,7 +342,9 @@ static void execlists_elsp_write(struct 
>> intel_engine_cs *ring,
>>       spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>>   }
>>   -static int execlists_ctx_write_tail(struct drm_i915_gem_object 
>> *ctx_obj, u32 tail)
>> +static int execlists_update_context(struct drm_i915_gem_object 
>> *ctx_obj,
>> +                    struct drm_i915_gem_object *ring_obj,
>> +                    u32 tail)
>>   {
>>       struct page *page;
>>       uint32_t *reg_state;
>> @@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct 
>> drm_i915_gem_object *ctx_obj, u32 tai
>>       reg_state = kmap_atomic(page);
>>         reg_state[CTX_RING_TAIL+1] = tail;
>> +    reg_state[CTX_RING_BUFFER_START+1] = 
>> i915_gem_obj_ggtt_offset(ring_obj);
>>         kunmap_atomic(reg_state);
>>   @@ -358,21 +364,25 @@ static int execlists_submit_context(struct 
>> intel_engine_cs *ring,
>>                       struct intel_context *to0, u32 tail0,
>>                       struct intel_context *to1, u32 tail1)
>>   {
>> -    struct drm_i915_gem_object *ctx_obj0;
>> +    struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
>> +    struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
>>       struct drm_i915_gem_object *ctx_obj1 = NULL;
>> +    struct intel_ringbuffer *ringbuf1 = NULL;
>>   -    ctx_obj0 = to0->engine[ring->id].state;
>>       BUG_ON(!ctx_obj0);
>>       WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
>> +    WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
>>   -    execlists_ctx_write_tail(ctx_obj0, tail0);
>> +    execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
>>         if (to1) {
>> +        ringbuf1 = to1->engine[ring->id].ringbuf;
>>           ctx_obj1 = to1->engine[ring->id].state;
>>           BUG_ON(!ctx_obj1);
>>           WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
>> +        WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
>>   -        execlists_ctx_write_tail(ctx_obj1, tail1);
>> +        execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
>>       }
>>         execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
>> @@ -524,6 +534,10 @@ static int execlists_context_queue(struct 
>> intel_engine_cs *ring,
>>           return -ENOMEM;
>>       req->ctx = to;
>>       i915_gem_context_reference(req->ctx);
>> +
>> +    if (to != ring->default_context)
>> +        intel_lr_context_pin(ring, to);
>> +
>>       req->ring = ring;
>>       req->tail = tail;
>>   @@ -544,7 +558,7 @@ static int execlists_context_queue(struct 
>> intel_engine_cs *ring,
>>             if (to == tail_req->ctx) {
>>               WARN(tail_req->elsp_submitted != 0,
>> -                 "More than 2 already-submitted reqs queued\n");
>> +                "More than 2 already-submitted reqs queued\n");
>>               list_del(&tail_req->execlist_link);
>>               list_add_tail(&tail_req->execlist_link,
>>                   &ring->execlist_retired_req_list);
>> @@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct 
>> intel_engine_cs *ring)
>>       spin_unlock_irqrestore(&ring->execlist_lock, flags);
>>         list_for_each_entry_safe(req, tmp, &retired_list, 
>> execlist_link) {
>> +        struct intel_context *ctx = req->ctx;
>> +        struct drm_i915_gem_object *ctx_obj =
>> +                ctx->engine[ring->id].state;
>> +
>> +        if (ctx_obj && (ctx != ring->default_context))
>> +            intel_lr_context_unpin(ring, ctx);
>>           intel_runtime_pm_put(dev_priv);
>>           i915_gem_context_unreference(req->ctx);
>>           list_del(&req->execlist_link);
>> @@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct 
>> intel_engine_cs *ring,
>>           struct intel_context *ctx)
>>   {
>>       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>> +    struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>       int ret = 0;
>> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>> @@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct 
>> intel_engine_cs *ring,
>>           ret = i915_gem_obj_ggtt_pin(ctx_obj,
>>                   GEN8_LR_CONTEXT_ALIGN, 0);
>>           if (ret)
>> -            ctx->engine[ring->id].unpin_count = 0;
>> +            goto reset_unpin_count;
>> +
>> +        ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
>> +        if (ret)
>> +            goto unpin_ctx_obj;
>>       }
>>         return ret;
>> +
>> +unpin_ctx_obj:
>> +    i915_gem_object_ggtt_unpin(ctx_obj);
>> +reset_unpin_count:
>> +    ctx->engine[ring->id].unpin_count = 0;
>> +
>> +    return ret;
>>   }
>>     void intel_lr_context_unpin(struct intel_engine_cs *ring,
>>           struct intel_context *ctx)
>>   {
>>       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>> +    struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>         if (ctx_obj) {
>> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>
> With pin specific mutex from previous patch set removed

Oops This comment was for previous patch in the series :( Since i reviewed the patch offline, comments got mixed :)
Anyways you patch looks fine,  Reviewed-by: Deepak S<deepak.s@linux.intel.com>

> Reviewed-by: Deepak S<deepak.s@linux.intel.com>
>
>> -        if (--ctx->engine[ring->id].unpin_count == 0)
>> +        if (--ctx->engine[ring->id].unpin_count == 0) {
>> +            intel_unpin_ringbuffer_obj(ringbuf);
>>               i915_gem_object_ggtt_unpin(ctx_obj);
>> +        }
>>       }
>>   }
>>   @@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context 
>> *ctx, struct drm_i915_gem_object *ctx_o
>>   {
>>       struct drm_device *dev = ring->dev;
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>> -    struct drm_i915_gem_object *ring_obj = ringbuf->obj;
>>       struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
>>       struct page *page;
>>       uint32_t *reg_state;
>> @@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx, 
>> struct drm_i915_gem_object *ctx_o
>>       reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
>>       reg_state[CTX_RING_TAIL+1] = 0;
>>       reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
>> -    reg_state[CTX_RING_BUFFER_START+1] = 
>> i915_gem_obj_ggtt_offset(ring_obj);
>> +    /* Ring buffer start address is not known until the buffer is 
>> pinned.
>> +     * It is written to the context image in execlists_update_context()
>> +     */
>>       reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
>>       reg_state[CTX_RING_BUFFER_CONTROL+1] =
>>               ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | 
>> RING_VALID;
>> @@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct 
>> intel_context *ctx)
>>                       ctx->engine[i].ringbuf;
>>               struct intel_engine_cs *ring = ringbuf->ring;
>>   +            if (ctx == ring->default_context) {
>> +                intel_unpin_ringbuffer_obj(ringbuf);
>> +                i915_gem_object_ggtt_unpin(ctx_obj);
>> +            }
>>               intel_destroy_ringbuffer_obj(ringbuf);
>>               kfree(ringbuf);
>> -            if (ctx == ring->default_context)
>> -                i915_gem_object_ggtt_unpin(ctx_obj);
>>               drm_gem_object_unreference(&ctx_obj->base);
>>           }
>>       }
>> @@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct 
>> intel_context *ctx,
>>       if (!ringbuf) {
>>           DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>>                   ring->name);
>> -        if (is_global_default_ctx)
>> -            i915_gem_object_ggtt_unpin(ctx_obj);
>> -        drm_gem_object_unreference(&ctx_obj->base);
>>           ret = -ENOMEM;
>> -        return ret;
>> +        goto error_unpin_ctx;
>>       }
>>         ringbuf->ring = ring;
>> @@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct 
>> intel_context *ctx,
>>       ringbuf->space = ringbuf->size;
>>       ringbuf->last_retired_head = -1;
>>   -    /* TODO: For now we put this in the mappable region so that we 
>> can reuse
>> -     * the existing ringbuffer code which ioremaps it. When we start
>> -     * creating many contexts, this will no longer work and we must 
>> switch
>> -     * to a kmapish interface.
>> -     */
>> -    ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
>> -    if (ret) {
>> -        DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
>> +    if (ringbuf->obj == NULL) {
>> +        ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
>> +        if (ret) {
>> +            DRM_DEBUG_DRIVER(
>> +                "Failed to allocate ringbuffer obj %s: %d\n",
>>                   ring->name, ret);
>> -        goto error;
>> +            goto error_free_rbuf;
>> +        }
>> +
>> +        if (is_global_default_ctx) {
>> +            ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
>> +            if (ret) {
>> +                DRM_ERROR(
>> +                    "Failed to pin and map ringbuffer %s: %d\n",
>> +                    ring->name, ret);
>> +                goto error_destroy_rbuf;
>> +            }
>> +        }
>> +
>>       }
>>         ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
>>       if (ret) {
>>           DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
>> -        intel_destroy_ringbuffer_obj(ringbuf);
>>           goto error;
>>       }
>>   @@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct 
>> intel_context *ctx,
>>               DRM_ERROR("Init render state failed: %d\n", ret);
>>               ctx->engine[ring->id].ringbuf = NULL;
>>               ctx->engine[ring->id].state = NULL;
>> -            intel_destroy_ringbuffer_obj(ringbuf);
>>               goto error;
>>           }
>>           ctx->rcs_initialized = true;
>> @@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct 
>> intel_context *ctx,
>>       return 0;
>>     error:
>> +    if (is_global_default_ctx)
>> +        intel_unpin_ringbuffer_obj(ringbuf);
>> +error_destroy_rbuf:
>> +    intel_destroy_ringbuffer_obj(ringbuf);
>> +error_free_rbuf:
>>       kfree(ringbuf);
>> +error_unpin_ctx:
>>       if (is_global_default_ctx)
>>           i915_gem_object_ggtt_unpin(ctx_obj);
>>       drm_gem_object_unreference(&ctx_obj->base);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index a8f72e8..0c4aab1 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct 
>> intel_engine_cs *ring)
>>       return 0;
>>   }
>>   -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>>   {
>> -    if (!ringbuf->obj)
>> -        return;
>> -
>>       iounmap(ringbuf->virtual_start);
>> +    ringbuf->virtual_start = NULL;
>>       i915_gem_object_ggtt_unpin(ringbuf->obj);
>> +}
>> +
>> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
>> +                     struct intel_ringbuffer *ringbuf)
>> +{
>> +    struct drm_i915_private *dev_priv = to_i915(dev);
>> +    struct drm_i915_gem_object *obj = ringbuf->obj;
>> +    int ret;
>> +
>> +    ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = i915_gem_object_set_to_gtt_domain(obj, true);
>> +    if (ret) {
>> +        i915_gem_object_ggtt_unpin(obj);
>> +        return ret;
>> +    }
>> +
>> +    ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
>> +            i915_gem_obj_ggtt_offset(obj), ringbuf->size);
>> +    if (ringbuf->virtual_start == NULL) {
>> +        i915_gem_object_ggtt_unpin(obj);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>> +{
>>       drm_gem_object_unreference(&ringbuf->obj->base);
>>       ringbuf->obj = NULL;
>>   }
>> @@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct 
>> intel_ringbuffer *ringbuf)
>>   int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>>                      struct intel_ringbuffer *ringbuf)
>>   {
>> -    struct drm_i915_private *dev_priv = to_i915(dev);
>>       struct drm_i915_gem_object *obj;
>> -    int ret;
>> -
>> -    if (ringbuf->obj)
>> -        return 0;
>>         obj = NULL;
>>       if (!HAS_LLC(dev))
>> @@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct 
>> drm_device *dev,
>>       /* mark ring buffers as read-only from GPU side by default */
>>       obj->gt_ro = 1;
>>   -    ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>> -    if (ret)
>> -        goto err_unref;
>> -
>> -    ret = i915_gem_object_set_to_gtt_domain(obj, true);
>> -    if (ret)
>> -        goto err_unpin;
>> -
>> -    ringbuf->virtual_start =
>> -        ioremap_wc(dev_priv->gtt.mappable_base + 
>> i915_gem_obj_ggtt_offset(obj),
>> -                ringbuf->size);
>> -    if (ringbuf->virtual_start == NULL) {
>> -        ret = -EINVAL;
>> -        goto err_unpin;
>> -    }
>> -
>>       ringbuf->obj = obj;
>> -    return 0;
>>   -err_unpin:
>> -    i915_gem_object_ggtt_unpin(obj);
>> -err_unref:
>> -    drm_gem_object_unreference(&obj->base);
>> -    return ret;
>> +    return 0;
>>   }
>>     static int intel_init_ring_buffer(struct drm_device *dev,
>> @@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct 
>> drm_device *dev,
>>               goto error;
>>       }
>>   -    ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
>> -    if (ret) {
>> -        DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", 
>> ring->name, ret);
>> -        goto error;
>> +    if (ringbuf->obj == NULL) {
>> +        ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
>> +        if (ret) {
>> +            DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
>> +                    ring->name, ret);
>> +            goto error;
>> +        }
>> +
>> +        ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
>> +        if (ret) {
>> +            DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
>> +                    ring->name, ret);
>> +            intel_destroy_ringbuffer_obj(ringbuf);
>> +            goto error;
>> +        }
>>       }
>>         /* Workaround an erratum on the i830 which causes a hang if
>> @@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct 
>> intel_engine_cs *ring)
>>       intel_stop_ring_buffer(ring);
>>       WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & 
>> MODE_IDLE) == 0);
>>   +    intel_unpin_ringbuffer_obj(ringbuf);
>>       intel_destroy_ringbuffer_obj(ringbuf);
>>       ring->preallocated_lazy_request = NULL;
>>       ring->outstanding_lazy_seqno = 0;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 8c002d2..365854ad 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs 
>> *ring,
>>   #define I915_GEM_HWS_SCRATCH_INDEX    0x30
>>   #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << 
>> MI_STORE_DWORD_INDEX_SHIFT)
>>   +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
>> +                     struct intel_ringbuffer *ringbuf);
>>   void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>>   int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>>                      struct intel_ringbuffer *ringbuf);
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
  2014-11-17 14:38     ` akash goel
  2014-11-17 18:09     ` Daniel Vetter
@ 2014-11-18  6:40     ` Deepak S
  2014-11-17 14:23       ` Daniel Vetter
  2014-11-24 14:24     ` Daniel Vetter
  3 siblings, 1 reply; 42+ messages in thread
From: Deepak S @ 2014-11-18  6:40 UTC (permalink / raw)
  To: intel-gfx


On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
>
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
>
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
>
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
>
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
>
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
>
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked.  Add WARN_ONs to make sure this is the case in future.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
>   drivers/gpu/drm/i915/i915_drv.h     |    1 +
>   drivers/gpu/drm/i915/i915_gem.c     |   39 +++++++++++++-------
>   drivers/gpu/drm/i915/intel_lrc.c    |   69 +++++++++++++++++++++++++++++------
>   drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
>   5 files changed, 98 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e60d5c2..6eaf813 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
>   				continue;
>   
>   			if (ctx_obj) {
> -				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
> -				uint32_t *reg_state = kmap_atomic(page);
> +				struct page *page;
> +				uint32_t *reg_state;
>   				int j;
>   
> +				i915_gem_obj_ggtt_pin(ctx_obj,
> +						GEN8_LR_CONTEXT_ALIGN, 0);
> +
> +				page = i915_gem_object_get_page(ctx_obj, 1);
> +				reg_state = kmap_atomic(page);
> +
>   				seq_printf(m, "CONTEXT: %s %u\n", ring->name,
>   						intel_execlists_ctx_id(ctx_obj));
>   
> @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
>   				}
>   				kunmap_atomic(reg_state);
>   
> +				i915_gem_object_ggtt_unpin(ctx_obj);
> +
>   				seq_putc(m, '\n');
>   			}
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
>   	struct {
>   		struct drm_i915_gem_object *state;
>   		struct intel_ringbuffer *ringbuf;
> +		int unpin_count;
>   	} engine[I915_NUM_RINGS];
>   
>   	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 408afe7..2ee6996 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>   
>   static void i915_gem_free_request(struct drm_i915_gem_request *request)
>   {
> +	struct intel_context *ctx = request->ctx;
> +
>   	list_del(&request->list);
>   	i915_gem_request_remove_from_client(request);
>   
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +	if (i915.enable_execlists && ctx) {
> +		struct intel_engine_cs *ring = request->ring;
>   
> +		if (ctx != ring->default_context)
> +			intel_lr_context_unpin(ring, ctx);
> +		i915_gem_context_unreference(ctx);
> +	}
>   	kfree(request);
>   }
>   
> @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>   	}
>   
>   	/*
> +	 * Clear the execlists queue up before freeing the requests, as those
> +	 * are the ones that keep the context and ringbuffer backing objects
> +	 * pinned in place.
> +	 */
> +	while (!list_empty(&ring->execlist_queue)) {
> +		struct intel_ctx_submit_request *submit_req;
> +
> +		submit_req = list_first_entry(&ring->execlist_queue,
> +				struct intel_ctx_submit_request,
> +				execlist_link);
> +		list_del(&submit_req->execlist_link);
> +		intel_runtime_pm_put(dev_priv);
> +		i915_gem_context_unreference(submit_req->ctx);
> +		kfree(submit_req);
> +	}
> +
> +	/*
>   	 * We must free the requests after all the corresponding objects have
>   	 * been moved off active lists. Which is the same order as the normal
>   	 * retire_requests function does. This is important if object hold
> @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>   		i915_gem_free_request(request);
>   	}
>   
> -	while (!list_empty(&ring->execlist_queue)) {
> -		struct intel_ctx_submit_request *submit_req;
> -
> -		submit_req = list_first_entry(&ring->execlist_queue,
> -				struct intel_ctx_submit_request,
> -				execlist_link);
> -		list_del(&submit_req->execlist_link);
> -		intel_runtime_pm_put(dev_priv);
> -		i915_gem_context_unreference(submit_req->ctx);
> -		kfree(submit_req);
> -	}
> -
>   	/* These may not have been flush before the reset, do so now */
>   	kfree(ring->preallocated_lazy_request);
>   	ring->preallocated_lazy_request = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 906b985..f7fa0f7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -139,8 +139,6 @@
>   #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
>   #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>   
> -#define GEN8_LR_CONTEXT_ALIGN 4096
> -
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
>   #define RING_EXECLIST1_VALID		(1 << 0x3)
>   #define RING_EXECLIST0_VALID		(1 << 0x4)
> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>   	execlists_context_queue(ring, ctx, ringbuf->tail);
>   }
>   
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
> +{
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	int ret = 0;
> +
> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));

With pin specific mutex from previous patch set removed.
Reviewed-by: Deepak S<deepak.s@linux.intel.com>

> +	if (ctx->engine[ring->id].unpin_count++ == 0) {
> +		ret = i915_gem_obj_ggtt_pin(ctx_obj,
> +				GEN8_LR_CONTEXT_ALIGN, 0);
> +		if (ret)
> +			ctx->engine[ring->id].unpin_count = 0;
> +	}
> +
> +	return ret;
> +}
> +
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
> +{
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +
> +	if (ctx_obj) {
> +		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +		if (--ctx->engine[ring->id].unpin_count == 0)
> +			i915_gem_object_ggtt_unpin(ctx_obj);
> +	}
> +}
> +
>   static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>   				    struct intel_context *ctx)
>   {
> +	int ret;
> +
>   	if (ring->outstanding_lazy_seqno)
>   		return 0;
>   
> @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>   		if (request == NULL)
>   			return -ENOMEM;
>   
> +		if (ctx != ring->default_context) {
> +			ret = intel_lr_context_pin(ring, ctx);
> +			if (ret) {
> +				kfree(request);
> +				return ret;
> +			}
> +		}
> +
>   		/* Hold a reference to the context this request belongs to
>   		 * (we will need it when the time comes to emit/retire the
>   		 * request).
> @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context *ctx)
>   
>   	for (i = 0; i < I915_NUM_RINGS; i++) {
>   		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> -		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>   
>   		if (ctx_obj) {
> +			struct intel_ringbuffer *ringbuf =
> +					ctx->engine[i].ringbuf;
> +			struct intel_engine_cs *ring = ringbuf->ring;
> +
>   			intel_destroy_ringbuffer_obj(ringbuf);
>   			kfree(ringbuf);
> -			i915_gem_object_ggtt_unpin(ctx_obj);
> +			if (ctx == ring->default_context)
> +				i915_gem_object_ggtt_unpin(ctx_obj);
>   			drm_gem_object_unreference(&ctx_obj->base);
>   		}
>   	}
> @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring)
>   {
> +	const bool is_global_default_ctx = (ctx == ring->default_context);
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_gem_object *ctx_obj;
>   	uint32_t context_size;
> @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   		return ret;
>   	}
>   
> -	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> -		drm_gem_object_unreference(&ctx_obj->base);
> -		return ret;
> +	if (is_global_default_ctx) {
> +		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> +		if (ret) {
> +			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
> +					ret);
> +			drm_gem_object_unreference(&ctx_obj->base);
> +			return ret;
> +		}
>   	}
>   
>   	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
>   	if (!ringbuf) {
>   		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>   				ring->name);
> -		i915_gem_object_ggtt_unpin(ctx_obj);
> +		if (is_global_default_ctx)
> +			i915_gem_object_ggtt_unpin(ctx_obj);
>   		drm_gem_object_unreference(&ctx_obj->base);
>   		ret = -ENOMEM;
>   		return ret;
> @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   
>   error:
>   	kfree(ringbuf);
> -	i915_gem_object_ggtt_unpin(ctx_obj);
> +	if (is_global_default_ctx)
> +		i915_gem_object_ggtt_unpin(ctx_obj);
>   	drm_gem_object_unreference(&ctx_obj->base);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 84bbf19..14b216b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -24,6 +24,8 @@
>   #ifndef _INTEL_LRC_H_
>   #define _INTEL_LRC_H_
>   
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
>   /* Execlists regs */
>   #define RING_ELSP(ring)			((ring)->mmio_base+0x230)
>   #define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
> @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>   void intel_lr_context_free(struct intel_context *ctx);
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring);
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx);
>   
>   /* Execlists */
>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-17 18:09     ` Daniel Vetter
@ 2014-11-18  9:27       ` Daniel, Thomas
  2014-11-18 10:48         ` Daniel, Thomas
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-18  9:27 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Monday, November 17, 2014 6:09 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -655,6 +655,7 @@ struct intel_context {
> >  	struct {
> >  		struct drm_i915_gem_object *state;
> >  		struct intel_ringbuffer *ringbuf;
> > +		int unpin_count;
> 
> Pinning is already refcounted. Why this additional refcount?

The vma.pin_count is only allocated 4 bits of storage.  If this restriction can be lifted then I can use that.

> And yes I've only realized this now that you've supplied the review
> comments from Akash. I really rely upon the review discussions to spot such
> low-level implementation details.

I know, and I explicitly asked the guys to post comments to the mailing list.

Cheers,
Thomas.

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18  9:27       ` Daniel, Thomas
@ 2014-11-18 10:48         ` Daniel, Thomas
  2014-11-18 14:33           ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-18 10:48 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Daniel, Thomas
> Sent: Tuesday, November 18, 2014 9:28 AM
> To: Daniel Vetter
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > Daniel Vetter
> > Sent: Monday, November 17, 2014 6:09 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> > backing objects to GGTT on-demand
> >
> > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -655,6 +655,7 @@ struct intel_context {
> > >  	struct {
> > >  		struct drm_i915_gem_object *state;
> > >  		struct intel_ringbuffer *ringbuf;
> > > +		int unpin_count;
> >
> > Pinning is already refcounted. Why this additional refcount?
> 
> The vma.pin_count is only allocated 4 bits of storage.  If this restriction can be
> lifted then I can use that.

Actually I just tried to implement this, it causes a problem for patch 4 of this set as the unpin_count is also used for the ringbuffer object which has an ioremap as well as a ggtt pin.

Thomas.


> > And yes I've only realized this now that you've supplied the review
> > comments from Akash. I really rely upon the review discussions to spot
> > such low-level implementation details.
> 
> I know, and I explicitly asked the guys to post comments to the mailing list.
> 
> Cheers,
> Thomas.
> 
> > -Daniel
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-17 14:23       ` Daniel Vetter
@ 2014-11-18 14:27         ` Deepak S
  0 siblings, 0 replies; 42+ messages in thread
From: Deepak S @ 2014-11-18 14:27 UTC (permalink / raw)
  To: intel-gfx


On Monday 17 November 2014 07:53 PM, Daniel Vetter wrote:
> On Tue, Nov 18, 2014 at 12:10:51PM +0530, Deepak S wrote:
>> On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 906b985..f7fa0f7 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -139,8 +139,6 @@
>>>   #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
>>>   #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>>> -#define GEN8_LR_CONTEXT_ALIGN 4096
>>> -
>>>   #define RING_EXECLIST_QFULL		(1 << 0x2)
>>>   #define RING_EXECLIST1_VALID		(1 << 0x3)
>>>   #define RING_EXECLIST0_VALID		(1 << 0x4)
>>> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>>>   	execlists_context_queue(ring, ctx, ringbuf->tail);
>>>   }
>>> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
>>> +		struct intel_context *ctx)
>>> +{
>>> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> +	int ret = 0;
>>> +
>>> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>> With pin specific mutex from previous patch set removed.
> Pardon my ignorance but I'm completely lost on this review comment here.
> Deepak, can you please elaborate what kind of lock on which exact version
> of the previous patch you mean? I didn't find any locking at all in the
> preceeding patch here ...
>
> Thanks, Daniel

Hi Daniel,

+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	int ret = 0;
+
+	mutex_lock(&ctx->engine[ring->id].unpin_lock);
+	if (ctx->engine[ring->id].unpin_count++ == 0) {
+		ret = i915_gem_obj_ggtt_pin(ctx_obj,
+				GEN8_LR_CONTEXT_ALIGN, 0);
+		if (ret)
+			ctx->engine[ring->id].unpin_count = 0;
+	}
+	mutex_unlock(&ctx->engine[ring->id].unpin_lock);
+
+	return ret;
+}

In Previous patch set we had a "mutex_lock(&ctx->engine[ring->id].unpin_lock);"

Since we "intel_lr_context_pin" is already under struct mutex, So we dont need unpin_lock. This was the change in latest patch set :)

Thanks
Deepak



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
  2014-11-17 14:29         ` Daniel Vetter
@ 2014-11-18 14:30           ` Deepak S
  0 siblings, 0 replies; 42+ messages in thread
From: Deepak S @ 2014-11-18 14:30 UTC (permalink / raw)
  To: intel-gfx


On Monday 17 November 2014 07:59 PM, Daniel Vetter wrote:
> On Tue, Nov 18, 2014 at 12:09:54PM +0530, Deepak S wrote:
>> On Tuesday 18 November 2014 12:07 PM, Deepak S wrote:
>>> With pin specific mutex from previous patch set removed
>> Oops This comment was for previous patch in the series :( Since i
>> reviewed the patch offline, comments got mixed :)
> Please forward these comments from the private discussion to the mailing
> list. Review isn't just about code correctness, but about communication -
> yes, I (and domain experts) actually read all this stuff that floats
> around and will jump into the discussion if there's something important or
> tricky being discussed.
>
> Second reason for public review is that the important part about the r-b
> tag isn't that review happened, but by whom. So this is all about
> reputation building and playing to people's various strenght. And if you
> do review in private nothing of that can happen, which makes the review a
> lot less useful. So let's extract the most value from all that engineering
> time we invest into reviewing and _always_ do the review in public.
>
> Thanks, Daniel

Thanks Daniel. I will make sure to add the comments to mail list :)


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18 10:48         ` Daniel, Thomas
@ 2014-11-18 14:33           ` Daniel Vetter
  2014-11-18 14:51             ` Daniel, Thomas
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-18 14:33 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Tue, Nov 18, 2014 at 10:48:09AM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf
> > Of Daniel, Thomas
> > Sent: Tuesday, November 18, 2014 9:28 AM
> > To: Daniel Vetter
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> > backing objects to GGTT on-demand
> > 
> > > -----Original Message-----
> > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > > Daniel Vetter
> > > Sent: Monday, November 17, 2014 6:09 PM
> > > To: Daniel, Thomas
> > > Cc: intel-gfx@lists.freedesktop.org
> > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> > > backing objects to GGTT on-demand
> > >
> > > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -655,6 +655,7 @@ struct intel_context {
> > > >  	struct {
> > > >  		struct drm_i915_gem_object *state;
> > > >  		struct intel_ringbuffer *ringbuf;
> > > > +		int unpin_count;
> > >
> > > Pinning is already refcounted. Why this additional refcount?
> > 
> > The vma.pin_count is only allocated 4 bits of storage.  If this restriction can be
> > lifted then I can use that.

Those 4 bits are good enough for legacy contexts, so I wonder a bit what's
so massively different for execlist contexts.
 
> Actually I just tried to implement this, it causes a problem for patch 4
> of this set as the unpin_count is also used for the ringbuffer object
> which has an ioremap as well as a ggtt pin.

Yeah, ioremap needs to be redone every time we pin/unpin. But on sane
archs it's almost no overhead really. And if this does start to matter
(shudder for 32bit kernels on gen8) then we can fix it ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18 14:33           ` Daniel Vetter
@ 2014-11-18 14:51             ` Daniel, Thomas
  2014-11-18 15:11               ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-18 14:51 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Tuesday, November 18, 2014 2:33 PM
> To: Daniel, Thomas
> Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> On Tue, Nov 18, 2014 at 10:48:09AM +0000, Daniel, Thomas wrote:
> > > -----Original Message-----
> > > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On
> > > Behalf Of Daniel, Thomas
> > > Sent: Tuesday, November 18, 2014 9:28 AM
> > > To: Daniel Vetter
> > > Cc: intel-gfx@lists.freedesktop.org
> > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > context backing objects to GGTT on-demand
> > >
> > > > -----Original Message-----
> > > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > > > Daniel Vetter
> > > > Sent: Monday, November 17, 2014 6:09 PM
> > > > To: Daniel, Thomas
> > > > Cc: intel-gfx@lists.freedesktop.org
> > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > context backing objects to GGTT on-demand
> > > >
> > > > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > @@ -655,6 +655,7 @@ struct intel_context {
> > > > >  	struct {
> > > > >  		struct drm_i915_gem_object *state;
> > > > >  		struct intel_ringbuffer *ringbuf;
> > > > > +		int unpin_count;
> > > >
> > > > Pinning is already refcounted. Why this additional refcount?
> > >
> > > The vma.pin_count is only allocated 4 bits of storage.  If this
> > > restriction can be lifted then I can use that.
> 
> Those 4 bits are good enough for legacy contexts, so I wonder a bit what's so
> massively different for execlist contexts.
With execlists, in order to dynamically unpin the LRC backing object and ring buffer object when not required we take a reference for each execlist request that uses them (remember that the execlist request lifecycle is currently different from the execbuffer request).  This can be a lot, especially in some of the less sane i-g-t tests.

> > Actually I just tried to implement this, it causes a problem for patch
> > 4 of this set as the unpin_count is also used for the ringbuffer
> > object which has an ioremap as well as a ggtt pin.
> 
> Yeah, ioremap needs to be redone every time we pin/unpin. But on sane
> archs it's almost no overhead really. And if this does start to matter (shudder
> for 32bit kernels on gen8) then we can fix it ...
Hm, so the CPU vaddr of the ring buffer will move around as more requests reference it which I suppose is not a problem.  We will use a lot of address space (again, especially with the i-g-t stress tests which can submit tens of thousands of requests in a very short space of time).  What would the fix be?  An extra reference count for the ioremap?  Looks familiar :)

I still think it's best to keep the context unpin_count for execlists mode.

Thomas.

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18 14:51             ` Daniel, Thomas
@ 2014-11-18 15:11               ` Daniel Vetter
  2014-11-18 15:32                 ` Daniel, Thomas
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-18 15:11 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Tue, Nov 18, 2014 at 02:51:52PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Tuesday, November 18, 2014 2:33 PM
> > To: Daniel, Thomas
> > Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> > backing objects to GGTT on-demand
> > 
> > On Tue, Nov 18, 2014 at 10:48:09AM +0000, Daniel, Thomas wrote:
> > > > -----Original Message-----
> > > > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On
> > > > Behalf Of Daniel, Thomas
> > > > Sent: Tuesday, November 18, 2014 9:28 AM
> > > > To: Daniel Vetter
> > > > Cc: intel-gfx@lists.freedesktop.org
> > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > context backing objects to GGTT on-demand
> > > >
> > > > > -----Original Message-----
> > > > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > > > > Daniel Vetter
> > > > > Sent: Monday, November 17, 2014 6:09 PM
> > > > > To: Daniel, Thomas
> > > > > Cc: intel-gfx@lists.freedesktop.org
> > > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > > context backing objects to GGTT on-demand
> > > > >
> > > > > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > @@ -655,6 +655,7 @@ struct intel_context {
> > > > > >  	struct {
> > > > > >  		struct drm_i915_gem_object *state;
> > > > > >  		struct intel_ringbuffer *ringbuf;
> > > > > > +		int unpin_count;
> > > > >
> > > > > Pinning is already refcounted. Why this additional refcount?
> > > >
> > > > The vma.pin_count is only allocated 4 bits of storage.  If this
> > > > restriction can be lifted then I can use that.
> > 
> > Those 4 bits are good enough for legacy contexts, so I wonder a bit what's so
> > massively different for execlist contexts.
> With execlists, in order to dynamically unpin the LRC backing object and
> ring buffer object when not required we take a reference for each
> execlist request that uses them (remember that the execlist request
> lifecycle is currently different from the execbuffer request).  This can
> be a lot, especially in some of the less sane i-g-t tests.

Why?

Presuming the buffer objects is properly pushed onto the active list you
only need to pin while doing the command submission up to the point where
you've committed the buffer object to the active list.

I know documentation sucks for this stuff since I have this discussion
with roughly everyone ever touching anything related to active buffers :(
If you want some recent examples the cmd parser's shadow batch should
serve well (including the entire evolution from reinvented wheel to just
using the active list, although the latest patches are only 90% there and
still have 1-2 misplaced pieces).

> > > Actually I just tried to implement this, it causes a problem for patch
> > > 4 of this set as the unpin_count is also used for the ringbuffer
> > > object which has an ioremap as well as a ggtt pin.
> > 
> > Yeah, ioremap needs to be redone every time we pin/unpin. But on sane
> > archs it's almost no overhead really. And if this does start to matter (shudder
> > for 32bit kernels on gen8) then we can fix it ...
> Hm, so the CPU vaddr of the ring buffer will move around as more
> requests reference it which I suppose is not a problem.  We will use a
> lot of address space (again, especially with the i-g-t stress tests
> which can submit tens of thousands of requests in a very short space of
> time).  What would the fix be?  An extra reference count for the
> ioremap?  Looks familiar :)

ioremap always gives you the same linear address on 64bit kernels. On
32bit it makes a new one, but if you ioremap for each request it'll fall
over anyway. The solution would be to ioremap just the required pages
using the atomic kmap stuff wrapped up into the io_mapping stuff.

> I still think it's best to keep the context unpin_count for execlists mode.

Well just means the todo-list to fix up execlist grows longer.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18 15:11               ` Daniel Vetter
@ 2014-11-18 15:32                 ` Daniel, Thomas
  2014-11-19  9:53                   ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-18 15:32 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Tuesday, November 18, 2014 3:11 PM
> To: Daniel, Thomas
> Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> On Tue, Nov 18, 2014 at 02:51:52PM +0000, Daniel, Thomas wrote:
> > > -----Original Message-----
> > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > > Daniel Vetter
> > > Sent: Tuesday, November 18, 2014 2:33 PM
> > > To: Daniel, Thomas
> > > Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > context backing objects to GGTT on-demand
> > >
> > > On Tue, Nov 18, 2014 at 10:48:09AM +0000, Daniel, Thomas wrote:
> > > > > -----Original Message-----
> > > > > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org]
> > > > > On Behalf Of Daniel, Thomas
> > > > > Sent: Tuesday, November 18, 2014 9:28 AM
> > > > > To: Daniel Vetter
> > > > > Cc: intel-gfx@lists.freedesktop.org
> > > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > > context backing objects to GGTT on-demand
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf
> > > > > > Of Daniel Vetter
> > > > > > Sent: Monday, November 17, 2014 6:09 PM
> > > > > > To: Daniel, Thomas
> > > > > > Cc: intel-gfx@lists.freedesktop.org
> > > > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > > > context backing objects to GGTT on-demand
> > > > > >
> > > > > > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d
> > > > > > > 100644
> > > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > @@ -655,6 +655,7 @@ struct intel_context {
> > > > > > >  	struct {
> > > > > > >  		struct drm_i915_gem_object *state;
> > > > > > >  		struct intel_ringbuffer *ringbuf;
> > > > > > > +		int unpin_count;
> > > > > >
> > > > > > Pinning is already refcounted. Why this additional refcount?
> > > > >
> > > > > The vma.pin_count is only allocated 4 bits of storage.  If this
> > > > > restriction can be lifted then I can use that.
> > >
> > > Those 4 bits are good enough for legacy contexts, so I wonder a bit
> > > what's so massively different for execlist contexts.
> > With execlists, in order to dynamically unpin the LRC backing object
> > and ring buffer object when not required we take a reference for each
> > execlist request that uses them (remember that the execlist request
> > lifecycle is currently different from the execbuffer request).  This
> > can be a lot, especially in some of the less sane i-g-t tests.
> 
> Why?
> 
> Presuming the buffer objects is properly pushed onto the active list you only
> need to pin while doing the command submission up to the point where
> you've committed the buffer object to the active list.
This is not currently the case.  Using the active list for context object management is one of the refactoring tasks, as we agreed.

> I know documentation sucks for this stuff since I have this discussion with
> roughly everyone ever touching anything related to active buffers :( If you
> want some recent examples the cmd parser's shadow batch should serve
> well (including the entire evolution from reinvented wheel to just using the
> active list, although the latest patches are only 90% there and still have 1-2
> misplaced pieces).
> 
> > > > Actually I just tried to implement this, it causes a problem for
> > > > patch
> > > > 4 of this set as the unpin_count is also used for the ringbuffer
> > > > object which has an ioremap as well as a ggtt pin.
> > >
> > > Yeah, ioremap needs to be redone every time we pin/unpin. But on
> > > sane archs it's almost no overhead really. And if this does start to
> > > matter (shudder for 32bit kernels on gen8) then we can fix it ...
> > Hm, so the CPU vaddr of the ring buffer will move around as more
> > requests reference it which I suppose is not a problem.  We will use a
> > lot of address space (again, especially with the i-g-t stress tests
> > which can submit tens of thousands of requests in a very short space
> > of time).  What would the fix be?  An extra reference count for the
> > ioremap?  Looks familiar :)
> 
> ioremap always gives you the same linear address on 64bit kernels. On 32bit
> it makes a new one, but if you ioremap for each request it'll fall over anyway.
Ah, I didn't know that ioremap behaved like that.

> The solution would be to ioremap just the required pages using the atomic
> kmap stuff wrapped up into the io_mapping stuff.
> 
> > I still think it's best to keep the context unpin_count for execlists mode.
> 
> Well just means the todo-list to fix up execlist grows longer.
That's OK from my point of view, this may go away anyway with some of the refactoring.  The (strong) direction I'm getting from the management is that they want these merged ASAP.

Cheers,
Thomas.

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-18 15:32                 ` Daniel, Thomas
@ 2014-11-19  9:53                   ` Daniel Vetter
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-19  9:53 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Tue, Nov 18, 2014 at 03:32:46PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Tuesday, November 18, 2014 3:11 PM
> > To: Daniel, Thomas
> > Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> > backing objects to GGTT on-demand
> > 
> > On Tue, Nov 18, 2014 at 02:51:52PM +0000, Daniel, Thomas wrote:
> > > > -----Original Message-----
> > > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > > > Daniel Vetter
> > > > Sent: Tuesday, November 18, 2014 2:33 PM
> > > > To: Daniel, Thomas
> > > > Cc: Daniel Vetter; intel-gfx@lists.freedesktop.org
> > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > context backing objects to GGTT on-demand
> > > >
> > > > On Tue, Nov 18, 2014 at 10:48:09AM +0000, Daniel, Thomas wrote:
> > > > > > -----Original Message-----
> > > > > > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org]
> > > > > > On Behalf Of Daniel, Thomas
> > > > > > Sent: Tuesday, November 18, 2014 9:28 AM
> > > > > > To: Daniel Vetter
> > > > > > Cc: intel-gfx@lists.freedesktop.org
> > > > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > > > context backing objects to GGTT on-demand
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf
> > > > > > > Of Daniel Vetter
> > > > > > > Sent: Monday, November 17, 2014 6:09 PM
> > > > > > > To: Daniel, Thomas
> > > > > > > Cc: intel-gfx@lists.freedesktop.org
> > > > > > > Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the
> > > > > > > context backing objects to GGTT on-demand
> > > > > > >
> > > > > > > On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > > b/drivers/gpu/drm/i915/i915_drv.h index 059330c..3c7299d
> > > > > > > > 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > > @@ -655,6 +655,7 @@ struct intel_context {
> > > > > > > >  	struct {
> > > > > > > >  		struct drm_i915_gem_object *state;
> > > > > > > >  		struct intel_ringbuffer *ringbuf;
> > > > > > > > +		int unpin_count;
> > > > > > >
> > > > > > > Pinning is already refcounted. Why this additional refcount?
> > > > > >
> > > > > > The vma.pin_count is only allocated 4 bits of storage.  If this
> > > > > > restriction can be lifted then I can use that.
> > > >
> > > > Those 4 bits are good enough for legacy contexts, so I wonder a bit
> > > > what's so massively different for execlist contexts.
> > > With execlists, in order to dynamically unpin the LRC backing object
> > > and ring buffer object when not required we take a reference for each
> > > execlist request that uses them (remember that the execlist request
> > > lifecycle is currently different from the execbuffer request).  This
> > > can be a lot, especially in some of the less sane i-g-t tests.
> > 
> > Why?
> > 
> > Presuming the buffer objects is properly pushed onto the active list you only
> > need to pin while doing the command submission up to the point where
> > you've committed the buffer object to the active list.
> This is not currently the case.  Using the active list for context
> object management is one of the refactoring tasks, as we agreed.

Actually I even lied, you need to pin the current context and you can only
throw the old one you've just switched out. Becuase the request for the
next batch/ctx combo will complete after the switched happened this all
works out.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-17 14:55       ` Daniel, Thomas
@ 2014-11-19 17:59         ` Daniel, Thomas
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-19 17:59 UTC (permalink / raw)
  To: intel-gfx, Daniel Vetter (daniel@ffwll.ch)

For the avoidance of confusion, I want to make it clear that the latest revisions to the patches in this set posted to the list (v5) address all the review comments from the VPG guys.

[v5 1/4] http://patchwork.freedesktop.org/patch/36716/
[2/4] already accepted
[v5 3/4] http://patchwork.freedesktop.org/patch/36717/
[v5 4/4] http://patchwork.freedesktop.org/patch/36718/

Thomas.

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Daniel, Thomas
> Sent: Monday, November 17, 2014 2:56 PM
> To: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> Here is the actual review...
> 
> _____________________________________________
> From: Daniel, Thomas
> Sent: Wednesday, November 12, 2014 8:52 PM
> To: Goel, Akash
> Subject: RE: Execlists patches code review
> 
> 
> Hi Akash,
> 
> I will put the WARN messages back in and remove the need_unpin.
> The elsp_submitted count does not behave exactly as you would expect
> because of some race condition.
> Have a look at the patch “Avoid non-lite-restore preemptions” by Oscar
> Mateo for a description.
> 
> Thanks,
> Thomas.
> _____________________________________________
> From: Goel, Akash
> Sent: Tuesday, November 11, 2014 4:37 PM
> To: Daniel, Thomas
> Subject: RE: Execlists patches code review
> 
> 
> Hi Thomas,
> 
> Few comments on http://patchwork.freedesktop.org/patch/35830/
> 
> 	int elsp_submitted;
> +	bool need_unpin;
> 
> This new field has not been used anywhere.
> 
> 
> 		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
> -			WARN(head_req->elsp_submitted == 0,
> -			     "Never submitted head request\n");
> 
> Sorry couldn’t get this change. Even if a request has been merged, still the
> elsp_submitted count should not be 0 here, when this function is executed
> on arrival of Context switch interrupt. When a new request is merged with a
> previously submitted request, the original value of elsp_submitted is still
> retained.
> 
> +			/* If the request has been merged, it is possible to
> get
> +			 * here with an unsubmitted request. */
>  			if (--head_req->elsp_submitted <= 0) {
> 
> 
> 
> 
> 		if (status & GEN8_CTX_STATUS_PREEMPTED) {
>  			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
> -				if (execlists_check_remove_request(ring,
> status_id))
> -					WARN(1, "Lite Restored request
> removed from queue\n");
> +				execlists_check_remove_request(ring,
> status_id);
> 
> Same doubt here, thought that in this case of interrupt due to Preemption
> (Lite restore), which will occur when the same Context is submitted as the
> one already being executed by the Hw, the count will not drop to 0. Count
> will drop to 0 when the context switch interrupt will be generated
> subsequently.
> 
> Best regards
> Akash
> _____________________________________________
> From: Goel, Akash
> Sent: Tuesday, November 11, 2014 8:58 PM
> To: Daniel, Thomas
> Subject: RE: Execlists patches code review
> 
> 
> Hi Thomas,
> 
> I was OOP today, I will provide this review comment tomorrow on the GFX
> mailing list.
> 
> Best regards
> Akash
> _____________________________________________
> From: Daniel, Thomas
> Sent: Monday, November 10, 2014 10:41 PM
> To: Goel, Akash
> Subject: RE: Execlists patches code review
> 
> 
> Hi Akash,
> 
> Please post this comment to the mailing list.
> Assuming nobody else comments I will remove the unpin_lock and replace
> the mutex_lock(&unpin_lock) with WARN_ON(!mutex_is_locked(&dev-
> >struct_mutex)).
> 
> Cheers,
> Thomas.
> 
> _____________________________________________
> From: Goel, Akash
> Sent: Monday, November 10, 2014 11:19 AM
> To: Daniel, Thomas
> Subject: RE: Execlists patches code review
> 
> 
> In context of the 3rd patch  http://patchwork.freedesktop.org/patch/35829/
> intel_lr_context_pin is being called from logical_ring_alloc_seqno function
> and intel_lr_context_unpin  gets called from i915_gem_free_request &
> i915_gem_reset_ring_cleanup functions
> 
> All these 3 paths are already protected by dev->struct_mutex (Global lock),
> so they will always execute sequentially with respect to each other.
> 
> Do we need to have a new lock ?
> +		struct mutex unpin_lock;
> 
> Best regards
> Akash
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
                       ` (2 preceding siblings ...)
  2014-11-18  6:40     ` Deepak S
@ 2014-11-24 14:24     ` Daniel Vetter
  2014-11-24 17:14       ` Daniel, Thomas
  3 siblings, 1 reply; 42+ messages in thread
From: Daniel Vetter @ 2014-11-24 14:24 UTC (permalink / raw)
  To: Thomas Daniel; +Cc: intel-gfx

On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
> 
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
> 
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
> 
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
> 
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
> 
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked.  Add WARN_ONs to make sure this is the case in future.
> 
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>

This patch here scored a regression (leak in the module unload path),
please address it asap. Deadline for regressions should be 1 week, then
I'll just drop the patch or apply the revert. That includes review and
everything.

https://bugs.freedesktop.org/show_bug.cgi?id=86507

Thanks,
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-24 14:24     ` Daniel Vetter
@ 2014-11-24 17:14       ` Daniel, Thomas
  2014-11-24 20:15         ` Daniel Vetter
  0 siblings, 1 reply; 42+ messages in thread
From: Daniel, Thomas @ 2014-11-24 17:14 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Monday, November 24, 2014 2:25 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context
> backing objects to GGTT on-demand
> 
> On Thu, Nov 13, 2014 at 10:28:10AM +0000, Thomas Daniel wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> >
> > Up until now, we have pinned every logical ring context backing object
> > during creation, and left it pinned until destruction. This made my
> > life easier, but it's a harmful thing to do, because we cause
> > fragmentation of the GGTT (and, eventually, we would run out of space).
> >
> > This patch makes the pinning on-demand: the backing objects of the two
> > contexts that are written to the ELSP are pinned right before
> > submission and unpinned once the hardware is done with them. The only
> > context that is still pinned regardless is the global default one, so
> > that the HWS can still be accessed in the same way (ring->status_page).
> >
> > v2: In the early version of this patch, we were pinning the context as
> > we put it into the ELSP: on the one hand, this is very efficient
> > because only a maximum two contexts are pinned at any given time, but
> > on the other hand, we cannot really pin in interrupt time :(
> >
> > v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> > Do not unpin default context in free_request.
> >
> > v4: Break out pin and unpin into functions.  Fix style problems
> > reported by checkpatch
> >
> > v5: Remove unpin_lock as all pinning and unpinning is done with the
> > struct mutex already locked.  Add WARN_ONs to make sure this is the case
> in future.
> >
> > Issue: VIZ-4277
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> 
> This patch here scored a regression (leak in the module unload path), please
> address it asap. Deadline for regressions should be 1 week, then I'll just drop
> the patch or apply the revert. That includes review and everything.
> 
> https://bugs.freedesktop.org/show_bug.cgi?id=86507 

Leak identified.  The fix is simple.
Do you want a v6 or a follow-up patch?

Cheers,
Thomas.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
  2014-11-24 17:14       ` Daniel, Thomas
@ 2014-11-24 20:15         ` Daniel Vetter
  0 siblings, 0 replies; 42+ messages in thread
From: Daniel Vetter @ 2014-11-24 20:15 UTC (permalink / raw)
  To: Daniel, Thomas; +Cc: intel-gfx

On Mon, Nov 24, 2014 at 6:14 PM, Daniel, Thomas <thomas.daniel@intel.com> wrote:
>> This patch here scored a regression (leak in the module unload path), please
>> address it asap. Deadline for regressions should be 1 week, then I'll just drop
>> the patch or apply the revert. That includes review and everything.
>>
>> https://bugs.freedesktop.org/show_bug.cgi?id=86507
>
> Leak identified.  The fix is simple.
> Do you want a v6 or a follow-up patch?

Tree is already tagged so no rebasing, hence full-blown patch with all
the bells and wistles please. In general I always prefer the follow-up
patch when I've merged the original one already - squashing in is easy
if still possible, but untangling if a freeze point happened (like
here) more of a pain.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2014-11-24 20:15 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-29  9:52 [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Thomas Daniel
2014-10-29  9:52 ` [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode Thomas Daniel
2014-11-03 15:47   ` Daniel Vetter
2014-10-29  9:52 ` [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand Thomas Daniel
2014-11-03 16:54   ` Daniel Vetter
2014-11-03 17:00     ` Daniel, Thomas
2014-11-03 17:11       ` Daniel Vetter
2014-11-03 21:04     ` Chris Wilson
2014-11-13 10:28   ` [PATCH v5 " Thomas Daniel
2014-11-17 14:38     ` akash goel
2014-11-17 14:55       ` Daniel, Thomas
2014-11-19 17:59         ` Daniel, Thomas
2014-11-17 18:09     ` Daniel Vetter
2014-11-18  9:27       ` Daniel, Thomas
2014-11-18 10:48         ` Daniel, Thomas
2014-11-18 14:33           ` Daniel Vetter
2014-11-18 14:51             ` Daniel, Thomas
2014-11-18 15:11               ` Daniel Vetter
2014-11-18 15:32                 ` Daniel, Thomas
2014-11-19  9:53                   ` Daniel Vetter
2014-11-18  6:40     ` Deepak S
2014-11-17 14:23       ` Daniel Vetter
2014-11-18 14:27         ` Deepak S
2014-11-24 14:24     ` Daniel Vetter
2014-11-24 17:14       ` Daniel, Thomas
2014-11-24 20:15         ` Daniel Vetter
2014-10-29  9:52 ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object " Thomas Daniel
2014-10-29 14:38   ` [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing shuang.he
2014-11-13 10:28   ` [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Thomas Daniel
2014-11-18  5:18     ` akash goel
2014-11-18  6:37     ` Deepak S
2014-11-18  6:39       ` Deepak S
2014-11-17 14:29         ` Daniel Vetter
2014-11-18 14:30           ` Deepak S
2014-11-03 15:33 ` [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work Daniel Vetter
2014-11-03 16:05   ` Daniel, Thomas
2014-11-03 16:17     ` Daniel Vetter
2014-11-04  9:11 ` Chris Wilson
2014-11-07 14:55 ` [PATCH v4 " Thomas Daniel
2014-11-13 10:27   ` [PATCH v5 " Thomas Daniel
2014-11-18  6:29     ` Deepak S
2014-11-17 14:41       ` akash goel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.