All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/i915: Sanitize GuC client initialization
@ 2017-02-14 13:53 Joonas Lahtinen
  2017-02-14 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Sanitize GuC client initialization (rev2) Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 13:53 UTC (permalink / raw)
  To: Intel graphics driver community testing & development

Started adding proper teardown to guc_client_alloc, ended up removing
quite a few dead ends where errors communicating with the GuC were
silently ignored. There also seemed to be quite a few erronous
teardown actions performed in case of an error (ordering wrong).

v2:
	- Increase function symmetry/proximity (Michal/Daniele)
	- Fix __reserve_doorbell accounting for high priority (Daniele)
	- Call __update_doorbell_desc! (Daniele)
	- Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)

Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   4 +-
 drivers/gpu/drm/i915/i915_guc_submission.c | 371 ++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_guc_fwif.h      |   4 +-
 drivers/gpu/drm/i915/intel_uc.h            |  11 +-
 4 files changed, 215 insertions(+), 175 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cd023fd..f3972cf 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2492,7 +2492,7 @@ static void i915_guc_client_info(struct seq_file *m,
 
 	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
 		client->priority, client->ctx_index, client->proc_desc_offset);
-	seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
+	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
 		client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
 	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
 		client->wq_size, client->wq_offset, client->wq_tail);
@@ -2527,7 +2527,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	}
 
 	seq_printf(m, "Doorbell map:\n");
-	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
+	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
 	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
 
 	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8ced9e2..258fca3 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -62,27 +62,73 @@
  *
  */
 
+static inline bool is_high_priority(struct i915_guc_client* client)
+{
+	return client->priority <= GUC_CTX_PRIORITY_HIGH;
+}
+
+static int __reserve_doorbell(struct i915_guc_client *client)
+{
+	unsigned long offset;
+	unsigned long end;
+	u16 id;
+
+	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
+
+	/*
+	 * The bitmap tracks which doorbell registers are currently in use.
+	 * It is split into two halves; the first half is used for normal
+	 * priority contexts, the second half for high-priority ones.
+	 * Note that logically higher priorities are numerically less than
+	 * normal ones, so the test below means "is it high-priority?"
+	 */
+
+	offset = 0;
+	end = GUC_NUM_DOORBELLS/2;
+	if (is_high_priority(client)) {
+		offset = end;
+		end += offset;
+        }
+
+	id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
+	if (id == end)
+		return -ENOSPC;
+
+	__set_bit(id, client->guc->doorbell_bitmap);
+	client->doorbell_id = id;
+	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell %d: \n",
+			 client->ctx_index, yesno(is_high_priority(client)),
+			 id);
+	return 0;
+}
+
+static void __unreserve_doorbell(struct i915_guc_client *client)
+{
+	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
+
+	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+	client->doorbell_id = GUC_DOORBELL_INVALID;
+}
+
 /*
  * Tell the GuC to allocate or deallocate a specific doorbell
  */
 
-static int guc_allocate_doorbell(struct intel_guc *guc,
-				 struct i915_guc_client *client)
+static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
-		client->ctx_index
+		ctx_index
 	};
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static int guc_release_doorbell(struct intel_guc *guc,
-				struct i915_guc_client *client)
+static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
-		client->ctx_index
+		ctx_index
 	};
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -95,104 +141,98 @@ static int guc_release_doorbell(struct intel_guc *guc,
  * client object which contains the page being used for the doorbell
  */
 
-static int guc_update_doorbell_id(struct intel_guc *guc,
-				  struct i915_guc_client *client,
-				  u16 new_id)
+static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
 {
-	struct sg_table *sg = guc->ctx_pool_vma->pages;
-	void *doorbell_bitmap = guc->doorbell_bitmap;
-	struct guc_doorbell_info *doorbell;
+	struct sg_table *sg = client->guc->ctx_pool_vma->pages;
 	struct guc_context_desc desc;
 	size_t len;
 
-	doorbell = client->vaddr + client->doorbell_offset;
-
-	if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
-	    test_bit(client->doorbell_id, doorbell_bitmap)) {
-		/* Deactivate the old doorbell */
-		doorbell->db_status = GUC_DOORBELL_DISABLED;
-		(void)guc_release_doorbell(guc, client);
-		__clear_bit(client->doorbell_id, doorbell_bitmap);
-	}
-
 	/* Update the GuC's idea of the doorbell ID */
 	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+				 sizeof(desc) * client->ctx_index);
 	if (len != sizeof(desc))
 		return -EFAULT;
+
 	desc.db_id = new_id;
 	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+				   sizeof(desc) * client->ctx_index);
 	if (len != sizeof(desc))
 		return -EFAULT;
 
-	client->doorbell_id = new_id;
-	if (new_id == GUC_INVALID_DOORBELL_ID)
-		return 0;
+	return 0;
+}
+
+static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client)
+{
+	return client->vaddr + client->doorbell_offset;
+}
+
+static bool has_doorbell(struct i915_guc_client *client)
+{
+	if (client->doorbell_id == GUC_DOORBELL_INVALID)
+		return false;
 
-	/* Activate the new doorbell */
-	__set_bit(new_id, doorbell_bitmap);
+	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+}
+
+static int __create_doorbell(struct i915_guc_client *client)
+{
+	struct guc_doorbell_info *doorbell;
+	int err;
+
+	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_ENABLED;
 	doorbell->cookie = client->doorbell_cookie;
-	return guc_allocate_doorbell(guc, client);
+
+	err = __guc_allocate_doorbell(client->guc, client->ctx_index);
+	if (err) {
+		doorbell->db_status = GUC_DOORBELL_DISABLED;
+		doorbell->cookie = 0;
+	}
+	return err;
 }
 
-static void guc_disable_doorbell(struct intel_guc *guc,
-				 struct i915_guc_client *client)
+static int __destroy_doorbell(struct i915_guc_client *client)
 {
-	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
+	struct guc_doorbell_info *doorbell;
 
-	/* XXX: wait for any interrupts */
-	/* XXX: wait for workqueue to drain */
+	doorbell = __get_doorbell(client);
+	doorbell->db_status = GUC_DOORBELL_DISABLED;
+	doorbell->cookie = 0;
+
+	return __guc_deallocate_doorbell(client->guc, client->ctx_index);
 }
 
-static uint16_t
-select_doorbell_register(struct intel_guc *guc, uint32_t priority)
+static int destroy_doorbell(struct i915_guc_client *client)
 {
-	/*
-	 * The bitmap tracks which doorbell registers are currently in use.
-	 * It is split into two halves; the first half is used for normal
-	 * priority contexts, the second half for high-priority ones.
-	 * Note that logically higher priorities are numerically less than
-	 * normal ones, so the test below means "is it high-priority?"
-	 */
-	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
-	const uint16_t half = GUC_MAX_DOORBELLS / 2;
-	const uint16_t start = hi_pri ? half : 0;
-	const uint16_t end = start + half;
-	uint16_t id;
+	int err;
 
-	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
-	if (id == end)
-		id = GUC_INVALID_DOORBELL_ID;
+	GEM_BUG_ON(!has_doorbell(client));
 
-	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
-			hi_pri ? "high" : "normal", id);
+	/* XXX: wait for any interrupts */
+	/* XXX: wait for workqueue to drain */
 
-	return id;
-}
+	err = __destroy_doorbell(client);
+	if (err)
+		return err;
 
-/*
- * Select, assign and relase doorbell cachelines
- *
- * These functions track which doorbell cachelines are in use.
- * The data they manipulate is protected by the intel_guc_send lock.
- */
+	__unreserve_doorbell(client);
 
-static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
+	return 0;
+}
+
+static unsigned long __reserve_cacheline(struct intel_guc* guc)
 {
-	const uint32_t cacheline_size = cache_line_size();
-	uint32_t offset;
+	unsigned long offset;
 
 	/* Doorbell uses a single cache line within a page */
 	offset = offset_in_page(guc->db_cacheline);
 
 	/* Moving to next cache line to reduce contention */
-	guc->db_cacheline += cacheline_size;
-
-	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
-			offset, guc->db_cacheline, cacheline_size);
+	guc->db_cacheline += cache_line_size();
 
+	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
+			offset, guc->db_cacheline, cache_line_size());
 	return offset;
 }
 
@@ -584,93 +624,84 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	return vma;
 }
 
-static void
-guc_client_free(struct drm_i915_private *dev_priv,
-		struct i915_guc_client *client)
+static void guc_client_free(struct i915_guc_client *client)
 {
-	struct intel_guc *guc = &dev_priv->guc;
-
-	if (!client)
-		return;
-
 	/*
 	 * XXX: wait for any outstanding submissions before freeing memory.
 	 * Be sure to drop any locks
 	 */
-
-	if (client->vaddr) {
-		/*
-		 * If we got as far as setting up a doorbell, make sure we
-		 * shut it down before unmapping & deallocating the memory.
-		 */
-		guc_disable_doorbell(guc, client);
-
-		i915_gem_object_unpin_map(client->vma->obj);
-	}
-
+	WARN_ON(destroy_doorbell(client));
+	guc_ctx_desc_fini(client->guc, client);
+	i915_gem_object_unpin_map(client->vma->obj);
 	i915_vma_unpin_and_release(&client->vma);
-
-	if (client->ctx_index != GUC_INVALID_CTX_ID) {
-		guc_ctx_desc_fini(guc, client);
-		ida_simple_remove(&guc->ctx_ids, client->ctx_index);
-	}
-
+	ida_simple_remove(&client->guc->ctx_ids, client->ctx_index);
 	kfree(client);
 }
 
 /* Check that a doorbell register is in the expected state */
-static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
+static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	i915_reg_t drbreg = GEN8_DRBREGL(db_id);
-	uint32_t value = I915_READ(drbreg);
-	bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
-	bool expected = test_bit(db_id, guc->doorbell_bitmap);
+	u32 drbregl;
+	bool valid;
+
+	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
 
-	if (enabled == expected)
+	drbregl = I915_READ(GEN8_DRBREGL(db_id));
+	valid = drbregl & GEN8_DRB_VALID;
+
+	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
 		return true;
 
-	DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
-			 db_id, drbreg.reg, value,
-			 expected ? "active" : "inactive");
+	DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n",
+			 db_id, drbregl, yesno(valid));
 
 	return false;
 }
 
+static int __reset_doorbell(struct i915_guc_client* client, u16 db_id)
+{
+	int err;
+
+	err = __update_doorbell_desc(client, db_id);
+	if (!err)
+		err = __create_doorbell(client);
+	if (!err)
+		err = __destroy_doorbell(client);
+
+	return err;
+}
+
 /*
  * Borrow the first client to set up & tear down each unused doorbell
  * in turn, to ensure that all doorbell h/w is (re)initialised.
  */
-static void guc_init_doorbell_hw(struct intel_guc *guc)
+static int guc_init_doorbell_hw(struct intel_guc *guc)
 {
 	struct i915_guc_client *client = guc->execbuf_client;
-	uint16_t db_id;
-	int i, err;
+	int err;
+	int i;
 
-	guc_disable_doorbell(guc, client);
+	if (has_doorbell(client))
+		destroy_doorbell(client);
 
-	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
-		/* Skip if doorbell is OK */
-		if (guc_doorbell_check(guc, i))
+	for (i = 0; i < GUC_NUM_DOORBELLS; ++i) {
+		if (doorbell_ok(guc, i))
 			continue;
 
-		err = guc_update_doorbell_id(guc, client, i);
-		if (err)
-			DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
-					i, err);
+		err = __reset_doorbell(client, i);
+		WARN(err, "Doorbell %d reset failed, err %d\n", i, err);
 	}
 
-	db_id = select_doorbell_register(guc, client->priority);
-	WARN_ON(db_id == GUC_INVALID_DOORBELL_ID);
-
-	err = guc_update_doorbell_id(guc, client, db_id);
+	err = __reserve_doorbell(client);
 	if (err)
-		DRM_WARN("Failed to restore doorbell to %d, err %d\n",
-			 db_id, err);
+		return err;
 
 	/* Read back & verify all doorbell registers */
-	for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
-		(void)guc_doorbell_check(guc, i);
+	for (i = 0; i < GUC_NUM_DOORBELLS; ++i)
+		WARN_ON(!doorbell_ok(guc, i));
+
+	return 0;
 }
 
 /**
@@ -696,49 +727,46 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_vma *vma;
 	void *vaddr;
-	uint16_t db_id;
+	int ret;
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
-	client->owner = ctx;
 	client->guc = guc;
+	client->owner = ctx;
 	client->engines = engines;
 	client->priority = priority;
-	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
+	client->doorbell_id = GUC_DOORBELL_INVALID;
+	client->wq_offset = GUC_DB_SIZE;
+	client->wq_size = GUC_WQ_SIZE;
+	spin_lock_init(&client->wq_lock);
 
-	client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
-			GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
-	if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) {
-		client->ctx_index = GUC_INVALID_CTX_ID;
-		goto err;
-	}
+	ret = ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS,
+				GFP_KERNEL);
+	if (ret < 0)
+		goto err_client;
+
+	client->ctx_index = ret;
 
 	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
 	vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
-	if (IS_ERR(vma))
-		goto err;
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_id;
+	}
 
 	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
 	client->vma = vma;
 
 	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr))
-		goto err;
-
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_vma;
+	}
 	client->vaddr = vaddr;
 
-	spin_lock_init(&client->wq_lock);
-	client->wq_offset = GUC_DB_SIZE;
-	client->wq_size = GUC_WQ_SIZE;
-
-	db_id = select_doorbell_register(guc, client->priority);
-	if (db_id == GUC_INVALID_DOORBELL_ID)
-		/* XXX: evict a doorbell instead? */
-		goto err;
-
-	client->doorbell_offset = select_doorbell_cacheline(guc);
+	client->doorbell_offset = __reserve_cacheline(guc);
 
 	/*
 	 * Since the doorbell only requires a single cacheline, we can save
@@ -753,27 +781,35 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	guc_proc_desc_init(guc, client);
 	guc_ctx_desc_init(guc, client);
 
-	/* For runtime client allocation we need to enable the doorbell. Not
-	 * required yet for the static execbuf_client as this special kernel
-	 * client is enabled from i915_guc_submission_enable().
-	 *
-	 * guc_update_doorbell_id(guc, client, db_id);
-	 */
+	/* For runtime client allocation we need to enable the doorbell. */
+	ret = __update_doorbell_desc(client, client->doorbell_id);
+	if (ret)
+		goto err_vaddr;
+
+	ret = __create_doorbell(client);
+	if (ret)
+		goto err_db;
 
 	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
-		priority, client, client->engines, client->ctx_index);
-	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
-		client->doorbell_id, client->doorbell_offset);
+			 priority, client, client->engines, client->ctx_index);
+	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
+			 client->doorbell_id, client->doorbell_offset);
 
 	return client;
+err_db:
+	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
+err_vaddr:
+	i915_gem_object_unpin_map(client->vma->obj);
+err_vma:
+	i915_vma_unpin_and_release(&client->vma);
+err_id:
+	ida_simple_remove(&guc->ctx_ids, client->ctx_index);
+err_client:
+	kfree(client);
 
-err:
-	guc_client_free(dev_priv, client);
-	return NULL;
+	return ERR_PTR(ret);
 }
 
-
-
 static void guc_policies_init(struct guc_policies *policies)
 {
 	struct guc_policy *policy;
@@ -881,7 +917,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 		return 0;
 
 	/* Wipe bitmap & delete client in case of reinitialisation */
-	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
+	bitmap_clear(guc->doorbell_bitmap, 0, GUC_NUM_DOORBELLS);
 	i915_guc_submission_disable(dev_priv);
 
 	if (!i915.enable_guc_submission)
@@ -932,14 +968,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	struct i915_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int err;
 
 	if (!client)
 		return -ENODEV;
 
-	intel_guc_sample_forcewake(guc);
+	err = intel_guc_sample_forcewake(guc);
+	if (err)
+		return err;
 
 	guc_reset_wq(client);
-	guc_init_doorbell_hw(guc);
+	err = guc_init_doorbell_hw(guc);
+	if (err)
+		return err;
 
 	/* Take over from manual control of ELSP (execlists) */
 	for_each_engine(engine, dev_priv, id) {
@@ -978,7 +1019,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
 	if (!client)
 		return;
 
-	guc_client_free(dev_priv, client);
+	guc_client_free(client);
 
 	i915_vma_unpin_and_release(&guc->ads_vma);
 	i915_vma_unpin_and_release(&guc->log.vma);
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 25691f0..3ae8cef 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -241,8 +241,8 @@ union guc_doorbell_qw {
 	u64 value_qw;
 } __packed;
 
-#define GUC_MAX_DOORBELLS		256
-#define GUC_INVALID_DOORBELL_ID		(GUC_MAX_DOORBELLS)
+#define GUC_NUM_DOORBELLS	256
+#define GUC_DOORBELL_INVALID	(GUC_NUM_DOORBELLS)
 
 #define GUC_DB_SIZE			(PAGE_SIZE)
 #define GUC_WQ_SIZE			(PAGE_SIZE * 2)
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index d74f4d3..604e71e 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -72,13 +72,12 @@ struct i915_guc_client {
 
 	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
-	uint32_t ctx_index;
+	u32 ctx_index;
 	uint32_t proc_desc_offset;
 
-	uint32_t doorbell_offset;
-	uint32_t doorbell_cookie;
-	uint16_t doorbell_id;
-	uint16_t padding[3];		/* Maintain alignment		*/
+	u16 doorbell_id;
+	unsigned long doorbell_offset;
+	u32 doorbell_cookie;
 
 	spinlock_t wq_lock;
 	uint32_t wq_offset;
@@ -159,7 +158,7 @@ struct intel_guc {
 
 	struct i915_guc_client *execbuf_client;
 
-	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
+	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
 
 	/* Action status & statistics */
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Sanitize GuC client initialization (rev2)
  2017-02-14 13:53 [PATCH v2] drm/i915: Sanitize GuC client initialization Joonas Lahtinen
@ 2017-02-14 17:52 ` Patchwork
  2017-02-16  2:28 ` [PATCH v2] drm/i915: Sanitize GuC client initialization Daniele Ceraolo Spurio
  2017-02-16 10:50 ` Oscar Mateo
  2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2017-02-14 17:52 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Sanitize GuC client initialization (rev2)
URL   : https://patchwork.freedesktop.org/series/19452/
State : success

== Summary ==

Series 19452v2 drm/i915: Sanitize GuC client initialization
https://patchwork.freedesktop.org/api/1.0/series/19452/revisions/2/mbox/

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

7b80cb5504b3b3cebaea944d35f25e27415e89a6 drm-tip: 2017y-02m-14d-14h-36m-43s UTC integration manifest
d57e3d1 drm/i915: Sanitize GuC client initialization

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3807/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-14 13:53 [PATCH v2] drm/i915: Sanitize GuC client initialization Joonas Lahtinen
  2017-02-14 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Sanitize GuC client initialization (rev2) Patchwork
@ 2017-02-16  2:28 ` Daniele Ceraolo Spurio
  2017-02-16  7:44   ` Chris Wilson
  2017-02-21 15:38   ` Joonas Lahtinen
  2017-02-16 10:50 ` Oscar Mateo
  2 siblings, 2 replies; 8+ messages in thread
From: Daniele Ceraolo Spurio @ 2017-02-16  2:28 UTC (permalink / raw)
  To: Joonas Lahtinen,
	Intel graphics driver community testing & development



On 14/02/17 05:53, Joonas Lahtinen wrote:
> Started adding proper teardown to guc_client_alloc, ended up removing
> quite a few dead ends where errors communicating with the GuC were
> silently ignored. There also seemed to be quite a few erronous
> teardown actions performed in case of an error (ordering wrong).
>
> v2:
> 	- Increase function symmetry/proximity (Michal/Daniele)
> 	- Fix __reserve_doorbell accounting for high priority (Daniele)
> 	- Call __update_doorbell_desc! (Daniele)
> 	- Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)
>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |   4 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c | 371 ++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_guc_fwif.h      |   4 +-
>  drivers/gpu/drm/i915/intel_uc.h            |  11 +-
>  4 files changed, 215 insertions(+), 175 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cd023fd..f3972cf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2492,7 +2492,7 @@ static void i915_guc_client_info(struct seq_file *m,
>
>  	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
>  		client->priority, client->ctx_index, client->proc_desc_offset);
> -	seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
> +	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
>  		client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
>  	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>  		client->wq_size, client->wq_offset, client->wq_tail);
> @@ -2527,7 +2527,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
>  	}
>
>  	seq_printf(m, "Doorbell map:\n");
> -	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
> +	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
>  	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
>
>  	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 8ced9e2..258fca3 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -62,27 +62,73 @@
>   *
>   */
>
> +static inline bool is_high_priority(struct i915_guc_client* client)
> +{
> +	return client->priority <= GUC_CTX_PRIORITY_HIGH;
> +}
> +
> +static int __reserve_doorbell(struct i915_guc_client *client)
> +{
> +	unsigned long offset;
> +	unsigned long end;
> +	u16 id;
> +
> +	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
> +
> +	/*
> +	 * The bitmap tracks which doorbell registers are currently in use.
> +	 * It is split into two halves; the first half is used for normal
> +	 * priority contexts, the second half for high-priority ones.
> +	 * Note that logically higher priorities are numerically less than
> +	 * normal ones, so the test below means "is it high-priority?"
> +	 */
> +
> +	offset = 0;
> +	end = GUC_NUM_DOORBELLS/2;
> +	if (is_high_priority(client)) {
> +		offset = end;
> +		end += offset;
> +        }
> +
> +	id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
> +	if (id == end)
> +		return -ENOSPC;
> +
> +	__set_bit(id, client->guc->doorbell_bitmap);
> +	client->doorbell_id = id;
> +	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell %d: \n",
> +			 client->ctx_index, yesno(is_high_priority(client)),
> +			 id);
> +	return 0;
> +}
> +
> +static void __unreserve_doorbell(struct i915_guc_client *client)
> +{
> +	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
> +
> +	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> +	client->doorbell_id = GUC_DOORBELL_INVALID;
> +}
> +
>  /*
>   * Tell the GuC to allocate or deallocate a specific doorbell
>   */
>
> -static int guc_allocate_doorbell(struct intel_guc *guc,
> -				 struct i915_guc_client *client)
> +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>  {
>  	u32 action[] = {
>  		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
> -		client->ctx_index
> +		ctx_index
>  	};
>
>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
>  }
>
> -static int guc_release_doorbell(struct intel_guc *guc,
> -				struct i915_guc_client *client)
> +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>  {
>  	u32 action[] = {
>  		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
> -		client->ctx_index
> +		ctx_index
>  	};
>
>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
> @@ -95,104 +141,98 @@ static int guc_release_doorbell(struct intel_guc *guc,
>   * client object which contains the page being used for the doorbell
>   */
>
> -static int guc_update_doorbell_id(struct intel_guc *guc,
> -				  struct i915_guc_client *client,
> -				  u16 new_id)
> +static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
>  {
> -	struct sg_table *sg = guc->ctx_pool_vma->pages;
> -	void *doorbell_bitmap = guc->doorbell_bitmap;
> -	struct guc_doorbell_info *doorbell;
> +	struct sg_table *sg = client->guc->ctx_pool_vma->pages;
>  	struct guc_context_desc desc;
>  	size_t len;
>
> -	doorbell = client->vaddr + client->doorbell_offset;
> -
> -	if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
> -	    test_bit(client->doorbell_id, doorbell_bitmap)) {
> -		/* Deactivate the old doorbell */
> -		doorbell->db_status = GUC_DOORBELL_DISABLED;
> -		(void)guc_release_doorbell(guc, client);
> -		__clear_bit(client->doorbell_id, doorbell_bitmap);
> -	}
> -
>  	/* Update the GuC's idea of the doorbell ID */
>  	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> -			     sizeof(desc) * client->ctx_index);
> +				 sizeof(desc) * client->ctx_index);
>  	if (len != sizeof(desc))
>  		return -EFAULT;
> +
>  	desc.db_id = new_id;
>  	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
> -			     sizeof(desc) * client->ctx_index);
> +				   sizeof(desc) * client->ctx_index);
>  	if (len != sizeof(desc))
>  		return -EFAULT;
>
> -	client->doorbell_id = new_id;
> -	if (new_id == GUC_INVALID_DOORBELL_ID)
> -		return 0;
> +	return 0;
> +}
> +
> +static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client)
> +{
> +	return client->vaddr + client->doorbell_offset;
> +}
> +
> +static bool has_doorbell(struct i915_guc_client *client)
> +{
> +	if (client->doorbell_id == GUC_DOORBELL_INVALID)
> +		return false;
>
> -	/* Activate the new doorbell */
> -	__set_bit(new_id, doorbell_bitmap);
> +	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);

Should we warn/gem_bug if client->doorbell_id != GUC_DOORBELL_INVALID 
and the bit in guc->doorbell_bitmap is not set? Not sure if you plan to 
decouple them more in the future, but with the current code it should 
always be impossible to have a valid doorbell without the bit in the 
bitmask being set. Maybe we can just return client->doorbell_id != 
GUC_DOORBELL_INVALID here if we have no plan for a case where they can 
be out of sync.

> +}
> +
> +static int __create_doorbell(struct i915_guc_client *client)
> +{
> +	struct guc_doorbell_info *doorbell;
> +	int err;
> +
> +	doorbell = __get_doorbell(client);
>  	doorbell->db_status = GUC_DOORBELL_ENABLED;
>  	doorbell->cookie = client->doorbell_cookie;
> -	return guc_allocate_doorbell(guc, client);
> +
> +	err = __guc_allocate_doorbell(client->guc, client->ctx_index);
> +	if (err) {
> +		doorbell->db_status = GUC_DOORBELL_DISABLED;
> +		doorbell->cookie = 0;
> +	}
> +	return err;
>  }
>
> -static void guc_disable_doorbell(struct intel_guc *guc,
> -				 struct i915_guc_client *client)
> +static int __destroy_doorbell(struct i915_guc_client *client)
>  {
> -	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
> +	struct guc_doorbell_info *doorbell;
>
> -	/* XXX: wait for any interrupts */
> -	/* XXX: wait for workqueue to drain */
> +	doorbell = __get_doorbell(client);
> +	doorbell->db_status = GUC_DOORBELL_DISABLED;
> +	doorbell->cookie = 0;
> +

Not from this patch (but since we're at it...), I did a bit of digging 
and I've found that doorbell release flow requires SW to poll the 
GEN8_DRBREGL(db_id) register after updating doorbell->db_status to wait 
for the GEN8_DRB_VALID bit to go to zero. This ensures that any pending 
events are processed before we call into GuC to release the doorbell. 
Note that GuC will fail the DEALLOCATE_DOORBELL action if the bit has 
not gone to zero yet. This is currently not an issue, probably because 
we use a single doorbell and we don't ring it near release time, but the 
situation could change in the future so I believe it is worth to fix it 
now. I can follow up with an additional patch if you want to keep this 
one as refactoring only.

> +	return __guc_deallocate_doorbell(client->guc, client->ctx_index);
>  }
>
> -static uint16_t
> -select_doorbell_register(struct intel_guc *guc, uint32_t priority)
> +static int destroy_doorbell(struct i915_guc_client *client)
>  {
> -	/*
> -	 * The bitmap tracks which doorbell registers are currently in use.
> -	 * It is split into two halves; the first half is used for normal
> -	 * priority contexts, the second half for high-priority ones.
> -	 * Note that logically higher priorities are numerically less than
> -	 * normal ones, so the test below means "is it high-priority?"
> -	 */
> -	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
> -	const uint16_t half = GUC_MAX_DOORBELLS / 2;
> -	const uint16_t start = hi_pri ? half : 0;
> -	const uint16_t end = start + half;
> -	uint16_t id;
> +	int err;
>
> -	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
> -	if (id == end)
> -		id = GUC_INVALID_DOORBELL_ID;
> +	GEM_BUG_ON(!has_doorbell(client));
>
> -	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
> -			hi_pri ? "high" : "normal", id);
> +	/* XXX: wait for any interrupts */
> +	/* XXX: wait for workqueue to drain */
>
> -	return id;
> -}
> +	err = __destroy_doorbell(client);
> +	if (err)
> +		return err;
>
> -/*
> - * Select, assign and relase doorbell cachelines
> - *
> - * These functions track which doorbell cachelines are in use.
> - * The data they manipulate is protected by the intel_guc_send lock.
> - */
> +	__unreserve_doorbell(client);
>
> -static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
> +	return 0;
> +}
> +
> +static unsigned long __reserve_cacheline(struct intel_guc* guc)

"reserve_cacheline" doesn't really reflect what happens, because more 
doorbells can use the same cacheline (while they are on different 
physical pages) and there is no concept of unreserving the cacheline. 
The idea is to try to avoid having lots of doorbells on the same 
cacheline if possible to make the monitoring more efficient on the HW, 
so I'd keep the "select_cacheline" naming for this function. We should 
probably also look at modifying the function to use something more 
elaborated than a simple round robin scheme to ensure doorbells are 
equally distribuited on cachelines, but that can probably wait until we 
use more doorbells.

>  {
> -	const uint32_t cacheline_size = cache_line_size();
> -	uint32_t offset;
> +	unsigned long offset;
>
>  	/* Doorbell uses a single cache line within a page */
>  	offset = offset_in_page(guc->db_cacheline);
>
>  	/* Moving to next cache line to reduce contention */
> -	guc->db_cacheline += cacheline_size;
> -
> -	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
> -			offset, guc->db_cacheline, cacheline_size);
> +	guc->db_cacheline += cache_line_size();
>
> +	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
> +			offset, guc->db_cacheline, cache_line_size());
>  	return offset;
>  }
>
> @@ -584,93 +624,84 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
>  	return vma;
>  }
>
> -static void
> -guc_client_free(struct drm_i915_private *dev_priv,
> -		struct i915_guc_client *client)
> +static void guc_client_free(struct i915_guc_client *client)
>  {
> -	struct intel_guc *guc = &dev_priv->guc;
> -
> -	if (!client)
> -		return;
> -
>  	/*
>  	 * XXX: wait for any outstanding submissions before freeing memory.
>  	 * Be sure to drop any locks
>  	 */
> -
> -	if (client->vaddr) {
> -		/*
> -		 * If we got as far as setting up a doorbell, make sure we
> -		 * shut it down before unmapping & deallocating the memory.
> -		 */
> -		guc_disable_doorbell(guc, client);
> -
> -		i915_gem_object_unpin_map(client->vma->obj);
> -	}
> -
> +	WARN_ON(destroy_doorbell(client));
> +	guc_ctx_desc_fini(client->guc, client);
> +	i915_gem_object_unpin_map(client->vma->obj);
>  	i915_vma_unpin_and_release(&client->vma);
> -
> -	if (client->ctx_index != GUC_INVALID_CTX_ID) {
> -		guc_ctx_desc_fini(guc, client);
> -		ida_simple_remove(&guc->ctx_ids, client->ctx_index);
> -	}
> -
> +	ida_simple_remove(&client->guc->ctx_ids, client->ctx_index);
>  	kfree(client);
>  }
>
>  /* Check that a doorbell register is in the expected state */
> -static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
> +static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
>  {
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> -	i915_reg_t drbreg = GEN8_DRBREGL(db_id);
> -	uint32_t value = I915_READ(drbreg);
> -	bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
> -	bool expected = test_bit(db_id, guc->doorbell_bitmap);
> +	u32 drbregl;
> +	bool valid;
> +
> +	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
>
> -	if (enabled == expected)
> +	drbregl = I915_READ(GEN8_DRBREGL(db_id));
> +	valid = drbregl & GEN8_DRB_VALID;
> +
> +	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
>  		return true;
>
> -	DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
> -			 db_id, drbreg.reg, value,
> -			 expected ? "active" : "inactive");
> +	DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n",
> +			 db_id, drbregl, yesno(valid));
>
>  	return false;
>  }
>
> +static int __reset_doorbell(struct i915_guc_client* client, u16 db_id)
> +{
> +	int err;
> +
> +	err = __update_doorbell_desc(client, db_id);
> +	if (!err)
> +		err = __create_doorbell(client);
> +	if (!err)
> +		err = __destroy_doorbell(client);
> +
> +	return err;
> +}
> +
>  /*
>   * Borrow the first client to set up & tear down each unused doorbell
>   * in turn, to ensure that all doorbell h/w is (re)initialised.
>   */
> -static void guc_init_doorbell_hw(struct intel_guc *guc)
> +static int guc_init_doorbell_hw(struct intel_guc *guc)
>  {
>  	struct i915_guc_client *client = guc->execbuf_client;
> -	uint16_t db_id;
> -	int i, err;
> +	int err;
> +	int i;
>
> -	guc_disable_doorbell(guc, client);
> +	if (has_doorbell(client))
> +		destroy_doorbell(client);
>
> -	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
> -		/* Skip if doorbell is OK */
> -		if (guc_doorbell_check(guc, i))
> +	for (i = 0; i < GUC_NUM_DOORBELLS; ++i) {
> +		if (doorbell_ok(guc, i))
>  			continue;
>

As mentioned in the previous patch version, here we assume that all the 
doorbells should be disabled an we want to reset HW that has been left 
enabled from previous users, so the doorbell_bitmap should be clear. 
Maybe adding a simple

	GEM_BUG_ON(test_bit(i, guc->doorbell_bitmap));

will help making sure that we're not leaving bits set when 
releasing/resetting.

> -		err = guc_update_doorbell_id(guc, client, i);
> -		if (err)
> -			DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
> -					i, err);
> +		err = __reset_doorbell(client, i);

If the reset fails the doorbell is in a bad state, so it might be worth 
to ensure that the bit is set in the bitmask to make sure we don't 
assign that doorbell to any client, but we'd have to make sure to clear 
the bitmask on GuC reset (see comment above).

> +		WARN(err, "Doorbell %d reset failed, err %d\n", i, err);
>  	}
>
> -	db_id = select_doorbell_register(guc, client->priority);
> -	WARN_ON(db_id == GUC_INVALID_DOORBELL_ID);
> -
> -	err = guc_update_doorbell_id(guc, client, db_id);
> +	err = __reserve_doorbell(client);
>  	if (err)
> -		DRM_WARN("Failed to restore doorbell to %d, err %d\n",
> -			 db_id, err);
> +		return err;

Continuing the discussion from the previous patch revision:

<snip>
 > Shouldn't this be create_doorbell() instead of reserve?

That would end up calling the __update_doorbell, which we can't as the
desc is for some strange reason not mapped yet.
</snip>

As far as I can see, everything should already be allocated and mapped 
here. Aren't we anyway already calling __update_doorbell both in the 
client_alloc and in __reset_doorbell above?
Also, where are we re-creating the doorbell that we destroyed at the 
beginning of the function?

>
>  	/* Read back & verify all doorbell registers */
> -	for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
> -		(void)guc_doorbell_check(guc, i);
> +	for (i = 0; i < GUC_NUM_DOORBELLS; ++i)
> +		WARN_ON(!doorbell_ok(guc, i));
> +
> +	return 0;
>  }
>
>  /**
> @@ -696,49 +727,46 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>  	struct intel_guc *guc = &dev_priv->guc;
>  	struct i915_vma *vma;
>  	void *vaddr;
> -	uint16_t db_id;
> +	int ret;
>
>  	client = kzalloc(sizeof(*client), GFP_KERNEL);
>  	if (!client)
> -		return NULL;
> +		return ERR_PTR(-ENOMEM);
>
> -	client->owner = ctx;
>  	client->guc = guc;
> +	client->owner = ctx;
>  	client->engines = engines;
>  	client->priority = priority;
> -	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
> +	client->doorbell_id = GUC_DOORBELL_INVALID;
> +	client->wq_offset = GUC_DB_SIZE;
> +	client->wq_size = GUC_WQ_SIZE;
> +	spin_lock_init(&client->wq_lock);
>
> -	client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
> -			GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
> -	if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) {
> -		client->ctx_index = GUC_INVALID_CTX_ID;
> -		goto err;
> -	}
> +	ret = ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS,
> +				GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_client;
> +
> +	client->ctx_index = ret;
>
>  	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
>  	vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
> -	if (IS_ERR(vma))
> -		goto err;
> +	if (IS_ERR(vma)) {
> +		ret = PTR_ERR(vma);
> +		goto err_id;
> +	}
>
>  	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
>  	client->vma = vma;
>
>  	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
> -	if (IS_ERR(vaddr))
> -		goto err;
> -
> +	if (IS_ERR(vaddr)) {
> +		ret = PTR_ERR(vaddr);
> +		goto err_vma;
> +	}
>  	client->vaddr = vaddr;
>
> -	spin_lock_init(&client->wq_lock);
> -	client->wq_offset = GUC_DB_SIZE;
> -	client->wq_size = GUC_WQ_SIZE;
> -
> -	db_id = select_doorbell_register(guc, client->priority);
> -	if (db_id == GUC_INVALID_DOORBELL_ID)
> -		/* XXX: evict a doorbell instead? */
> -		goto err;
> -
> -	client->doorbell_offset = select_doorbell_cacheline(guc);
> +	client->doorbell_offset = __reserve_cacheline(guc);
>
>  	/*
>  	 * Since the doorbell only requires a single cacheline, we can save
> @@ -753,27 +781,35 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>  	guc_proc_desc_init(guc, client);
>  	guc_ctx_desc_init(guc, client);
>
> -	/* For runtime client allocation we need to enable the doorbell. Not
> -	 * required yet for the static execbuf_client as this special kernel
> -	 * client is enabled from i915_guc_submission_enable().
> -	 *
> -	 * guc_update_doorbell_id(guc, client, db_id);
> -	 */
> +	/* For runtime client allocation we need to enable the doorbell. */

this comment is a bit unclear now because you're not deferring the 
initialization of execbuf_client to guc_init_doorbell_hw anymore, so 
there is no difference between execbuf_client and "runtime" clients. 
Maybe we can just remove the comment.
Note that creating the doorbell here will cause it to be destroyed and 
re-allocated when i915_guc_submission_enable is called. A worth 
compromise IMO to avoid special cases, but worth pointing out :)

Thanks,
Daniele

> +	ret = __update_doorbell_desc(client, client->doorbell_id);
> +	if (ret)
> +		goto err_vaddr;
> +
> +	ret = __create_doorbell(client);
> +	if (ret)
> +		goto err_db;
>
>  	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
> -		priority, client, client->engines, client->ctx_index);
> -	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
> -		client->doorbell_id, client->doorbell_offset);
> +			 priority, client, client->engines, client->ctx_index);
> +	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
> +			 client->doorbell_id, client->doorbell_offset);
>
>  	return client;
> +err_db:
> +	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> +err_vaddr:
> +	i915_gem_object_unpin_map(client->vma->obj);
> +err_vma:
> +	i915_vma_unpin_and_release(&client->vma);
> +err_id:
> +	ida_simple_remove(&guc->ctx_ids, client->ctx_index);
> +err_client:
> +	kfree(client);
>
> -err:
> -	guc_client_free(dev_priv, client);
> -	return NULL;
> +	return ERR_PTR(ret);
>  }
>
> -
> -
>  static void guc_policies_init(struct guc_policies *policies)
>  {
>  	struct guc_policy *policy;
> @@ -881,7 +917,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
>  		return 0;
>
>  	/* Wipe bitmap & delete client in case of reinitialisation */
> -	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
> +	bitmap_clear(guc->doorbell_bitmap, 0, GUC_NUM_DOORBELLS);
>  	i915_guc_submission_disable(dev_priv);
>
>  	if (!i915.enable_guc_submission)
> @@ -932,14 +968,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	struct i915_guc_client *client = guc->execbuf_client;
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	int err;
>
>  	if (!client)
>  		return -ENODEV;
>
> -	intel_guc_sample_forcewake(guc);
> +	err = intel_guc_sample_forcewake(guc);
> +	if (err)
> +		return err;
>
>  	guc_reset_wq(client);
> -	guc_init_doorbell_hw(guc);
> +	err = guc_init_doorbell_hw(guc);
> +	if (err)
> +		return err;
>
>  	/* Take over from manual control of ELSP (execlists) */
>  	for_each_engine(engine, dev_priv, id) {
> @@ -978,7 +1019,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>  	if (!client)
>  		return;
>
> -	guc_client_free(dev_priv, client);
> +	guc_client_free(client);
>
>  	i915_vma_unpin_and_release(&guc->ads_vma);
>  	i915_vma_unpin_and_release(&guc->log.vma);
> diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
> index 25691f0..3ae8cef 100644
> --- a/drivers/gpu/drm/i915/intel_guc_fwif.h
> +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
> @@ -241,8 +241,8 @@ union guc_doorbell_qw {
>  	u64 value_qw;
>  } __packed;
>
> -#define GUC_MAX_DOORBELLS		256
> -#define GUC_INVALID_DOORBELL_ID		(GUC_MAX_DOORBELLS)
> +#define GUC_NUM_DOORBELLS	256
> +#define GUC_DOORBELL_INVALID	(GUC_NUM_DOORBELLS)
>
>  #define GUC_DB_SIZE			(PAGE_SIZE)
>  #define GUC_WQ_SIZE			(PAGE_SIZE * 2)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index d74f4d3..604e71e 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -72,13 +72,12 @@ struct i915_guc_client {
>
>  	uint32_t engines;		/* bitmap of (host) engine ids	*/
>  	uint32_t priority;
> -	uint32_t ctx_index;
> +	u32 ctx_index;
>  	uint32_t proc_desc_offset;
>
> -	uint32_t doorbell_offset;
> -	uint32_t doorbell_cookie;
> -	uint16_t doorbell_id;
> -	uint16_t padding[3];		/* Maintain alignment		*/
> +	u16 doorbell_id;
> +	unsigned long doorbell_offset;
> +	u32 doorbell_cookie;
>
>  	spinlock_t wq_lock;
>  	uint32_t wq_offset;
> @@ -159,7 +158,7 @@ struct intel_guc {
>
>  	struct i915_guc_client *execbuf_client;
>
> -	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
> +	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
>  	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
>
>  	/* Action status & statistics */
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-16  2:28 ` [PATCH v2] drm/i915: Sanitize GuC client initialization Daniele Ceraolo Spurio
@ 2017-02-16  7:44   ` Chris Wilson
  2017-02-16 16:22     ` Daniele Ceraolo Spurio
  2017-02-21 15:38   ` Joonas Lahtinen
  1 sibling, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2017-02-16  7:44 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio
  Cc: Intel graphics driver community testing & development

On Wed, Feb 15, 2017 at 06:28:59PM -0800, Daniele Ceraolo Spurio wrote:
> On 14/02/17 05:53, Joonas Lahtinen wrote:
> >-static void guc_disable_doorbell(struct intel_guc *guc,
> >-				 struct i915_guc_client *client)
> >+static int __destroy_doorbell(struct i915_guc_client *client)
> > {
> >-	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
> >+	struct guc_doorbell_info *doorbell;
> >
> >-	/* XXX: wait for any interrupts */
> >-	/* XXX: wait for workqueue to drain */
> >+	doorbell = __get_doorbell(client);
> >+	doorbell->db_status = GUC_DOORBELL_DISABLED;
> >+	doorbell->cookie = 0;
> >+
> 
> Not from this patch (but since we're at it...), I did a bit of
> digging and I've found that doorbell release flow requires SW to
> poll the GEN8_DRBREGL(db_id) register after updating
> doorbell->db_status to wait for the GEN8_DRB_VALID bit to go to
> zero. This ensures that any pending events are processed before we
> call into GuC to release the doorbell. Note that GuC will fail the
> DEALLOCATE_DOORBELL action if the bit has not gone to zero yet. This
> is currently not an issue, probably because we use a single doorbell
> and we don't ring it near release time, but the situation could
> change in the future so I believe it is worth to fix it now. I can
> follow up with an additional patch if you want to keep this one as
> refactoring only.

Just keep in mind that we currently do a disable after GPU reset. The guc
may not know what we are talking about :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-14 13:53 [PATCH v2] drm/i915: Sanitize GuC client initialization Joonas Lahtinen
  2017-02-14 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Sanitize GuC client initialization (rev2) Patchwork
  2017-02-16  2:28 ` [PATCH v2] drm/i915: Sanitize GuC client initialization Daniele Ceraolo Spurio
@ 2017-02-16 10:50 ` Oscar Mateo
  2 siblings, 0 replies; 8+ messages in thread
From: Oscar Mateo @ 2017-02-16 10:50 UTC (permalink / raw)
  To: Joonas Lahtinen,
	Intel graphics driver community testing & development



On 02/14/2017 05:53 AM, Joonas Lahtinen wrote:
> Started adding proper teardown to guc_client_alloc, ended up removing
> quite a few dead ends where errors communicating with the GuC were
> silently ignored. There also seemed to be quite a few erronous
> teardown actions performed in case of an error (ordering wrong).
>
> v2:
> 	- Increase function symmetry/proximity (Michal/Daniele)
> 	- Fix __reserve_doorbell accounting for high priority (Daniele)
> 	- Call __update_doorbell_desc! (Daniele)
> 	- Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)
>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |   4 +-
>   drivers/gpu/drm/i915/i915_guc_submission.c | 371 ++++++++++++++++-------------
>   drivers/gpu/drm/i915/intel_guc_fwif.h      |   4 +-
>   drivers/gpu/drm/i915/intel_uc.h            |  11 +-
>   4 files changed, 215 insertions(+), 175 deletions(-)
<SNIP>
>   
>   	/*
>   	 * Since the doorbell only requires a single cacheline, we can save
> @@ -753,27 +781,35 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>   	guc_proc_desc_init(guc, client);
>   	guc_ctx_desc_init(guc, client);
> -	/* For runtime client allocation we need to enable the doorbell. Not
> -	 * required yet for the static execbuf_client as this special kernel
> -	 * client is enabled from i915_guc_submission_enable().
> -	 *
> -	 * guc_update_doorbell_id(guc, client, db_id);
> -	 */
> +	/* For runtime client allocation we need to enable the doorbell. */
> +	ret = __update_doorbell_desc(client, client->doorbell_id);
> +	if (ret)
> +		goto err_vaddr;
> +
> +	ret = __create_doorbell(client);
> +	if (ret)
> +		goto err_db;
At this point, client->doorbell_id is still invalid (__reserve_doorbell 
is not called until later from guc_init_doorbell_hw), so the 
__create_doorbell fails (and from there, the whole thing falls over: see 
my next comment below). CI.BAT didn't catch it because GuC is disabled 
by default.

>   	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
> -		priority, client, client->engines, client->ctx_index);
> -	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
> -		client->doorbell_id, client->doorbell_offset);
> +			 priority, client, client->engines, client->ctx_index);
> +	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
> +			 client->doorbell_id, client->doorbell_offset);
>   
>   	return client;
> +err_db:
> +	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> +err_vaddr:
> +	i915_gem_object_unpin_map(client->vma->obj);
> +err_vma:
> +	i915_vma_unpin_and_release(&client->vma);
> +err_id:
> +	ida_simple_remove(&guc->ctx_ids, client->ctx_index);
> +err_client:
> +	kfree(client);
>   
> -err:
> -	guc_client_free(dev_priv, client);
> -	return NULL;
> +	return ERR_PTR(ret);
>   }
>
I know you are leaving i915_guc_submission_init to me, but this patch 
should as a minimum check the return code from guc_client_alloc, 
otherwise we might end up with an invalid guc->execbuf_client without 
noticing.

Something like this should suffice, and I can take it from there:

@@ -939,7 +939,7 @@ int i915_guc_submission_init(struct drm_i915_private 
*dev_priv)
INTEL_INFO(dev_priv)->ring_mask,
GUC_CTX_PRIORITY_KMD_NORMAL,
dev_priv->kernel_context);
-       if (!guc->execbuf_client) {
+       if (IS_ERR(guc->execbuf_client)) {
                 DRM_ERROR("Failed to create GuC client for execbuf!\n");
                 goto err;
         }
@@ -1016,10 +1016,8 @@ void i915_guc_submission_fini(struct 
drm_i915_private *dev_priv)
         struct i915_guc_client *client;

         client = fetch_and_zero(&guc->execbuf_client);
-       if (!client)
-               return;
-
-       guc_client_free(client);
+       if (client && !IS_ERR(client))
+               guc_client_free(client);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-16  7:44   ` Chris Wilson
@ 2017-02-16 16:22     ` Daniele Ceraolo Spurio
  0 siblings, 0 replies; 8+ messages in thread
From: Daniele Ceraolo Spurio @ 2017-02-16 16:22 UTC (permalink / raw)
  To: Chris Wilson, Joonas Lahtinen,
	Intel graphics driver community testing & development,
	Michal Wajdeczko, Arkadiusz Hiler, Oscar Mateo, Tvrtko Ursulin



On 15/02/17 23:44, Chris Wilson wrote:
> On Wed, Feb 15, 2017 at 06:28:59PM -0800, Daniele Ceraolo Spurio wrote:
>> On 14/02/17 05:53, Joonas Lahtinen wrote:
>>> -static void guc_disable_doorbell(struct intel_guc *guc,
>>> -				 struct i915_guc_client *client)
>>> +static int __destroy_doorbell(struct i915_guc_client *client)
>>> {
>>> -	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
>>> +	struct guc_doorbell_info *doorbell;
>>>
>>> -	/* XXX: wait for any interrupts */
>>> -	/* XXX: wait for workqueue to drain */
>>> +	doorbell = __get_doorbell(client);
>>> +	doorbell->db_status = GUC_DOORBELL_DISABLED;
>>> +	doorbell->cookie = 0;
>>> +
>>
>> Not from this patch (but since we're at it...), I did a bit of
>> digging and I've found that doorbell release flow requires SW to
>> poll the GEN8_DRBREGL(db_id) register after updating
>> doorbell->db_status to wait for the GEN8_DRB_VALID bit to go to
>> zero. This ensures that any pending events are processed before we
>> call into GuC to release the doorbell. Note that GuC will fail the
>> DEALLOCATE_DOORBELL action if the bit has not gone to zero yet. This
>> is currently not an issue, probably because we use a single doorbell
>> and we don't ring it near release time, but the situation could
>> change in the future so I believe it is worth to fix it now. I can
>> follow up with an additional patch if you want to keep this one as
>> refactoring only.
>
> Just keep in mind that we currently do a disable after GPU reset. The guc
> may not know what we are talking about :)
> -Chris
>

Yep, we should move the release to before reset or, if the 
client/doorbell page stays pinned across reset, limit ourselves to just 
updating the db status in memory (which will cause the validity bit to 
be updated accordingly) without notifying GuC, because as you said GuC 
will have no idea of what we're referring to.

I'm also seeing a doorbell release failure during driver unload:

[drm] INTEL_GUC_SEND: Action 0x20 failed; ret=-110 status=0x00000020 
response=0x00000000

I haven't looked int it yet (I was waiting for this patch to go in 
first), but when I reload the driver the status of the doorbells seems 
to be ok so probably just a communication issue with GuC.

Thanks,
Daniele
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-16  2:28 ` [PATCH v2] drm/i915: Sanitize GuC client initialization Daniele Ceraolo Spurio
  2017-02-16  7:44   ` Chris Wilson
@ 2017-02-21 15:38   ` Joonas Lahtinen
  2017-02-22 11:44     ` Joonas Lahtinen
  1 sibling, 1 reply; 8+ messages in thread
From: Joonas Lahtinen @ 2017-02-21 15:38 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio,
	Intel graphics driver community testing & development

On ke, 2017-02-15 at 18:28 -0800, Daniele Ceraolo Spurio wrote:
> 
> On 14/02/17 05:53, Joonas Lahtinen wrote:
> > 
> > Started adding proper teardown to guc_client_alloc, ended up removing
> > quite a few dead ends where errors communicating with the GuC were
> > silently ignored. There also seemed to be quite a few erronous
> > teardown actions performed in case of an error (ordering wrong).
> > 
> > v2:
> > 	- Increase function symmetry/proximity (Michal/Daniele)
> > 	- Fix __reserve_doorbell accounting for high priority (Daniele)
> > 	- Call __update_doorbell_desc! (Daniele)
> > 	- Isolate __guc_{,de}allocate_doorbell (Michal/Daniele)
> > 
> > Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

<SNIP>

> > +static bool has_doorbell(struct i915_guc_client *client)
> > +{
> > +	if (client->doorbell_id == GUC_DOORBELL_INVALID)
> > +		return false;
> > 
> > -	/* Activate the new doorbell */
> > -	__set_bit(new_id, doorbell_bitmap);
> > +	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
> 
> Should we warn/gem_bug if client->doorbell_id != GUC_DOORBELL_INVALID 
> and the bit in guc->doorbell_bitmap is not set? Not sure if you plan to 
> decouple them more in the future, but with the current code it should 
> always be impossible to have a valid doorbell without the bit in the 
> bitmask being set. Maybe we can just return client->doorbell_id != 
> GUC_DOORBELL_INVALID here if we have no plan for a case where they can 
> be out of sync.

Kinda a leftover from restructuring, the selection and reservation were
a different thing at some point. Changed into GEM_BUG_ON.

> > +static int __destroy_doorbell(struct i915_guc_client *client)
> >  {
> > -	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
> > +	struct guc_doorbell_info *doorbell;
> > 
> > -	/* XXX: wait for any interrupts */
> > -	/* XXX: wait for workqueue to drain */
> > +	doorbell = __get_doorbell(client);
> > +	doorbell->db_status = GUC_DOORBELL_DISABLED;
> > +	doorbell->cookie = 0;
> > +
> 
> Not from this patch (but since we're at it...), I did a bit of digging 
> and I've found that doorbell release flow requires SW to poll the 
> GEN8_DRBREGL(db_id) register after updating doorbell->db_status to wait 
> for the GEN8_DRB_VALID bit to go to zero. This ensures that any pending 
> events are processed before we call into GuC to release the doorbell. 
> Note that GuC will fail the DEALLOCATE_DOORBELL action if the bit has 
> not gone to zero yet. This is currently not an issue, probably because 
> we use a single doorbell and we don't ring it near release time, but the 
> situation could change in the future so I believe it is worth to fix it 
> now. I can follow up with an additional patch if you want to keep this 
> one as refactoring only.

Ack, add a follow-up on top of your series.

> > -static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
> > +	return 0;
> > +}
> > +
> > +static unsigned long __reserve_cacheline(struct intel_guc* guc)
> 
> "reserve_cacheline" doesn't really reflect what happens, because more 
> doorbells can use the same cacheline (while they are on different 
> physical pages) and there is no concept of unreserving the cacheline. 
> The idea is to try to avoid having lots of doorbells on the same 
> cacheline if possible to make the monitoring more efficient on the HW, 
> so I'd keep the "select_cacheline" naming for this function. We should 
> probably also look at modifying the function to use something more 
> elaborated than a simple round robin scheme to ensure doorbells are 
> equally distribuited on cachelines, but that can probably wait until we 
> use more doorbells.

Renamed.


> >  /*
> >   * Borrow the first client to set up & tear down each unused doorbell
> >   * in turn, to ensure that all doorbell h/w is (re)initialised.
> >   */
> > -static void guc_init_doorbell_hw(struct intel_guc *guc)
> > +static int guc_init_doorbell_hw(struct intel_guc *guc)
> >  {
> >  	struct i915_guc_client *client = guc->execbuf_client;
> > -	uint16_t db_id;
> > -	int i, err;
> > +	int err;
> > +	int i;
> > 
> > -	guc_disable_doorbell(guc, client);
> > +	if (has_doorbell(client))
> > +		destroy_doorbell(client);
> > 
> > -	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
> > -		/* Skip if doorbell is OK */
> > -		if (guc_doorbell_check(guc, i))
> > +	for (i = 0; i < GUC_NUM_DOORBELLS; ++i) {
> > +		if (doorbell_ok(guc, i))
> >  			continue;
> > 
> 
> As mentioned in the previous patch version, here we assume that all the 
> doorbells should be disabled an we want to reset HW that has been left 
> enabled from previous users, so the doorbell_bitmap should be clear. 
> Maybe adding a simple
> 
> 	GEM_BUG_ON(test_bit(i, guc->doorbell_bitmap));
> 
> will help making sure that we're not leaving bits set when 
> releasing/resetting.

The functions don't actually even touch doorbell_bitmap, they just nuke
all active clients. That's what the previous code did, as I read it.


> > -		err = guc_update_doorbell_id(guc, client, i);
> > -		if (err)
> > -			DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
> > -					i, err);
> > +		err = __reset_doorbell(client, i);
> 
> If the reset fails the doorbell is in a bad state, so it might be worth 
> to ensure that the bit is set in the bitmask to make sure we don't 
> assign that doorbell to any client, but we'd have to make sure to clear 
> the bitmask on GuC reset (see comment above).

Clearing the bits now in guc_init_doorbell_hw().


> > +		WARN(err, "Doorbell %d reset failed, err %d\n", i, err);
> >  	}
> > 
> > -	db_id = select_doorbell_register(guc, client->priority);
> > -	WARN_ON(db_id == GUC_INVALID_DOORBELL_ID);
> > -
> > -	err = guc_update_doorbell_id(guc, client, db_id);
> > +	err = __reserve_doorbell(client);
> >  	if (err)
> > -		DRM_WARN("Failed to restore doorbell to %d, err %d\n",
> > -			 db_id, err);
> > +		return err;
> 
> Continuing the discussion from the previous patch revision:
> 
> <snip>
>  > Shouldn't this be create_doorbell() instead of reserve?
> 
> That would end up calling the __update_doorbell, which we can't as the
> desc is for some strange reason not mapped yet.
> </snip>
> 
> As far as I can see, everything should already be allocated and mapped 
> here. Aren't we anyway already calling __update_doorbell both in the 
> client_alloc and in __reset_doorbell above?
> Also, where are we re-creating the doorbell that we destroyed at the 
> beginning of the function?
> 

Hmm, refactored the extra destroy cycle out.

<SNIP>

> > @@ -753,27 +781,35 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
> >  	guc_proc_desc_init(guc, client);
> >  	guc_ctx_desc_init(guc, client);
> > 
> > -	/* For runtime client allocation we need to enable the doorbell. Not
> > -	 * required yet for the static execbuf_client as this special kernel
> > -	 * client is enabled from i915_guc_submission_enable().
> > -	 *
> > -	 * guc_update_doorbell_id(guc, client, db_id);
> > -	 */
> > +	/* For runtime client allocation we need to enable the doorbell. */
> 
> this comment is a bit unclear now because you're not deferring the 
> initialization of execbuf_client to guc_init_doorbell_hw anymore, so 
> there is no difference between execbuf_client and "runtime" clients. 
> Maybe we can just remove the comment.

Removed.

> Note that creating the doorbell here will cause it to be destroyed and 
> re-allocated when i915_guc_submission_enable is called. A worth 
> compromise IMO to avoid special cases, but worth pointing out :)

Refactored a bit.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/i915: Sanitize GuC client initialization
  2017-02-21 15:38   ` Joonas Lahtinen
@ 2017-02-22 11:44     ` Joonas Lahtinen
  0 siblings, 0 replies; 8+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:44 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio,
	Intel graphics driver community testing & development

Hi,

Seems like there were couple too many steps away from "just reordering
code", and indeed it's not working with GuC enabled, so have to spend
some time to see why.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-02-22 11:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-14 13:53 [PATCH v2] drm/i915: Sanitize GuC client initialization Joonas Lahtinen
2017-02-14 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Sanitize GuC client initialization (rev2) Patchwork
2017-02-16  2:28 ` [PATCH v2] drm/i915: Sanitize GuC client initialization Daniele Ceraolo Spurio
2017-02-16  7:44   ` Chris Wilson
2017-02-16 16:22     ` Daniele Ceraolo Spurio
2017-02-21 15:38   ` Joonas Lahtinen
2017-02-22 11:44     ` Joonas Lahtinen
2017-02-16 10:50 ` Oscar Mateo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.