All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter
@ 2016-04-27 18:03 Dave Gordon
  2016-04-27 18:03 ` [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}() Dave Gordon
                   ` (5 more replies)
  0 siblings, 6 replies; 24+ messages in thread
From: Dave Gordon @ 2016-04-27 18:03 UTC (permalink / raw)
  To: intel-gfx

Split the function of "enable_guc_submission" into two separate
options.  The new one ("enable_guc_loading") controls only the
*fetching and loading* of the GuC firmware image. The existing
one is redefined to control only the *use* of the GuC for batch
submission once the firmware is loaded.

In addition, the degree of control has been refined from a simple
bool to an integer key, allowing several options:
 -1 (default)     whatever the platform default is
  0  DISABLE      don't load/use the GuC
  1  BEST EFFORT  try to load/use the GuC, fallback if not available
  2  REQUIRE      must load/use the GuC, else leave the GPU wedged

The new platform default (as coded here) will be to attempt to
load the GuC iff the device has a GuC that requires firmware,
but not yet to use it for submission. A later patch will change
to enable it if appropriate.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |  1 -
 drivers/gpu/drm/i915/i915_guc_submission.c |  4 +-
 drivers/gpu/drm/i915/i915_params.c         | 14 ++++-
 drivers/gpu/drm/i915/i915_params.h         |  3 +-
 drivers/gpu/drm/i915/intel_guc_loader.c    | 98 ++++++++++++++++--------------
 drivers/gpu/drm/i915/intel_uncore.c        |  2 +-
 6 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d493e79..b04effc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4910,7 +4910,6 @@ int i915_gem_init_engines(struct drm_device *dev)
 		ret = intel_guc_ucode_load(dev);
 		if (ret) {
 			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
-			ret = -EIO;
 			goto out;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 72d6665..42d2efa 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -970,7 +970,7 @@ int intel_guc_suspend(struct drm_device *dev)
 	struct intel_context *ctx;
 	u32 data[3];
 
-	if (!i915.enable_guc_submission)
+	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
 		return 0;
 
 	ctx = dev_priv->kernel_context;
@@ -996,7 +996,7 @@ int intel_guc_resume(struct drm_device *dev)
 	struct intel_context *ctx;
 	u32 data[3];
 
-	if (!i915.enable_guc_submission)
+	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
 		return 0;
 
 	ctx = dev_priv->kernel_context;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 383c076..6a5578c 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -54,7 +54,8 @@ struct i915_params i915 __read_mostly = {
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
 	.edp_vswing = 0,
-	.enable_guc_submission = false,
+	.enable_guc_loading = -1,
+	.enable_guc_submission = 0,
 	.guc_log_level = -1,
 	.enable_dp_mst = true,
 	.inject_load_failure = 0,
@@ -198,8 +199,15 @@ struct i915_params i915 __read_mostly = {
 		 "(0=use value from vbt [default], 1=low power swing(200mV),"
 		 "2=default swing(400mV))");
 
-module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400);
-MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)");
+module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400);
+MODULE_PARM_DESC(enable_guc_loading,
+		"Enable GuC firmware loading "
+		"(-1=auto [default], 0=never, 1=if available, 2=required)");
+
+module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400);
+MODULE_PARM_DESC(enable_guc_submission,
+		"Enable GuC submission "
+		"(-1=auto, 0=never [default], 1=if available, 2=required)");
 
 module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
 MODULE_PARM_DESC(guc_log_level,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 65e73dd..1323261 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -45,6 +45,8 @@ struct i915_params {
 	int enable_ips;
 	int invert_brightness;
 	int enable_cmd_parser;
+	int enable_guc_loading;
+	int enable_guc_submission;
 	int guc_log_level;
 	int use_mmio_flip;
 	int mmio_debug;
@@ -57,7 +59,6 @@ struct i915_params {
 	bool load_detect_test;
 	bool reset;
 	bool disable_display;
-	bool enable_guc_submission;
 	bool verbose_state_checks;
 	bool nuclear_pageflip;
 	bool enable_dp_mst;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 876e5da..2ec9cf1 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -387,49 +387,37 @@ int intel_guc_ucode_load(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
+	const char *fw_path = guc_fw->guc_fw_path;
 	int retries, err = 0;
 
-	if (!i915.enable_guc_submission)
-		return 0;
-
-	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
+	DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n",
+		fw_path,
 		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
 
-	direct_interrupts_to_host(dev_priv);
+	/* Loading forbidden, or no firmware to load? */
+	if (!i915.enable_guc_loading)
+		goto fail;
+	if (fw_path == NULL)
+		goto fail;
+	if (*fw_path == '\0') {
+		DRM_ERROR("No GuC firmware known for this platform\n");
+		goto fail;
+	}
 
-	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
-		return 0;
+	/* Fetch failed, or already fetched but failed to load? */
+	if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
+		goto fail;
+	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
+		goto fail;
 
-	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
-	    guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
-		return -ENOEXEC;
+	direct_interrupts_to_host(dev_priv);
 
 	guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
 
-	DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
-		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
-
-	switch (guc_fw->guc_fw_fetch_status) {
-	case GUC_FIRMWARE_FAIL:
-		/* something went wrong :( */
-		err = -EIO;
-		goto fail;
-
-	case GUC_FIRMWARE_NONE:
-	case GUC_FIRMWARE_PENDING:
-	default:
-		/* "can't happen" */
-		WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n",
-			guc_fw->guc_fw_path,
-			intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
-			guc_fw->guc_fw_fetch_status);
-		err = -ENXIO;
-		goto fail;
-
-	case GUC_FIRMWARE_SUCCESS:
-		break;
-	}
+	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
+		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
+		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
 
 	err = i915_guc_submission_init(dev);
 	if (err)
@@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
 
 fail:
 	DRM_ERROR("GuC firmware load failed, err %d\n", err);
+
 	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
 		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
 
@@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
 	i915_guc_submission_disable(dev);
 	i915_guc_submission_fini(dev);
 
+	/*
+	 * We've failed to load the firmware :(
+	 *
+	 * Decide whether to disable GuC submission and fall back to
+	 * execlist mode, and whether to hide the error by returning
+	 * zero or to return -EIO, which the caller will treat as a
+	 * nonfatal error (i.e. it doesn't prevent driver load, but
+	 * marks the GPU as wedged until reset).
+	 */
+	if (i915.enable_guc_loading > 1) {
+		err = -EIO;
+	} else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
+		return 0;
+	} else if (i915.enable_guc_submission > 1) {
+		err = -EIO;
+	} else {
+		err = 0;
+	}
+
+	i915.enable_guc_submission = 0;
+
+	DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
+
 	return err;
 }
 
@@ -631,8 +643,11 @@ void intel_guc_ucode_init(struct drm_device *dev)
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	const char *fw_path;
 
-	if (!HAS_GUC_SCHED(dev))
-		i915.enable_guc_submission = false;
+	/* A negative value means "use platform default" */
+	if (i915.enable_guc_loading < 0)
+		i915.enable_guc_loading = HAS_GUC_UCODE(dev);
+	if (i915.enable_guc_submission < 0)
+		i915.enable_guc_submission = HAS_GUC_SCHED(dev);
 
 	if (!HAS_GUC_UCODE(dev)) {
 		fw_path = NULL;
@@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
 		guc_fw->guc_fw_major_wanted = 6;
 		guc_fw->guc_fw_minor_wanted = 1;
 	} else {
-		i915.enable_guc_submission = false;
 		fw_path = "";	/* unknown device */
 	}
 
-	if (!i915.enable_guc_submission)
-		return;
-
 	guc_fw->guc_dev = dev;
 	guc_fw->guc_fw_path = fw_path;
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
 	guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE;
 
+	/* Early (and silent) return if GuC loading is disabled */
+	if (!i915.enable_guc_loading)
+		return;
 	if (fw_path == NULL)
 		return;
-
-	if (*fw_path == '\0') {
-		DRM_ERROR("No GuC firmware known for this platform\n");
-		guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
+	if (*fw_path == '\0')
 		return;
-	}
 
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING;
 	DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4f1dfe6..df698d7 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1758,7 +1758,7 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
 	int ret;
 	unsigned long irqflags;
 
-	if (!i915.enable_guc_submission)
+	if (!HAS_GUC_UCODE(dev_priv))
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}()
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
@ 2016-04-27 18:03 ` Dave Gordon
  2016-04-29 15:08   ` Tvrtko Ursulin
  2016-04-27 18:03 ` [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full Dave Gordon
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-04-27 18:03 UTC (permalink / raw)
  To: intel-gfx

The knowledge of how to derive the relevant client from the request
should be localised within i915_guc_submission.c; the LRC code shouldn't
have to know about the internal details of the GuC submission process.
And all the information the GuC code needs should be encapsulated in (or
reachable from) the request.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 11 ++++++-----
 drivers/gpu/drm/i915/intel_guc.h           |  5 ++---
 drivers/gpu/drm/i915/intel_lrc.c           |  9 +++------
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 42d2efa..66af5ce 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -451,10 +451,11 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 			     sizeof(desc) * client->ctx_index);
 }
 
-int i915_guc_wq_check_space(struct i915_guc_client *gc)
+int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
+	const size_t size = sizeof(struct guc_wq_item);
+	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
 	struct guc_process_desc *desc;
-	u32 size = sizeof(struct guc_wq_item);
 	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	if (!gc)
@@ -537,11 +538,11 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
  *
  * Return:	0 if succeed
  */
-int i915_guc_submit(struct i915_guc_client *client,
-		    struct drm_i915_gem_request *rq)
+int i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	struct intel_guc *guc = client->guc;
 	unsigned int engine_id = rq->engine->guc_id;
+	struct intel_guc *guc = &rq->i915->guc;
+	struct i915_guc_client *client = guc->execbuf_client;
 	int q_ret, b_ret;
 
 	q_ret = guc_add_workqueue_item(client, rq);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 9d79c4c..b37c731 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -148,10 +148,9 @@ extern int intel_guc_resume(struct drm_device *dev);
 /* i915_guc_submission.c */
 int i915_guc_submission_init(struct drm_device *dev);
 int i915_guc_submission_enable(struct drm_device *dev);
-int i915_guc_submit(struct i915_guc_client *client,
-		    struct drm_i915_gem_request *rq);
+int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
+int i915_guc_submit(struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_device *dev);
 void i915_guc_submission_fini(struct drm_device *dev);
-int i915_guc_wq_check_space(struct i915_guc_client *client);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2b7e6bb..b8ec8c6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -708,9 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 		 * going any further, as the i915_add_request() call
 		 * later on mustn't fail ...
 		 */
-		struct intel_guc *guc = &request->i915->guc;
-
-		ret = i915_guc_wq_check_space(guc->execbuf_client);
+		ret = i915_guc_wq_check_space(request);
 		if (ret)
 			return ret;
 	}
@@ -776,7 +774,6 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
 intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 {
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
-	struct drm_i915_private *dev_priv = request->i915;
 	struct intel_engine_cs *engine = request->engine;
 
 	intel_logical_ring_advance(ringbuf);
@@ -806,8 +803,8 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
 		}
 	}
 
-	if (dev_priv->guc.execbuf_client)
-		i915_guc_submit(dev_priv->guc.execbuf_client, request);
+	if (i915.enable_guc_submission)
+		i915_guc_submit(request);
 	else
 		execlists_context_queue(request);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
  2016-04-27 18:03 ` [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}() Dave Gordon
@ 2016-04-27 18:03 ` Dave Gordon
  2016-04-29 15:17   ` Tvrtko Ursulin
  2016-04-29 15:45   ` Tvrtko Ursulin
  2016-04-27 18:03 ` [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item() Dave Gordon
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 24+ messages in thread
From: Dave Gordon @ 2016-04-27 18:03 UTC (permalink / raw)
  To: intel-gfx

Rather than wait to see whether more space becomes available in the GuC
submission workqueue, we can just return -EAGAIN and let the caller try
again in a little while. This gets rid of an uninterruptable sleep in
the polling code :)

We'll also add a counter to the GuC client statistics, to see how often
we find the WQ full.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
 drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
 drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8b8d6f0..1024947 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
 		client->wq_size, client->wq_offset, client->wq_tail);
 
+	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
 	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
 	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
 	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 66af5ce..6626eff 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 
 int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
-	const size_t size = sizeof(struct guc_wq_item);
+	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
 	struct guc_process_desc *desc;
-	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	if (!gc)
 		return 0;
 
 	desc = gc->client_base + gc->proc_desc_offset;
 
-	while (timeout_counter-- > 0) {
-		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
-			ret = 0;
-			break;
-		}
+	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
+		return 0;
 
-		if (timeout_counter)
-			usleep_range(1000, 2000);
-	};
+	gc->no_wq_space += 1;
 
-	return ret;
+	return -EAGAIN;
 }
 
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index b37c731..436f2d6 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -73,10 +73,10 @@ struct i915_guc_client {
 
 	/* GuC submission statistics & status */
 	uint64_t submissions[GUC_MAX_ENGINES_NUM];
-	uint32_t q_fail;
-	uint32_t b_fail;
-	int retcode;
-	int spare;			/* pad to 32 DWords		*/
+	uint32_t no_wq_space;		/* Space pre-check failed	*/
+	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
+	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
+	int retcode;			/* Result of last guc_submit()	*/
 };
 
 enum intel_guc_fw_status {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
  2016-04-27 18:03 ` [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}() Dave Gordon
  2016-04-27 18:03 ` [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full Dave Gordon
@ 2016-04-27 18:03 ` Dave Gordon
  2016-04-29 15:44   ` Tvrtko Ursulin
  2016-04-27 18:03 ` [PATCH v2 5/5] DO NOT MERGE: change default to using GuC submission if possible Dave Gordon
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-04-27 18:03 UTC (permalink / raw)
  To: intel-gfx

Mostly little optimisations; for instance, if the driver is correctly
following the submission protocol, the "out of space" condition is
impossible, so the previous runtime WARN_ON() is promoted to a
GEM_BUG_ON() for a more dramatic effect in development and less impact
in end-user systems.

Similarly we can replace other WARN_ON() conditions that don't relate to
the hardware state with either BUILD_BUG_ON() for compile-time-
detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.

With those changes, we can convert it to void, as suggested by Chris
Wilson, and update the calling code appropriately.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>

---
 drivers/gpu/drm/i915/i915_guc_submission.c | 69 +++++++++++++++---------------
 drivers/gpu/drm/i915/intel_guc.h           |  2 +-
 drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
 3 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 6626eff..4d2ea84 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 	return -EAGAIN;
 }
 
-static int guc_add_workqueue_item(struct i915_guc_client *gc,
-				  struct drm_i915_gem_request *rq)
+static void guc_add_workqueue_item(struct i915_guc_client *gc,
+				   struct drm_i915_gem_request *rq)
 {
+	/* wqi_len is in DWords, and does not include the one-word header */
+	const size_t wqi_size = sizeof(struct guc_wq_item);
+	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
 	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
-	u32 tail, wq_len, wq_off, space;
+	u32 space, tail, wq_off, wq_page;
 
 	desc = gc->client_base + gc->proc_desc_offset;
+
+	/* Space was checked earlier, in i915_guc_wq_check_space() above */
 	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-	if (WARN_ON(space < sizeof(struct guc_wq_item)))
-		return -ENOSPC; /* shouldn't happen */
+	GEM_BUG_ON(space < wqi_size);
 
-	/* postincrement WQ tail for next time */
-	wq_off = gc->wq_tail;
-	gc->wq_tail += sizeof(struct guc_wq_item);
-	gc->wq_tail &= gc->wq_size - 1;
+	/* The GuC firmware wants the tail index in QWords, not bytes */
+	tail = rq->tail;
+	GEM_BUG_ON(tail & 7);
+	tail >>= 3;
+	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 	 * should not have the case where structure wqi is across page, neither
@@ -495,19 +500,23 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 	 * XXX: if not the case, we need save data to a temp wqi and copy it to
 	 * workqueue buffer dw by dw.
 	 */
-	WARN_ON(sizeof(struct guc_wq_item) != 16);
-	WARN_ON(wq_off & 3);
+	BUILD_BUG_ON(wqi_size != 16);
 
-	/* wq starts from the page after doorbell / process_desc */
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
-			(wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
+	/* postincrement WQ tail for next time */
+	wq_off = gc->wq_tail;
+	gc->wq_tail += wqi_size;
+	gc->wq_tail &= gc->wq_size - 1;
+	GEM_BUG_ON(wq_off & (wqi_size - 1));
+
+	/* WQ starts from the page after doorbell / process_desc */
+	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
 	wq_off &= PAGE_SIZE - 1;
+	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
 	wqi = (struct guc_wq_item *)((char *)base + wq_off);
 
-	/* len does not include the header */
-	wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
+	/* Now fill in the 4-word work queue item */
 	wqi->header = WQ_TYPE_INORDER |
-			(wq_len << WQ_LEN_SHIFT) |
+			(wqi_len << WQ_LEN_SHIFT) |
 			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
 			WQ_NO_WCFLUSH_WAIT;
 
@@ -515,14 +524,10 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
 							     rq->engine);
 
-	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->ringbuf->tail >> 3;
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-	wqi->fence_id = 0; /*XXX: what fence to be here */
+	wqi->fence_id = rq->seqno;
 
 	kunmap_atomic(base);
-
-	return 0;
 }
 
 /**
@@ -537,26 +542,20 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
 	unsigned int engine_id = rq->engine->guc_id;
 	struct intel_guc *guc = &rq->i915->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
-	int q_ret, b_ret;
+	int b_ret;
 
-	q_ret = guc_add_workqueue_item(client, rq);
-	if (q_ret == 0)
-		b_ret = guc_ring_doorbell(client);
+	guc_add_workqueue_item(client, rq);
+	b_ret = guc_ring_doorbell(client);
 
 	client->submissions[engine_id] += 1;
-	if (q_ret) {
-		client->q_fail += 1;
-		client->retcode = q_ret;
-	} else if (b_ret) {
+	client->retcode = b_ret;
+	if (b_ret)
 		client->b_fail += 1;
-		client->retcode = q_ret = b_ret;
-	} else {
-		client->retcode = 0;
-	}
+
 	guc->submissions[engine_id] += 1;
 	guc->last_seqno[engine_id] = rq->seqno;
 
-	return q_ret;
+	return b_ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 436f2d6..10e1d5e 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -74,7 +74,7 @@ struct i915_guc_client {
 	/* GuC submission statistics & status */
 	uint64_t submissions[GUC_MAX_ENGINES_NUM];
 	uint32_t no_wq_space;		/* Space pre-check failed	*/
-	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
+	uint32_t q_fail;		/* No longer used		*/
 	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
 	int retcode;			/* Result of last guc_submit()	*/
 };
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 2de57ff..944786d 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -71,7 +71,8 @@
 #define   WQ_WORKLOAD_TOUCH		(2 << WQ_WORKLOAD_SHIFT)
 
 #define WQ_RING_TAIL_SHIFT		20
-#define WQ_RING_TAIL_MASK		(0x7FF << WQ_RING_TAIL_SHIFT)
+#define WQ_RING_TAIL_MAX		0x7FF	/* 2^11 QWords */
+#define WQ_RING_TAIL_MASK		(WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
 
 #define GUC_DOORBELL_ENABLED		1
 #define GUC_DOORBELL_DISABLED		0
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 5/5] DO NOT MERGE: change default to using GuC submission if possible
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
                   ` (2 preceding siblings ...)
  2016-04-27 18:03 ` [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item() Dave Gordon
@ 2016-04-27 18:03 ` Dave Gordon
  2016-04-28  6:24 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915/guc: add enable_guc_loading parameter Patchwork
  2016-04-29 15:03 ` [PATCH v2 1/5] " Tvrtko Ursulin
  5 siblings, 0 replies; 24+ messages in thread
From: Dave Gordon @ 2016-04-27 18:03 UTC (permalink / raw)
  To: intel-gfx

This patch simply changes the default value of "enable_guc_submission"
from 0 (never) to -1 (auto). This means that GuC submission will be
used if the platform has a GuC, the GuC supports the request submission
protocol, and any required GuC firmwware was successfully loaded. If any
of these conditions are not met, the driver will fall back to using
execlist mode.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 6a5578c..573e787 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -55,7 +55,7 @@ struct i915_params i915 __read_mostly = {
 	.nuclear_pageflip = 0,
 	.edp_vswing = 0,
 	.enable_guc_loading = -1,
-	.enable_guc_submission = 0,
+	.enable_guc_submission = -1,
 	.guc_log_level = -1,
 	.enable_dp_mst = true,
 	.inject_load_failure = 0,
@@ -207,7 +207,7 @@ struct i915_params i915 __read_mostly = {
 module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400);
 MODULE_PARM_DESC(enable_guc_submission,
 		"Enable GuC submission "
-		"(-1=auto, 0=never [default], 1=if available, 2=required)");
+		"(-1=auto [default], 0=never, 1=if available, 2=required)");
 
 module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
 MODULE_PARM_DESC(guc_log_level,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915/guc: add enable_guc_loading parameter
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
                   ` (3 preceding siblings ...)
  2016-04-27 18:03 ` [PATCH v2 5/5] DO NOT MERGE: change default to using GuC submission if possible Dave Gordon
@ 2016-04-28  6:24 ` Patchwork
  2016-04-29 15:03 ` [PATCH v2 1/5] " Tvrtko Ursulin
  5 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2016-04-28  6:24 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/5] drm/i915/guc: add enable_guc_loading parameter
URL   : https://patchwork.freedesktop.org/series/6431/
State : success

== Summary ==

Series 6431v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/6431/revisions/1/mbox/


bdw-nuci7        total:201  pass:189  dwarn:0   dfail:0   fail:0   skip:12 
bdw-ultra        total:201  pass:176  dwarn:0   dfail:0   fail:0   skip:25 
byt-nuc          total:200  pass:159  dwarn:0   dfail:0   fail:0   skip:41 
hsw-brixbox      total:201  pass:175  dwarn:0   dfail:0   fail:0   skip:26 
hsw-gt2          total:201  pass:179  dwarn:0   dfail:0   fail:1   skip:21 
ilk-hp8440p      total:201  pass:140  dwarn:0   dfail:0   fail:0   skip:61 
ivb-t430s        total:201  pass:170  dwarn:0   dfail:0   fail:0   skip:31 
skl-i7k-2        total:201  pass:174  dwarn:0   dfail:0   fail:0   skip:27 
skl-nuci5        total:201  pass:190  dwarn:0   dfail:0   fail:0   skip:11 
snb-dellxps      total:201  pass:159  dwarn:0   dfail:0   fail:0   skip:42 

Results at /archive/results/CI_IGT_test/Patchwork_2096/

c5c937220f88dcadac8da2377e141cf998968efc drm-intel-nightly: 2016y-04m-27d-19h-36m-40s UTC integration manifest
effe8ae DO NOT MERGE: change default to using GuC submission if possible
7d64bb7 drm/i915/guc: rework guc_add_workqueue_item()
243a367 drm/i915/guc: don't spinwait if the GuC's workqueue is full
8965bf0 drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}()
0c4b94d drm/i915/guc: add enable_guc_loading parameter

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter
  2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
                   ` (4 preceding siblings ...)
  2016-04-28  6:24 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915/guc: add enable_guc_loading parameter Patchwork
@ 2016-04-29 15:03 ` Tvrtko Ursulin
  2016-05-06 16:39   ` Dave Gordon
  5 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-04-29 15:03 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


Hi,

On 27/04/16 19:03, Dave Gordon wrote:
> Split the function of "enable_guc_submission" into two separate
> options.  The new one ("enable_guc_loading") controls only the
> *fetching and loading* of the GuC firmware image. The existing
> one is redefined to control only the *use* of the GuC for batch
> submission once the firmware is loaded.
>
> In addition, the degree of control has been refined from a simple
> bool to an integer key, allowing several options:
>   -1 (default)     whatever the platform default is
>    0  DISABLE      don't load/use the GuC
>    1  BEST EFFORT  try to load/use the GuC, fallback if not available
>    2  REQUIRE      must load/use the GuC, else leave the GPU wedged
>
> The new platform default (as coded here) will be to attempt to
> load the GuC iff the device has a GuC that requires firmware,
> but not yet to use it for submission. A later patch will change
> to enable it if appropriate.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c            |  1 -
>   drivers/gpu/drm/i915/i915_guc_submission.c |  4 +-
>   drivers/gpu/drm/i915/i915_params.c         | 14 ++++-
>   drivers/gpu/drm/i915/i915_params.h         |  3 +-
>   drivers/gpu/drm/i915/intel_guc_loader.c    | 98 ++++++++++++++++--------------
>   drivers/gpu/drm/i915/intel_uncore.c        |  2 +-
>   6 files changed, 70 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d493e79..b04effc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4910,7 +4910,6 @@ int i915_gem_init_engines(struct drm_device *dev)
>   		ret = intel_guc_ucode_load(dev);
>   		if (ret) {
>   			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
> -			ret = -EIO;
>   			goto out;
>   		}
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 72d6665..42d2efa 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -970,7 +970,7 @@ int intel_guc_suspend(struct drm_device *dev)
>   	struct intel_context *ctx;
>   	u32 data[3];
>
> -	if (!i915.enable_guc_submission)
> +	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
>   		return 0;
>
>   	ctx = dev_priv->kernel_context;
> @@ -996,7 +996,7 @@ int intel_guc_resume(struct drm_device *dev)
>   	struct intel_context *ctx;
>   	u32 data[3];
>
> -	if (!i915.enable_guc_submission)
> +	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
>   		return 0;

Not terribly important and probably predates your work, but just spotted 
how this reads very redundant - guc->guc_fw.guc_fw_something, while it 
could be much more readable as guc->fw.load_status. Observation only.

>
>   	ctx = dev_priv->kernel_context;
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index 383c076..6a5578c 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -54,7 +54,8 @@ struct i915_params i915 __read_mostly = {
>   	.verbose_state_checks = 1,
>   	.nuclear_pageflip = 0,
>   	.edp_vswing = 0,
> -	.enable_guc_submission = false,
> +	.enable_guc_loading = -1,
> +	.enable_guc_submission = 0,
>   	.guc_log_level = -1,
>   	.enable_dp_mst = true,
>   	.inject_load_failure = 0,
> @@ -198,8 +199,15 @@ struct i915_params i915 __read_mostly = {
>   		 "(0=use value from vbt [default], 1=low power swing(200mV),"
>   		 "2=default swing(400mV))");
>
> -module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400);
> -MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)");
> +module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400);
> +MODULE_PARM_DESC(enable_guc_loading,
> +		"Enable GuC firmware loading "
> +		"(-1=auto [default], 0=never, 1=if available, 2=required)");
> +
> +module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400);
> +MODULE_PARM_DESC(enable_guc_submission,
> +		"Enable GuC submission "
> +		"(-1=auto, 0=never [default], 1=if available, 2=required)");
>
>   module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
>   MODULE_PARM_DESC(guc_log_level,
> diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
> index 65e73dd..1323261 100644
> --- a/drivers/gpu/drm/i915/i915_params.h
> +++ b/drivers/gpu/drm/i915/i915_params.h
> @@ -45,6 +45,8 @@ struct i915_params {
>   	int enable_ips;
>   	int invert_brightness;
>   	int enable_cmd_parser;
> +	int enable_guc_loading;
> +	int enable_guc_submission;
>   	int guc_log_level;
>   	int use_mmio_flip;
>   	int mmio_debug;
> @@ -57,7 +59,6 @@ struct i915_params {
>   	bool load_detect_test;
>   	bool reset;
>   	bool disable_display;
> -	bool enable_guc_submission;
>   	bool verbose_state_checks;
>   	bool nuclear_pageflip;
>   	bool enable_dp_mst;
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 876e5da..2ec9cf1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -387,49 +387,37 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
> +	const char *fw_path = guc_fw->guc_fw_path;
>   	int retries, err = 0;
>
> -	if (!i915.enable_guc_submission)
> -		return 0;
> -
> -	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
> +	DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n",
> +		fw_path,
>   		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>   		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));

Should load status be anything other than GUC_FIRMWARE_NONE here?

> -	direct_interrupts_to_host(dev_priv);
> +	/* Loading forbidden, or no firmware to load? */
> +	if (!i915.enable_guc_loading)

Nitpick, == 0 would perhaps make it more obvious this is not a boolean.

> +		goto fail;
> +	if (fw_path == NULL)
> +		goto fail;
> +	if (*fw_path == '\0') {
> +		DRM_ERROR("No GuC firmware known for this platform\n");

It is not an error unless i915.enable_guc_loading == 2, no? And if best 
effort then it is probably debug or informational.

Also, don't the checks against fw_path (together with the error or debug 
message) belong in the fw fetch function? If they are invalid fw fetch 
would have failed and this function would be able to inspect the high 
level status of that step here, no?

> +		goto fail;
> +	}
>
> -	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
> -		return 0;
> +	/* Fetch failed, or already fetched but failed to load? */
> +	if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
> +		goto fail;
> +	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
> +		goto fail;

Leads back to the question of load status in this function. So it is 
expected we always enter here with load status of none? Is it possible 
to get here with the firmware already loaded already?

>
> -	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
> -	    guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
> -		return -ENOEXEC;
> +	direct_interrupts_to_host(dev_priv);
>
>   	guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
>
> -	DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
> -		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
> -
> -	switch (guc_fw->guc_fw_fetch_status) {
> -	case GUC_FIRMWARE_FAIL:
> -		/* something went wrong :( */
> -		err = -EIO;
> -		goto fail;
> -
> -	case GUC_FIRMWARE_NONE:
> -	case GUC_FIRMWARE_PENDING:
> -	default:
> -		/* "can't happen" */
> -		WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n",
> -			guc_fw->guc_fw_path,
> -			intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
> -			guc_fw->guc_fw_fetch_status);
> -		err = -ENXIO;
> -		goto fail;
> -
> -	case GUC_FIRMWARE_SUCCESS:
> -		break;
> -	}
> +	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
> +		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
> +		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>
>   	err = i915_guc_submission_init(dev);
>   	if (err)
> @@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>
>   fail:
>   	DRM_ERROR("GuC firmware load failed, err %d\n", err);

Same as above I think error must be dependent on the requested mode. 
Some customers are very sensitive to errors which are not really errors 
so it is bad to log them when they are not.

> +
>   	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>   		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>
> @@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   	i915_guc_submission_disable(dev);
>   	i915_guc_submission_fini(dev);
>
> +	/*
> +	 * We've failed to load the firmware :(
> +	 *
> +	 * Decide whether to disable GuC submission and fall back to
> +	 * execlist mode, and whether to hide the error by returning
> +	 * zero or to return -EIO, which the caller will treat as a
> +	 * nonfatal error (i.e. it doesn't prevent driver load, but
> +	 * marks the GPU as wedged until reset).
> +	 */
> +	if (i915.enable_guc_loading > 1) {
> +		err = -EIO;
> +	} else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
> +		return 0;

i915_gem_init_hw already guards the call to intel_guc_ucode_load with 
HAS_GUC_UCODE so at the moment at least this is a dead branch.

I don't even understand what is this branch supposed to do? How can 
there be a platform with no guc fw but guc scheduling?

> +	} else if (i915.enable_guc_submission > 1) {
> +		err = -EIO;
> +	} else {
> +		err = 0;
> +	}
> +
> +	i915.enable_guc_submission = 0;
> +
> +	DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
> +

This would log when i915.enable_guc_loading is set to 0 which would be 
confusing. I think in this case the function should bail out much earlier.

>   	return err;
>   }
>
> @@ -631,8 +643,11 @@ void intel_guc_ucode_init(struct drm_device *dev)
>   	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
>   	const char *fw_path;
>
> -	if (!HAS_GUC_SCHED(dev))
> -		i915.enable_guc_submission = false;
> +	/* A negative value means "use platform default" */
> +	if (i915.enable_guc_loading < 0)
> +		i915.enable_guc_loading = HAS_GUC_UCODE(dev);
> +	if (i915.enable_guc_submission < 0)
> +		i915.enable_guc_submission = HAS_GUC_SCHED(dev);

With this setup currently there is no difference between -1 and 1. But I 
can assume maybe in the future we could have -1 mean 2 on some platform 
which would then justify having four possible values for each?

>
>   	if (!HAS_GUC_UCODE(dev)) {
>   		fw_path = NULL;
> @@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
>   		guc_fw->guc_fw_major_wanted = 6;
>   		guc_fw->guc_fw_minor_wanted = 1;
>   	} else {
> -		i915.enable_guc_submission = false;
>   		fw_path = "";	/* unknown device */
>   	}

Confusing block, HAS_GUC_UCODE is defined as (IS_GEN9(dev) && 
!IS_KABYLAKE(dev)) but then here we only support SKL here. Why the 
former is then not just IS_SKYLAKE?

When BXT support is added this still needs to be modified and would only 
save touching HAS_GUC_UCODE in the header. But it must be a better reason?

>
> -	if (!i915.enable_guc_submission)
> -		return;
> -
>   	guc_fw->guc_dev = dev;
>   	guc_fw->guc_fw_path = fw_path;
>   	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
>   	guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE;
>
> +	/* Early (and silent) return if GuC loading is disabled */
> +	if (!i915.enable_guc_loading)
> +		return;
>   	if (fw_path == NULL)
>   		return;
> -
> -	if (*fw_path == '\0') {
> -		DRM_ERROR("No GuC firmware known for this platform\n");
> -		guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
> +	if (*fw_path == '\0')
>   		return;
> -	}

I also do not understand the complications with fw_path (either NULL or 
""). In the two cases fetch status will be either none or fail, 
respectively, which will equally cause intel_guc_ucode_load to hit the 
failure path (fw_fetch_status != success).

So Is it really required for the fw_path to can either be NULL or ""?

>
>   	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING;
>   	DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4f1dfe6..df698d7 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1758,7 +1758,7 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
>   	int ret;
>   	unsigned long irqflags;
>
> -	if (!i915.enable_guc_submission)
> +	if (!HAS_GUC_UCODE(dev_priv))
>   		return -EINVAL;

What if HAS_GUC_UCODE is true but the i915.load_guc_firmware has been 
set to zero? Should it skip the reset in that case as well?

>
>   	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}()
  2016-04-27 18:03 ` [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}() Dave Gordon
@ 2016-04-29 15:08   ` Tvrtko Ursulin
  2016-05-03 19:22     ` Dave Gordon
  0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-04-29 15:08 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 27/04/16 19:03, Dave Gordon wrote:
> The knowledge of how to derive the relevant client from the request
> should be localised within i915_guc_submission.c; the LRC code shouldn't
> have to know about the internal details of the GuC submission process.
> And all the information the GuC code needs should be encapsulated in (or
> reachable from) the request.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 11 ++++++-----
>   drivers/gpu/drm/i915/intel_guc.h           |  5 ++---
>   drivers/gpu/drm/i915/intel_lrc.c           |  9 +++------
>   3 files changed, 11 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 42d2efa..66af5ce 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -451,10 +451,11 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>   			     sizeof(desc) * client->ctx_index);
>   }
>
> -int i915_guc_wq_check_space(struct i915_guc_client *gc)
> +int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
> +	const size_t size = sizeof(struct guc_wq_item);

All the other variables passed to CIRC_SPACE are u32 so this is for the 
worse I think.

> +	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>   	struct guc_process_desc *desc;
> -	u32 size = sizeof(struct guc_wq_item);
>   	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
> @@ -537,11 +538,11 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
>    *
>    * Return:	0 if succeed
>    */
> -int i915_guc_submit(struct i915_guc_client *client,
> -		    struct drm_i915_gem_request *rq)
> +int i915_guc_submit(struct drm_i915_gem_request *rq)
>   {
> -	struct intel_guc *guc = client->guc;
>   	unsigned int engine_id = rq->engine->guc_id;
> +	struct intel_guc *guc = &rq->i915->guc;
> +	struct i915_guc_client *client = guc->execbuf_client;
>   	int q_ret, b_ret;
>
>   	q_ret = guc_add_workqueue_item(client, rq);
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 9d79c4c..b37c731 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -148,10 +148,9 @@ extern int intel_guc_resume(struct drm_device *dev);
>   /* i915_guc_submission.c */
>   int i915_guc_submission_init(struct drm_device *dev);
>   int i915_guc_submission_enable(struct drm_device *dev);
> -int i915_guc_submit(struct i915_guc_client *client,
> -		    struct drm_i915_gem_request *rq);
> +int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
> +int i915_guc_submit(struct drm_i915_gem_request *rq);
>   void i915_guc_submission_disable(struct drm_device *dev);
>   void i915_guc_submission_fini(struct drm_device *dev);
> -int i915_guc_wq_check_space(struct i915_guc_client *client);
>
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2b7e6bb..b8ec8c6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -708,9 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   		 * going any further, as the i915_add_request() call
>   		 * later on mustn't fail ...
>   		 */
> -		struct intel_guc *guc = &request->i915->guc;
> -
> -		ret = i915_guc_wq_check_space(guc->execbuf_client);
> +		ret = i915_guc_wq_check_space(request);
>   		if (ret)
>   			return ret;
>   	}
> @@ -776,7 +774,6 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>   intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   {
>   	struct intel_ringbuffer *ringbuf = request->ringbuf;
> -	struct drm_i915_private *dev_priv = request->i915;
>   	struct intel_engine_cs *engine = request->engine;
>
>   	intel_logical_ring_advance(ringbuf);
> @@ -806,8 +803,8 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>   		}
>   	}
>
> -	if (dev_priv->guc.execbuf_client)
> -		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> +	if (i915.enable_guc_submission)
> +		i915_guc_submit(request);
>   	else
>   		execlists_context_queue(request);
>
>

With the datatype fix (revert) in i915_guc_wq_check_space:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-04-27 18:03 ` [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full Dave Gordon
@ 2016-04-29 15:17   ` Tvrtko Ursulin
  2016-05-06 17:12     ` Dave Gordon
  2016-04-29 15:45   ` Tvrtko Ursulin
  1 sibling, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-04-29 15:17 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 27/04/16 19:03, Dave Gordon wrote:
> Rather than wait to see whether more space becomes available in the GuC
> submission workqueue, we can just return -EAGAIN and let the caller try
> again in a little while. This gets rid of an uninterruptable sleep in
> the polling code :)
>
> We'll also add a counter to the GuC client statistics, to see how often
> we find the WQ full.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>   3 files changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8b8d6f0..1024947 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file *m,
>   	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>   		client->wq_size, client->wq_offset, client->wq_tail);
>
> +	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>   	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>   	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>   	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 66af5ce..6626eff 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
> -	const size_t size = sizeof(struct guc_wq_item);
> +	const size_t wqi_size = sizeof(struct guc_wq_item);
>   	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>   	struct guc_process_desc *desc;
> -	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
>   		return 0;

Not part of this patch but spotted it - can this really happen, and if 
so, does it warrant a WARN_ONCE, GEM_BUG_ON or something?

>
>   	desc = gc->client_base + gc->proc_desc_offset;
>
> -	while (timeout_counter-- > 0) {
> -		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
> -			ret = 0;
> -			break;
> -		}
> +	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
> +		return 0;

Could stick a likely here to lay out the code for the expected case but 
it could be just OCD. :)

>
> -		if (timeout_counter)
> -			usleep_range(1000, 2000);
> -	};
> +	gc->no_wq_space += 1;
>
> -	return ret;
> +	return -EAGAIN;

I suppose it is OK to make userspace pay the cost since this should be 
extremely rare, correct?

>   }
>
>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index b37c731..436f2d6 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -73,10 +73,10 @@ struct i915_guc_client {
>
>   	/* GuC submission statistics & status */
>   	uint64_t submissions[GUC_MAX_ENGINES_NUM];
> -	uint32_t q_fail;
> -	uint32_t b_fail;
> -	int retcode;
> -	int spare;			/* pad to 32 DWords		*/
> +	uint32_t no_wq_space;		/* Space pre-check failed	*/
> +	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
> +	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
> +	int retcode;			/* Result of last guc_submit()	*/
>   };
>
>   enum intel_guc_fw_status {
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-04-27 18:03 ` [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item() Dave Gordon
@ 2016-04-29 15:44   ` Tvrtko Ursulin
  2016-04-29 16:10     ` Chris Wilson
  2016-05-05 18:38     ` Dave Gordon
  0 siblings, 2 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-04-29 15:44 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 27/04/16 19:03, Dave Gordon wrote:
> Mostly little optimisations; for instance, if the driver is correctly
> following the submission protocol, the "out of space" condition is
> impossible, so the previous runtime WARN_ON() is promoted to a
> GEM_BUG_ON() for a more dramatic effect in development and less impact
> in end-user systems.
>
> Similarly we can replace other WARN_ON() conditions that don't relate to
> the hardware state with either BUILD_BUG_ON() for compile-time-
> detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
>
> With those changes, we can convert it to void, as suggested by Chris
> Wilson, and update the calling code appropriately.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 69 +++++++++++++++---------------
>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>   drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
>   3 files changed, 37 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 6626eff..4d2ea84 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   	return -EAGAIN;
>   }
>
> -static int guc_add_workqueue_item(struct i915_guc_client *gc,
> -				  struct drm_i915_gem_request *rq)
> +static void guc_add_workqueue_item(struct i915_guc_client *gc,
> +				   struct drm_i915_gem_request *rq)
>   {
> +	/* wqi_len is in DWords, and does not include the one-word header */
> +	const size_t wqi_size = sizeof(struct guc_wq_item);

Again, u32 is correct I think.

> +	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>   	struct guc_process_desc *desc;
>   	struct guc_wq_item *wqi;
>   	void *base;
> -	u32 tail, wq_len, wq_off, space;
> +	u32 space, tail, wq_off, wq_page;
>
>   	desc = gc->client_base + gc->proc_desc_offset;
> +
> +	/* Space was checked earlier, in i915_guc_wq_check_space() above */

It may be above in the file, but the two do not call one another so I 
recommend saying exactly who called it.

>   	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
> -	if (WARN_ON(space < sizeof(struct guc_wq_item)))
> -		return -ENOSPC; /* shouldn't happen */
> +	GEM_BUG_ON(space < wqi_size);

It is impossible to hit this only because of the struct_mutex guarding 
the whole time window from request creation to submission. If in the 
future, near or far, that gets fixed, then this will need reworking.

I don't have any better ideas though.

But a WARN_ON and return would be almost as good. Everything is better 
than a dead machine one can't ssh into...

So I appeal to make this a WARN_ON and return. Nothing bad would happen 
apart from software thinking GPU has hung.

>
> -	/* postincrement WQ tail for next time */
> -	wq_off = gc->wq_tail;
> -	gc->wq_tail += sizeof(struct guc_wq_item);
> -	gc->wq_tail &= gc->wq_size - 1;
> +	/* The GuC firmware wants the tail index in QWords, not bytes */
> +	tail = rq->tail;

Used to be sampled from rq->ringbuf->tail - are those the same?

> +	GEM_BUG_ON(tail & 7);
> +	tail >>= 3;
> +	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
>
>   	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
>   	 * should not have the case where structure wqi is across page, neither
> @@ -495,19 +500,23 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
>   	 * XXX: if not the case, we need save data to a temp wqi and copy it to
>   	 * workqueue buffer dw by dw.
>   	 */
> -	WARN_ON(sizeof(struct guc_wq_item) != 16);
> -	WARN_ON(wq_off & 3);
> +	BUILD_BUG_ON(wqi_size != 16);
>
> -	/* wq starts from the page after doorbell / process_desc */
> -	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
> -			(wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
> +	/* postincrement WQ tail for next time */
> +	wq_off = gc->wq_tail;
> +	gc->wq_tail += wqi_size;
> +	gc->wq_tail &= gc->wq_size - 1;
> +	GEM_BUG_ON(wq_off & (wqi_size - 1));

Use to be wq_off & 3, now is wq_off & 15, which one is correct?

> +
> +	/* WQ starts from the page after doorbell / process_desc */
> +	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
>   	wq_off &= PAGE_SIZE - 1;
> +	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
>   	wqi = (struct guc_wq_item *)((char *)base + wq_off);
>
> -	/* len does not include the header */
> -	wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
> +	/* Now fill in the 4-word work queue item */
>   	wqi->header = WQ_TYPE_INORDER |
> -			(wq_len << WQ_LEN_SHIFT) |
> +			(wqi_len << WQ_LEN_SHIFT) |
>   			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
>   			WQ_NO_WCFLUSH_WAIT;
>
> @@ -515,14 +524,10 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
>   	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>   							     rq->engine);
>
> -	/* The GuC firmware wants the tail index in QWords, not bytes */
> -	tail = rq->ringbuf->tail >> 3;
>   	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
> -	wqi->fence_id = 0; /*XXX: what fence to be here */
> +	wqi->fence_id = rq->seqno;

Not mentioned in the commit?

>
>   	kunmap_atomic(base);
> -
> -	return 0;
>   }
>
>   /**
> @@ -537,26 +542,20 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
>   	unsigned int engine_id = rq->engine->guc_id;
>   	struct intel_guc *guc = &rq->i915->guc;
>   	struct i915_guc_client *client = guc->execbuf_client;
> -	int q_ret, b_ret;
> +	int b_ret;

What is b_ret out of interest, door*B*ell return code ?

>
> -	q_ret = guc_add_workqueue_item(client, rq);
> -	if (q_ret == 0)
> -		b_ret = guc_ring_doorbell(client);
> +	guc_add_workqueue_item(client, rq);
> +	b_ret = guc_ring_doorbell(client);
>
>   	client->submissions[engine_id] += 1;
> -	if (q_ret) {
> -		client->q_fail += 1;
> -		client->retcode = q_ret;
> -	} else if (b_ret) {
> +	client->retcode = b_ret;

I wanted to ask for what is this for but then found it is for debugfs.

> +	if (b_ret)
>   		client->b_fail += 1;
> -		client->retcode = q_ret = b_ret;
> -	} else {
> -		client->retcode = 0;
> -	}
> +
>   	guc->submissions[engine_id] += 1;
>   	guc->last_seqno[engine_id] = rq->seqno;
>
> -	return q_ret;
> +	return b_ret;

Could also kill the return value from this one, the only caller, 
intel_logical_ring_advance_and_submit does not use it. That function 
itself does not look like it needs a return value at the moment. :)

>   }
>
>   /*
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 436f2d6..10e1d5e 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -74,7 +74,7 @@ struct i915_guc_client {
>   	/* GuC submission statistics & status */
>   	uint64_t submissions[GUC_MAX_ENGINES_NUM];
>   	uint32_t no_wq_space;		/* Space pre-check failed	*/
> -	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
> +	uint32_t q_fail;		/* No longer used		*/

Get rid of it then?

>   	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/
>   	int retcode;			/* Result of last guc_submit()	*/
>   };
> diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
> index 2de57ff..944786d 100644
> --- a/drivers/gpu/drm/i915/intel_guc_fwif.h
> +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
> @@ -71,7 +71,8 @@
>   #define   WQ_WORKLOAD_TOUCH		(2 << WQ_WORKLOAD_SHIFT)
>
>   #define WQ_RING_TAIL_SHIFT		20
> -#define WQ_RING_TAIL_MASK		(0x7FF << WQ_RING_TAIL_SHIFT)
> +#define WQ_RING_TAIL_MAX		0x7FF	/* 2^11 QWords */
> +#define WQ_RING_TAIL_MASK		(WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
>
>   #define GUC_DOORBELL_ENABLED		1
>   #define GUC_DOORBELL_DISABLED		0
>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-04-27 18:03 ` [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full Dave Gordon
  2016-04-29 15:17   ` Tvrtko Ursulin
@ 2016-04-29 15:45   ` Tvrtko Ursulin
  2016-05-06 15:17     ` Dave Gordon
  1 sibling, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-04-29 15:45 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


One late comment:

On 27/04/16 19:03, Dave Gordon wrote:
> Rather than wait to see whether more space becomes available in the GuC
> submission workqueue, we can just return -EAGAIN and let the caller try
> again in a little while. This gets rid of an uninterruptable sleep in
> the polling code :)
>
> We'll also add a counter to the GuC client statistics, to see how often
> we find the WQ full.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>   3 files changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8b8d6f0..1024947 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file *m,
>   	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>   		client->wq_size, client->wq_offset, client->wq_tail);
>
> +	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>   	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>   	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>   	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 66af5ce..6626eff 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
> -	const size_t size = sizeof(struct guc_wq_item);
> +	const size_t wqi_size = sizeof(struct guc_wq_item);
>   	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>   	struct guc_process_desc *desc;
> -	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
>   		return 0;
>
>   	desc = gc->client_base + gc->proc_desc_offset;
>
> -	while (timeout_counter-- > 0) {
> -		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
> -			ret = 0;
> -			break;
> -		}
> +	if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
> +		return 0;
>
> -		if (timeout_counter)
> -			usleep_range(1000, 2000);
> -	};
> +	gc->no_wq_space += 1;
>
> -	return ret;
> +	return -EAGAIN;
>   }
>
>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index b37c731..436f2d6 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -73,10 +73,10 @@ struct i915_guc_client {
>
>   	/* GuC submission statistics & status */
>   	uint64_t submissions[GUC_MAX_ENGINES_NUM];
> -	uint32_t q_fail;
> -	uint32_t b_fail;
> -	int retcode;
> -	int spare;			/* pad to 32 DWords		*/
> +	uint32_t no_wq_space;		/* Space pre-check failed	*/
> +	uint32_t q_fail;		/* Failed to queue (MBZ)	*/
> +	uint32_t b_fail;		/* Doorbell failure (MBZ)	*/

Why MBZ? It is not all used in this context so this will just confuse 
people.

> +	int retcode;			/* Result of last guc_submit()	*/
>   };
>
>   enum intel_guc_fw_status {
>


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-04-29 15:44   ` Tvrtko Ursulin
@ 2016-04-29 16:10     ` Chris Wilson
  2016-05-03 10:26       ` Tvrtko Ursulin
  2016-05-05 18:38     ` Dave Gordon
  1 sibling, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2016-04-29 16:10 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Apr 29, 2016 at 04:44:24PM +0100, Tvrtko Ursulin wrote:
> 
> On 27/04/16 19:03, Dave Gordon wrote:
> >Mostly little optimisations; for instance, if the driver is correctly
> >following the submission protocol, the "out of space" condition is
> >impossible, so the previous runtime WARN_ON() is promoted to a
> >GEM_BUG_ON() for a more dramatic effect in development and less impact
> >in end-user systems.
> >
> >Similarly we can replace other WARN_ON() conditions that don't relate to
> >the hardware state with either BUILD_BUG_ON() for compile-time-
> >detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
> >
> >With those changes, we can convert it to void, as suggested by Chris
> >Wilson, and update the calling code appropriately.
> >
> >Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >
> >---
> >  drivers/gpu/drm/i915/i915_guc_submission.c | 69 +++++++++++++++---------------
> >  drivers/gpu/drm/i915/intel_guc.h           |  2 +-
> >  drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
> >  3 files changed, 37 insertions(+), 37 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index 6626eff..4d2ea84 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
> >  	return -EAGAIN;
> >  }
> >
> >-static int guc_add_workqueue_item(struct i915_guc_client *gc,
> >-				  struct drm_i915_gem_request *rq)
> >+static void guc_add_workqueue_item(struct i915_guc_client *gc,
> >+				   struct drm_i915_gem_request *rq)
> >  {
> >+	/* wqi_len is in DWords, and does not include the one-word header */
> >+	const size_t wqi_size = sizeof(struct guc_wq_item);
> 
> Again, u32 is correct I think.
> 
> >+	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
> >  	struct guc_process_desc *desc;
> >  	struct guc_wq_item *wqi;
> >  	void *base;
> >-	u32 tail, wq_len, wq_off, space;
> >+	u32 space, tail, wq_off, wq_page;
> >
> >  	desc = gc->client_base + gc->proc_desc_offset;
> >+
> >+	/* Space was checked earlier, in i915_guc_wq_check_space() above */
> 
> It may be above in the file, but the two do not call one another so
> I recommend saying exactly who called it.
> 
> >  	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
> >-	if (WARN_ON(space < sizeof(struct guc_wq_item)))
> >-		return -ENOSPC; /* shouldn't happen */
> >+	GEM_BUG_ON(space < wqi_size);
> 
> It is impossible to hit this only because of the struct_mutex
> guarding the whole time window from request creation to submission.
> If in the future, near or far, that gets fixed, then this will need
> reworking.

Request submission will still have to serialised by a "ring" mutex, 
from the time we allocate the request to the time we add it to whatever
submission queue. It should still hold that we can pin all the required
resources (ringbuffer, context state, vm page tables, workqueues) up
front and take any errors early and then rely on our preallocation when
submitting the request.

> I don't have any better ideas though.
> 
> But a WARN_ON and return would be almost as good. Everything is
> better than a dead machine one can't ssh into...
> 
> So I appeal to make this a WARN_ON and return. Nothing bad would
> happen apart from software thinking GPU has hung.

Hence why not make it a bug? If you can't ssh in because the driver died
inside GEM, something is very wrong.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-04-29 16:10     ` Chris Wilson
@ 2016-05-03 10:26       ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-05-03 10:26 UTC (permalink / raw)
  To: Chris Wilson, Dave Gordon, intel-gfx


On 29/04/16 17:10, Chris Wilson wrote:
> On Fri, Apr 29, 2016 at 04:44:24PM +0100, Tvrtko Ursulin wrote:
>>
>> On 27/04/16 19:03, Dave Gordon wrote:
>>> Mostly little optimisations; for instance, if the driver is correctly
>>> following the submission protocol, the "out of space" condition is
>>> impossible, so the previous runtime WARN_ON() is promoted to a
>>> GEM_BUG_ON() for a more dramatic effect in development and less impact
>>> in end-user systems.
>>>
>>> Similarly we can replace other WARN_ON() conditions that don't relate to
>>> the hardware state with either BUILD_BUG_ON() for compile-time-
>>> detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
>>>
>>> With those changes, we can convert it to void, as suggested by Chris
>>> Wilson, and update the calling code appropriately.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> ---
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 69 +++++++++++++++---------------
>>>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>>>   drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
>>>   3 files changed, 37 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 6626eff..4d2ea84 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>>   	return -EAGAIN;
>>>   }
>>>
>>> -static int guc_add_workqueue_item(struct i915_guc_client *gc,
>>> -				  struct drm_i915_gem_request *rq)
>>> +static void guc_add_workqueue_item(struct i915_guc_client *gc,
>>> +				   struct drm_i915_gem_request *rq)
>>>   {
>>> +	/* wqi_len is in DWords, and does not include the one-word header */
>>> +	const size_t wqi_size = sizeof(struct guc_wq_item);
>>
>> Again, u32 is correct I think.
>>
>>> +	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>>>   	struct guc_process_desc *desc;
>>>   	struct guc_wq_item *wqi;
>>>   	void *base;
>>> -	u32 tail, wq_len, wq_off, space;
>>> +	u32 space, tail, wq_off, wq_page;
>>>
>>>   	desc = gc->client_base + gc->proc_desc_offset;
>>> +
>>> +	/* Space was checked earlier, in i915_guc_wq_check_space() above */
>>
>> It may be above in the file, but the two do not call one another so
>> I recommend saying exactly who called it.
>>
>>>   	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
>>> -	if (WARN_ON(space < sizeof(struct guc_wq_item)))
>>> -		return -ENOSPC; /* shouldn't happen */
>>> +	GEM_BUG_ON(space < wqi_size);
>>
>> It is impossible to hit this only because of the struct_mutex
>> guarding the whole time window from request creation to submission.
>> If in the future, near or far, that gets fixed, then this will need
>> reworking.
>
> Request submission will still have to serialised by a "ring" mutex,
> from the time we allocate the request to the time we add it to whatever
> submission queue. It should still hold that we can pin all the required
> resources (ringbuffer, context state, vm page tables, workqueues) up
> front and take any errors early and then rely on our preallocation when
> submitting the request.
>
>> I don't have any better ideas though.
>>
>> But a WARN_ON and return would be almost as good. Everything is
>> better than a dead machine one can't ssh into...
>>
>> So I appeal to make this a WARN_ON and return. Nothing bad would
>> happen apart from software thinking GPU has hung.
>
> Hence why not make it a bug? If you can't ssh in because the driver died
> inside GEM, something is very wrong.

I was sure bugs are kernel panics, looks like I've been running with 
panic on oops for too long. :(

GEM_BUG_ON is ok then.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}()
  2016-04-29 15:08   ` Tvrtko Ursulin
@ 2016-05-03 19:22     ` Dave Gordon
  2016-05-04  8:33       ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-05-03 19:22 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 29/04/16 16:08, Tvrtko Ursulin wrote:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> The knowledge of how to derive the relevant client from the request
>> should be localised within i915_guc_submission.c; the LRC code shouldn't
>> have to know about the internal details of the GuC submission process.
>> And all the information the GuC code needs should be encapsulated in (or
>> reachable from) the request.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 11 ++++++-----
>>   drivers/gpu/drm/i915/intel_guc.h           |  5 ++---
>>   drivers/gpu/drm/i915/intel_lrc.c           |  9 +++------
>>   3 files changed, 11 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 42d2efa..66af5ce 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -451,10 +451,11 @@ static void guc_fini_ctx_desc(struct intel_guc
>> *guc,
>>                    sizeof(desc) * client->ctx_index);
>>   }
>>
>> -int i915_guc_wq_check_space(struct i915_guc_client *gc)
>> +int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>> +    const size_t size = sizeof(struct guc_wq_item);
>
> All the other variables passed to CIRC_SPACE are u32 so this is for the
> worse I think.

This isn't passed to CIRC_SPACE; this is what the result of CIRC_SPACE 
is compared against. As it's a constant, gcc knows how big it needs to 
be to correctly represent its actual value, so whether this is the 
logically correct "size_t" (as it represents a size) or the practical 
"u32" or even "u8" (as we know it will fit), the generated code is 
identical (and I'd hope the compiler would warn us if it made a 
difference, e.g. using u8 for a sizeof() expression greater than 256).

Interestingly, the expected value "sizeof(struct work_queue_item)" is 
16, but that doesn't appear in the code at all -- instead gcc compares 
against 15 so it can use "jbe" (Jump if Below or Equal) to get 
unsigned-comparison semantics.

.Dave.

>> +    struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>       struct guc_process_desc *desc;
>> -    u32 size = sizeof(struct guc_wq_item);
>>       int ret = -ETIMEDOUT, timeout_counter = 200;
>>
>>       if (!gc)
>> @@ -537,11 +538,11 @@ static int guc_add_workqueue_item(struct
>> i915_guc_client *gc,
>>    *
>>    * Return:    0 if succeed
>>    */
>> -int i915_guc_submit(struct i915_guc_client *client,
>> -            struct drm_i915_gem_request *rq)
>> +int i915_guc_submit(struct drm_i915_gem_request *rq)
>>   {
>> -    struct intel_guc *guc = client->guc;
>>       unsigned int engine_id = rq->engine->guc_id;
>> +    struct intel_guc *guc = &rq->i915->guc;
>> +    struct i915_guc_client *client = guc->execbuf_client;
>>       int q_ret, b_ret;
>>
>>       q_ret = guc_add_workqueue_item(client, rq);
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 9d79c4c..b37c731 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -148,10 +148,9 @@ extern int intel_guc_resume(struct drm_device *dev);
>>   /* i915_guc_submission.c */
>>   int i915_guc_submission_init(struct drm_device *dev);
>>   int i915_guc_submission_enable(struct drm_device *dev);
>> -int i915_guc_submit(struct i915_guc_client *client,
>> -            struct drm_i915_gem_request *rq);
>> +int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>> +int i915_guc_submit(struct drm_i915_gem_request *rq);
>>   void i915_guc_submission_disable(struct drm_device *dev);
>>   void i915_guc_submission_fini(struct drm_device *dev);
>> -int i915_guc_wq_check_space(struct i915_guc_client *client);
>>
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 2b7e6bb..b8ec8c6 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -708,9 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct
>> drm_i915_gem_request *request
>>            * going any further, as the i915_add_request() call
>>            * later on mustn't fail ...
>>            */
>> -        struct intel_guc *guc = &request->i915->guc;
>> -
>> -        ret = i915_guc_wq_check_space(guc->execbuf_client);
>> +        ret = i915_guc_wq_check_space(request);
>>           if (ret)
>>               return ret;
>>       }
>> @@ -776,7 +774,6 @@ static int logical_ring_wait_for_space(struct
>> drm_i915_gem_request *req,
>>   intel_logical_ring_advance_and_submit(struct drm_i915_gem_request
>> *request)
>>   {
>>       struct intel_ringbuffer *ringbuf = request->ringbuf;
>> -    struct drm_i915_private *dev_priv = request->i915;
>>       struct intel_engine_cs *engine = request->engine;
>>
>>       intel_logical_ring_advance(ringbuf);
>> @@ -806,8 +803,8 @@ static int logical_ring_wait_for_space(struct
>> drm_i915_gem_request *req,
>>           }
>>       }
>>
>> -    if (dev_priv->guc.execbuf_client)
>> -        i915_guc_submit(dev_priv->guc.execbuf_client, request);
>> +    if (i915.enable_guc_submission)
>> +        i915_guc_submit(request);
>>       else
>>           execlists_context_queue(request);
>>
>>
>
> With the datatype fix (revert) in i915_guc_wq_check_space:
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
>
> Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}()
  2016-05-03 19:22     ` Dave Gordon
@ 2016-05-04  8:33       ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-05-04  8:33 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 03/05/16 20:22, Dave Gordon wrote:
> On 29/04/16 16:08, Tvrtko Ursulin wrote:
>>
>> On 27/04/16 19:03, Dave Gordon wrote:
>>> The knowledge of how to derive the relevant client from the request
>>> should be localised within i915_guc_submission.c; the LRC code shouldn't
>>> have to know about the internal details of the GuC submission process.
>>> And all the information the GuC code needs should be encapsulated in (or
>>> reachable from) the request.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 11 ++++++-----
>>>   drivers/gpu/drm/i915/intel_guc.h           |  5 ++---
>>>   drivers/gpu/drm/i915/intel_lrc.c           |  9 +++------
>>>   3 files changed, 11 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 42d2efa..66af5ce 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -451,10 +451,11 @@ static void guc_fini_ctx_desc(struct intel_guc
>>> *guc,
>>>                    sizeof(desc) * client->ctx_index);
>>>   }
>>>
>>> -int i915_guc_wq_check_space(struct i915_guc_client *gc)
>>> +int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>>   {
>>> +    const size_t size = sizeof(struct guc_wq_item);
>>
>> All the other variables passed to CIRC_SPACE are u32 so this is for the
>> worse I think.
>
> This isn't passed to CIRC_SPACE; this is what the result of CIRC_SPACE
> is compared against. As it's a constant, gcc knows how big it needs to

Oh you are right, how did I see it passed in no idea now.

Regards,

Tvrtko

> be to correctly represent its actual value, so whether this is the
> logically correct "size_t" (as it represents a size) or the practical
> "u32" or even "u8" (as we know it will fit), the generated code is
> identical (and I'd hope the compiler would warn us if it made a
> difference, e.g. using u8 for a sizeof() expression greater than 256).
>
> Interestingly, the expected value "sizeof(struct work_queue_item)" is
> 16, but that doesn't appear in the code at all -- instead gcc compares
> against 15 so it can use "jbe" (Jump if Below or Equal) to get
> unsigned-comparison semantics.
>
> .Dave.
>
>>> +    struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>>       struct guc_process_desc *desc;
>>> -    u32 size = sizeof(struct guc_wq_item);
>>>       int ret = -ETIMEDOUT, timeout_counter = 200;
>>>
>>>       if (!gc)
>>> @@ -537,11 +538,11 @@ static int guc_add_workqueue_item(struct
>>> i915_guc_client *gc,
>>>    *
>>>    * Return:    0 if succeed
>>>    */
>>> -int i915_guc_submit(struct i915_guc_client *client,
>>> -            struct drm_i915_gem_request *rq)
>>> +int i915_guc_submit(struct drm_i915_gem_request *rq)
>>>   {
>>> -    struct intel_guc *guc = client->guc;
>>>       unsigned int engine_id = rq->engine->guc_id;
>>> +    struct intel_guc *guc = &rq->i915->guc;
>>> +    struct i915_guc_client *client = guc->execbuf_client;
>>>       int q_ret, b_ret;
>>>
>>>       q_ret = guc_add_workqueue_item(client, rq);
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index 9d79c4c..b37c731 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -148,10 +148,9 @@ extern int intel_guc_resume(struct drm_device
>>> *dev);
>>>   /* i915_guc_submission.c */
>>>   int i915_guc_submission_init(struct drm_device *dev);
>>>   int i915_guc_submission_enable(struct drm_device *dev);
>>> -int i915_guc_submit(struct i915_guc_client *client,
>>> -            struct drm_i915_gem_request *rq);
>>> +int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>>> +int i915_guc_submit(struct drm_i915_gem_request *rq);
>>>   void i915_guc_submission_disable(struct drm_device *dev);
>>>   void i915_guc_submission_fini(struct drm_device *dev);
>>> -int i915_guc_wq_check_space(struct i915_guc_client *client);
>>>
>>>   #endif
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 2b7e6bb..b8ec8c6 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -708,9 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct
>>> drm_i915_gem_request *request
>>>            * going any further, as the i915_add_request() call
>>>            * later on mustn't fail ...
>>>            */
>>> -        struct intel_guc *guc = &request->i915->guc;
>>> -
>>> -        ret = i915_guc_wq_check_space(guc->execbuf_client);
>>> +        ret = i915_guc_wq_check_space(request);
>>>           if (ret)
>>>               return ret;
>>>       }
>>> @@ -776,7 +774,6 @@ static int logical_ring_wait_for_space(struct
>>> drm_i915_gem_request *req,
>>>   intel_logical_ring_advance_and_submit(struct drm_i915_gem_request
>>> *request)
>>>   {
>>>       struct intel_ringbuffer *ringbuf = request->ringbuf;
>>> -    struct drm_i915_private *dev_priv = request->i915;
>>>       struct intel_engine_cs *engine = request->engine;
>>>
>>>       intel_logical_ring_advance(ringbuf);
>>> @@ -806,8 +803,8 @@ static int logical_ring_wait_for_space(struct
>>> drm_i915_gem_request *req,
>>>           }
>>>       }
>>>
>>> -    if (dev_priv->guc.execbuf_client)
>>> -        i915_guc_submit(dev_priv->guc.execbuf_client, request);
>>> +    if (i915.enable_guc_submission)
>>> +        i915_guc_submit(request);
>>>       else
>>>           execlists_context_queue(request);
>>>
>>>
>>
>> With the datatype fix (revert) in i915_guc_wq_check_space:
>>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Regards,
>>
>> Tvrtko
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-04-29 15:44   ` Tvrtko Ursulin
  2016-04-29 16:10     ` Chris Wilson
@ 2016-05-05 18:38     ` Dave Gordon
  2016-05-06  8:55       ` Tvrtko Ursulin
  1 sibling, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-05-05 18:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 29/04/2016 16:44, Tvrtko Ursulin wrote:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Mostly little optimisations; for instance, if the driver is correctly
>> following the submission protocol, the "out of space" condition is
>> impossible, so the previous runtime WARN_ON() is promoted to a
>> GEM_BUG_ON() for a more dramatic effect in development and less impact
>> in end-user systems.
>>
>> Similarly we can replace other WARN_ON() conditions that don't relate to
>> the hardware state with either BUILD_BUG_ON() for compile-time-
>> detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
>>
>> With those changes, we can convert it to void, as suggested by Chris
>> Wilson, and update the calling code appropriately.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 69 
>> +++++++++++++++---------------
>>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>>   drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
>>   3 files changed, 37 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 6626eff..4d2ea84 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct 
>> drm_i915_gem_request *request)
>>       return -EAGAIN;
>>   }
>>
>> -static int guc_add_workqueue_item(struct i915_guc_client *gc,
>> -                  struct drm_i915_gem_request *rq)
>> +static void guc_add_workqueue_item(struct i915_guc_client *gc,
>> +                   struct drm_i915_gem_request *rq)
>>   {
>> +    /* wqi_len is in DWords, and does not include the one-word 
>> header */
>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>
> Again, u32 is correct I think.
Nope, it's a sizeof(), but the compiler will check that it fits in u32 
when we convert it to DWords below.
>
>> +    const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>>       struct guc_process_desc *desc;
>>       struct guc_wq_item *wqi;
>>       void *base;
>> -    u32 tail, wq_len, wq_off, space;
>> +    u32 space, tail, wq_off, wq_page;
>>
>>       desc = gc->client_base + gc->proc_desc_offset;
>> +
>> +    /* Space was checked earlier, in i915_guc_wq_check_space() above */
>
> It may be above in the file, but the two do not call one another so I 
> recommend saying exactly who called it.
I don't really mind who called it, as long as it was called sometime 
earlier in the request submission protocol -- the callsite may get moved 
around a bit. Use cscope(1) or your favourite IDE to find it.
>
>>       space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
>> -    if (WARN_ON(space < sizeof(struct guc_wq_item)))
>> -        return -ENOSPC; /* shouldn't happen */
>> +    GEM_BUG_ON(space < wqi_size);
>
> It is impossible to hit this only because of the struct_mutex guarding 
> the whole time window from request creation to submission. If in the 
> future, near or far, that gets fixed, then this will need reworking.
>
> I don't have any better ideas though.
>
> But a WARN_ON and return would be almost as good. Everything is better 
> than a dead machine one can't ssh into...
>
> So I appeal to make this a WARN_ON and return. Nothing bad would 
> happen apart from software thinking GPU has hung.
If the driver violates the sequencing of check+submit then it does 
indeed require reworking -- that would be a bug. Hence (GEM_)BUG_ON().
>
>>
>> -    /* postincrement WQ tail for next time */
>> -    wq_off = gc->wq_tail;
>> -    gc->wq_tail += sizeof(struct guc_wq_item);
>> -    gc->wq_tail &= gc->wq_size - 1;
>> +    /* The GuC firmware wants the tail index in QWords, not bytes */
>> +    tail = rq->tail;
>
> Used to be sampled from rq->ringbuf->tail - are those the same?
It should always have been rq->tail; they're the same at present because 
it was copied from ringbuffer as part of the submission process, but it 
might not be the same with the scheduler & preemption. So let's get it 
right before it becomes a mysterious bug.
>
>> +    GEM_BUG_ON(tail & 7);
>> +    tail >>= 3;
>> +    GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
>>
>>       /* For now workqueue item is 4 DWs; workqueue buffer is 2 
>> pages. So we
>>        * should not have the case where structure wqi is across page, 
>> neither
>> @@ -495,19 +500,23 @@ static int guc_add_workqueue_item(struct 
>> i915_guc_client *gc,
>>        * XXX: if not the case, we need save data to a temp wqi and 
>> copy it to
>>        * workqueue buffer dw by dw.
>>        */
>> -    WARN_ON(sizeof(struct guc_wq_item) != 16);
>> -    WARN_ON(wq_off & 3);
>> +    BUILD_BUG_ON(wqi_size != 16);
>>
>> -    /* wq starts from the page after doorbell / process_desc */
>> -    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
>> -            (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
>> +    /* postincrement WQ tail for next time */
>> +    wq_off = gc->wq_tail;
>> +    gc->wq_tail += wqi_size;
>> +    gc->wq_tail &= gc->wq_size - 1;
>> +    GEM_BUG_ON(wq_off & (wqi_size - 1));
>
> Use to be wq_off & 3, now is wq_off & 15, which one is correct?
The new one :) I made it more stringent, so we check for correct 
alignment of wq_tail to sizeof(queue item), not just DWord.
>
>> +
>> +    /* WQ starts from the page after doorbell / process_desc */
>> +    wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
>>       wq_off &= PAGE_SIZE - 1;
>> +    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 
>> wq_page));
>>       wqi = (struct guc_wq_item *)((char *)base + wq_off);
>>
>> -    /* len does not include the header */
>> -    wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
>> +    /* Now fill in the 4-word work queue item */
>>       wqi->header = WQ_TYPE_INORDER |
>> -            (wq_len << WQ_LEN_SHIFT) |
>> +            (wqi_len << WQ_LEN_SHIFT) |
>>               (rq->engine->guc_id << WQ_TARGET_SHIFT) |
>>               WQ_NO_WCFLUSH_WAIT;
>>
>> @@ -515,14 +524,10 @@ static int guc_add_workqueue_item(struct 
>> i915_guc_client *gc,
>>       wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>>                                    rq->engine);
>>
>> -    /* The GuC firmware wants the tail index in QWords, not bytes */
>> -    tail = rq->ringbuf->tail >> 3;
>>       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
>> -    wqi->fence_id = 0; /*XXX: what fence to be here */
>> +    wqi->fence_id = rq->seqno;
>
> Not mentioned in the commit?
Not actually used either, but I expect it was provided for a Windows 
fence ID, so our equivalent is a seqno.
It may show up in a GuC log, in which case seqno provides more helpful 
information.
>
>>
>>       kunmap_atomic(base);
>> -
>> -    return 0;
>>   }
>>
>>   /**
>> @@ -537,26 +542,20 @@ int i915_guc_submit(struct drm_i915_gem_request 
>> *rq)
>>       unsigned int engine_id = rq->engine->guc_id;
>>       struct intel_guc *guc = &rq->i915->guc;
>>       struct i915_guc_client *client = guc->execbuf_client;
>> -    int q_ret, b_ret;
>> +    int b_ret;
>
> What is b_ret out of interest, door*B*ell return code ?
>
Yes, originally q_ret & b_ret, for return from Queuing and Bellringing, 
respectively. Queuing can no longer return an error, but ringing the 
doorbell can, even if only in the event of hardware not behaving as 
specified.
>>
>> -    q_ret = guc_add_workqueue_item(client, rq);
>> -    if (q_ret == 0)
>> -        b_ret = guc_ring_doorbell(client);
>> +    guc_add_workqueue_item(client, rq);
>> +    b_ret = guc_ring_doorbell(client);
>>
>>       client->submissions[engine_id] += 1;
>> -    if (q_ret) {
>> -        client->q_fail += 1;
>> -        client->retcode = q_ret;
>> -    } else if (b_ret) {
>> +    client->retcode = b_ret;
>
> I wanted to ask for what is this for but then found it is for debugfs.
>
>> +    if (b_ret)
>>           client->b_fail += 1;
>> -        client->retcode = q_ret = b_ret;
>> -    } else {
>> -        client->retcode = 0;
>> -    }
>> +
>>       guc->submissions[engine_id] += 1;
>>       guc->last_seqno[engine_id] = rq->seqno;
>>
>> -    return q_ret;
>> +    return b_ret;
>
> Could also kill the return value from this one, the only caller, 
> intel_logical_ring_advance_and_submit does not use it. That function 
> itself does not look like it needs a return value at the moment. :)
That's really only because we don't have an exit strategy for submission 
failure. I would rather see all parts of the submission process able to 
return errors (e.g. hardware not responding) and let the top-level code 
implement some sort of recovery strategy. Anyway, I'm leaving this here 
until someone changes the call signature to void.
>
>>   }
>>
>>   /*
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h 
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 436f2d6..10e1d5e 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -74,7 +74,7 @@ struct i915_guc_client {
>>       /* GuC submission statistics & status */
>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>>       uint32_t no_wq_space;        /* Space pre-check failed */
>> -    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>> +    uint32_t q_fail;        /* No longer used        */
>
> Get rid of it then?
No, it keeps things aligned. It can be reused for the next thing we need 
to add.

.Dave.

>
>>       uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>>       int retcode;            /* Result of last guc_submit() */
>>   };
>> diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h 
>> b/drivers/gpu/drm/i915/intel_guc_fwif.h
>> index 2de57ff..944786d 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_fwif.h
>> +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
>> @@ -71,7 +71,8 @@
>>   #define   WQ_WORKLOAD_TOUCH        (2 << WQ_WORKLOAD_SHIFT)
>>
>>   #define WQ_RING_TAIL_SHIFT        20
>> -#define WQ_RING_TAIL_MASK        (0x7FF << WQ_RING_TAIL_SHIFT)
>> +#define WQ_RING_TAIL_MAX        0x7FF    /* 2^11 QWords */
>> +#define WQ_RING_TAIL_MASK        (WQ_RING_TAIL_MAX << 
>> WQ_RING_TAIL_SHIFT)
>>
>>   #define GUC_DOORBELL_ENABLED        1
>>   #define GUC_DOORBELL_DISABLED        0
>>
>
> Regards,
>
> Tvrtko
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-05-05 18:38     ` Dave Gordon
@ 2016-05-06  8:55       ` Tvrtko Ursulin
  2016-05-06 15:06         ` Dave Gordon
  0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-05-06  8:55 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 05/05/16 19:38, Dave Gordon wrote:
> On 29/04/2016 16:44, Tvrtko Ursulin wrote:
>>
>> On 27/04/16 19:03, Dave Gordon wrote:
>>> Mostly little optimisations; for instance, if the driver is correctly
>>> following the submission protocol, the "out of space" condition is
>>> impossible, so the previous runtime WARN_ON() is promoted to a
>>> GEM_BUG_ON() for a more dramatic effect in development and less impact
>>> in end-user systems.
>>>
>>> Similarly we can replace other WARN_ON() conditions that don't relate to
>>> the hardware state with either BUILD_BUG_ON() for compile-time-
>>> detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
>>>
>>> With those changes, we can convert it to void, as suggested by Chris
>>> Wilson, and update the calling code appropriately.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> ---
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 69
>>> +++++++++++++++---------------
>>>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>>>   drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
>>>   3 files changed, 37 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 6626eff..4d2ea84 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct
>>> drm_i915_gem_request *request)
>>>       return -EAGAIN;
>>>   }
>>>
>>> -static int guc_add_workqueue_item(struct i915_guc_client *gc,
>>> -                  struct drm_i915_gem_request *rq)
>>> +static void guc_add_workqueue_item(struct i915_guc_client *gc,
>>> +                   struct drm_i915_gem_request *rq)
>>>   {
>>> +    /* wqi_len is in DWords, and does not include the one-word
>>> header */
>>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>
>> Again, u32 is correct I think.
> Nope, it's a sizeof(), but the compiler will check that it fits in u32
> when we convert it to DWords below.

I already wrote in this same thread few days ago this was my oversight.

>>
>>> +    const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>>>       struct guc_process_desc *desc;
>>>       struct guc_wq_item *wqi;
>>>       void *base;
>>> -    u32 tail, wq_len, wq_off, space;
>>> +    u32 space, tail, wq_off, wq_page;
>>>
>>>       desc = gc->client_base + gc->proc_desc_offset;
>>> +
>>> +    /* Space was checked earlier, in i915_guc_wq_check_space() above */
>>
>> It may be above in the file, but the two do not call one another so I
>> recommend saying exactly who called it.
> I don't really mind who called it, as long as it was called sometime
> earlier in the request submission protocol -- the callsite may get moved
> around a bit. Use cscope(1) or your favourite IDE to find it.

So what does this comment supposed to tell the reader? 
guc_add_workqueue_item does not call i915_guc_wq_check_space so "above" 
what? Above in the file? Or maybe *earlier* in the call sequence of 
what? Might as well put an useful comment in when you are adding one.

>>
>>>       space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
>>> -    if (WARN_ON(space < sizeof(struct guc_wq_item)))
>>> -        return -ENOSPC; /* shouldn't happen */
>>> +    GEM_BUG_ON(space < wqi_size);
>>
>> It is impossible to hit this only because of the struct_mutex guarding
>> the whole time window from request creation to submission. If in the
>> future, near or far, that gets fixed, then this will need reworking.
>>
>> I don't have any better ideas though.
>>
>> But a WARN_ON and return would be almost as good. Everything is better
>> than a dead machine one can't ssh into...
>>
>> So I appeal to make this a WARN_ON and return. Nothing bad would
>> happen apart from software thinking GPU has hung.
> If the driver violates the sequencing of check+submit then it does
> indeed require reworking -- that would be a bug. Hence (GEM_)BUG_ON().

This also was clarified in this same thread few days ago and I conceded 
the point.

>>
>>>
>>> -    /* postincrement WQ tail for next time */
>>> -    wq_off = gc->wq_tail;
>>> -    gc->wq_tail += sizeof(struct guc_wq_item);
>>> -    gc->wq_tail &= gc->wq_size - 1;
>>> +    /* The GuC firmware wants the tail index in QWords, not bytes */
>>> +    tail = rq->tail;
>>
>> Used to be sampled from rq->ringbuf->tail - are those the same?
> It should always have been rq->tail; they're the same at present because
> it was copied from ringbuffer as part of the submission process, but it
> might not be the same with the scheduler & preemption. So let's get it
> right before it becomes a mysterious bug.
>>
>>> +    GEM_BUG_ON(tail & 7);
>>> +    tail >>= 3;
>>> +    GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
>>>
>>>       /* For now workqueue item is 4 DWs; workqueue buffer is 2
>>> pages. So we
>>>        * should not have the case where structure wqi is across page,
>>> neither
>>> @@ -495,19 +500,23 @@ static int guc_add_workqueue_item(struct
>>> i915_guc_client *gc,
>>>        * XXX: if not the case, we need save data to a temp wqi and
>>> copy it to
>>>        * workqueue buffer dw by dw.
>>>        */
>>> -    WARN_ON(sizeof(struct guc_wq_item) != 16);
>>> -    WARN_ON(wq_off & 3);
>>> +    BUILD_BUG_ON(wqi_size != 16);
>>>
>>> -    /* wq starts from the page after doorbell / process_desc */
>>> -    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
>>> -            (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
>>> +    /* postincrement WQ tail for next time */
>>> +    wq_off = gc->wq_tail;
>>> +    gc->wq_tail += wqi_size;
>>> +    gc->wq_tail &= gc->wq_size - 1;
>>> +    GEM_BUG_ON(wq_off & (wqi_size - 1));
>>
>> Use to be wq_off & 3, now is wq_off & 15, which one is correct?
> The new one :) I made it more stringent, so we check for correct
> alignment of wq_tail to sizeof(queue item), not just DWord.

Worth mentioning in the commit message? Since it begins with "Mostly 
little optimisations;" readers/reviewers expect to see no functional 
changes then.

>>
>>> +
>>> +    /* WQ starts from the page after doorbell / process_desc */
>>> +    wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
>>>       wq_off &= PAGE_SIZE - 1;
>>> +    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
>>> wq_page));
>>>       wqi = (struct guc_wq_item *)((char *)base + wq_off);
>>>
>>> -    /* len does not include the header */
>>> -    wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
>>> +    /* Now fill in the 4-word work queue item */
>>>       wqi->header = WQ_TYPE_INORDER |
>>> -            (wq_len << WQ_LEN_SHIFT) |
>>> +            (wqi_len << WQ_LEN_SHIFT) |
>>>               (rq->engine->guc_id << WQ_TARGET_SHIFT) |
>>>               WQ_NO_WCFLUSH_WAIT;
>>>
>>> @@ -515,14 +524,10 @@ static int guc_add_workqueue_item(struct
>>> i915_guc_client *gc,
>>>       wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>>>                                    rq->engine);
>>>
>>> -    /* The GuC firmware wants the tail index in QWords, not bytes */
>>> -    tail = rq->ringbuf->tail >> 3;
>>>       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
>>> -    wqi->fence_id = 0; /*XXX: what fence to be here */
>>> +    wqi->fence_id = rq->seqno;
>>
>> Not mentioned in the commit?
> Not actually used either, but I expect it was provided for a Windows
> fence ID, so our equivalent is a seqno.
> It may show up in a GuC log, in which case seqno provides more helpful
> information.

Best to mention it in the commit imho, again, not to confuse 
readers/reviewers.

>>
>>>
>>>       kunmap_atomic(base);
>>> -
>>> -    return 0;
>>>   }
>>>
>>>   /**
>>> @@ -537,26 +542,20 @@ int i915_guc_submit(struct drm_i915_gem_request
>>> *rq)
>>>       unsigned int engine_id = rq->engine->guc_id;
>>>       struct intel_guc *guc = &rq->i915->guc;
>>>       struct i915_guc_client *client = guc->execbuf_client;
>>> -    int q_ret, b_ret;
>>> +    int b_ret;
>>
>> What is b_ret out of interest, door*B*ell return code ?
>>
> Yes, originally q_ret & b_ret, for return from Queuing and Bellringing,
> respectively. Queuing can no longer return an error, but ringing the
> doorbell can, even if only in the event of hardware not behaving as
> specified.
>>>
>>> -    q_ret = guc_add_workqueue_item(client, rq);
>>> -    if (q_ret == 0)
>>> -        b_ret = guc_ring_doorbell(client);
>>> +    guc_add_workqueue_item(client, rq);
>>> +    b_ret = guc_ring_doorbell(client);
>>>
>>>       client->submissions[engine_id] += 1;
>>> -    if (q_ret) {
>>> -        client->q_fail += 1;
>>> -        client->retcode = q_ret;
>>> -    } else if (b_ret) {
>>> +    client->retcode = b_ret;
>>
>> I wanted to ask for what is this for but then found it is for debugfs.
>>
>>> +    if (b_ret)
>>>           client->b_fail += 1;
>>> -        client->retcode = q_ret = b_ret;
>>> -    } else {
>>> -        client->retcode = 0;
>>> -    }
>>> +
>>>       guc->submissions[engine_id] += 1;
>>>       guc->last_seqno[engine_id] = rq->seqno;
>>>
>>> -    return q_ret;
>>> +    return b_ret;
>>
>> Could also kill the return value from this one, the only caller,
>> intel_logical_ring_advance_and_submit does not use it. That function
>> itself does not look like it needs a return value at the moment. :)
> That's really only because we don't have an exit strategy for submission
> failure. I would rather see all parts of the submission process able to
> return errors (e.g. hardware not responding) and let the top-level code
> implement some sort of recovery strategy. Anyway, I'm leaving this here
> until someone changes the call signature to void.
>>
>>>   }
>>>
>>>   /*
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index 436f2d6..10e1d5e 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -74,7 +74,7 @@ struct i915_guc_client {
>>>       /* GuC submission statistics & status */
>>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>>>       uint32_t no_wq_space;        /* Space pre-check failed */
>>> -    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>>> +    uint32_t q_fail;        /* No longer used        */
>>
>> Get rid of it then?
> No, it keeps things aligned. It can be reused for the next thing we need
> to add.

AFAIR u32s need to be 4-byte aligned on x86_64 so I don't get it but 
whatever.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item()
  2016-05-06  8:55       ` Tvrtko Ursulin
@ 2016-05-06 15:06         ` Dave Gordon
  0 siblings, 0 replies; 24+ messages in thread
From: Dave Gordon @ 2016-05-06 15:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 06/05/16 09:55, Tvrtko Ursulin wrote:
>
> On 05/05/16 19:38, Dave Gordon wrote:
>> On 29/04/2016 16:44, Tvrtko Ursulin wrote:
>>>
>>> On 27/04/16 19:03, Dave Gordon wrote:
>>>> Mostly little optimisations; for instance, if the driver is correctly
>>>> following the submission protocol, the "out of space" condition is
>>>> impossible, so the previous runtime WARN_ON() is promoted to a
>>>> GEM_BUG_ON() for a more dramatic effect in development and less impact
>>>> in end-user systems.
>>>>
>>>> Similarly we can replace other WARN_ON() conditions that don't
>>>> relate to
>>>> the hardware state with either BUILD_BUG_ON() for compile-time-
>>>> detectable issues, or GEM_BUG_ON() for logical "can't happen" errors.
>>>>
>>>> With those changes, we can convert it to void, as suggested by Chris
>>>> Wilson, and update the calling code appropriately.
>>>>
>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 69
>>>> +++++++++++++++---------------
>>>>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>>>>   drivers/gpu/drm/i915/intel_guc_fwif.h      |  3 +-
>>>>   3 files changed, 37 insertions(+), 37 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> index 6626eff..4d2ea84 100644
>>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> @@ -470,23 +470,28 @@ int i915_guc_wq_check_space(struct
>>>> drm_i915_gem_request *request)
>>>>       return -EAGAIN;
>>>>   }
>>>>
>>>> -static int guc_add_workqueue_item(struct i915_guc_client *gc,
>>>> -                  struct drm_i915_gem_request *rq)
>>>> +static void guc_add_workqueue_item(struct i915_guc_client *gc,
>>>> +                   struct drm_i915_gem_request *rq)
>>>>   {
>>>> +    /* wqi_len is in DWords, and does not include the one-word
>>>> header */
>>>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>>
>>> Again, u32 is correct I think.
>> Nope, it's a sizeof(), but the compiler will check that it fits in u32
>> when we convert it to DWords below.
>
> I already wrote in this same thread few days ago this was my oversight.

Yeah, I'm answering messages out of sequence :)

>>>> +    const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>>>>       struct guc_process_desc *desc;
>>>>       struct guc_wq_item *wqi;
>>>>       void *base;
>>>> -    u32 tail, wq_len, wq_off, space;
>>>> +    u32 space, tail, wq_off, wq_page;
>>>>
>>>>       desc = gc->client_base + gc->proc_desc_offset;
>>>> +
>>>> +    /* Space was checked earlier, in i915_guc_wq_check_space()
>>>> above */
>>>
>>> It may be above in the file, but the two do not call one another so I
>>> recommend saying exactly who called it.
 >>>
>> I don't really mind who called it, as long as it was called sometime
>> earlier in the request submission protocol -- the callsite may get moved
>> around a bit. Use cscope(1) or your favourite IDE to find it.
>
> So what does this comment supposed to tell the reader?

It tells the reader that we "cannot" be out of space, because we already 
checked that there was enough. Unless, of course, somebody removed the 
check. The commit message says that, too.

> guc_add_workqueue_item does not call i915_guc_wq_check_space so "above"
> what? Above in the file? Or maybe *earlier* in the call sequence of
> what? Might as well put an useful comment in when you are adding one.

It already has both the words "earlier" (as in, before in time, in 
calling sequence), and "above" (meaning, in textual order, in this 
file). So yes, both "above" AND "earlier". Anyway, this one-liner is 
only here to justify the lack of any check-and-wait code. OTOH I could 
add some kerneldoc elsewhere to explain that the caller must call 
check_space() before calling submit().

>>>>       space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
>>>> -    if (WARN_ON(space < sizeof(struct guc_wq_item)))
>>>> -        return -ENOSPC; /* shouldn't happen */
>>>> +    GEM_BUG_ON(space < wqi_size);
>>>
>>> It is impossible to hit this only because of the struct_mutex guarding
>>> the whole time window from request creation to submission. If in the
>>> future, near or far, that gets fixed, then this will need reworking.
>>>
>>> I don't have any better ideas though.
>>>
>>> But a WARN_ON and return would be almost as good. Everything is better
>>> than a dead machine one can't ssh into...
>>>
>>> So I appeal to make this a WARN_ON and return. Nothing bad would
>>> happen apart from software thinking GPU has hung.
 >>
>> If the driver violates the sequencing of check+submit then it does
>> indeed require reworking -- that would be a bug. Hence (GEM_)BUG_ON().
>
> This also was clarified in this same thread few days ago and I conceded
> the point.
>
>>>>
>>>> -    /* postincrement WQ tail for next time */
>>>> -    wq_off = gc->wq_tail;
>>>> -    gc->wq_tail += sizeof(struct guc_wq_item);
>>>> -    gc->wq_tail &= gc->wq_size - 1;
>>>> +    /* The GuC firmware wants the tail index in QWords, not bytes */
>>>> +    tail = rq->tail;
>>>
>>> Used to be sampled from rq->ringbuf->tail - are those the same?
 >>
>> It should always have been rq->tail; they're the same at present because
>> it was copied from ringbuffer as part of the submission process, but it
>> might not be the same with the scheduler & preemption. So let's get it
>> right before it becomes a mysterious bug.
>>
>>>> +    GEM_BUG_ON(tail & 7);
>>>> +    tail >>= 3;
>>>> +    GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
>>>>
>>>>       /* For now workqueue item is 4 DWs; workqueue buffer is 2
>>>> pages. So we
>>>>        * should not have the case where structure wqi is across page,
>>>> neither
>>>> @@ -495,19 +500,23 @@ static int guc_add_workqueue_item(struct
>>>> i915_guc_client *gc,
>>>>        * XXX: if not the case, we need save data to a temp wqi and
>>>> copy it to
>>>>        * workqueue buffer dw by dw.
>>>>        */
>>>> -    WARN_ON(sizeof(struct guc_wq_item) != 16);
>>>> -    WARN_ON(wq_off & 3);
>>>> +    BUILD_BUG_ON(wqi_size != 16);
>>>>
>>>> -    /* wq starts from the page after doorbell / process_desc */
>>>> -    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
>>>> -            (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
>>>> +    /* postincrement WQ tail for next time */
>>>> +    wq_off = gc->wq_tail;
>>>> +    gc->wq_tail += wqi_size;
>>>> +    gc->wq_tail &= gc->wq_size - 1;
>>>> +    GEM_BUG_ON(wq_off & (wqi_size - 1));
>>>
>>> Use to be wq_off & 3, now is wq_off & 15, which one is correct?
>> The new one :) I made it more stringent, so we check for correct
>> alignment of wq_tail to sizeof(queue item), not just DWord.
>
> Worth mentioning in the commit message? Since it begins with "Mostly
> little optimisations;" readers/reviewers expect to see no functional
> changes then.

It *isn't* a functional change. It *should* be impossible for the value 
ever NOT to be a multiple of 16, and neither the old version nor the new 
version has ever been triggered. It's just future-proofing for the day 
when someone else changes this code and introduces a bug :)

>>>> +
>>>> +    /* WQ starts from the page after doorbell / process_desc */
>>>> +    wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
>>>>       wq_off &= PAGE_SIZE - 1;
>>>> +    base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
>>>> wq_page));
>>>>       wqi = (struct guc_wq_item *)((char *)base + wq_off);
>>>>
>>>> -    /* len does not include the header */
>>>> -    wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
>>>> +    /* Now fill in the 4-word work queue item */
>>>>       wqi->header = WQ_TYPE_INORDER |
>>>> -            (wq_len << WQ_LEN_SHIFT) |
>>>> +            (wqi_len << WQ_LEN_SHIFT) |
>>>>               (rq->engine->guc_id << WQ_TARGET_SHIFT) |
>>>>               WQ_NO_WCFLUSH_WAIT;
>>>>
>>>> @@ -515,14 +524,10 @@ static int guc_add_workqueue_item(struct
>>>> i915_guc_client *gc,
>>>>       wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>>>>                                    rq->engine);
>>>>
>>>> -    /* The GuC firmware wants the tail index in QWords, not bytes */
>>>> -    tail = rq->ringbuf->tail >> 3;
>>>>       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
>>>> -    wqi->fence_id = 0; /*XXX: what fence to be here */
>>>> +    wqi->fence_id = rq->seqno;
>>>
>>> Not mentioned in the commit?
 >>
>> Not actually used either, but I expect it was provided for a Windows
>> fence ID, so our equivalent is a seqno.
>> It may show up in a GuC log, in which case seqno provides more helpful
>> information.
>
> Best to mention it in the commit imho, again, not to confuse
> readers/reviewers.

As the (deleted) comment says, we didn't really know what to put here, 
so no one can have been relying on an *absence* of useful information. 
Maybe next week we'll put the system timestamp in this field instead. 
Also, I'm not guaranteeing that this value *will* show up anywhere at 
all; for now at least, GuC logging is an uncommitted interface. So 
noting that we're setting a previously-unused value to a new value that 
has no functional effect and isn't visible through any stable interface 
seems a little pointless.

(Of course, *next* time somebody changes it there will probably be a 
good reason, like "timestamp is more useful than seqno in GuC log").

>>>>       kunmap_atomic(base);
>>>> -
>>>> -    return 0;
>>>>   }
>>>>
>>>>   /**
>>>> @@ -537,26 +542,20 @@ int i915_guc_submit(struct drm_i915_gem_request
>>>> *rq)
>>>>       unsigned int engine_id = rq->engine->guc_id;
>>>>       struct intel_guc *guc = &rq->i915->guc;
>>>>       struct i915_guc_client *client = guc->execbuf_client;
>>>> -    int q_ret, b_ret;
>>>> +    int b_ret;
>>>
>>> What is b_ret out of interest, door*B*ell return code ?
>>>
>> Yes, originally q_ret & b_ret, for return from Queuing and Bellringing,
>> respectively. Queuing can no longer return an error, but ringing the
>> doorbell can, even if only in the event of hardware not behaving as
>> specified.
>>>
>>>> -    q_ret = guc_add_workqueue_item(client, rq);
>>>> -    if (q_ret == 0)
>>>> -        b_ret = guc_ring_doorbell(client);
>>>> +    guc_add_workqueue_item(client, rq);
>>>> +    b_ret = guc_ring_doorbell(client);
>>>>
>>>>       client->submissions[engine_id] += 1;
>>>> -    if (q_ret) {
>>>> -        client->q_fail += 1;
>>>> -        client->retcode = q_ret;
>>>> -    } else if (b_ret) {
>>>> +    client->retcode = b_ret;
>>>
>>> I wanted to ask for what is this for but then found it is for debugfs.
>>>
>>>> +    if (b_ret)
>>>>           client->b_fail += 1;
>>>> -        client->retcode = q_ret = b_ret;
>>>> -    } else {
>>>> -        client->retcode = 0;
>>>> -    }
>>>> +
>>>>       guc->submissions[engine_id] += 1;
>>>>       guc->last_seqno[engine_id] = rq->seqno;
>>>>
>>>> -    return q_ret;
>>>> +    return b_ret;
>>>
>>> Could also kill the return value from this one, the only caller,
>>> intel_logical_ring_advance_and_submit does not use it. That function
>>> itself does not look like it needs a return value at the moment. :)
 >>
>> That's really only because we don't have an exit strategy for submission
>> failure. I would rather see all parts of the submission process able to
>> return errors (e.g. hardware not responding) and let the top-level code
>> implement some sort of recovery strategy. Anyway, I'm leaving this here
>> until someone changes the call signature to void.
>>
>>>>   }
>>>>
>>>>   /*
>>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>>> b/drivers/gpu/drm/i915/intel_guc.h
>>>> index 436f2d6..10e1d5e 100644
>>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>>> @@ -74,7 +74,7 @@ struct i915_guc_client {
>>>>       /* GuC submission statistics & status */
>>>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>>>>       uint32_t no_wq_space;        /* Space pre-check failed */
>>>> -    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>>>> +    uint32_t q_fail;        /* No longer used        */
>>>
>>> Get rid of it then?
>> No, it keeps things aligned. It can be reused for the next thing we need
>> to add.
>
> AFAIR u32s need to be 4-byte aligned on x86_64 so I don't get it but
> whatever.
>
> Regards,
> Tvrtko

It's not just about keeping things DWord aligned, but also optimising 
cache-line placement. One recommendation (from an Intel-internal site) 
suggests anything up to 16 bytes be aligned to the power-of-two value 
equal to or greater than its own size, and anything over 64 bytes (in 
aggregate) should be aligned to 64 bytes (1 cacheline). At present, this 
structure exactly fits a multiple of 64, so we might as well just leave 
the spare word for reuse later.

.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-04-29 15:45   ` Tvrtko Ursulin
@ 2016-05-06 15:17     ` Dave Gordon
  2016-05-10 14:44       ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-05-06 15:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 29/04/16 16:45, Tvrtko Ursulin wrote:
>
> One late comment:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Rather than wait to see whether more space becomes available in the GuC
>> submission workqueue, we can just return -EAGAIN and let the caller try
>> again in a little while. This gets rid of an uninterruptable sleep in
>> the polling code :)
>>
>> We'll also add a counter to the GuC client statistics, to see how often
>> we find the WQ full.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 8b8d6f0..1024947 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>> *m,
>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>           client->wq_size, client->wq_offset, client->wq_tail);
>>
>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 66af5ce..6626eff 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>> *guc,
>>
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>> -    const size_t size = sizeof(struct guc_wq_item);
>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>       struct guc_process_desc *desc;
>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>
>>       if (!gc)
>>           return 0;
>>
>>       desc = gc->client_base + gc->proc_desc_offset;
>>
>> -    while (timeout_counter-- > 0) {
>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>> -            ret = 0;
>> -            break;
>> -        }
>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>> +        return 0;
>>
>> -        if (timeout_counter)
>> -            usleep_range(1000, 2000);
>> -    };
>> +    gc->no_wq_space += 1;
>>
>> -    return ret;
>> +    return -EAGAIN;
>>   }
>>
>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index b37c731..436f2d6 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>
>>       /* GuC submission statistics & status */
>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>> -    uint32_t q_fail;
>> -    uint32_t b_fail;
>> -    int retcode;
>> -    int spare;            /* pad to 32 DWords        */
>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>
> Why MBZ? It is not all used in this context so this will just confuse
> people.

MBZ => Must Be Zero. As in, we can't really deal with the events that 
cause these counters to be incremented, so if they're nonzero, something 
is broken and the driver may or may not recover :(

If the call protocol is changed, the MBZ variables may go away entirely.

.Dave.

>> +    int retcode;            /* Result of last guc_submit()    */
>>   };
>>
>>   enum intel_guc_fw_status {
>
>
> Regards,
>
> Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter
  2016-04-29 15:03 ` [PATCH v2 1/5] " Tvrtko Ursulin
@ 2016-05-06 16:39   ` Dave Gordon
  2016-05-10 14:37     ` Tvrtko Ursulin
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Gordon @ 2016-05-06 16:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 29/04/16 16:03, Tvrtko Ursulin wrote:
>
> Hi,
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Split the function of "enable_guc_submission" into two separate
>> options.  The new one ("enable_guc_loading") controls only the
>> *fetching and loading* of the GuC firmware image. The existing
>> one is redefined to control only the *use* of the GuC for batch
>> submission once the firmware is loaded.
>>
>> In addition, the degree of control has been refined from a simple
>> bool to an integer key, allowing several options:
>>   -1 (default)     whatever the platform default is
>>    0  DISABLE      don't load/use the GuC
>>    1  BEST EFFORT  try to load/use the GuC, fallback if not available
>>    2  REQUIRE      must load/use the GuC, else leave the GPU wedged
>>
>> The new platform default (as coded here) will be to attempt to
>> load the GuC iff the device has a GuC that requires firmware,
>> but not yet to use it for submission. A later patch will change
>> to enable it if appropriate.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c            |  1 -
>>   drivers/gpu/drm/i915/i915_guc_submission.c |  4 +-
>>   drivers/gpu/drm/i915/i915_params.c         | 14 ++++-
>>   drivers/gpu/drm/i915/i915_params.h         |  3 +-
>>   drivers/gpu/drm/i915/intel_guc_loader.c    | 98
>> ++++++++++++++++--------------
>>   drivers/gpu/drm/i915/intel_uncore.c        |  2 +-
>>   6 files changed, 70 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index d493e79..b04effc 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -4910,7 +4910,6 @@ int i915_gem_init_engines(struct drm_device *dev)
>>           ret = intel_guc_ucode_load(dev);
>>           if (ret) {
>>               DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
>> -            ret = -EIO;
>>               goto out;
>>           }
>>       }
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 72d6665..42d2efa 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -970,7 +970,7 @@ int intel_guc_suspend(struct drm_device *dev)
>>       struct intel_context *ctx;
>>       u32 data[3];
>>
>> -    if (!i915.enable_guc_submission)
>> +    if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
>>           return 0;
>>
>>       ctx = dev_priv->kernel_context;
>> @@ -996,7 +996,7 @@ int intel_guc_resume(struct drm_device *dev)
>>       struct intel_context *ctx;
>>       u32 data[3];
>>
>> -    if (!i915.enable_guc_submission)
>> +    if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
>>           return 0;
>
> Not terribly important and probably predates your work, but just spotted
> how this reads very redundant - guc->guc_fw.guc_fw_something, while it
> could be much more readable as guc->fw.load_status. Observation only.

That's almost the naming we had in the days when we had a generic 
(non-GuC-specific) firmware loader. But when that was rejected, each 
custom version had all the variables qualified with the specific target, 
hence the redundant naming :(

[snip]

>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> @@ -387,49 +387,37 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>   {
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>>       struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
>> +    const char *fw_path = guc_fw->guc_fw_path;
>>       int retries, err = 0;
>>
>> -    if (!i915.enable_guc_submission)
>> -        return 0;
>> -
>> -    DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
>> +    DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n",
>> +        fw_path,
>>           intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>>           intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>
> Should load status be anything other than GUC_FIRMWARE_NONE here?

During resume, it *could* be FAIL, meaning we've tried to load it before 
and it didn't work then. In that case we won't try again, as presumably 
it still won't work.

>> -    direct_interrupts_to_host(dev_priv);
>> +    /* Loading forbidden, or no firmware to load? */
>> +    if (!i915.enable_guc_loading)
>
> Nitpick, == 0 would perhaps make it more obvious this is not a boolean.

But here we are specifically choosing to treat it as a boolean. This 
reads: if GuC loading is NOT enabled (i.e. IS disabled) ...

The value 0 for disabled was not accidental. And note we don't create 
#defines for this sort of kernel parameter, because you have to give a 
literal constant on the kernel command line.

>> +        goto fail;
>> +    if (fw_path == NULL)
>> +        goto fail;
>> +    if (*fw_path == '\0') {
>> +        DRM_ERROR("No GuC firmware known for this platform\n");
>
> It is not an error unless i915.enable_guc_loading == 2, no? And if best
> effort then it is probably debug or informational.

No, it's still an ERROR. You're running the driver on a platform for 
which we don't know what firmware is required. That probably means an 
old driver on new hardware, so it might not work at all. You can 
suppress the error by setting i915.enable_guc_loading=0 if you want to 
try this version of the driver anyway. Also note the difference between 
path == NULL (no GuC, or no firmware required => not an error) vs. path 
== "" (has GuC, presumably needs firmware, but we don't know where to 
look => ERROR).

> Also, don't the checks against fw_path (together with the error or debug
> message) belong in the fw fetch function? If they are invalid fw fetch
> would have failed and this function would be able to inspect the high
> level status of that step here, no?

The checks are done in intel_guc_ucode_init(), before fw_fetch() is even 
called; but that function is void, so can't return failure. (Also, we 
originally supported asynchronous loading, which also can't return 
failure). So this function will get called even when we already know 
that we haven't got any firmware to load, and these tests are indeed 
checking the high-level status from _init().

>> +        goto fail;
>> +    }
>>
>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
>> -        return 0;
>> +    /* Fetch failed, or already fetched but failed to load? */
>> +    if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
>> +        goto fail;
>> +    if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>> +        goto fail;
>
> Leads back to the question of load status in this function. So it is
> expected we always enter here with load status of none? Is it possible
> to get here with the firmware already loaded already?

Not *actually* loaded, because it's been erased by poweroff. But the 
status tracking variables are persistent, so they reflect the last 
attempt. So on resume, we actually expect "SUCCESS" at this point, and 
therefore change it back to PENDING below.

>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
>> -        guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>> -        return -ENOEXEC;
>> +    direct_interrupts_to_host(dev_priv);
>>
>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
>>
>> -    DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
>> -        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
>> -
>> -    switch (guc_fw->guc_fw_fetch_status) {
>> -    case GUC_FIRMWARE_FAIL:
>> -        /* something went wrong :( */
>> -        err = -EIO;
>> -        goto fail;
>> -
>> -    case GUC_FIRMWARE_NONE:
>> -    case GUC_FIRMWARE_PENDING:
>> -    default:
>> -        /* "can't happen" */
>> -        WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n",
>> -            guc_fw->guc_fw_path,
>> -            intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>> -            guc_fw->guc_fw_fetch_status);
>> -        err = -ENXIO;
>> -        goto fail;
>> -
>> -    case GUC_FIRMWARE_SUCCESS:
>> -        break;
>> -    }
>> +    DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>>
>>       err = i915_guc_submission_init(dev);
>>       if (err)
>> @@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>
>>   fail:
>>       DRM_ERROR("GuC firmware load failed, err %d\n", err);
>
> Same as above I think error must be dependent on the requested mode.
> Some customers are very sensitive to errors which are not really errors
> so it is bad to log them when they are not.

No, it's still an ERROR. enable_guc_loading must be nonzero, so we've 
been asked to *try* to load the GuC. If the load fails, that means 
broken hardware or a corrupt firmware blob, or some other form of system 
misconfiguration. Even if we're going to fall back to execlist mode, 
that needs to be reported; for example, it may mean that SLPC is 
disabled. The user can avoid the error by booting with GuC loading 
disabled, but they should probably fix the problem instead.

>> +
>>       if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>>           guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>>
>> @@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>       i915_guc_submission_disable(dev);
>>       i915_guc_submission_fini(dev);
>>
>> +    /*
>> +     * We've failed to load the firmware :(
>> +     *
>> +     * Decide whether to disable GuC submission and fall back to
>> +     * execlist mode, and whether to hide the error by returning
>> +     * zero or to return -EIO, which the caller will treat as a
>> +     * nonfatal error (i.e. it doesn't prevent driver load, but
>> +     * marks the GPU as wedged until reset).
>> +     */
>> +    if (i915.enable_guc_loading > 1) {
>> +        err = -EIO;
>> +    } else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
>> +        return 0;
>
> i915_gem_init_hw already guards the call to intel_guc_ucode_load with
> HAS_GUC_UCODE so at the moment at least this is a dead branch.
>
> I don't even understand what is this branch supposed to do? How can
> there be a platform with no guc fw but guc scheduling?

Imagine a GuC with firmware in ROM :) Or at least flash ...

(it already has a BootROM)

>> +    } else if (i915.enable_guc_submission > 1) {
>> +        err = -EIO;
>> +    } else {
>> +        err = 0;
>> +    }
>> +
>> +    i915.enable_guc_submission = 0;
>> +
>> +    DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
>> +
>
> This would log when i915.enable_guc_loading is set to 0 which would be
> confusing. I think in this case the function should bail out much earlier.

That was tested right back at the top of the function! It bailed out 
very early, so you can't get here with GuC loading disabled.

Also, that's a DEBUG message, so users won't see it by default.

>>       return err;
>>   }
>>
>> @@ -631,8 +643,11 @@ void intel_guc_ucode_init(struct drm_device *dev)
>>       struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
>>       const char *fw_path;
>>
>> -    if (!HAS_GUC_SCHED(dev))
>> -        i915.enable_guc_submission = false;
>> +    /* A negative value means "use platform default" */
>> +    if (i915.enable_guc_loading < 0)
>> +        i915.enable_guc_loading = HAS_GUC_UCODE(dev);
>> +    if (i915.enable_guc_submission < 0)
>> +        i915.enable_guc_submission = HAS_GUC_SCHED(dev);
>
> With this setup currently there is no difference between -1 and 1. But I
> can assume maybe in the future we could have -1 mean 2 on some platform
> which would then justify having four possible values for each?

Yes. 0, 1, and 2 are "user preference". -1 is "system's choice". So 
there *is* a difference between -1 and 1, because -1 means the same as 0 
on non-GuC platforms, but the same as 1 on those that have a GuC, or 
could mean the same as 2 on GuC-only platforms.

>>       if (!HAS_GUC_UCODE(dev)) {
>>           fw_path = NULL;
>> @@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
>>           guc_fw->guc_fw_major_wanted = 6;
>>           guc_fw->guc_fw_minor_wanted = 1;
>>       } else {
>> -        i915.enable_guc_submission = false;
>>           fw_path = "";    /* unknown device */
>>       }
>
> Confusing block, HAS_GUC_UCODE is defined as (IS_GEN9(dev) &&
> !IS_KABYLAKE(dev)) but then here we only support SKL here. Why the
> former is then not just IS_SKYLAKE?
>
> When BXT support is added this still needs to be modified and would only
> save touching HAS_GUC_UCODE in the header. But it must be a better reason?

I don't see anything confusing. The logic does not depend on how 
somebody has defined HAS_GUC_UCODE(), and we shouldn't assume any 
relation between it and the platform macros. What it says here is:

1.  if this platform *doesn't* have GuC firmware, fw_path is NULL
2a. else if this is SKYLAKE, look for f/w version 6.1
2b. (repeat 2a for each supported platform)
3.  (else) unknown device, path is "" which triggers ERROR later.

Imagine a SKL version with GuC uCode in ROM - the HAS_GUC_UCODE() test 
must take precedence.

>> -    if (!i915.enable_guc_submission)
>> -        return;
>> -
>>       guc_fw->guc_dev = dev;
>>       guc_fw->guc_fw_path = fw_path;
>>       guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE;
>>
>> +    /* Early (and silent) return if GuC loading is disabled */
>> +    if (!i915.enable_guc_loading)
>> +        return;
>>       if (fw_path == NULL)
>>           return;
>> -
>> -    if (*fw_path == '\0') {
>> -        DRM_ERROR("No GuC firmware known for this platform\n");
>> -        guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
>> +    if (*fw_path == '\0')
>>           return;
>> -    }
>
> I also do not understand the complications with fw_path (either NULL or
> ""). In the two cases fetch status will be either none or fail,
> respectively, which will equally cause intel_guc_ucode_load to hit the
> failure path (fw_fetch_status != success).
>
> So Is it really required for the fw_path to can either be NULL or ""?

As mentioned before, it's so that the status gets propagated from 
_init() to _load(). A Previous Reviewer didn't like issuing errors here, 
so the error report got deferred until we actually try to load the 
firmware. (Also, someday, we might reenable asynchronous (deferred) 
firmware loading. especially on Android).

>>       guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING;
>>       DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path);
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index 4f1dfe6..df698d7 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -1758,7 +1758,7 @@ int intel_guc_reset(struct drm_i915_private
>> *dev_priv)
>>       int ret;
>>       unsigned long irqflags;
>>
>> -    if (!i915.enable_guc_submission)
>> +    if (!HAS_GUC_UCODE(dev_priv))
>>           return -EINVAL;
>
> What if HAS_GUC_UCODE is true but the i915.load_guc_firmware has been
> set to zero? Should it skip the reset in that case as well?

Probably not. This test should probably be HAS_GUC(), regardless of 
whether it has f/w or whether you want the f/w loaded. But there isn't 
any such macro, so this was the closest.

.Dave.

>>       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>>
>
> Regards,
>
> Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-04-29 15:17   ` Tvrtko Ursulin
@ 2016-05-06 17:12     ` Dave Gordon
  0 siblings, 0 replies; 24+ messages in thread
From: Dave Gordon @ 2016-05-06 17:12 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 29/04/16 16:17, Tvrtko Ursulin wrote:
>
> On 27/04/16 19:03, Dave Gordon wrote:
>> Rather than wait to see whether more space becomes available in the GuC
>> submission workqueue, we can just return -EAGAIN and let the caller try
>> again in a little while. This gets rid of an uninterruptable sleep in
>> the polling code :)
>>
>> We'll also add a counter to the GuC client statistics, to see how often
>> we find the WQ full.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 8b8d6f0..1024947 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>> *m,
>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>           client->wq_size, client->wq_offset, client->wq_tail);
>>
>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 66af5ce..6626eff 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>> *guc,
>>
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>> -    const size_t size = sizeof(struct guc_wq_item);
>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>       struct guc_process_desc *desc;
>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>
>>       if (!gc)
>>           return 0;
>
> Not part of this patch but spotted it - can this really happen, and if
> so, does it warrant a WARN_ONCE, GEM_BUG_ON or something?

It certainly shouldn't, as this function shouldn't be called unless 
enable_guc_submission is nonzero, and it's set to zero if client 
allocation fails. So it could be a GEM_BUG_ON(), and it should be part 
of the previous patch, which changed this from a parameter to getting it 
out of the dev_private.

>>       desc = gc->client_base + gc->proc_desc_offset;
>>
>> -    while (timeout_counter-- > 0) {
>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>> -            ret = 0;
>> -            break;
>> -        }
>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>> +        return 0;
>
> Could stick a likely here to lay out the code for the expected case but
> it could be just OCD. :)

OK, though it makes the line too long. :-/
I'll just calculate the freespace, then compare separately.

>> -        if (timeout_counter)
>> -            usleep_range(1000, 2000);
>> -    };
>> +    gc->no_wq_space += 1;
>>
>> -    return ret;
>> +    return -EAGAIN;
>
> I suppose it is OK to make userspace pay the cost since this should be
> extremely rare, correct?

It *should* be rare. It could mean the GuC has hung, or the GPU has hung 
so the GuC can't dequeue anything. Whatever it means, someone needs to 
back off. Then maybe TDR will cut in -- or maybe stuff will complete 
after all.

If the errno gets back to userland, it can just retry, or wait for some 
other request to complete first. It doesn't have to be userland, though; 
the scheduler will also back off and try again later in this situation.

Either way, anything is better than sleeping (uninterruptibly) in the 
driver!

>>   }
>>
>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index b37c731..436f2d6 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>
>>       /* GuC submission statistics & status */
>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>> -    uint32_t q_fail;
>> -    uint32_t b_fail;
>> -    int retcode;
>> -    int spare;            /* pad to 32 DWords        */
>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>> +    int retcode;            /* Result of last guc_submit()    */
>>   };
>>
>>   enum intel_guc_fw_status {
>>
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
> Tvrtko

Thanks,
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter
  2016-05-06 16:39   ` Dave Gordon
@ 2016-05-10 14:37     ` Tvrtko Ursulin
  2016-05-13 14:36       ` Dave Gordon
  0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-05-10 14:37 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 06/05/16 17:39, Dave Gordon wrote:
> On 29/04/16 16:03, Tvrtko Ursulin wrote:

[snip]

>>> +        goto fail;
>>> +    if (fw_path == NULL)
>>> +        goto fail;
>>> +    if (*fw_path == '\0') {
>>> +        DRM_ERROR("No GuC firmware known for this platform\n");
>>
>> It is not an error unless i915.enable_guc_loading == 2, no? And if best
>> effort then it is probably debug or informational.
>
> No, it's still an ERROR. You're running the driver on a platform for
> which we don't know what firmware is required. That probably means an
> old driver on new hardware, so it might not work at all. You can
> suppress the error by setting i915.enable_guc_loading=0 if you want to
> try this version of the driver anyway. Also note the difference between
> path == NULL (no GuC, or no firmware required => not an error) vs. path
> == "" (has GuC, presumably needs firmware, but we don't know where to
> look => ERROR).

I think that if i915.enable_guc_loading == 1 then no error should be 
logged. Documentation says that value meand "try to load/use the GuC, 
fallback if not available" and to me that means it is not an error and 
an informational message only should be logged.

>> Also, don't the checks against fw_path (together with the error or debug
>> message) belong in the fw fetch function? If they are invalid fw fetch
>> would have failed and this function would be able to inspect the high
>> level status of that step here, no?
>
> The checks are done in intel_guc_ucode_init(), before fw_fetch() is even
> called; but that function is void, so can't return failure. (Also, we
> originally supported asynchronous loading, which also can't return
> failure). So this function will get called even when we already know
> that we haven't got any firmware to load, and these tests are indeed
> checking the high-level status from _init().

Anyhow the special meanings conveyed in fw_path == NULL and *fw_path == 
0 are imho just too hard to follow. I see it is not your code, but 
reworking this looked like an opportunity to clean that up. Never mind.

>>> +        goto fail;
>>> +    }
>>>
>>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
>>> -        return 0;
>>> +    /* Fetch failed, or already fetched but failed to load? */
>>> +    if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
>>> +        goto fail;
>>> +    if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>>> +        goto fail;
>>
>> Leads back to the question of load status in this function. So it is
>> expected we always enter here with load status of none? Is it possible
>> to get here with the firmware already loaded already?
>
> Not *actually* loaded, because it's been erased by poweroff. But the
> status tracking variables are persistent, so they reflect the last
> attempt. So on resume, we actually expect "SUCCESS" at this point, and
> therefore change it back to PENDING below.

Shouldn't the code then update the status variables on suspend/whatever? 
Same as above, I find it very hard to follow the logic here.

>>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
>>> -        guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>>> -        return -ENOEXEC;
>>> +    direct_interrupts_to_host(dev_priv);
>>>
>>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
>>>
>>> -    DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
>>> -        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
>>> -
>>> -    switch (guc_fw->guc_fw_fetch_status) {
>>> -    case GUC_FIRMWARE_FAIL:
>>> -        /* something went wrong :( */
>>> -        err = -EIO;
>>> -        goto fail;
>>> -
>>> -    case GUC_FIRMWARE_NONE:
>>> -    case GUC_FIRMWARE_PENDING:
>>> -    default:
>>> -        /* "can't happen" */
>>> -        WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n",
>>> -            guc_fw->guc_fw_path,
>>> -            intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>>> -            guc_fw->guc_fw_fetch_status);
>>> -        err = -ENXIO;
>>> -        goto fail;
>>> -
>>> -    case GUC_FIRMWARE_SUCCESS:
>>> -        break;
>>> -    }
>>> +    DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
>>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>>>
>>>       err = i915_guc_submission_init(dev);
>>>       if (err)
>>> @@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>>
>>>   fail:
>>>       DRM_ERROR("GuC firmware load failed, err %d\n", err);
>>
>> Same as above I think error must be dependent on the requested mode.
>> Some customers are very sensitive to errors which are not really errors
>> so it is bad to log them when they are not.
>
> No, it's still an ERROR. enable_guc_loading must be nonzero, so we've
> been asked to *try* to load the GuC. If the load fails, that means
> broken hardware or a corrupt firmware blob, or some other form of system
> misconfiguration. Even if we're going to fall back to execlist mode,
> that needs to be reported; for example, it may mean that SLPC is
> disabled. The user can avoid the error by booting with GuC loading
> disabled, but they should probably fix the problem instead.

Yes it needs to be reported, but if we are in best effort mode it should 
be informational I think. Depends how you view the errors in the kernel 
log - it may be a firmware loading error, but from the point of view of 
the system log, it is not an error. Everything still works as intended, 
more so, the user has asked us to try and carry on with the alternative. 
So the system (log) experienced no error.

>>> +
>>>       if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>>>           guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>>>
>>> @@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>>       i915_guc_submission_disable(dev);
>>>       i915_guc_submission_fini(dev);
>>>
>>> +    /*
>>> +     * We've failed to load the firmware :(
>>> +     *
>>> +     * Decide whether to disable GuC submission and fall back to
>>> +     * execlist mode, and whether to hide the error by returning
>>> +     * zero or to return -EIO, which the caller will treat as a
>>> +     * nonfatal error (i.e. it doesn't prevent driver load, but
>>> +     * marks the GPU as wedged until reset).
>>> +     */
>>> +    if (i915.enable_guc_loading > 1) {
>>> +        err = -EIO;
>>> +    } else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
>>> +        return 0;
>>
>> i915_gem_init_hw already guards the call to intel_guc_ucode_load with
>> HAS_GUC_UCODE so at the moment at least this is a dead branch.
>>
>> I don't even understand what is this branch supposed to do? How can
>> there be a platform with no guc fw but guc scheduling?
>
> Imagine a GuC with firmware in ROM :) Or at least flash ...
>
> (it already has a BootROM)

Ok but it is still a dead branch, no? Should the HAS_GUC_UCODE guard in 
i915_gem_init_hw be removed and let be handled by this code only?

>>> +    } else if (i915.enable_guc_submission > 1) {
>>> +        err = -EIO;
>>> +    } else {
>>> +        err = 0;
>>> +    }
>>> +
>>> +    i915.enable_guc_submission = 0;
>>> +
>>> +    DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
>>> +
>>
>> This would log when i915.enable_guc_loading is set to 0 which would be
>> confusing. I think in this case the function should bail out much
>> earlier.
>
> That was tested right back at the top of the function! It bailed out
> very early, so you can't get here with GuC loading disabled.

AFAICS the patch removes the early bailout?!

> Also, that's a DEBUG message, so users won't see it by default.

So it is OK to confuse fellow developers? :D

>>>       if (!HAS_GUC_UCODE(dev)) {
>>>           fw_path = NULL;
>>> @@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
>>>           guc_fw->guc_fw_major_wanted = 6;
>>>           guc_fw->guc_fw_minor_wanted = 1;
>>>       } else {
>>> -        i915.enable_guc_submission = false;
>>>           fw_path = "";    /* unknown device */
>>>       }
>>
>> Confusing block, HAS_GUC_UCODE is defined as (IS_GEN9(dev) &&
>> !IS_KABYLAKE(dev)) but then here we only support SKL here. Why the
>> former is then not just IS_SKYLAKE?
>>
>> When BXT support is added this still needs to be modified and would only
>> save touching HAS_GUC_UCODE in the header. But it must be a better
>> reason?
>
> I don't see anything confusing. The logic does not depend on how
> somebody has defined HAS_GUC_UCODE(), and we shouldn't assume any
> relation between it and the platform macros. What it says here is:
>
> 1.  if this platform *doesn't* have GuC firmware, fw_path is NULL
> 2a. else if this is SKYLAKE, look for f/w version 6.1
> 2b. (repeat 2a for each supported platform)
> 3.  (else) unknown device, path is "" which triggers ERROR later.
>
> Imagine a SKL version with GuC uCode in ROM - the HAS_GUC_UCODE() test
> must take precedence.

My question was, why isn't HAS_GUC_UCODE == IS_SKYLAKE to start with ? 
Then when BXT support is added, which will need code changes anyway, we 
change HAS_GUC_UCODE only.

>>> -    if (!i915.enable_guc_submission)
>>> -        return;
>>> -
>>>       guc_fw->guc_dev = dev;
>>>       guc_fw->guc_fw_path = fw_path;
>>>       guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
>>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE;
>>>
>>> +    /* Early (and silent) return if GuC loading is disabled */
>>> +    if (!i915.enable_guc_loading)
>>> +        return;
>>>       if (fw_path == NULL)
>>>           return;
>>> -
>>> -    if (*fw_path == '\0') {
>>> -        DRM_ERROR("No GuC firmware known for this platform\n");
>>> -        guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
>>> +    if (*fw_path == '\0')
>>>           return;
>>> -    }
>>
>> I also do not understand the complications with fw_path (either NULL or
>> ""). In the two cases fetch status will be either none or fail,
>> respectively, which will equally cause intel_guc_ucode_load to hit the
>> failure path (fw_fetch_status != success).
>>
>> So Is it really required for the fw_path to can either be NULL or ""?
>
> As mentioned before, it's so that the status gets propagated from
> _init() to _load(). A Previous Reviewer didn't like issuing errors here,
> so the error report got deferred until we actually try to load the
> firmware. (Also, someday, we might reenable asynchronous (deferred)
> firmware loading. especially on Android).

It is all very complicated, do we have an IGT to test all these new 
options? I would trust that more than my ability to figure out the logic 
here.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full
  2016-05-06 15:17     ` Dave Gordon
@ 2016-05-10 14:44       ` Tvrtko Ursulin
  0 siblings, 0 replies; 24+ messages in thread
From: Tvrtko Ursulin @ 2016-05-10 14:44 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 06/05/16 16:17, Dave Gordon wrote:
> On 29/04/16 16:45, Tvrtko Ursulin wrote:
>>
>> One late comment:
>>
>> On 27/04/16 19:03, Dave Gordon wrote:
>>> Rather than wait to see whether more space becomes available in the GuC
>>> submission workqueue, we can just return -EAGAIN and let the caller try
>>> again in a little while. This gets rid of an uninterruptable sleep in
>>> the polling code :)
>>>
>>> We'll also add a counter to the GuC client statistics, to see how often
>>> we find the WQ full.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c        |  1 +
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 16 +++++-----------
>>>   drivers/gpu/drm/i915/intel_guc.h           |  8 ++++----
>>>   3 files changed, 10 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 8b8d6f0..1024947 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2509,6 +2509,7 @@ static void i915_guc_client_info(struct seq_file
>>> *m,
>>>       seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
>>>           client->wq_size, client->wq_offset, client->wq_tail);
>>>
>>> +    seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
>>>       seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
>>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 66af5ce..6626eff 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -453,27 +453,21 @@ static void guc_fini_ctx_desc(struct intel_guc
>>> *guc,
>>>
>>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>>   {
>>> -    const size_t size = sizeof(struct guc_wq_item);
>>> +    const size_t wqi_size = sizeof(struct guc_wq_item);
>>>       struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>>>       struct guc_process_desc *desc;
>>> -    int ret = -ETIMEDOUT, timeout_counter = 200;
>>>
>>>       if (!gc)
>>>           return 0;
>>>
>>>       desc = gc->client_base + gc->proc_desc_offset;
>>>
>>> -    while (timeout_counter-- > 0) {
>>> -        if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>>> -            ret = 0;
>>> -            break;
>>> -        }
>>> +    if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= wqi_size)
>>> +        return 0;
>>>
>>> -        if (timeout_counter)
>>> -            usleep_range(1000, 2000);
>>> -    };
>>> +    gc->no_wq_space += 1;
>>>
>>> -    return ret;
>>> +    return -EAGAIN;
>>>   }
>>>
>>>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index b37c731..436f2d6 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -73,10 +73,10 @@ struct i915_guc_client {
>>>
>>>       /* GuC submission statistics & status */
>>>       uint64_t submissions[GUC_MAX_ENGINES_NUM];
>>> -    uint32_t q_fail;
>>> -    uint32_t b_fail;
>>> -    int retcode;
>>> -    int spare;            /* pad to 32 DWords        */
>>> +    uint32_t no_wq_space;        /* Space pre-check failed    */
>>> +    uint32_t q_fail;        /* Failed to queue (MBZ)    */
>>> +    uint32_t b_fail;        /* Doorbell failure (MBZ)    */
>>
>> Why MBZ? It is not all used in this context so this will just confuse
>> people.
>
> MBZ => Must Be Zero. As in, we can't really deal with the events that
> cause these counters to be incremented, so if they're nonzero, something
> is broken and the driver may or may not recover :(
>
> If the call protocol is changed, the MBZ variables may go away entirely.

My objection is that when someone sees MBZ they'll wrongly think this 
structure is shared with the hardware. Since it is just a software 
counter MBZ is a confusing marker to use.

Instead these fields should probably just be unsigned ints with comments 
saying something to the effect of what you wrote above.

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter
  2016-05-10 14:37     ` Tvrtko Ursulin
@ 2016-05-13 14:36       ` Dave Gordon
  0 siblings, 0 replies; 24+ messages in thread
From: Dave Gordon @ 2016-05-13 14:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 10/05/16 15:37, Tvrtko Ursulin wrote:
>
> On 06/05/16 17:39, Dave Gordon wrote:
>> On 29/04/16 16:03, Tvrtko Ursulin wrote:
>
> [snip]
>
>>>> +        goto fail;
>>>> +    if (fw_path == NULL)
>>>> +        goto fail;
>>>> +    if (*fw_path == '\0') {
>>>> +        DRM_ERROR("No GuC firmware known for this platform\n");
>>>
>>> It is not an error unless i915.enable_guc_loading == 2, no? And if best
>>> effort then it is probably debug or informational.
>>
>> No, it's still an ERROR. You're running the driver on a platform for
>> which we don't know what firmware is required. That probably means an
>> old driver on new hardware, so it might not work at all. You can
>> suppress the error by setting i915.enable_guc_loading=0 if you want to
>> try this version of the driver anyway. Also note the difference between
>> path == NULL (no GuC, or no firmware required => not an error) vs. path
>> == "" (has GuC, presumably needs firmware, but we don't know where to
>> look => ERROR).
>
> I think that if i915.enable_guc_loading == 1 then no error should be
> logged. Documentation says that value meand "try to load/use the GuC,
> fallback if not available" and to me that means it is not an error and
> an informational message only should be logged.

OK, this message is now DRM_INFO

>>> Also, don't the checks against fw_path (together with the error or debug
>>> message) belong in the fw fetch function? If they are invalid fw fetch
>>> would have failed and this function would be able to inspect the high
>>> level status of that step here, no?
>>
>> The checks are done in intel_guc_ucode_init(), before fw_fetch() is even
>> called; but that function is void, so can't return failure. (Also, we
>> originally supported asynchronous loading, which also can't return
>> failure). So this function will get called even when we already know
>> that we haven't got any firmware to load, and these tests are indeed
>> checking the high-level status from _init().
>
> Anyhow the special meanings conveyed in fw_path == NULL and *fw_path ==
> 0 are imho just too hard to follow. I see it is not your code, but
> reworking this looked like an opportunity to clean that up. Never mind.

For fw_path:
== NULL  means no firmware needed, not an error.
== ""    means the driver actually doesn't know what to load in this
          situation - the hardware is not recognised. IMHO this should
          be an ERROR, but I've made it just informational now.
== path  the file we want.

Those are the only three possibilities, it's set just once according the 
hardware type and is constant thereafter. It doesn't depend on the 
actual load status. That really isn't very complicated.

>>>> +        goto fail;
>>>> +    }
>>>>
>>>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
>>>> -        return 0;
>>>> +    /* Fetch failed, or already fetched but failed to load? */
>>>> +    if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
>>>> +        goto fail;
>>>> +    if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>>>> +        goto fail;
>>>
>>> Leads back to the question of load status in this function. So it is
>>> expected we always enter here with load status of none? Is it possible
>>> to get here with the firmware already loaded already?
>>
>> Not *actually* loaded, because it's been erased by poweroff. But the
>> status tracking variables are persistent, so they reflect the last
>> attempt. So on resume, we actually expect "SUCCESS" at this point, and
>> therefore change it back to PENDING below.
>
> Shouldn't the code then update the status variables on suspend/whatever?
> Same as above, I find it very hard to follow the logic here.

It proceeds by identifying all the ways in which the loading process can 
fail; what's left must be The Path To Success (tm) :)

1. Loading forbidden => fail
2. No firmware wanted => fail
3. No firmware known => fail
4. Firmware not fetched => fail
5. Previous load failed => fail

At this point we commit to trying to load the firmware into the h/w.
Then we try three times to reset-and-load the GuC.

>>>> -    if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
>>>> -        guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
>>>> -        return -ENOEXEC;
>>>> +    direct_interrupts_to_host(dev_priv);
>>>>
>>>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
>>>>
>>>> -    DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
>>>> -        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
>>>> -
>>>> -    switch (guc_fw->guc_fw_fetch_status) {
>>>> -    case GUC_FIRMWARE_FAIL:
>>>> -        /* something went wrong :( */
>>>> -        err = -EIO;
>>>> -        goto fail;
>>>> -
>>>> -    case GUC_FIRMWARE_NONE:
>>>> -    case GUC_FIRMWARE_PENDING:
>>>> -    default:
>>>> -        /* "can't happen" */
>>>> -        WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s
>>>> [%d]\n",
>>>> -            guc_fw->guc_fw_path,
>>>> -            intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>>>> -            guc_fw->guc_fw_fetch_status);
>>>> -        err = -ENXIO;
>>>> -        goto fail;
>>>> -
>>>> -    case GUC_FIRMWARE_SUCCESS:
>>>> -        break;
>>>> -    }

Look how many lines of complicated logic this patch removes :)

>>>> +    DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
>>>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
>>>> +        intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>>>>
>>>>       err = i915_guc_submission_init(dev);
>>>>       if (err)
>>>> @@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>>>
>>>>   fail:
>>>>       DRM_ERROR("GuC firmware load failed, err %d\n", err);
>>>
>>> Same as above I think error must be dependent on the requested mode.
>>> Some customers are very sensitive to errors which are not really errors
>>> so it is bad to log them when they are not.
>>
>> No, it's still an ERROR. enable_guc_loading must be nonzero, so we've
>> been asked to *try* to load the GuC. If the load fails, that means
>> broken hardware or a corrupt firmware blob, or some other form of system
>> misconfiguration. Even if we're going to fall back to execlist mode,
>> that needs to be reported; for example, it may mean that SLPC is
>> disabled. The user can avoid the error by booting with GuC loading
>> disabled, but they should probably fix the problem instead.
>
> Yes it needs to be reported, but if we are in best effort mode it should
> be informational I think. Depends how you view the errors in the kernel
> log - it may be a firmware loading error, but from the point of view of
> the system log, it is not an error. Everything still works as intended,
> more so, the user has asked us to try and carry on with the alternative.
> So the system (log) experienced no error.

OK, I've moved it later, and it's now DRM_ERROR if we're going to return 
-EIO (GPU wedged) and DRM_INFO otherwise.

>>>> +
>>>>       if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>>>>           guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>>>>
>>>> @@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>>>       i915_guc_submission_disable(dev);
>>>>       i915_guc_submission_fini(dev);
>>>>
>>>> +    /*
>>>> +     * We've failed to load the firmware :(
>>>> +     *
>>>> +     * Decide whether to disable GuC submission and fall back to
>>>> +     * execlist mode, and whether to hide the error by returning
>>>> +     * zero or to return -EIO, which the caller will treat as a
>>>> +     * nonfatal error (i.e. it doesn't prevent driver load, but
>>>> +     * marks the GPU as wedged until reset).
>>>> +     */
>>>> +    if (i915.enable_guc_loading > 1) {
>>>> +        err = -EIO;
>>>> +    } else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
>>>> +        return 0;
>>>
>>> i915_gem_init_hw already guards the call to intel_guc_ucode_load with
>>> HAS_GUC_UCODE so at the moment at least this is a dead branch.
>>>
>>> I don't even understand what is this branch supposed to do? How can
>>> there be a platform with no guc fw but guc scheduling?
>>
>> Imagine a GuC with firmware in ROM :) Or at least flash ...
>>
>> (it already has a BootROM)
>
> Ok but it is still a dead branch, no? Should the HAS_GUC_UCODE guard in
> i915_gem_init_hw be removed and let be handled by this code only?

Yep, I've changed the call condition :)

>>>> +    } else if (i915.enable_guc_submission > 1) {
>>>> +        err = -EIO;
>>>> +    } else {
>>>> +        err = 0;
>>>> +    }
>>>> +
>>>> +    i915.enable_guc_submission = 0;
>>>> +
>>>> +    DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
>>>
>>> This would log when i915.enable_guc_loading is set to 0 which would be
>>> confusing. I think in this case the function should bail out much
>>> earlier.
>>
>> That was tested right back at the top of the function! It bailed out
>> very early, so you can't get here with GuC loading disabled.
>
> AFAICS the patch removes the early bailout?!
>
>> Also, that's a DEBUG message, so users won't see it by default.
>
> So it is OK to confuse fellow developers? :D

With the new reporting as described above, this message is promoted to 
DRM_INFO, but only if enable_guc_submission is not already zero. If GuC 
submission is already disabled, we won't report that we're falling back 
to execlist mode.

You can still confuse yourself by disabling GuC loading *without* also 
disabling GuC submission, but in that case the message should help you 
realise you asked for something rather strange :)

>>>>       if (!HAS_GUC_UCODE(dev)) {
>>>>           fw_path = NULL;
>>>> @@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
>>>>           guc_fw->guc_fw_major_wanted = 6;
>>>>           guc_fw->guc_fw_minor_wanted = 1;
>>>>       } else {
>>>> -        i915.enable_guc_submission = false;
>>>>           fw_path = "";    /* unknown device */
>>>>       }
>>>
>>> Confusing block, HAS_GUC_UCODE is defined as (IS_GEN9(dev) &&
>>> !IS_KABYLAKE(dev)) but then here we only support SKL here. Why the
>>> former is then not just IS_SKYLAKE?
>>>
>>> When BXT support is added this still needs to be modified and would only
>>> save touching HAS_GUC_UCODE in the header. But it must be a better
>>> reason?
>>
>> I don't see anything confusing. The logic does not depend on how
>> somebody has defined HAS_GUC_UCODE(), and we shouldn't assume any
>> relation between it and the platform macros. What it says here is:
>>
>> 1.  if this platform *doesn't* have GuC firmware, fw_path is NULL
>> 2a. else if this is SKYLAKE, look for f/w version 6.1
>> 2b. (repeat 2a for each supported platform)
>> 3.  (else) unknown device, path is "" which triggers ERROR later.
>>
>> Imagine a SKL version with GuC uCode in ROM - the HAS_GUC_UCODE() test
>> must take precedence.
>
> My question was, why isn't HAS_GUC_UCODE == IS_SKYLAKE to start with ?
> Then when BXT support is added, which will need code changes anyway, we
> change HAS_GUC_UCODE only.

Probably because the original definition of HAS_GUC_UCODE() was simply 
IS_GEN9() -- POR was that *ALL* Gen9 platforms should have a GuC and 
firmware for it. Then someone else added KBL as an exception.

Anyway, we now have BXT support, so the if-else-if condition ladder has 
a more obvious structure now :)

.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-05-13 14:36 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-27 18:03 [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter Dave Gordon
2016-04-27 18:03 ` [PATCH v2 2/5] drm/i915/guc: pass request (not client) to i915_guc_{wq_check_space, submit}() Dave Gordon
2016-04-29 15:08   ` Tvrtko Ursulin
2016-05-03 19:22     ` Dave Gordon
2016-05-04  8:33       ` Tvrtko Ursulin
2016-04-27 18:03 ` [PATCH v2 3/5] drm/i915/guc: don't spinwait if the GuC's workqueue is full Dave Gordon
2016-04-29 15:17   ` Tvrtko Ursulin
2016-05-06 17:12     ` Dave Gordon
2016-04-29 15:45   ` Tvrtko Ursulin
2016-05-06 15:17     ` Dave Gordon
2016-05-10 14:44       ` Tvrtko Ursulin
2016-04-27 18:03 ` [PATCH v2 4/5] drm/i915/guc: rework guc_add_workqueue_item() Dave Gordon
2016-04-29 15:44   ` Tvrtko Ursulin
2016-04-29 16:10     ` Chris Wilson
2016-05-03 10:26       ` Tvrtko Ursulin
2016-05-05 18:38     ` Dave Gordon
2016-05-06  8:55       ` Tvrtko Ursulin
2016-05-06 15:06         ` Dave Gordon
2016-04-27 18:03 ` [PATCH v2 5/5] DO NOT MERGE: change default to using GuC submission if possible Dave Gordon
2016-04-28  6:24 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/5] drm/i915/guc: add enable_guc_loading parameter Patchwork
2016-04-29 15:03 ` [PATCH v2 1/5] " Tvrtko Ursulin
2016-05-06 16:39   ` Dave Gordon
2016-05-10 14:37     ` Tvrtko Ursulin
2016-05-13 14:36       ` Dave Gordon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.