All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel
@ 2016-04-19 11:45 Dave Gordon
  2016-04-19 11:45 ` [PATCH v3 2/3] drm/i915/guc: drop cached copy of 'wq_head' Dave Gordon
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Dave Gordon @ 2016-04-19 11:45 UTC (permalink / raw)
  To: intel-gfx

Don't use kmap_atomic() for doorbell & process descriptor access.
This patch fixes the BUG shown below, where the thread could sleep
while holding a kmap_atomic mapping. In order not to need to call
kmap_atomic() in this code path, we now set up a permanent kernel
mapping of the shared doorbell and process-descriptor page, and
use that in all doorbell and process-descriptor related code.

  BUG: scheduling while atomic: gem_close_race/1941/0x00000002
  Modules linked in: hid_generic usbhid i915 asix usbnet libphy mii
    i2c_algo_bit drm_kms_helper cfbfillrect syscopyarea cfbimgblt
    sysfillrect sysimgblt fb_sys_fops cfbcopyarea drm coretemp i2c_hid
    hid video pinctrl_sunrisepoint pinctrl_intel acpi_pad nls_iso8859_1
    e1000e ptp psmouse pps_core ahci libahci
  CPU: 0 PID: 1941 Comm: gem_close_race Tainted: G     U 4.4.0-160121+ #123
  Hardware name: Intel Corporation Skylake Client platform/Skylake AIO
    DDR3L RVP10, BIOS SKLSE2R1.R00.X100.B01.1509220551 09/22/2015
    0000000000013e40 ffff880166c27a78 ffffffff81280d02 ffff880172c13e40
    ffff880166c27a88 ffffffff810c203a ffff880166c27ac8 ffffffff814ec808
    ffff88016b7c6000 ffff880166c28000 00000000000f4240 0000000000000001
  Call Trace:
    [<ffffffff81280d02>] dump_stack+0x4b/0x79
    [<ffffffff810c203a>] __schedule_bug+0x41/0x4f
    [<ffffffff814ec808>] __schedule+0x5a8/0x690
    [<ffffffff814ec927>] schedule+0x37/0x80
    [<ffffffff814ef3fd>] schedule_hrtimeout_range_clock+0xad/0x130
    [<ffffffff81090be0>] ? hrtimer_init+0x10/0x10
    [<ffffffff814ef3f1>] ?  schedule_hrtimeout_range_clock+0xa1/0x130
    [<ffffffff814ef48e>] schedule_hrtimeout_range+0xe/0x10
    [<ffffffff814eef9b>] usleep_range+0x3b/0x40
    [<ffffffffa01ec109>] i915_guc_wq_check_space+0x119/0x210 [i915]
    [<ffffffffa01da47c>] intel_logical_ring_alloc_request_extras+0x5c/0x70 [i915]
    [<ffffffffa01cdbf1>] i915_gem_request_alloc+0x91/0x170 [i915]
    [<ffffffffa01c1c07>] i915_gem_do_execbuffer.isra.25+0xbc7/0x12a0 [i915]
    [<ffffffffa01cb785>] ?  i915_gem_object_get_pages_gtt+0x225/0x3c0 [i915]
    [<ffffffffa01d1fb6>] ? i915_gem_pwrite_ioctl+0xd6/0x9f0 [i915]
    [<ffffffffa01c2e68>] i915_gem_execbuffer2+0xa8/0x250 [i915]
    [<ffffffffa00f65d8>] drm_ioctl+0x258/0x4f0 [drm]
    [<ffffffffa01c2dc0>] ? i915_gem_execbuffer+0x340/0x340 [i915]
    [<ffffffff8111590d>] do_vfs_ioctl+0x2cd/0x4a0
    [<ffffffff8111eac2>] ? __fget+0x72/0xb0
    [<ffffffff81115b1c>] SyS_ioctl+0x3c/0x70
    [<ffffffff814effd7>] entry_SYSCALL_64_fastpath+0x12/0x6a
  ------------[ cut here ]------------

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93847
Original-version-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvtrko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 40 +++++++++---------------------
 drivers/gpu/drm/i915/intel_guc.h           |  2 ++
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index da86bdb..98af8ca 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -179,15 +179,11 @@ static void guc_init_doorbell(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
 	struct guc_doorbell_info *doorbell;
-	void *base;
 
-	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
-	doorbell = base + client->doorbell_offset;
+	doorbell = client->client_base + client->doorbell_offset;
 
-	doorbell->db_status = 1;
+	doorbell->db_status = GUC_DOORBELL_ENABLED;
 	doorbell->cookie = 0;
-
-	kunmap_atomic(base);
 }
 
 static int guc_ring_doorbell(struct i915_guc_client *gc)
@@ -195,11 +191,9 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 	struct guc_process_desc *desc;
 	union guc_doorbell_qw db_cmp, db_exc, db_ret;
 	union guc_doorbell_qw *db;
-	void *base;
 	int attempt = 2, ret = -EAGAIN;
 
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
-	desc = base + gc->proc_desc_offset;
+	desc = gc->client_base + gc->proc_desc_offset;
 
 	/* Update the tail so it is visible to GuC */
 	desc->tail = gc->wq_tail;
@@ -215,7 +209,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 		db_exc.cookie = 1;
 
 	/* pointer of current doorbell cacheline */
-	db = base + gc->doorbell_offset;
+	db = gc->client_base + gc->doorbell_offset;
 
 	while (attempt--) {
 		/* lets ring the doorbell */
@@ -247,7 +241,6 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 	/* Finally, update the cached copy of the GuC's WQ head */
 	gc->wq_head = desc->head;
 
-	kunmap_atomic(base);
 	return ret;
 }
 
@@ -256,16 +249,12 @@ static void guc_disable_doorbell(struct intel_guc *guc,
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct guc_doorbell_info *doorbell;
-	void *base;
 	i915_reg_t drbreg = GEN8_DRBREGL(client->doorbell_id);
 	int value;
 
-	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
-	doorbell = base + client->doorbell_offset;
-
-	doorbell->db_status = 0;
+	doorbell = client->client_base + client->doorbell_offset;
 
-	kunmap_atomic(base);
+	doorbell->db_status = GUC_DOORBELL_DISABLED;
 
 	I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID);
 
@@ -341,10 +330,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 			       struct i915_guc_client *client)
 {
 	struct guc_process_desc *desc;
-	void *base;
 
-	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
-	desc = base + client->proc_desc_offset;
+	desc = client->client_base + client->proc_desc_offset;
 
 	memset(desc, 0, sizeof(*desc));
 
@@ -361,8 +348,6 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 	desc->wq_size_bytes = client->wq_size;
 	desc->wq_status = WQ_STATUS_ACTIVE;
 	desc->priority = client->priority;
-
-	kunmap_atomic(base);
 }
 
 /*
@@ -474,7 +459,6 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 int i915_guc_wq_check_space(struct i915_guc_client *gc)
 {
 	struct guc_process_desc *desc;
-	void *base;
 	u32 size = sizeof(struct guc_wq_item);
 	int ret = -ETIMEDOUT, timeout_counter = 200;
 
@@ -486,8 +470,7 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 	if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
 		return 0;
 
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
-	desc = base + gc->proc_desc_offset;
+	desc = gc->client_base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
 		gc->wq_head = desc->head;
@@ -501,8 +484,6 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 			usleep_range(1000, 2000);
 	};
 
-	kunmap_atomic(base);
-
 	return ret;
 }
 
@@ -676,6 +657,7 @@ static void guc_client_free(struct drm_device *dev,
 	 * Be sure to drop any locks
 	 */
 
+	kunmap(kmap_to_page(client->client_base));
 	gem_release_guc_obj(client->client_obj);
 
 	if (client->ctx_index != GUC_INVALID_CTX_ID) {
@@ -696,7 +678,7 @@ static void guc_client_free(struct drm_device *dev,
  * @ctx:	the context that owns the client (we use the default render
  * 		context)
  *
- * Return:	An i915_guc_client object if success.
+ * Return:	An i915_guc_client object if success, else NULL.
  */
 static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 						uint32_t priority,
@@ -728,7 +710,9 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	if (!obj)
 		goto err;
 
+	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
 	client->client_obj = obj;
+	client->client_base = kmap(i915_gem_object_get_page(obj, 0));
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 3bb85b1..06050c241a 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -31,6 +31,7 @@ struct drm_i915_gem_request;
 
 struct i915_guc_client {
 	struct drm_i915_gem_object *client_obj;
+	void *client_base;		/* first page (only) of above	*/
 	struct intel_context *owner;
 	struct intel_guc *guc;
 	uint32_t priority;
@@ -52,6 +53,7 @@ struct i915_guc_client {
 	uint32_t q_fail;
 	uint32_t b_fail;
 	int retcode;
+	int spare;			/* pad to 32 DWords		*/
 };
 
 enum intel_guc_fw_status {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/3] drm/i915/guc: drop cached copy of 'wq_head'
  2016-04-19 11:45 [PATCH v3 1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Dave Gordon
@ 2016-04-19 11:45 ` Dave Gordon
  2016-04-19 11:45 ` [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments Dave Gordon
  2016-04-19 13:24 ` ✗ Fi.CI.BAT: failure for series starting with [v3,1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Patchwork
  2 siblings, 0 replies; 7+ messages in thread
From: Dave Gordon @ 2016-04-19 11:45 UTC (permalink / raw)
  To: intel-gfx

From: Alex Dai <yu.dai@intel.com>

Now that we keep the GuC client process descriptor permanently mapped,
we don't really need to keep a local copy of the GuC's work-queue-head.
So we can simplify the code a little by not doing this.

Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 16 ++++------------
 drivers/gpu/drm/i915/intel_guc.h           |  2 +-
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 98af8ca..135f094 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -238,9 +238,6 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 			db_exc.cookie = 1;
 	}
 
-	/* Finally, update the cached copy of the GuC's WQ head */
-	gc->wq_head = desc->head;
-
 	return ret;
 }
 
@@ -465,17 +462,10 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 	if (!gc)
 		return 0;
 
-	/* Quickly return if wq space is available since last time we cache the
-	 * head position. */
-	if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
-		return 0;
-
 	desc = gc->client_base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		gc->wq_head = desc->head;
-
-		if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			ret = 0;
 			break;
 		}
@@ -490,11 +480,13 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
 				  struct drm_i915_gem_request *rq)
 {
+	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
 	u32 tail, wq_len, wq_off, space;
 
-	space = CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size);
+	desc = gc->client_base + gc->proc_desc_offset;
+	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
 	if (WARN_ON(space < sizeof(struct guc_wq_item)))
 		return -ENOSPC; /* shouldn't happen */
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 06050c241a..19ca593 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -46,7 +46,7 @@ struct i915_guc_client {
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
-	uint32_t wq_head;
+	uint32_t unused;		/* Was 'wq_head'		*/
 
 	/* GuC submission statistics & status */
 	uint64_t submissions[GUC_MAX_ENGINES_NUM];
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments
  2016-04-19 11:45 [PATCH v3 1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Dave Gordon
  2016-04-19 11:45 ` [PATCH v3 2/3] drm/i915/guc: drop cached copy of 'wq_head' Dave Gordon
@ 2016-04-19 11:45 ` Dave Gordon
  2016-04-19 15:08   ` Tvrtko Ursulin
  2016-04-19 13:24 ` ✗ Fi.CI.BAT: failure for series starting with [v3,1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Patchwork
  2 siblings, 1 reply; 7+ messages in thread
From: Dave Gordon @ 2016-04-19 11:45 UTC (permalink / raw)
  To: intel-gfx

Tidying up guc_init_proc_desc() and adding commentary to the client
structure after the recent change in GuC page mapping strategy.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 38 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_guc.h           | 23 ++++++++++++++++++
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 135f094..5bbb13b 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -358,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 static void guc_init_ctx_desc(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
+	struct drm_i915_gem_object *client_obj = client->client_obj;
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct intel_engine_cs *engine;
 	struct intel_context *ctx = client->owner;
 	struct guc_context_desc desc;
 	struct sg_table *sg;
 	enum intel_engine_id id;
+	u32 gfx_addr;
 
 	memset(&desc, 0, sizeof(desc));
 
@@ -392,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		lrc->context_desc = (u32)ctx_desc;
 
 		/* The state page is after PPHWSP */
-		lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
-				LRC_STATE_PN * PAGE_SIZE;
+		gfx_addr = i915_gem_obj_ggtt_offset(obj);
+		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
 				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
 		obj = ctx->engine[id].ringbuf->obj;
+		gfx_addr = i915_gem_obj_ggtt_offset(obj);
 
-		lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
-		lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
-		lrc->ring_next_free_location = lrc->ring_begin;
+		lrc->ring_begin = gfx_addr;
+		lrc->ring_end = gfx_addr + obj->base.size - 1;
+		lrc->ring_next_free_location = gfx_addr;
 		lrc->ring_current_tail_pointer_value = 0;
 
 		desc.engines_used |= (1 << engine->guc_id);
@@ -410,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	WARN_ON(desc.engines_used == 0);
 
 	/*
-	 * The CPU address is only needed at certain points, so kmap_atomic on
-	 * demand instead of storing it in the ctx descriptor.
-	 * XXX: May make debug easier to have it mapped
+	 * The doorbell, process descriptor, and workqueue are all parts
+	 * of the client object, which the GuC will reference via the GGTT
 	 */
-	desc.db_trigger_cpu = 0;
-	desc.db_trigger_uk = client->doorbell_offset +
-		i915_gem_obj_ggtt_offset(client->client_obj);
-	desc.db_trigger_phy = client->doorbell_offset +
-		sg_dma_address(client->client_obj->pages->sgl);
-
-	desc.process_desc = client->proc_desc_offset +
-		i915_gem_obj_ggtt_offset(client->client_obj);
-
-	desc.wq_addr = client->wq_offset +
-		i915_gem_obj_ggtt_offset(client->client_obj);
-
+	gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
+	desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
+				client->doorbell_offset;
+	desc.db_trigger_cpu = (uintptr_t)client->client_base +
+				client->doorbell_offset;
+	desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
+	desc.process_desc = gfx_addr + client->proc_desc_offset;
+	desc.wq_addr = gfx_addr + client->wq_offset;
 	desc.wq_size = client->wq_size;
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 19ca593..c503ccd 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -29,6 +29,29 @@
 
 struct drm_i915_gem_request;
 
+/*
+ * This structure primarily describes the GEM object shared with the GuC.
+ * The GEM object is held for the entire lifetime of our interaction with
+ * the GuC, being allocated before the GuC is loaded with its firmware. 
+ * Because there's no way to update the address used by the GuC after
+ * initialisation, the shared object must stay pinned into the GGTT as
+ * long as the GuC is in use. We also keep the first page (only) mapped
+ * into kernel address space, as it includes shared data that must be
+ * updated on every request submission.
+ *
+ * The single GEM object described here is actually made up of several
+ * separate areas, as far as the GuC is concerned. The first page (kept
+ * kmap'd) includes the "process decriptor" which holds sequence data for
+ * the doorbell, and one cacheline which actually *is* the doorbell; a
+ * write to this will "ring" the doorbell (i.e. send an interrupt to the
+ * GuC). The subsequent  pages of the client object constitute the work
+ * queue (a circular array of work items), again described in the process
+ * descriptor. Work queue pages are mapped momentarily as required.
+ *
+ * Finally, we also keep a few statistics here, including the number of
+ * submissions to each engine, and a record of the last submission failure
+ * (if any).
+ */
 struct i915_guc_client {
 	struct drm_i915_gem_object *client_obj;
 	void *client_base;		/* first page (only) of above	*/
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [v3,1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel
  2016-04-19 11:45 [PATCH v3 1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Dave Gordon
  2016-04-19 11:45 ` [PATCH v3 2/3] drm/i915/guc: drop cached copy of 'wq_head' Dave Gordon
  2016-04-19 11:45 ` [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments Dave Gordon
@ 2016-04-19 13:24 ` Patchwork
  2 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2016-04-19 13:24 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel
URL   : https://patchwork.freedesktop.org/series/5927/
State : failure

== Summary ==

Series 5927v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/5927/revisions/1/mbox/


bdw-nuci7        total:192  pass:180  dwarn:0   dfail:0   fail:0   skip:12 
bdw-ultra        total:127  pass:105  dwarn:0   dfail:0   fail:0   skip:22 
bsw-nuc-2        total:191  pass:152  dwarn:0   dfail:0   fail:0   skip:39 
byt-nuc          total:191  pass:153  dwarn:0   dfail:0   fail:0   skip:38 
hsw-brixbox      total:192  pass:168  dwarn:0   dfail:0   fail:0   skip:24 
hsw-gt2          total:192  pass:173  dwarn:0   dfail:0   fail:0   skip:19 
ivb-t430s        total:192  pass:164  dwarn:0   dfail:0   fail:0   skip:28 
skl-i7k-2        total:192  pass:167  dwarn:0   dfail:0   fail:0   skip:25 
skl-nuci5        total:192  pass:181  dwarn:0   dfail:0   fail:0   skip:11 
snb-dellxps      total:192  pass:154  dwarn:0   dfail:0   fail:0   skip:38 
snb-x220t        total:192  pass:154  dwarn:0   dfail:0   fail:1   skip:37 
BOOT FAILED for ilk-hp8440p

Results at /archive/results/CI_IGT_test/Patchwork_1941/

83dde235b9d8bbe1cabf7ad002a6c48ff5a699fc drm-intel-nightly: 2016y-04m-19d-11h-58m-43s UTC integration manifest
2183673 drm/i915/guc: local optimisations and updating comments
a4c57e5 drm/i915/guc: drop cached copy of 'wq_head'
4d18066 drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments
  2016-04-19 11:45 ` [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments Dave Gordon
@ 2016-04-19 15:08   ` Tvrtko Ursulin
  2016-04-19 16:38     ` Dave Gordon
  0 siblings, 1 reply; 7+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 15:08 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 19/04/16 12:45, Dave Gordon wrote:
> Tidying up guc_init_proc_desc() and adding commentary to the client
> structure after the recent change in GuC page mapping strategy.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 38 ++++++++++++++----------------
>   drivers/gpu/drm/i915/intel_guc.h           | 23 ++++++++++++++++++
>   2 files changed, 41 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 135f094..5bbb13b 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -358,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc *guc,
>   static void guc_init_ctx_desc(struct intel_guc *guc,
>   			      struct i915_guc_client *client)
>   {
> +	struct drm_i915_gem_object *client_obj = client->client_obj;
>   	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>   	struct intel_engine_cs *engine;
>   	struct intel_context *ctx = client->owner;
>   	struct guc_context_desc desc;
>   	struct sg_table *sg;
>   	enum intel_engine_id id;
> +	u32 gfx_addr;
>
>   	memset(&desc, 0, sizeof(desc));
>
> @@ -392,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>   		lrc->context_desc = (u32)ctx_desc;
>
>   		/* The state page is after PPHWSP */
> -		lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
> -				LRC_STATE_PN * PAGE_SIZE;
> +		gfx_addr = i915_gem_obj_ggtt_offset(obj);
> +		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>   		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
>   				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>
>   		obj = ctx->engine[id].ringbuf->obj;
> +		gfx_addr = i915_gem_obj_ggtt_offset(obj);
>
> -		lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
> -		lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
> -		lrc->ring_next_free_location = lrc->ring_begin;
> +		lrc->ring_begin = gfx_addr;
> +		lrc->ring_end = gfx_addr + obj->base.size - 1;
> +		lrc->ring_next_free_location = gfx_addr;
>   		lrc->ring_current_tail_pointer_value = 0;
>
>   		desc.engines_used |= (1 << engine->guc_id);
> @@ -410,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>   	WARN_ON(desc.engines_used == 0);
>
>   	/*
> -	 * The CPU address is only needed at certain points, so kmap_atomic on
> -	 * demand instead of storing it in the ctx descriptor.
> -	 * XXX: May make debug easier to have it mapped
> +	 * The doorbell, process descriptor, and workqueue are all parts
> +	 * of the client object, which the GuC will reference via the GGTT
>   	 */
> -	desc.db_trigger_cpu = 0;
> -	desc.db_trigger_uk = client->doorbell_offset +
> -		i915_gem_obj_ggtt_offset(client->client_obj);
> -	desc.db_trigger_phy = client->doorbell_offset +
> -		sg_dma_address(client->client_obj->pages->sgl);
> -
> -	desc.process_desc = client->proc_desc_offset +
> -		i915_gem_obj_ggtt_offset(client->client_obj);
> -
> -	desc.wq_addr = client->wq_offset +
> -		i915_gem_obj_ggtt_offset(client->client_obj);
> -
> +	gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
> +	desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
> +				client->doorbell_offset;
> +	desc.db_trigger_cpu = (uintptr_t)client->client_base +
> +				client->doorbell_offset;

This changed from zero to the address. Was it buggy before?

> +	desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
> +	desc.process_desc = gfx_addr + client->proc_desc_offset;
> +	desc.wq_addr = gfx_addr + client->wq_offset;
>   	desc.wq_size = client->wq_size;
>
>   	/*
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 19ca593..c503ccd 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -29,6 +29,29 @@
>
>   struct drm_i915_gem_request;
>
> +/*
> + * This structure primarily describes the GEM object shared with the GuC.
> + * The GEM object is held for the entire lifetime of our interaction with
> + * the GuC, being allocated before the GuC is loaded with its firmware.
> + * Because there's no way to update the address used by the GuC after
> + * initialisation, the shared object must stay pinned into the GGTT as
> + * long as the GuC is in use. We also keep the first page (only) mapped
> + * into kernel address space, as it includes shared data that must be
> + * updated on every request submission.
> + *
> + * The single GEM object described here is actually made up of several
> + * separate areas, as far as the GuC is concerned. The first page (kept
> + * kmap'd) includes the "process decriptor" which holds sequence data for

Typo - descriptor. :) (spotted by Thunderbird)

> + * the doorbell, and one cacheline which actually *is* the doorbell; a
> + * write to this will "ring" the doorbell (i.e. send an interrupt to the
> + * GuC). The subsequent  pages of the client object constitute the work
> + * queue (a circular array of work items), again described in the process
> + * descriptor. Work queue pages are mapped momentarily as required.
> + *
> + * Finally, we also keep a few statistics here, including the number of
> + * submissions to each engine, and a record of the last submission failure
> + * (if any).
> + */
>   struct i915_guc_client {
>   	struct drm_i915_gem_object *client_obj;
>   	void *client_base;		/* first page (only) of above	*/
>

Looks fine, only question mark over desc.db_trigger_cpu is preventing r-b.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments
  2016-04-19 15:08   ` Tvrtko Ursulin
@ 2016-04-19 16:38     ` Dave Gordon
  2016-04-20  8:39       ` Tvrtko Ursulin
  0 siblings, 1 reply; 7+ messages in thread
From: Dave Gordon @ 2016-04-19 16:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On 19/04/16 16:08, Tvrtko Ursulin wrote:
>
> On 19/04/16 12:45, Dave Gordon wrote:
>> Tidying up guc_init_proc_desc() and adding commentary to the client
>> structure after the recent change in GuC page mapping strategy.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 38
>> ++++++++++++++----------------
>>   drivers/gpu/drm/i915/intel_guc.h           | 23 ++++++++++++++++++
>>   2 files changed, 41 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 135f094..5bbb13b 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -358,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc
>> *guc,
>>   static void guc_init_ctx_desc(struct intel_guc *guc,
>>                     struct i915_guc_client *client)
>>   {
>> +    struct drm_i915_gem_object *client_obj = client->client_obj;
>>       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>       struct intel_engine_cs *engine;
>>       struct intel_context *ctx = client->owner;
>>       struct guc_context_desc desc;
>>       struct sg_table *sg;
>>       enum intel_engine_id id;
>> +    u32 gfx_addr;
>>
>>       memset(&desc, 0, sizeof(desc));
>>
>> @@ -392,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc
>> *guc,
>>           lrc->context_desc = (u32)ctx_desc;
>>
>>           /* The state page is after PPHWSP */
>> -        lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
>> -                LRC_STATE_PN * PAGE_SIZE;
>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>> +        lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>>           lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
>>                   (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>>
>>           obj = ctx->engine[id].ringbuf->obj;
>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>>
>> -        lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
>> -        lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
>> -        lrc->ring_next_free_location = lrc->ring_begin;
>> +        lrc->ring_begin = gfx_addr;
>> +        lrc->ring_end = gfx_addr + obj->base.size - 1;
>> +        lrc->ring_next_free_location = gfx_addr;
>>           lrc->ring_current_tail_pointer_value = 0;
>>
>>           desc.engines_used |= (1 << engine->guc_id);
>> @@ -410,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc
>> *guc,
>>       WARN_ON(desc.engines_used == 0);
>>
>>       /*
>> -     * The CPU address is only needed at certain points, so
>> kmap_atomic on
>> -     * demand instead of storing it in the ctx descriptor.
>> -     * XXX: May make debug easier to have it mapped
>> +     * The doorbell, process descriptor, and workqueue are all parts
>> +     * of the client object, which the GuC will reference via the GGTT
>>        */
>> -    desc.db_trigger_cpu = 0;
>> -    desc.db_trigger_uk = client->doorbell_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -    desc.db_trigger_phy = client->doorbell_offset +
>> -        sg_dma_address(client->client_obj->pages->sgl);
>> -
>> -    desc.process_desc = client->proc_desc_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -
>> -    desc.wq_addr = client->wq_offset +
>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>> -
>> +    gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
>> +    desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
>> +                client->doorbell_offset;
>> +    desc.db_trigger_cpu = (uintptr_t)client->client_base +
>> +                client->doorbell_offset;
>
> This changed from zero to the address. Was it buggy before?

It's not actually used, AFAICT, but we might as well set it to the 
correct value anyway - this data is shared with the GuC, so I'd like the 
values to be as complete and correct as possible, in case the GuC 
firmware ever starts making use of them. And maybe we could use this for 
consistency checking if we ever found mysterious page faults in the 
doorbell code. But really it's just to show that these three fields all 
describe the same thing, namely where to ring the doorbell, in physical 
(DMA), kernel, and GGTT address spaces.

.Dave.

>> +    desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
>> +    desc.process_desc = gfx_addr + client->proc_desc_offset;
>> +    desc.wq_addr = gfx_addr + client->wq_offset;
>>       desc.wq_size = client->wq_size;
>>
>>       /*
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 19ca593..c503ccd 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -29,6 +29,29 @@
>>
>>   struct drm_i915_gem_request;
>>
>> +/*
>> + * This structure primarily describes the GEM object shared with the
>> GuC.
>> + * The GEM object is held for the entire lifetime of our interaction
>> with
>> + * the GuC, being allocated before the GuC is loaded with its firmware.
>> + * Because there's no way to update the address used by the GuC after
>> + * initialisation, the shared object must stay pinned into the GGTT as
>> + * long as the GuC is in use. We also keep the first page (only) mapped
>> + * into kernel address space, as it includes shared data that must be
>> + * updated on every request submission.
>> + *
>> + * The single GEM object described here is actually made up of several
>> + * separate areas, as far as the GuC is concerned. The first page (kept
>> + * kmap'd) includes the "process decriptor" which holds sequence data
>> for
>
> Typo - descriptor. :) (spotted by Thunderbird)
>
>> + * the doorbell, and one cacheline which actually *is* the doorbell; a
>> + * write to this will "ring" the doorbell (i.e. send an interrupt to the
>> + * GuC). The subsequent  pages of the client object constitute the work
>> + * queue (a circular array of work items), again described in the
>> process
>> + * descriptor. Work queue pages are mapped momentarily as required.
>> + *
>> + * Finally, we also keep a few statistics here, including the number of
>> + * submissions to each engine, and a record of the last submission
>> failure
>> + * (if any).
>> + */
>>   struct i915_guc_client {
>>       struct drm_i915_gem_object *client_obj;
>>       void *client_base;        /* first page (only) of above    */
>>
>
> Looks fine, only question mark over desc.db_trigger_cpu is preventing r-b.
>
> Regards,
>
> Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments
  2016-04-19 16:38     ` Dave Gordon
@ 2016-04-20  8:39       ` Tvrtko Ursulin
  0 siblings, 0 replies; 7+ messages in thread
From: Tvrtko Ursulin @ 2016-04-20  8:39 UTC (permalink / raw)
  To: Dave Gordon, intel-gfx


On 19/04/16 17:38, Dave Gordon wrote:
> On 19/04/16 16:08, Tvrtko Ursulin wrote:
>>
>> On 19/04/16 12:45, Dave Gordon wrote:
>>> Tidying up guc_init_proc_desc() and adding commentary to the client
>>> structure after the recent change in GuC page mapping strategy.
>>>
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_guc_submission.c | 38
>>> ++++++++++++++----------------
>>>   drivers/gpu/drm/i915/intel_guc.h           | 23 ++++++++++++++++++
>>>   2 files changed, 41 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index 135f094..5bbb13b 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -358,12 +358,14 @@ static void guc_init_proc_desc(struct intel_guc
>>> *guc,
>>>   static void guc_init_ctx_desc(struct intel_guc *guc,
>>>                     struct i915_guc_client *client)
>>>   {
>>> +    struct drm_i915_gem_object *client_obj = client->client_obj;
>>>       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>>       struct intel_engine_cs *engine;
>>>       struct intel_context *ctx = client->owner;
>>>       struct guc_context_desc desc;
>>>       struct sg_table *sg;
>>>       enum intel_engine_id id;
>>> +    u32 gfx_addr;
>>>
>>>       memset(&desc, 0, sizeof(desc));
>>>
>>> @@ -392,16 +394,17 @@ static void guc_init_ctx_desc(struct intel_guc
>>> *guc,
>>>           lrc->context_desc = (u32)ctx_desc;
>>>
>>>           /* The state page is after PPHWSP */
>>> -        lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
>>> -                LRC_STATE_PN * PAGE_SIZE;
>>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>>> +        lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>>>           lrc->context_id = (client->ctx_index <<
>>> GUC_ELC_CTXID_OFFSET) |
>>>                   (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>>>
>>>           obj = ctx->engine[id].ringbuf->obj;
>>> +        gfx_addr = i915_gem_obj_ggtt_offset(obj);
>>>
>>> -        lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
>>> -        lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
>>> -        lrc->ring_next_free_location = lrc->ring_begin;
>>> +        lrc->ring_begin = gfx_addr;
>>> +        lrc->ring_end = gfx_addr + obj->base.size - 1;
>>> +        lrc->ring_next_free_location = gfx_addr;
>>>           lrc->ring_current_tail_pointer_value = 0;
>>>
>>>           desc.engines_used |= (1 << engine->guc_id);
>>> @@ -410,22 +413,17 @@ static void guc_init_ctx_desc(struct intel_guc
>>> *guc,
>>>       WARN_ON(desc.engines_used == 0);
>>>
>>>       /*
>>> -     * The CPU address is only needed at certain points, so
>>> kmap_atomic on
>>> -     * demand instead of storing it in the ctx descriptor.
>>> -     * XXX: May make debug easier to have it mapped
>>> +     * The doorbell, process descriptor, and workqueue are all parts
>>> +     * of the client object, which the GuC will reference via the GGTT
>>>        */
>>> -    desc.db_trigger_cpu = 0;
>>> -    desc.db_trigger_uk = client->doorbell_offset +
>>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>>> -    desc.db_trigger_phy = client->doorbell_offset +
>>> -        sg_dma_address(client->client_obj->pages->sgl);
>>> -
>>> -    desc.process_desc = client->proc_desc_offset +
>>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>>> -
>>> -    desc.wq_addr = client->wq_offset +
>>> -        i915_gem_obj_ggtt_offset(client->client_obj);
>>> -
>>> +    gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
>>> +    desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
>>> +                client->doorbell_offset;
>>> +    desc.db_trigger_cpu = (uintptr_t)client->client_base +
>>> +                client->doorbell_offset;
>>
>> This changed from zero to the address. Was it buggy before?
>
> It's not actually used, AFAICT, but we might as well set it to the
> correct value anyway - this data is shared with the GuC, so I'd like the
> values to be as complete and correct as possible, in case the GuC
> firmware ever starts making use of them. And maybe we could use this for
> consistency checking if we ever found mysterious page faults in the
> doorbell code. But really it's just to show that these three fields all
> describe the same thing, namely where to ring the doorbell, in physical
> (DMA), kernel, and GGTT address spaces.

Got it, in that case:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-04-20  8:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-19 11:45 [PATCH v3 1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Dave Gordon
2016-04-19 11:45 ` [PATCH v3 2/3] drm/i915/guc: drop cached copy of 'wq_head' Dave Gordon
2016-04-19 11:45 ` [PATCH v3 3/3] drm/i915/guc: local optimisations and updating comments Dave Gordon
2016-04-19 15:08   ` Tvrtko Ursulin
2016-04-19 16:38     ` Dave Gordon
2016-04-20  8:39       ` Tvrtko Ursulin
2016-04-19 13:24 ` ✗ Fi.CI.BAT: failure for series starting with [v3,1/3] drm/i915/guc: keep GuC doorbell & process descriptor mapped in kernel Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.