All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
@ 2016-02-11 23:09 yu.dai
  2016-02-12 13:03 ` Tvrtko Ursulin
  0 siblings, 1 reply; 9+ messages in thread
From: yu.dai @ 2016-02-11 23:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: daniel.vetter

From: Alex Dai <yu.dai@intel.com>

GuC client object is always pinned during its life cycle. We cache
the kmap of its first page, which includes guc_process_desc and
doorbell. By doing so, we can simplify the code where we read from
this page to get where GuC is progressing on work queue; and the
code where driver program doorbell to send work queue item to GuC.

As a result, this patch removes the kmap_atomic in wq_check_space,
where usleep_range could be called while kmap_atomic is held. This
fixes issue below.

[   34.098798] BUG: scheduling while atomic: gem_close_race/1941/0x00000002
[   34.098822] Modules linked in: hid_generic usbhid i915 asix usbnet libphy mii i2c_algo_bit drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea drm coretemp i2c_hid hid video pinctrl_sunrisepoint pinctrl_intel acpi_pad nls_iso8859_1 e1000e ptp psmouse pps_core ahci libahci
[   34.098824] CPU: 0 PID: 1941 Comm: gem_close_race Tainted: G     U          4.4.0-160121+ #123
[   34.098824] Hardware name: Intel Corporation Skylake Client platform/Skylake AIO DDR3L RVP10, BIOS SKLSE2R1.R00.X100.B01.1509220551 09/22/2015
[   34.098825]  0000000000013e40 ffff880166c27a78 ffffffff81280d02 ffff880172c13e40
[   34.098826]  ffff880166c27a88 ffffffff810c203a ffff880166c27ac8 ffffffff814ec808
[   34.098827]  ffff88016b7c6000 ffff880166c28000 00000000000f4240 0000000000000001
[   34.098827] Call Trace:
[   34.098831]  [<ffffffff81280d02>] dump_stack+0x4b/0x79
[   34.098833]  [<ffffffff810c203a>] __schedule_bug+0x41/0x4f
[   34.098834]  [<ffffffff814ec808>] __schedule+0x5a8/0x690
[   34.098835]  [<ffffffff814ec927>] schedule+0x37/0x80
[   34.098836]  [<ffffffff814ef3fd>] schedule_hrtimeout_range_clock+0xad/0x130
[   34.098837]  [<ffffffff81090be0>] ? hrtimer_init+0x10/0x10
[   34.098838]  [<ffffffff814ef3f1>] ? schedule_hrtimeout_range_clock+0xa1/0x130
[   34.098839]  [<ffffffff814ef48e>] schedule_hrtimeout_range+0xe/0x10
[   34.098840]  [<ffffffff814eef9b>] usleep_range+0x3b/0x40
[   34.098853]  [<ffffffffa01ec109>] i915_guc_wq_check_space+0x119/0x210 [i915]
[   34.098861]  [<ffffffffa01da47c>] intel_logical_ring_alloc_request_extras+0x5c/0x70 [i915]
[   34.098869]  [<ffffffffa01cdbf1>] i915_gem_request_alloc+0x91/0x170 [i915]
[   34.098875]  [<ffffffffa01c1c07>] i915_gem_do_execbuffer.isra.25+0xbc7/0x12a0 [i915]
[   34.098882]  [<ffffffffa01cb785>] ? i915_gem_object_get_pages_gtt+0x225/0x3c0 [i915]
[   34.098889]  [<ffffffffa01d1fb6>] ? i915_gem_pwrite_ioctl+0xd6/0x9f0 [i915]
[   34.098895]  [<ffffffffa01c2e68>] i915_gem_execbuffer2+0xa8/0x250 [i915]
[   34.098900]  [<ffffffffa00f65d8>] drm_ioctl+0x258/0x4f0 [drm]
[   34.098906]  [<ffffffffa01c2dc0>] ? i915_gem_execbuffer+0x340/0x340 [i915]
[   34.098908]  [<ffffffff8111590d>] do_vfs_ioctl+0x2cd/0x4a0
[   34.098909]  [<ffffffff8111eac2>] ? __fget+0x72/0xb0
[   34.098910]  [<ffffffff81115b1c>] SyS_ioctl+0x3c/0x70
[   34.098911]  [<ffffffff814effd7>] entry_SYSCALL_64_fastpath+0x12/0x6a
[   34.100208] ------------[ cut here ]------------

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93847
Cc: <daniel.vetter@ffwll.ch>
Cc: <tvrtko.ursulin@intel.com>
Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 39 +++++++++---------------------
 drivers/gpu/drm/i915/intel_guc.h           |  3 ++-
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index d7543ef..d51015e 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -195,11 +195,9 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 	struct guc_process_desc *desc;
 	union guc_doorbell_qw db_cmp, db_exc, db_ret;
 	union guc_doorbell_qw *db;
-	void *base;
 	int attempt = 2, ret = -EAGAIN;
 
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
-	desc = base + gc->proc_desc_offset;
+	desc = gc->client_base + gc->proc_desc_offset;
 
 	/* Update the tail so it is visible to GuC */
 	desc->tail = gc->wq_tail;
@@ -215,7 +213,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 		db_exc.cookie = 1;
 
 	/* pointer of current doorbell cacheline */
-	db = base + gc->doorbell_offset;
+	db = gc->client_base + gc->doorbell_offset;
 
 	while (attempt--) {
 		/* lets ring the doorbell */
@@ -244,10 +242,6 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 			db_exc.cookie = 1;
 	}
 
-	/* Finally, update the cached copy of the GuC's WQ head */
-	gc->wq_head = desc->head;
-
-	kunmap_atomic(base);
 	return ret;
 }
 
@@ -341,10 +335,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 			       struct i915_guc_client *client)
 {
 	struct guc_process_desc *desc;
-	void *base;
 
-	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
-	desc = base + client->proc_desc_offset;
+	desc = client->client_base + client->proc_desc_offset;
 
 	memset(desc, 0, sizeof(*desc));
 
@@ -361,8 +353,6 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 	desc->wq_size_bytes = client->wq_size;
 	desc->wq_status = WQ_STATUS_ACTIVE;
 	desc->priority = client->priority;
-
-	kunmap_atomic(base);
 }
 
 /*
@@ -474,25 +464,16 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 int i915_guc_wq_check_space(struct i915_guc_client *gc)
 {
 	struct guc_process_desc *desc;
-	void *base;
 	u32 size = sizeof(struct guc_wq_item);
 	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	if (!gc)
 		return 0;
 
-	/* Quickly return if wq space is available since last time we cache the
-	 * head position. */
-	if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
-		return 0;
-
-	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
-	desc = base + gc->proc_desc_offset;
+	desc = gc->client_base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		gc->wq_head = desc->head;
-
-		if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			ret = 0;
 			break;
 		}
@@ -501,19 +482,19 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
 			usleep_range(1000, 2000);
 	};
 
-	kunmap_atomic(base);
-
 	return ret;
 }
 
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
 				  struct drm_i915_gem_request *rq)
 {
+	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
 	u32 tail, wq_len, wq_off, space;
 
-	space = CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size);
+	desc = gc->client_base + gc->proc_desc_offset;
+	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
 	if (WARN_ON(space < sizeof(struct guc_wq_item)))
 		return -ENOSPC; /* shouldn't happen */
 
@@ -675,6 +656,8 @@ static void guc_client_free(struct drm_device *dev,
 	 * Be sure to drop any locks
 	 */
 
+	kunmap(kmap_to_page(client->client_base));
+
 	gem_release_guc_obj(client->client_obj);
 
 	if (client->ctx_index != GUC_INVALID_CTX_ID) {
@@ -727,6 +710,8 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	if (!obj)
 		goto err;
 
+	client->client_base = kmap(i915_gem_object_get_dirty_page(obj, 0));
+
 	client->client_obj = obj;
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 73002e9..9f08bd7 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -34,6 +34,8 @@ struct i915_guc_client {
 	uint32_t priority;
 	uint32_t ctx_index;
 
+	void *client_base;
+
 	uint32_t proc_desc_offset;
 	uint32_t doorbell_offset;
 	uint32_t cookie;
@@ -43,7 +45,6 @@ struct i915_guc_client {
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
-	uint32_t wq_head;
 
 	/* GuC submission statistics & status */
 	uint64_t submissions[GUC_MAX_ENGINES_NUM];
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-11 23:09 [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object yu.dai
@ 2016-02-12 13:03 ` Tvrtko Ursulin
  2016-02-15 14:39   ` Dave Gordon
  2016-02-15 15:23   ` Dave Gordon
  0 siblings, 2 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2016-02-12 13:03 UTC (permalink / raw)
  To: yu.dai, intel-gfx; +Cc: daniel.vetter


On 11/02/16 23:09, yu.dai@intel.com wrote:
> From: Alex Dai <yu.dai@intel.com>
>
> GuC client object is always pinned during its life cycle. We cache
> the kmap of its first page, which includes guc_process_desc and
> doorbell. By doing so, we can simplify the code where we read from
> this page to get where GuC is progressing on work queue; and the
> code where driver program doorbell to send work queue item to GuC.
>
> As a result, this patch removes the kmap_atomic in wq_check_space,
> where usleep_range could be called while kmap_atomic is held. This
> fixes issue below.
>
> [   34.098798] BUG: scheduling while atomic: gem_close_race/1941/0x00000002
> [   34.098822] Modules linked in: hid_generic usbhid i915 asix usbnet libphy mii i2c_algo_bit drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea drm coretemp i2c_hid hid video pinctrl_sunrisepoint pinctrl_intel acpi_pad nls_iso8859_1 e1000e ptp psmouse pps_core ahci libahci
> [   34.098824] CPU: 0 PID: 1941 Comm: gem_close_race Tainted: G     U          4.4.0-160121+ #123
> [   34.098824] Hardware name: Intel Corporation Skylake Client platform/Skylake AIO DDR3L RVP10, BIOS SKLSE2R1.R00.X100.B01.1509220551 09/22/2015
> [   34.098825]  0000000000013e40 ffff880166c27a78 ffffffff81280d02 ffff880172c13e40
> [   34.098826]  ffff880166c27a88 ffffffff810c203a ffff880166c27ac8 ffffffff814ec808
> [   34.098827]  ffff88016b7c6000 ffff880166c28000 00000000000f4240 0000000000000001
> [   34.098827] Call Trace:
> [   34.098831]  [<ffffffff81280d02>] dump_stack+0x4b/0x79
> [   34.098833]  [<ffffffff810c203a>] __schedule_bug+0x41/0x4f
> [   34.098834]  [<ffffffff814ec808>] __schedule+0x5a8/0x690
> [   34.098835]  [<ffffffff814ec927>] schedule+0x37/0x80
> [   34.098836]  [<ffffffff814ef3fd>] schedule_hrtimeout_range_clock+0xad/0x130
> [   34.098837]  [<ffffffff81090be0>] ? hrtimer_init+0x10/0x10
> [   34.098838]  [<ffffffff814ef3f1>] ? schedule_hrtimeout_range_clock+0xa1/0x130
> [   34.098839]  [<ffffffff814ef48e>] schedule_hrtimeout_range+0xe/0x10
> [   34.098840]  [<ffffffff814eef9b>] usleep_range+0x3b/0x40
> [   34.098853]  [<ffffffffa01ec109>] i915_guc_wq_check_space+0x119/0x210 [i915]
> [   34.098861]  [<ffffffffa01da47c>] intel_logical_ring_alloc_request_extras+0x5c/0x70 [i915]
> [   34.098869]  [<ffffffffa01cdbf1>] i915_gem_request_alloc+0x91/0x170 [i915]
> [   34.098875]  [<ffffffffa01c1c07>] i915_gem_do_execbuffer.isra.25+0xbc7/0x12a0 [i915]
> [   34.098882]  [<ffffffffa01cb785>] ? i915_gem_object_get_pages_gtt+0x225/0x3c0 [i915]
> [   34.098889]  [<ffffffffa01d1fb6>] ? i915_gem_pwrite_ioctl+0xd6/0x9f0 [i915]
> [   34.098895]  [<ffffffffa01c2e68>] i915_gem_execbuffer2+0xa8/0x250 [i915]
> [   34.098900]  [<ffffffffa00f65d8>] drm_ioctl+0x258/0x4f0 [drm]
> [   34.098906]  [<ffffffffa01c2dc0>] ? i915_gem_execbuffer+0x340/0x340 [i915]
> [   34.098908]  [<ffffffff8111590d>] do_vfs_ioctl+0x2cd/0x4a0
> [   34.098909]  [<ffffffff8111eac2>] ? __fget+0x72/0xb0
> [   34.098910]  [<ffffffff81115b1c>] SyS_ioctl+0x3c/0x70
> [   34.098911]  [<ffffffff814effd7>] entry_SYSCALL_64_fastpath+0x12/0x6a
> [   34.100208] ------------[ cut here ]------------
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93847
> Cc: <daniel.vetter@ffwll.ch>
> Cc: <tvrtko.ursulin@intel.com>
> Signed-off-by: Alex Dai <yu.dai@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 39 +++++++++---------------------
>   drivers/gpu/drm/i915/intel_guc.h           |  3 ++-
>   2 files changed, 14 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index d7543ef..d51015e 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -195,11 +195,9 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>   	struct guc_process_desc *desc;
>   	union guc_doorbell_qw db_cmp, db_exc, db_ret;
>   	union guc_doorbell_qw *db;
> -	void *base;
>   	int attempt = 2, ret = -EAGAIN;
>
> -	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
> -	desc = base + gc->proc_desc_offset;
> +	desc = gc->client_base + gc->proc_desc_offset;
>
>   	/* Update the tail so it is visible to GuC */
>   	desc->tail = gc->wq_tail;
> @@ -215,7 +213,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>   		db_exc.cookie = 1;
>
>   	/* pointer of current doorbell cacheline */
> -	db = base + gc->doorbell_offset;
> +	db = gc->client_base + gc->doorbell_offset;
>
>   	while (attempt--) {
>   		/* lets ring the doorbell */
> @@ -244,10 +242,6 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>   			db_exc.cookie = 1;
>   	}
>
> -	/* Finally, update the cached copy of the GuC's WQ head */
> -	gc->wq_head = desc->head;

Did you mean to remove the above?

> -
> -	kunmap_atomic(base);
>   	return ret;
>   }
>
> @@ -341,10 +335,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
>   			       struct i915_guc_client *client)
>   {
>   	struct guc_process_desc *desc;
> -	void *base;
>
> -	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
> -	desc = base + client->proc_desc_offset;
> +	desc = client->client_base + client->proc_desc_offset;
>
>   	memset(desc, 0, sizeof(*desc));
>
> @@ -361,8 +353,6 @@ static void guc_init_proc_desc(struct intel_guc *guc,
>   	desc->wq_size_bytes = client->wq_size;
>   	desc->wq_status = WQ_STATUS_ACTIVE;
>   	desc->priority = client->priority;
> -
> -	kunmap_atomic(base);
>   }
>
>   /*
> @@ -474,25 +464,16 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>   int i915_guc_wq_check_space(struct i915_guc_client *gc)
>   {
>   	struct guc_process_desc *desc;
> -	void *base;
>   	u32 size = sizeof(struct guc_wq_item);
>   	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	if (!gc)
>   		return 0;
>
> -	/* Quickly return if wq space is available since last time we cache the
> -	 * head position. */
> -	if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
> -		return 0;
> -
> -	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
> -	desc = base + gc->proc_desc_offset;
> +	desc = gc->client_base + gc->proc_desc_offset;
>
>   	while (timeout_counter-- > 0) {
> -		gc->wq_head = desc->head;
> -

Hm again what looks like unrelated change to caching the base?

> -		if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size) {
> +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>   			ret = 0;
>   			break;
>   		}
> @@ -501,19 +482,19 @@ int i915_guc_wq_check_space(struct i915_guc_client *gc)
>   			usleep_range(1000, 2000);
>   	};
>
> -	kunmap_atomic(base);
> -
>   	return ret;
>   }
>
>   static int guc_add_workqueue_item(struct i915_guc_client *gc,
>   				  struct drm_i915_gem_request *rq)
>   {
> +	struct guc_process_desc *desc;
>   	struct guc_wq_item *wqi;
>   	void *base;
>   	u32 tail, wq_len, wq_off, space;
>
> -	space = CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size);
> +	desc = gc->client_base + gc->proc_desc_offset;
> +	space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
>   	if (WARN_ON(space < sizeof(struct guc_wq_item)))
>   		return -ENOSPC; /* shouldn't happen */
>
> @@ -675,6 +656,8 @@ static void guc_client_free(struct drm_device *dev,
>   	 * Be sure to drop any locks
>   	 */
>
> +	kunmap(kmap_to_page(client->client_base));
> +
>   	gem_release_guc_obj(client->client_obj);
>
>   	if (client->ctx_index != GUC_INVALID_CTX_ID) {
> @@ -727,6 +710,8 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
>   	if (!obj)
>   		goto err;
>
> +	client->client_base = kmap(i915_gem_object_get_dirty_page(obj, 0));

Was this another bug, that the page/object wasn't dirtied before?

> +
>   	client->client_obj = obj;
>   	client->wq_offset = GUC_DB_SIZE;
>   	client->wq_size = GUC_WQ_SIZE;
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 73002e9..9f08bd7 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -34,6 +34,8 @@ struct i915_guc_client {
>   	uint32_t priority;
>   	uint32_t ctx_index;
>
> +	void *client_base;
> +
>   	uint32_t proc_desc_offset;
>   	uint32_t doorbell_offset;
>   	uint32_t cookie;
> @@ -43,7 +45,6 @@ struct i915_guc_client {
>   	uint32_t wq_offset;
>   	uint32_t wq_size;
>   	uint32_t wq_tail;
> -	uint32_t wq_head;

Hm ok I don't get why kmap caching means removing this as well?

Btw I don't see patch 2/2 ?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-12 13:03 ` Tvrtko Ursulin
@ 2016-02-15 14:39   ` Dave Gordon
  2016-02-16 16:27     ` Yu Dai
  2016-02-15 15:23   ` Dave Gordon
  1 sibling, 1 reply; 9+ messages in thread
From: Dave Gordon @ 2016-02-15 14:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, yu.dai, intel-gfx; +Cc: daniel.vetter

On 12/02/16 13:03, Tvrtko Ursulin wrote:
>
> On 11/02/16 23:09, yu.dai@intel.com wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> GuC client object is always pinned during its life cycle. We cache
>> the kmap of its first page, which includes guc_process_desc and
>> doorbell. By doing so, we can simplify the code where we read from
>> this page to get where GuC is progressing on work queue; and the
>> code where driver program doorbell to send work queue item to GuC.

[snip]

>>
>> -    /* Finally, update the cached copy of the GuC's WQ head */
>> -    gc->wq_head = desc->head;
>
> Did you mean to remove the above?

I wondered that too at first, but the answer is "yes" -- see below.

>>
>> +    client->client_base = kmap(i915_gem_object_get_dirty_page(obj, 0));
>
> Was this another bug, that the page/object wasn't dirtied before?

It wouldn't have made any difference; the object is pinned in the GTT 
forever, so it can't be swapped out or reclaimed.

>> -    uint32_t wq_head;
>
> Hm ok I don't get why kmap caching means removing this as well?

'wq_head' was an optimisation so that we could check whether there was 
known to be space in the workqueue without kmapping and reading the 
process descriptor. Now that the client (which includes the process 
descriptor) is permanently mapped, there's no advantage to caching the 
head; we might just as well read the current value from 'desc->head' 
each time.

> Btw I don't see patch 2/2 ?
>
> Regards,
>
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-12 13:03 ` Tvrtko Ursulin
  2016-02-15 14:39   ` Dave Gordon
@ 2016-02-15 15:23   ` Dave Gordon
  2016-02-16 16:47     ` Yu Dai
  1 sibling, 1 reply; 9+ messages in thread
From: Dave Gordon @ 2016-02-15 15:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, yu.dai, intel-gfx; +Cc: daniel.vetter

On 12/02/16 13:03, Tvrtko Ursulin wrote:
>
> On 11/02/16 23:09, yu.dai@intel.com wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> GuC client object is always pinned during its life cycle. We cache
>> the kmap of its first page, which includes guc_process_desc and
>> doorbell. By doing so, we can simplify the code where we read from
>> this page to get where GuC is progressing on work queue; and the
>> code where driver program doorbell to send work queue item to GuC.

There's still one k(un)map_atomic() pair, in guc_add_workqueue_item(). 
Maybe we could get rid of that one too? So instead of kmapping only the 
first page of the client, we could vmap() all three pages and so not 
need to kmap_atomic() the WQ pages on the fly.

There's a handy vmap_obj() function we might use, except it's currently 
static ...

.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-15 14:39   ` Dave Gordon
@ 2016-02-16 16:27     ` Yu Dai
  0 siblings, 0 replies; 9+ messages in thread
From: Yu Dai @ 2016-02-16 16:27 UTC (permalink / raw)
  To: Dave Gordon, Tvrtko Ursulin, intel-gfx; +Cc: daniel.vetter



On 02/15/2016 06:39 AM, Dave Gordon wrote:
> On 12/02/16 13:03, Tvrtko Ursulin wrote:
> >
> > On 11/02/16 23:09, yu.dai@intel.com wrote:
> >> From: Alex Dai <yu.dai@intel.com>
> >>
> >> GuC client object is always pinned during its life cycle. We cache
> >> the kmap of its first page, which includes guc_process_desc and
> >> doorbell. By doing so, we can simplify the code where we read from
> >> this page to get where GuC is progressing on work queue; and the
> >> code where driver program doorbell to send work queue item to GuC.
>
> [snip]
>
> >>
> >> -    /* Finally, update the cached copy of the GuC's WQ head */
> >> -    gc->wq_head = desc->head;
> >
> > Did you mean to remove the above?
>
> I wondered that too at first, but the answer is "yes" -- see below.
>
> >>
> >> +    client->client_base = kmap(i915_gem_object_get_dirty_page(obj, 0));
> >
> > Was this another bug, that the page/object wasn't dirtied before?
>
> It wouldn't have made any difference; the object is pinned in the GTT
> forever, so it can't be swapped out or reclaimed.
>
> >> -    uint32_t wq_head;
> >
> > Hm ok I don't get why kmap caching means removing this as well?
>
> 'wq_head' was an optimisation so that we could check whether there was
> known to be space in the workqueue without kmapping and reading the
> process descriptor. Now that the client (which includes the process
> descriptor) is permanently mapped, there's no advantage to caching the
> head; we might just as well read the current value from 'desc->head'
> each time.
>
> > Btw I don't see patch 2/2 ?
> >
>
My bad, there is no 2/2. Thanks Dave for answering the questions. I have 
no more comments. :-)

Thanks,
Alex
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-15 15:23   ` Dave Gordon
@ 2016-02-16 16:47     ` Yu Dai
  2016-02-17 16:04       ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Yu Dai @ 2016-02-16 16:47 UTC (permalink / raw)
  To: Dave Gordon, Tvrtko Ursulin, intel-gfx; +Cc: daniel.vetter



On 02/15/2016 07:23 AM, Dave Gordon wrote:
> On 12/02/16 13:03, Tvrtko Ursulin wrote:
> >
> > On 11/02/16 23:09, yu.dai@intel.com wrote:
> >> From: Alex Dai <yu.dai@intel.com>
> >>
> >> GuC client object is always pinned during its life cycle. We cache
> >> the kmap of its first page, which includes guc_process_desc and
> >> doorbell. By doing so, we can simplify the code where we read from
> >> this page to get where GuC is progressing on work queue; and the
> >> code where driver program doorbell to send work queue item to GuC.
>
> There's still one k(un)map_atomic() pair, in guc_add_workqueue_item().
> Maybe we could get rid of that one too? So instead of kmapping only the
> first page of the client, we could vmap() all three pages and so not
> need to kmap_atomic() the WQ pages on the fly.
>
> There's a handy vmap_obj() function we might use, except it's currently
> static ...
>
>
Yes, there is a vmap_obj we can use but it is static. Actually two, 
vmap_batch() in i915_cmd_parser.c and vmap_obj() in intel_ringbuffer.c. 
Maybe it is a good idea to make it global, so GuC can use it too.

Thanks,
Alex
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-16 16:47     ` Yu Dai
@ 2016-02-17 16:04       ` Daniel Vetter
  2016-02-17 17:48         ` Yu Dai
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2016-02-17 16:04 UTC (permalink / raw)
  To: Yu Dai; +Cc: daniel.vetter, intel-gfx

On Tue, Feb 16, 2016 at 08:47:07AM -0800, Yu Dai wrote:
> 
> 
> On 02/15/2016 07:23 AM, Dave Gordon wrote:
> >On 12/02/16 13:03, Tvrtko Ursulin wrote:
> >>
> >> On 11/02/16 23:09, yu.dai@intel.com wrote:
> >>> From: Alex Dai <yu.dai@intel.com>
> >>>
> >>> GuC client object is always pinned during its life cycle. We cache
> >>> the kmap of its first page, which includes guc_process_desc and
> >>> doorbell. By doing so, we can simplify the code where we read from
> >>> this page to get where GuC is progressing on work queue; and the
> >>> code where driver program doorbell to send work queue item to GuC.
> >
> >There's still one k(un)map_atomic() pair, in guc_add_workqueue_item().
> >Maybe we could get rid of that one too? So instead of kmapping only the
> >first page of the client, we could vmap() all three pages and so not
> >need to kmap_atomic() the WQ pages on the fly.
> >
> >There's a handy vmap_obj() function we might use, except it's currently
> >static ...
> >
> >
> Yes, there is a vmap_obj we can use but it is static. Actually two,
> vmap_batch() in i915_cmd_parser.c and vmap_obj() in intel_ringbuffer.c.
> Maybe it is a good idea to make it global, so GuC can use it too.

There should be a vmap function somewhere in the dma-buf code too iirc.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-17 16:04       ` Daniel Vetter
@ 2016-02-17 17:48         ` Yu Dai
  2016-02-18  9:51           ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Yu Dai @ 2016-02-17 17:48 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: daniel.vetter, intel-gfx



On 02/17/2016 08:04 AM, Daniel Vetter wrote:
> On Tue, Feb 16, 2016 at 08:47:07AM -0800, Yu Dai wrote:
> >
> >
> > On 02/15/2016 07:23 AM, Dave Gordon wrote:
> > >On 12/02/16 13:03, Tvrtko Ursulin wrote:
> > >>
> > >> On 11/02/16 23:09, yu.dai@intel.com wrote:
> > >>> From: Alex Dai <yu.dai@intel.com>
> > >>>
> > >>> GuC client object is always pinned during its life cycle. We cache
> > >>> the kmap of its first page, which includes guc_process_desc and
> > >>> doorbell. By doing so, we can simplify the code where we read from
> > >>> this page to get where GuC is progressing on work queue; and the
> > >>> code where driver program doorbell to send work queue item to GuC.
> > >
> > >There's still one k(un)map_atomic() pair, in guc_add_workqueue_item().
> > >Maybe we could get rid of that one too? So instead of kmapping only the
> > >first page of the client, we could vmap() all three pages and so not
> > >need to kmap_atomic() the WQ pages on the fly.
> > >
> > >There's a handy vmap_obj() function we might use, except it's currently
> > >static ...
> > >
> > >
> > Yes, there is a vmap_obj we can use but it is static. Actually two,
> > vmap_batch() in i915_cmd_parser.c and vmap_obj() in intel_ringbuffer.c.
> > Maybe it is a good idea to make it global, so GuC can use it too.
>
> There should be a vmap function somewhere in the dma-buf code too iirc.

Yes, i915_gem_dmabuf_vmap. Let me try to make a common helper function can be shared.

Alex

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object
  2016-02-17 17:48         ` Yu Dai
@ 2016-02-18  9:51           ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2016-02-18  9:51 UTC (permalink / raw)
  To: Yu Dai; +Cc: daniel.vetter, intel-gfx

On Wed, Feb 17, 2016 at 09:48:31AM -0800, Yu Dai wrote:
> 
> 
> On 02/17/2016 08:04 AM, Daniel Vetter wrote:
> >On Tue, Feb 16, 2016 at 08:47:07AM -0800, Yu Dai wrote:
> >>
> >>
> >> On 02/15/2016 07:23 AM, Dave Gordon wrote:
> >> >On 12/02/16 13:03, Tvrtko Ursulin wrote:
> >> >>
> >> >> On 11/02/16 23:09, yu.dai@intel.com wrote:
> >> >>> From: Alex Dai <yu.dai@intel.com>
> >> >>>
> >> >>> GuC client object is always pinned during its life cycle. We cache
> >> >>> the kmap of its first page, which includes guc_process_desc and
> >> >>> doorbell. By doing so, we can simplify the code where we read from
> >> >>> this page to get where GuC is progressing on work queue; and the
> >> >>> code where driver program doorbell to send work queue item to GuC.
> >> >
> >> >There's still one k(un)map_atomic() pair, in guc_add_workqueue_item().
> >> >Maybe we could get rid of that one too? So instead of kmapping only the
> >> >first page of the client, we could vmap() all three pages and so not
> >> >need to kmap_atomic() the WQ pages on the fly.
> >> >
> >> >There's a handy vmap_obj() function we might use, except it's currently
> >> >static ...
> >> >
> >> >
> >> Yes, there is a vmap_obj we can use but it is static. Actually two,
> >> vmap_batch() in i915_cmd_parser.c and vmap_obj() in intel_ringbuffer.c.
> >> Maybe it is a good idea to make it global, so GuC can use it too.
> >
> >There should be a vmap function somewhere in the dma-buf code too iirc.
> 
> Yes, i915_gem_dmabuf_vmap. Let me try to make a common helper function can be shared.

I have sent such a patch many times.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-02-18  9:51 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-11 23:09 [PATCH 1/2] drm/i915/guc: Simplify code by keeping kmap of guc_client object yu.dai
2016-02-12 13:03 ` Tvrtko Ursulin
2016-02-15 14:39   ` Dave Gordon
2016-02-16 16:27     ` Yu Dai
2016-02-15 15:23   ` Dave Gordon
2016-02-16 16:47     ` Yu Dai
2016-02-17 16:04       ` Daniel Vetter
2016-02-17 17:48         ` Yu Dai
2016-02-18  9:51           ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.