All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
@ 2018-03-27 15:17 Tomasz Lis
  2018-03-27 15:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
                   ` (38 more replies)
  0 siblings, 39 replies; 70+ messages in thread
From: Tomasz Lis @ 2018-03-27 15:17 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_lrc.h         |  1 +
 5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *last,
 				int prio)
 {
-	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+	return (engine->i915->preempt_context ||
+		HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
+		 prio > max(rq_prio(last), 0);
 }
 
 /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	/*
+	 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+	 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+	 */
+	GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+	/* trigger preemption to idle */
+	writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -594,7 +616,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			goto unlock;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+				gen11_preempt_to_idle(engine);
+			else
+				inject_preempt_context(engine);
 			goto unlock;
 		}
 
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
+			/* Check if switched to active or preempted to active */
+			if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					GEN8_CTX_STATUS_PREEMPTED)) &&
+			    !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_HWACK);
+
 			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
 			/* We should never get a COMPLETED | IDLE_ACTIVE! */
 			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-03-27 15:40 ` Patchwork
  2018-03-27 15:56 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (37 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-03-27 15:40 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
96268839cd00 drm/i915/gen11: Preempt-to-idle support in execlists.
-:97: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "execlists->ctrl_reg"
#97: FILE: drivers/gpu/drm/i915/intel_lrc.c:551:
+	GEM_BUG_ON(execlists->ctrl_reg != NULL);

-:149: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#149: FILE: drivers/gpu/drm/i915/intel_lrc.c:1013:
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			         ^

total: 0 errors, 0 warnings, 2 checks, 114 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
  2018-03-27 15:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2018-03-27 15:56 ` Patchwork
  2018-03-27 20:50 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (36 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-03-27 15:56 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

Series 40747v1 drm/i915/gen11: Preempt-to-idle support in execlists.
https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/1/mbox/

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                dmesg-warn -> PASS       (fi-cnl-y3) fdo#104951
Test prime_vgem:
        Subgroup basic-fence-flip:
                pass       -> FAIL       (fi-ilk-650) fdo#104008

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:434s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:451s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:547s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:302s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:514s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:515s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:529s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:514s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:411s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:573s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:515s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:594s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:430s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:326s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:537s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:285  pass:224  dwarn:0   dfail:0   fail:1   skip:60  time:421s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:476s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:434s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:479s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:471s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:521s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:660s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:456s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:531s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:508s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:491s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:430s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:449s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:584s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:403s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:255  dwarn:3   dfail:0   fail:1   skip:26  time:534s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:495s

ff7820832182a0f4bebf9092a74ab17f8b3ae7ef drm-tip: 2018y-03m-27d-14h-31m-00s UTC integration manifest
96268839cd00 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8505/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
  2018-03-27 15:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
  2018-03-27 15:56 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-03-27 20:50 ` Patchwork
  2018-03-27 23:27 ` [PATCH v1] " Chris Wilson
                   ` (35 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-03-27 20:50 UTC (permalink / raw)
  To: Lis, Tomasz; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists.
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

---- Known issues:

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-atomic:
                fail       -> PASS       (shard-hsw) fdo#102670
Test kms_flip:
        Subgroup 2x-dpms-vs-vblank-race-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#103060
        Subgroup 2x-flip-vs-wf_vblank:
                fail       -> PASS       (shard-hsw) fdo#100368 +1
Test kms_sysfs_edid_timing:
                pass       -> WARN       (shard-apl) fdo#100047

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047

shard-apl        total:3495 pass:1831 dwarn:1   dfail:0   fail:7   skip:1655 time:12913s
shard-hsw        total:3495 pass:1781 dwarn:1   dfail:0   fail:3   skip:1709 time:11676s
shard-snb        total:3495 pass:1374 dwarn:1   dfail:0   fail:3   skip:2117 time:7015s
Blacklisted hosts:
shard-kbl        total:3493 pass:1950 dwarn:5   dfail:0   fail:9   skip:1528 time:9549s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8505/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (2 preceding siblings ...)
  2018-03-27 20:50 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-03-27 23:27 ` Chris Wilson
  2018-03-28 16:06   ` Lis, Tomasz
  2018-03-30 18:23   ` Daniele Ceraolo Spurio
  2018-04-19 11:44 ` [PATCH v2] " Tomasz Lis
                   ` (34 subsequent siblings)
  38 siblings, 2 replies; 70+ messages in thread
From: Chris Wilson @ 2018-03-27 23:27 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: mika.kuoppala

Quoting Tomasz Lis (2018-03-27 16:17:59)
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>  5 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 800230b..c32580b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>                 ((dev_priv)->info.has_logical_ring_elsq)
>  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>                 ((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>  
>  #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4364922..66b6700 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>         GEN(11), \
>         .ddb_size = 2048, \
>         .has_csr = 0, \
> -       .has_logical_ring_elsq = 1
> +       .has_logical_ring_elsq = 1, \
> +       .has_hw_preempt_to_idle = 1
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>         GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>         func(has_logical_ring_contexts); \
>         func(has_logical_ring_elsq); \
>         func(has_logical_ring_preemption); \
> +       func(has_hw_preempt_to_idle); \
>         func(has_overlay); \
>         func(has_pooled_eu); \
>         func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ba7f783..1a22de4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -153,6 +153,7 @@
>  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>  #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>  #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>  
>  #define GEN8_CTX_STATUS_COMPLETED_MASK \
>          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>                                 const struct i915_request *last,
>                                 int prio)
>  {
> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +       return (engine->i915->preempt_context ||
> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&

Well, you haven't actually disabled allocating the preempt_context so...

But at any rate, making this an engine->flag would eliminate one pointer
dance.

> +                prio > max(rq_prio(last), 0);
>  }
>  
>  /**
> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>         execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> +{
> +       struct intel_engine_execlists *execlists = &engine->execlists;
> +
> +       GEM_TRACE("%s\n", engine->name);
> +
> +       /*
> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +        */
> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
> +
> +       /* trigger preemption to idle */
> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);

Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.

> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                                   status, buf[2*head + 1],
>                                   execlists->active);
>  
> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -                                     GEN8_CTX_STATUS_PREEMPTED))
> +                       /* Check if switched to active or preempted to active */
> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))

Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.

>                                 execlists_set_active(execlists,
>                                                      EXECLISTS_ACTIVE_HWACK);
> +
>                         if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>                                 execlists_clear_active(execlists,
>                                                        EXECLISTS_ACTIVE_HWACK);
> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                         /* We should never get a COMPLETED | IDLE_ACTIVE! */
>                         GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>  
> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
> +                       /*
> +                        * Check if preempted to real idle, either directly or
> +                        * the preemptive context already finished executing
> +                        */
> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>                                 GEM_TRACE("%s preempt-idle\n", engine->name);

Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 23:27 ` [PATCH v1] " Chris Wilson
@ 2018-03-28 16:06   ` Lis, Tomasz
  2018-03-28 22:28     ` Chris Wilson
  2018-03-30 18:23   ` Daniele Ceraolo Spurio
  1 sibling, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-03-28 16:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala



On 2018-03-28 01:27, Chris Wilson wrote:
> Quoting Tomasz Lis (2018-03-27 16:17:59)
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be executed,
>> so the Command Streamer never preempted to idle directly. In Icelake it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 800230b..c32580b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>                  ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>   
>>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 4364922..66b6700 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>          GEN(11), \
>>          .ddb_size = 2048, \
>>          .has_csr = 0, \
>> -       .has_logical_ring_elsq = 1
>> +       .has_logical_ring_elsq = 1, \
>> +       .has_hw_preempt_to_idle = 1
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>          GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>          func(has_logical_ring_contexts); \
>>          func(has_logical_ring_elsq); \
>>          func(has_logical_ring_preemption); \
>> +       func(has_hw_preempt_to_idle); \
>>          func(has_overlay); \
>>          func(has_pooled_eu); \
>>          func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index ba7f783..1a22de4 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -153,6 +153,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>   
>>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>                                  const struct i915_request *last,
>>                                  int prio)
>>   {
>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>> +       return (engine->i915->preempt_context ||
>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now, 
as that would mean adding a temporary condition on GuC until the GuC 
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
> But at any rate, making this an engine->flag would eliminate one pointer
> dance.
Could be an interesting idea for a separate patch.
>
>> +                prio > max(rq_prio(last), 0);
>>   }
>>   
>>   /**
>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>   }
>>   
>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>> +{
>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>> +
>> +       GEM_TRACE("%s\n", engine->name);
>> +
>> +       /*
>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +        */
>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>> +
>> +       /* trigger preemption to idle */
>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> Future plans? Because just inserting the branch into the setter of
> inject_preempt_context() resolves a lot of conflicts with other work.
My arguments for separate function are:
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption 
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so 
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not 
having distinct names

That's all, I don't have any future plans for it. If you want me to 
merge the two, let me know.

>
>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                                    status, buf[2*head + 1],
>>                                    execlists->active);
>>   
>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>> +                       /* Check if switched to active or preempted to active */
>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> Setting HWACK here is harmless as it gets cleared again. Unless, there
> is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple 
definition of what HWACK flag means.
>
>>                                  execlists_set_active(execlists,
>>                                                       EXECLISTS_ACTIVE_HWACK);
>> +
>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>                                  execlists_clear_active(execlists,
>>                                                         EXECLISTS_ACTIVE_HWACK);
>> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                          /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>                          GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   
>> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
>> +                       /*
>> +                        * Check if preempted to real idle, either directly or
>> +                        * the preemptive context already finished executing
>> +                        */
>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>>                                  GEM_TRACE("%s preempt-idle\n", engine->name);
> Hmm. I was hoping that we would be able to engineer a single check to
> cover all sins. Might have been overly optimistic, but I can dream.
> -Chris
I don't see any way to do that, besides creating separate function for 
gen11.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-28 16:06   ` Lis, Tomasz
@ 2018-03-28 22:28     ` Chris Wilson
  2018-03-30 15:42       ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Chris Wilson @ 2018-03-28 22:28 UTC (permalink / raw)
  To: Lis, Tomasz, intel-gfx; +Cc: mika.kuoppala

Quoting Lis, Tomasz (2018-03-28 17:06:58)
> 
> 
> On 2018-03-28 01:27, Chris Wilson wrote:
> > Quoting Tomasz Lis (2018-03-27 16:17:59)
> >> The patch adds support of preempt-to-idle requesting by setting a proper
> >> bit within Execlist Control Register, and receiving preemption result from
> >> Context Status Buffer.
> >>
> >> Preemption in previous gens required a special batch buffer to be executed,
> >> so the Command Streamer never preempted to idle directly. In Icelake it is
> >> possible, as there is a hardware mechanism to inform the kernel about
> >> status of the preemption request.
> >>
> >> This patch does not cover using the new preemption mechanism when GuC is
> >> active.
> >>
> >> Bspec: 18922
> >> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
> >>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
> >>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
> >>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
> >>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
> >>   5 files changed, 45 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 800230b..c32580b 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
> >>                  ((dev_priv)->info.has_logical_ring_elsq)
> >>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
> >>                  ((dev_priv)->info.has_logical_ring_preemption)
> >> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> >> +               ((dev_priv)->info.has_hw_preempt_to_idle)
> >>   
> >>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
> >>   
> >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> >> index 4364922..66b6700 100644
> >> --- a/drivers/gpu/drm/i915/i915_pci.c
> >> +++ b/drivers/gpu/drm/i915/i915_pci.c
> >> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
> >>          GEN(11), \
> >>          .ddb_size = 2048, \
> >>          .has_csr = 0, \
> >> -       .has_logical_ring_elsq = 1
> >> +       .has_logical_ring_elsq = 1, \
> >> +       .has_hw_preempt_to_idle = 1
> >>   
> >>   static const struct intel_device_info intel_icelake_11_info = {
> >>          GEN11_FEATURES,
> >> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> >> index 933e316..4eb97b5 100644
> >> --- a/drivers/gpu/drm/i915/intel_device_info.h
> >> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> >> @@ -98,6 +98,7 @@ enum intel_platform {
> >>          func(has_logical_ring_contexts); \
> >>          func(has_logical_ring_elsq); \
> >>          func(has_logical_ring_preemption); \
> >> +       func(has_hw_preempt_to_idle); \
> >>          func(has_overlay); \
> >>          func(has_pooled_eu); \
> >>          func(has_psr); \
> >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >> index ba7f783..1a22de4 100644
> >> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >> @@ -153,6 +153,7 @@
> >>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
> >>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
> >>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> >> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
> >>   
> >>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
> >>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> >> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> >>                                  const struct i915_request *last,
> >>                                  int prio)
> >>   {
> >> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> >> +       return (engine->i915->preempt_context ||
> >> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> > Well, you haven't actually disabled allocating the preempt_context so...
> Yes.. I had mixed feelings about changing needs_preempt_context() now, 
> as that would mean adding a temporary condition on GuC until the GuC 
> preemption is merged.
> I will add the conditions and disable the allocation in v2 of the patch.
> > But at any rate, making this an engine->flag would eliminate one pointer
> > dance.
> Could be an interesting idea for a separate patch.

To land first ;)

> >> +                prio > max(rq_prio(last), 0);
> >>   }
> >>   
> >>   /**
> >> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >>   }
> >>   
> >> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> >> +{
> >> +       struct intel_engine_execlists *execlists = &engine->execlists;
> >> +
> >> +       GEM_TRACE("%s\n", engine->name);
> >> +
> >> +       /*
> >> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> >> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> >> +        */
> >> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
> >> +
> >> +       /* trigger preemption to idle */
> >> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> > Future plans? Because just inserting the branch into the setter of
> > inject_preempt_context() resolves a lot of conflicts with other work.
> My arguments for separate function are:
> - better code readability
> - keeping the symmetry between execlist and GuC flow - GuC preemption 
> patches will introduce separate function as well
> - only 4 lines of the function would be common
> - the name inject_preempt_context() wouldn't match the new purpose, so 
> renaming would be needed
> - reduced self-documenting code due to two separate preempt methods not 
> having distinct names
> 
> That's all, I don't have any future plans for it. If you want me to 
> merge the two, let me know.

The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.

> >> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
> >>                                    status, buf[2*head + 1],
> >>                                    execlists->active);
> >>   
> >> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >> -                                     GEN8_CTX_STATUS_PREEMPTED))
> >> +                       /* Check if switched to active or preempted to active */
> >> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
> >> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> > Setting HWACK here is harmless as it gets cleared again. Unless, there
> > is some oddity in the code flow.
> I will check if lack of the change affects test results.
> Personally, I would keep this change, even if only for allowing simple 
> definition of what HWACK flag means.

The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.

Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-28 22:28     ` Chris Wilson
@ 2018-03-30 15:42       ` Lis, Tomasz
  2018-03-30 19:45         ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-03-30 15:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala



On 2018-03-29 00:28, Chris Wilson wrote:
> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>
>> On 2018-03-28 01:27, Chris Wilson wrote:
>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>>> bit within Execlist Control Register, and receiving preemption result from
>>>> Context Status Buffer.
>>>>
>>>> Preemption in previous gens required a special batch buffer to be executed,
>>>> so the Command Streamer never preempted to idle directly. In Icelake it is
>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>> status of the preemption request.
>>>>
>>>> This patch does not cover using the new preemption mechanism when GuC is
>>>> active.
>>>>
>>>> Bspec: 18922
>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 800230b..c32580b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>>>                   ((dev_priv)->info.has_logical_ring_elsq)
>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>                   ((dev_priv)->info.has_logical_ring_preemption)
>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>    
>>>>    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>    
>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>>> index 4364922..66b6700 100644
>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>>>           GEN(11), \
>>>>           .ddb_size = 2048, \
>>>>           .has_csr = 0, \
>>>> -       .has_logical_ring_elsq = 1
>>>> +       .has_logical_ring_elsq = 1, \
>>>> +       .has_hw_preempt_to_idle = 1
>>>>    
>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>           GEN11_FEATURES,
>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>>>> index 933e316..4eb97b5 100644
>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>           func(has_logical_ring_contexts); \
>>>>           func(has_logical_ring_elsq); \
>>>>           func(has_logical_ring_preemption); \
>>>> +       func(has_hw_preempt_to_idle); \
>>>>           func(has_overlay); \
>>>>           func(has_pooled_eu); \
>>>>           func(has_psr); \
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index ba7f783..1a22de4 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -153,6 +153,7 @@
>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>    
>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>>>                                   const struct i915_request *last,
>>>>                                   int prio)
>>>>    {
>>>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>>>> +       return (engine->i915->preempt_context ||
>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>> Well, you haven't actually disabled allocating the preempt_context so...
>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>> as that would mean adding a temporary condition on GuC until the GuC
>> preemption is merged.
>> I will add the conditions and disable the allocation in v2 of the patch.
>>> But at any rate, making this an engine->flag would eliminate one pointer
>>> dance.
>> Could be an interesting idea for a separate patch.
> To land first ;)
:)
Sure, I can do that.
>>>> +                prio > max(rq_prio(last), 0);
>>>>    }
>>>>    
>>>>    /**
>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>>>           execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>>    }
>>>>    
>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>> +{
>>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>>> +
>>>> +       GEM_TRACE("%s\n", engine->name);
>>>> +
>>>> +       /*
>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>> +        */
>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>> +
>>>> +       /* trigger preemption to idle */
>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> Future plans? Because just inserting the branch into the setter of
>>> inject_preempt_context() resolves a lot of conflicts with other work.
>> My arguments for separate function are:
>> - better code readability
>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>> patches will introduce separate function as well
>> - only 4 lines of the function would be common
>> - the name inject_preempt_context() wouldn't match the new purpose, so
>> renaming would be needed
>> - reduced self-documenting code due to two separate preempt methods not
>> having distinct names
>>
>> That's all, I don't have any future plans for it. If you want me to
>> merge the two, let me know.
> The problem that I am worrying about is that we will duplicate bunch of
> other code, the actual ELS[PQ] write is the smaller portion. Plus we
> already have the branch on something much more pleasant.
I see. I don't know any details there, so I'm not able to weigh that.
Just let me know whether this possible duplication outweights the 
arguments I provided, and I will merge these functions.
I'm not overly attached to my solution.
>
>>>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>>>                                     status, buf[2*head + 1],
>>>>                                     execlists->active);
>>>>    
>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>>>> +                       /* Check if switched to active or preempted to active */
>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>>> is some oddity in the code flow.
>> I will check if lack of the change affects test results.
>> Personally, I would keep this change, even if only for allowing simple
>> definition of what HWACK flag means.
> The simple definition is the opposite one, imo. We set the flag after we
> get the corresponding response from HW; any preemption or activate event
> must follow the most recent ELSP write. So that will include the
> preemption event following the preempt-idle write.
>
> Then on deciding that the HW is idle, we apply the complication such
> that execlists->active == 0. (That rule is what breaks the pattern.)
> -Chris
Ok, I will remove this unnecessary condition.
I tested this and lack of it doesn't seem to affect the results.
(I'll be out next week; expect v2 when I'm back)
-Tomasz

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 23:27 ` [PATCH v1] " Chris Wilson
  2018-03-28 16:06   ` Lis, Tomasz
@ 2018-03-30 18:23   ` Daniele Ceraolo Spurio
  2018-04-12 17:15     ` Lis, Tomasz
  1 sibling, 1 reply; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-30 18:23 UTC (permalink / raw)
  To: Chris Wilson, Tomasz Lis, intel-gfx; +Cc: mika.kuoppala



On 27/03/18 16:27, Chris Wilson wrote:
> Quoting Tomasz Lis (2018-03-27 16:17:59)
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be executed,
>> so the Command Streamer never preempted to idle directly. In Icelake it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 800230b..c32580b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>                  ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>   
>>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 4364922..66b6700 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>          GEN(11), \
>>          .ddb_size = 2048, \
>>          .has_csr = 0, \
>> -       .has_logical_ring_elsq = 1
>> +       .has_logical_ring_elsq = 1, \
>> +       .has_hw_preempt_to_idle = 1
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>          GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>          func(has_logical_ring_contexts); \
>>          func(has_logical_ring_elsq); \
>>          func(has_logical_ring_preemption); \
>> +       func(has_hw_preempt_to_idle); \
>>          func(has_overlay); \
>>          func(has_pooled_eu); \
>>          func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index ba7f783..1a22de4 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -153,6 +153,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>   
>>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>                                  const struct i915_request *last,
>>                                  int prio)
>>   {
>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>> +       return (engine->i915->preempt_context ||
>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> 
> Well, you haven't actually disabled allocating the preempt_context so...
> 
> But at any rate, making this an engine->flag would eliminate one pointer
> dance.
> 

Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in 
engine->i915->caps.scheduler? That btw like here to be set if 
i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)

>> +                prio > max(rq_prio(last), 0);
>>   }
>>   
>>   /**
>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>   }
>>   
>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>> +{
>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>> +
>> +       GEM_TRACE("%s\n", engine->name);
>> +
>> +       /*
>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +        */
>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);

Shouldn't this check be the other way around?

>> +
>> +       /* trigger preemption to idle */
>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> 
> Future plans? Because just inserting the branch into the setter of
> inject_preempt_context() resolves a lot of conflicts with other work.
> 
>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                                    status, buf[2*head + 1],
>>                                    execlists->active);
>>   
>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>> +                       /* Check if switched to active or preempted to active */
>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> 
> Setting HWACK here is harmless as it gets cleared again. Unless, there
> is some oddity in the code flow.

There is actually some oddity, but it is more on the HW side. A preempt 
to idle can potentially land on an already idle HW, in which case 
GEN8_CTX_STATUS_ACTIVE_IDLE is not set and GEN8_CTX_STATUS_IDLE_ACTIVE 
is set instead. In this case without this check on 
GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but we 
wouldn't call the clear below. Not sure if we end up clearing the flag 
elsewhere, but that doesn't look too nice IMHO.

BTW, the relevant CSB bits coming out in the 2 preempt to idle cases are 
as follows:

preempt active HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE | 
GEN8_CTX_STATUS_PREEMPTED

Preempt idle HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE

Daniele

> 
>>                                  execlists_set_active(execlists,
>>                                                       EXECLISTS_ACTIVE_HWACK);
>> +
>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>                                  execlists_clear_active(execlists,
>>                                                         EXECLISTS_ACTIVE_HWACK);
>> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                          /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>                          GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   
>> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
>> +                       /*
>> +                        * Check if preempted to real idle, either directly or
>> +                        * the preemptive context already finished executing
>> +                        */
>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>>                                  GEM_TRACE("%s preempt-idle\n", engine->name);
> 
> Hmm. I was hoping that we would be able to engineer a single check to
> cover all sins. Might have been overly optimistic, but I can dream.
> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-30 15:42       ` Lis, Tomasz
@ 2018-03-30 19:45         ` Daniele Ceraolo Spurio
  2018-04-26 14:02           ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-30 19:45 UTC (permalink / raw)
  To: Lis, Tomasz, Chris Wilson, intel-gfx; +Cc: mika.kuoppala



On 30/03/18 08:42, Lis, Tomasz wrote:
> 
> 
> On 2018-03-29 00:28, Chris Wilson wrote:
>> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>>
>>> On 2018-03-28 01:27, Chris Wilson wrote:
>>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>>> The patch adds support of preempt-to-idle requesting by setting a 
>>>>> proper
>>>>> bit within Execlist Control Register, and receiving preemption 
>>>>> result from
>>>>> Context Status Buffer.
>>>>>
>>>>> Preemption in previous gens required a special batch buffer to be 
>>>>> executed,
>>>>> so the Command Streamer never preempted to idle directly. In 
>>>>> Icelake it is
>>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>>> status of the preemption request.
>>>>>
>>>>> This patch does not cover using the new preemption mechanism when 
>>>>> GuC is
>>>>> active.
>>>>>
>>>>> Bspec: 18922
>>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>>>> +++++++++++++++++++++++++++-----
>>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>>> index 800230b..c32580b 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>>>> *dev_priv)
>>>>>                   ((dev_priv)->info.has_logical_ring_elsq)
>>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>>                   ((dev_priv)->info.has_logical_ring_preemption)
>>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>>    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>> index 4364922..66b6700 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>>>> intel_cannonlake_info = {
>>>>>           GEN(11), \
>>>>>           .ddb_size = 2048, \
>>>>>           .has_csr = 0, \
>>>>> -       .has_logical_ring_elsq = 1
>>>>> +       .has_logical_ring_elsq = 1, \
>>>>> +       .has_hw_preempt_to_idle = 1
>>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>>           GEN11_FEATURES,
>>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>>>> index 933e316..4eb97b5 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>>           func(has_logical_ring_contexts); \
>>>>>           func(has_logical_ring_elsq); \
>>>>>           func(has_logical_ring_preemption); \
>>>>> +       func(has_hw_preempt_to_idle); \
>>>>>           func(has_overlay); \
>>>>>           func(has_pooled_eu); \
>>>>>           func(has_psr); \
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index ba7f783..1a22de4 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -153,6 +153,7 @@
>>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>>>> intel_engine_cs *engine,
>>>>>                                   const struct i915_request *last,
>>>>>                                   int prio)
>>>>>    {
>>>>> -       return engine->i915->preempt_context && prio > 
>>>>> max(rq_prio(last), 0);
>>>>> +       return (engine->i915->preempt_context ||
>>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>>> Well, you haven't actually disabled allocating the preempt_context 
>>>> so...
>>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>>> as that would mean adding a temporary condition on GuC until the GuC
>>> preemption is merged.
>>> I will add the conditions and disable the allocation in v2 of the patch.
>>>> But at any rate, making this an engine->flag would eliminate one 
>>>> pointer
>>>> dance.
>>> Could be an interesting idea for a separate patch.
>> To land first ;)
> :)
> Sure, I can do that.
>>>>> +                prio > max(rq_prio(last), 0);
>>>>>    }
>>>>>    /**
>>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>>>> intel_engine_cs *engine)
>>>>>           execlists_set_active(&engine->execlists, 
>>>>> EXECLISTS_ACTIVE_PREEMPT);
>>>>>    }
>>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>>> +{
>>>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>>>> +
>>>>> +       GEM_TRACE("%s\n", engine->name);
>>>>> +
>>>>> +       /*
>>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>>> +        */
>>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>>> +
>>>>> +       /* trigger preemption to idle */
>>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>>> Future plans? Because just inserting the branch into the setter of
>>>> inject_preempt_context() resolves a lot of conflicts with other work.
>>> My arguments for separate function are:
>>> - better code readability
>>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>>> patches will introduce separate function as well
>>> - only 4 lines of the function would be common
>>> - the name inject_preempt_context() wouldn't match the new purpose, so
>>> renaming would be needed
>>> - reduced self-documenting code due to two separate preempt methods not
>>> having distinct names
>>>
>>> That's all, I don't have any future plans for it. If you want me to
>>> merge the two, let me know.
>> The problem that I am worrying about is that we will duplicate bunch of
>> other code, the actual ELS[PQ] write is the smaller portion. Plus we
>> already have the branch on something much more pleasant.
> I see. I don't know any details there, so I'm not able to weigh that.
> Just let me know whether this possible duplication outweights the 
> arguments I provided, and I will merge these functions.
> I'm not overly attached to my solution.
>>
>>>>> @@ -962,10 +987,13 @@ static void 
>>>>> execlists_submission_tasklet(unsigned long data)
>>>>>                                     status, buf[2*head + 1],
>>>>>                                     execlists->active);
>>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>>>>> +                       /* Check if switched to active or preempted 
>>>>> to active */
>>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>>>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>>>> is some oddity in the code flow.
>>> I will check if lack of the change affects test results.
>>> Personally, I would keep this change, even if only for allowing simple
>>> definition of what HWACK flag means.
>> The simple definition is the opposite one, imo. We set the flag after we
>> get the corresponding response from HW; any preemption or activate event
>> must follow the most recent ELSP write. So that will include the
>> preemption event following the preempt-idle write.
>>
>> Then on deciding that the HW is idle, we apply the complication such
>> that execlists->active == 0. (That rule is what breaks the pattern.)
>> -Chris
> Ok, I will remove this unnecessary condition.
> I tested this and lack of it doesn't seem to affect the results.
> (I'll be out next week; expect v2 when I'm back)
> -Tomasz
> 

Do we have any test to cover a preempt to idle on already idle HW (which 
is the case we cover with this flag here)? If not maybe we cold add a 
selftest for that.

Daniele

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-30 18:23   ` Daniele Ceraolo Spurio
@ 2018-04-12 17:15     ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-04-12 17:15 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, Chris Wilson, intel-gfx; +Cc: mika.kuoppala



On 2018-03-30 20:23, Daniele Ceraolo Spurio wrote:
>
>
> On 27/03/18 16:27, Chris Wilson wrote:
>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>> The patch adds support of preempt-to-idle requesting by setting a 
>>> proper
>>> bit within Execlist Control Register, and receiving preemption 
>>> result from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be 
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake 
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when 
>>> GuC is
>>> active.
>>>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>> +++++++++++++++++++++++++++-----
>>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 800230b..c32580b 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>> *dev_priv)
>>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>> ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>   diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 4364922..66b6700 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>          GEN(11), \
>>>          .ddb_size = 2048, \
>>>          .has_csr = 0, \
>>> -       .has_logical_ring_elsq = 1
>>> +       .has_logical_ring_elsq = 1, \
>>> +       .has_hw_preempt_to_idle = 1
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>          GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>          func(has_logical_ring_contexts); \
>>>          func(has_logical_ring_elsq); \
>>>          func(has_logical_ring_preemption); \
>>> +       func(has_hw_preempt_to_idle); \
>>>          func(has_overlay); \
>>>          func(has_pooled_eu); \
>>>          func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index ba7f783..1a22de4 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -153,6 +153,7 @@
>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>> intel_engine_cs *engine,
>>>                                  const struct i915_request *last,
>>>                                  int prio)
>>>   {
>>> -       return engine->i915->preempt_context && prio > 
>>> max(rq_prio(last), 0);
>>> +       return (engine->i915->preempt_context ||
>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>
>> Well, you haven't actually disabled allocating the preempt_context so...
>>
>> But at any rate, making this an engine->flag would eliminate one pointer
>> dance.
>>
>
> Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in 
> engine->i915->caps.scheduler? That btw like here to be set if 
> i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)
The engine->flag which Chris introduced is now used to set 
I915_SCHEDULER_CAP_PREEMPTION.
>
>>> +                prio > max(rq_prio(last), 0);
>>>   }
>>>     /**
>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>> intel_engine_cs *engine)
>>>          execlists_set_active(&engine->execlists, 
>>> EXECLISTS_ACTIVE_PREEMPT);
>>>   }
>>>   +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>> +{
>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>> +
>>> +       GEM_TRACE("%s\n", engine->name);
>>> +
>>> +       /*
>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +        */
>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>
> Shouldn't this check be the other way around?
Wow. I have no idea how I was able to test this patch and not trigger 
this. You are right.
>
>>> +
>>> +       /* trigger preemption to idle */
>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>
>> Future plans? Because just inserting the branch into the setter of
>> inject_preempt_context() resolves a lot of conflicts with other work.
>>
>>> @@ -962,10 +987,13 @@ static void 
>>> execlists_submission_tasklet(unsigned long data)
>>>                                    status, buf[2*head + 1],
>>>                                    execlists->active);
>>>   -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> - GEN8_CTX_STATUS_PREEMPTED))
>>> +                       /* Check if switched to active or preempted 
>>> to active */
>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> + GEN8_CTX_STATUS_PREEMPTED)) &&
>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>
>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>> is some oddity in the code flow.
>
> There is actually some oddity, but it is more on the HW side. A 
> preempt to idle can potentially land on an already idle HW, in which 
> case GEN8_CTX_STATUS_ACTIVE_IDLE is not set and 
> GEN8_CTX_STATUS_IDLE_ACTIVE is set instead. In this case without this 
> check on GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but 
> we wouldn't call the clear below. Not sure if we end up clearing the 
> flag elsewhere, but that doesn't look too nice IMHO.
>
> BTW, the relevant CSB bits coming out in the 2 preempt to idle cases 
> are as follows:
>
> preempt active HW:
> GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE | 
> GEN8_CTX_STATUS_PREEMPTED
>
> Preempt idle HW:
> GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE
>
> Daniele
Thanks Daniele, this makes things a lot clearer.
Considering also HWACK description from Chris, I will add a condition to 
execlists_clear_active() below instead of  here.
-Tomasz
>
>>
>>> execlists_set_active(execlists,
>>> EXECLISTS_ACTIVE_HWACK);
>>> +
>>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> execlists_clear_active(execlists,
>>> EXECLISTS_ACTIVE_HWACK);
>>> @@ -976,8 +1004,13 @@ static void 
>>> execlists_submission_tasklet(unsigned long data)
>>>                          /* We should never get a COMPLETED | 
>>> IDLE_ACTIVE! */
>>>                          GEM_BUG_ON(status & 
>>> GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>   -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                           buf[2*head + 1] == 
>>> execlists->preempt_complete_status) {
>>> +                       /*
>>> +                        * Check if preempted to real idle, either 
>>> directly or
>>> +                        * the preemptive context already finished 
>>> executing
>>> +                        */
>>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                           buf[2*head + 1] == 
>>> execlists->preempt_complete_status)) {
>>>                                  GEM_TRACE("%s preempt-idle\n", 
>>> engine->name);
>>
>> Hmm. I was hoping that we would be able to engineer a single check to
>> cover all sins. Might have been overly optimistic, but I can dream.
>> -Chris
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v2] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (3 preceding siblings ...)
  2018-03-27 23:27 ` [PATCH v1] " Chris Wilson
@ 2018-04-19 11:44 ` Tomasz Lis
  2018-04-19 12:00   ` Chris Wilson
  2018-04-19 11:58 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2) Patchwork
                   ` (33 subsequent siblings)
  38 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-04-19 11:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 ++-
 drivers/gpu/drm/i915/i915_pci.c          |  3 +-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 47 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_lrc.h         |  1 +
 6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0286911..f445340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2518,6 +2518,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 74435af..d65f469 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,9 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915) &&
+	       !USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 029901a..4c94488 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -552,6 +553,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	/*
+	 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+	 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+	 */
+	GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+	/* trigger preemption to idle */
+	writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -611,7 +631,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			goto unlock;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+				gen11_preempt_to_idle(engine);
+			else
+				inject_preempt_context(engine);
 			goto unlock;
 		}
 
@@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
 				      GEN8_CTX_STATUS_PREEMPTED))
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+
+			/*
+			 * Check if switched to idle or preempted to idle.
+			 * The STATUS_IDLE_ACTIVE flag is really used to mark
+			 * preemtion from idle to idle, this is not a mistake.
+			 */
+			if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+			    ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_HWACK);
 
@@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
 			/* We should never get a COMPLETED | IDLE_ACTIVE! */
 			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
@@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (4 preceding siblings ...)
  2018-04-19 11:44 ` [PATCH v2] " Tomasz Lis
@ 2018-04-19 11:58 ` Patchwork
  2018-04-19 11:59 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (32 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-04-19 11:58 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
8e4bae99c558 drm/i915/gen11: Preempt-to-idle support in execlists.
-:107: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#107: FILE: drivers/gpu/drm/i915/intel_lrc.c:566:
+	GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:160: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#160: FILE: drivers/gpu/drm/i915/intel_lrc.c:1060:
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			         ^

total: 0 errors, 0 warnings, 2 checks, 124 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (5 preceding siblings ...)
  2018-04-19 11:58 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2) Patchwork
@ 2018-04-19 11:59 ` Patchwork
  2018-04-19 12:13 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (31 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-04-19 11:59 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3656:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3658:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v2] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-04-19 11:44 ` [PATCH v2] " Tomasz Lis
@ 2018-04-19 12:00   ` Chris Wilson
  2018-04-19 22:23     ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 70+ messages in thread
From: Chris Wilson @ 2018-04-19 12:00 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: mika.kuoppala

Quoting Tomasz Lis (2018-04-19 12:44:48)
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> v2: Added needs_preempt_context() change so that it is not created when
>     preempt-to-idle is supported. (Chris)
>     Updated setting HWACK flag so that it is cleared after
>     preempt-to-dle. (Chris, Daniele)
>     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>  drivers/gpu/drm/i915/i915_gem_context.c  |  4 ++-
>  drivers/gpu/drm/i915/i915_pci.c          |  3 +-
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c         | 47 ++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>  6 files changed, 51 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0286911..f445340 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2518,6 +2518,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>                 ((dev_priv)->info.has_logical_ring_elsq)
>  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>                 ((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>  
>  #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 74435af..d65f469 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -454,7 +454,9 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
>  
>  static bool needs_preempt_context(struct drm_i915_private *i915)
>  {
> -       return HAS_LOGICAL_RING_PREEMPTION(i915);
> +       return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> +              !HAS_HW_PREEMPT_TO_IDLE(i915) &&
> +              !USES_GUC_SUBMISSION(i915);

Pardon? The guc uses the preempt_context for its preempt_client.

>  int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4364922..66b6700 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>         GEN(11), \
>         .ddb_size = 2048, \
>         .has_csr = 0, \
> -       .has_logical_ring_elsq = 1
> +       .has_logical_ring_elsq = 1, \
> +       .has_hw_preempt_to_idle = 1
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>         GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>         func(has_logical_ring_contexts); \
>         func(has_logical_ring_elsq); \
>         func(has_logical_ring_preemption); \
> +       func(has_hw_preempt_to_idle); \
>         func(has_overlay); \
>         func(has_pooled_eu); \
>         func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 029901a..4c94488 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -154,6 +154,7 @@
>  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>  #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>  #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>  
>  #define GEN8_CTX_STATUS_COMPLETED_MASK \
>          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -552,6 +553,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>         execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> +{
> +       struct intel_engine_execlists *execlists = &engine->execlists;
> +
> +       GEM_TRACE("%s\n", engine->name);
> +
> +       /*
> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +        */
> +       GEM_BUG_ON(execlists->ctrl_reg == NULL);
> +
> +       /* trigger preemption to idle */
> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> +
> +       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> +       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +}
> +
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
>         struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -611,7 +631,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>                         goto unlock;
>  
>                 if (need_preempt(engine, last, execlists->queue_priority)) {
> -                       inject_preempt_context(engine);
> +                       if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
> +                               gen11_preempt_to_idle(engine);
> +                       else
> +                               inject_preempt_context(engine);

Please do move this to inject_preempt_context. The conflict with other
work in flight is not worth the hassle, especially to reiterate such
points as we already have the equivalent machine check and so avoid
repeating it in even more pointer dancing.

>                         goto unlock;
>                 }
>  
> @@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
>                                       GEN8_CTX_STATUS_PREEMPTED))
>                                 execlists_set_active(execlists,
>                                                      EXECLISTS_ACTIVE_HWACK);
> -                       if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +
> +                       /*
> +                        * Check if switched to idle or preempted to idle.
> +                        * The STATUS_IDLE_ACTIVE flag is really used to mark
> +                        * preemtion from idle to idle, this is not a mistake.
> +                        */
> +                       if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
> +                           ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> +                            (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
>                                 execlists_clear_active(execlists,
>                                                        EXECLISTS_ACTIVE_HWACK);

But still pointless, no?

> @@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                         /* We should never get a COMPLETED | IDLE_ACTIVE! */
>                         GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>  
> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
> +                       /*
> +                        * Check if preempted to real idle, either directly or
> +                        * the preemptive context already finished executing
> +                        */
> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>                                 GEM_TRACE("%s preempt-idle\n", engine->name);
>  
>                                 execlists_cancel_port_requests(execlists);
> @@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>         engine->unpark = NULL;
>  
>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -       if (engine->i915->preempt_context)
> +       if (engine->i915->preempt_context ||
> +           HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>                 engine->flags |= I915_ENGINE_HAS_PREEMPTION;

-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (6 preceding siblings ...)
  2018-04-19 11:59 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-04-19 12:13 ` Patchwork
  2018-04-19 16:08 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (30 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-04-19 12:13 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4068 -> Patchwork_8751 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/2/mbox/

== Known issues ==

  Here are the changes found in Patchwork_8751 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_suspend@basic-s4-devices:
      fi-kbl-7500u:       PASS -> DMESG-WARN (fdo#105128)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
      fi-ivb-3520m:       PASS -> DMESG-WARN (fdo#106084)

    igt@prime_vgem@basic-gtt:
      fi-glk-1:           NOTRUN -> INCOMPLETE (k.org#198133, fdo#103359)

    
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128
  fdo#106084 https://bugs.freedesktop.org/show_bug.cgi?id=106084
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (34 -> 32) ==

  Additional (1): fi-glk-1 
  Missing    (3): fi-ctg-p8600 fi-ilk-m540 fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4068 -> Patchwork_8751

  CI_DRM_4068: 28fecc12e5c2b1beb9ab89e3616266d5d5e58e3d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4441: 83ba5b7d3bde48b383df41792fc9c955a5a23bdb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8751: 8e4bae99c5587cb819b3ebb7a22dd8d75883be1b @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4441: e60d247eb359f044caf0c09904da14e39d7adca1 @ git://anongit.freedesktop.org/piglit


== Linux commits ==

8e4bae99c558 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8751/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (7 preceding siblings ...)
  2018-04-19 12:13 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-04-19 16:08 ` Patchwork
  2018-05-11 15:45 ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes Tomasz Lis
                   ` (29 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-04-19 16:08 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev2)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4068_full -> Patchwork_8751_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_8751_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_8751_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/2/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_8751_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_mmap_wc@set-cache-level:
      shard-glk:          PASS -> SKIP +71

    igt@gem_mocs_settings@mocs-rc6-bsd1:
      shard-kbl:          SKIP -> PASS

    igt@kms_mmap_write_crc:
      shard-glk:          SKIP -> PASS +93

    igt@perf_pmu@rc6:
      shard-kbl:          PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_8751_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_cursor_legacy@flip-vs-cursor-toggle:
      shard-hsw:          PASS -> FAIL (fdo#102670) +1

    igt@kms_flip@2x-flip-vs-dpms-interruptible:
      shard-hsw:          PASS -> DMESG-WARN (fdo#102614)

    igt@kms_flip@flip-vs-wf_vblank-interruptible:
      shard-glk:          SKIP -> FAIL (fdo#100368)

    igt@kms_flip@modeset-vs-vblank-race-interruptible:
      shard-hsw:          PASS -> FAIL (fdo#103060)

    igt@kms_flip@wf_vblank-ts-check-interruptible:
      shard-apl:          PASS -> FAIL (fdo#100368)

    
    ==== Possible fixes ====

    igt@gem_ppgtt@blt-vs-render-ctx0:
      shard-kbl:          INCOMPLETE (fdo#106023, fdo#103665) -> PASS

    igt@kms_flip@2x-wf_vblank-ts-check:
      shard-hsw:          FAIL (fdo#100368) -> PASS

    igt@kms_flip@plain-flip-ts-check-interruptible:
      shard-glk:          FAIL (fdo#100368) -> PASS +2

    igt@kms_hdmi_inject@inject-audio:
      shard-glk:          FAIL (fdo#102370) -> PASS

    igt@kms_setmode@basic:
      shard-glk:          FAIL (fdo#99912) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102370 https://bugs.freedesktop.org/show_bug.cgi?id=102370
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (6 -> 5) ==

  Missing    (1): shard-glkb 


== Build changes ==

    * Linux: CI_DRM_4068 -> Patchwork_8751

  CI_DRM_4068: 28fecc12e5c2b1beb9ab89e3616266d5d5e58e3d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4441: 83ba5b7d3bde48b383df41792fc9c955a5a23bdb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8751: 8e4bae99c5587cb819b3ebb7a22dd8d75883be1b @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4441: e60d247eb359f044caf0c09904da14e39d7adca1 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8751/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v2] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-04-19 12:00   ` Chris Wilson
@ 2018-04-19 22:23     ` Daniele Ceraolo Spurio
  0 siblings, 0 replies; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-04-19 22:23 UTC (permalink / raw)
  To: Chris Wilson, Tomasz Lis, intel-gfx; +Cc: mika.kuoppala

<snip>

>>   
>> @@ -1010,7 +1033,15 @@ static void execlists_submission_tasklet(unsigned long data)
>>                                        GEN8_CTX_STATUS_PREEMPTED))
>>                                  execlists_set_active(execlists,
>>                                                       EXECLISTS_ACTIVE_HWACK);
>> -                       if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> +
>> +                       /*
>> +                        * Check if switched to idle or preempted to idle.
>> +                        * The STATUS_IDLE_ACTIVE flag is really used to mark
>> +                        * preemtion from idle to idle, this is not a mistake.
>> +                        */
>> +                       if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
>> +                           ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>> +                            (status & GEN11_CTX_STATUS_PREEMPT_IDLE)))
>>                                  execlists_clear_active(execlists,
>>                                                         EXECLISTS_ACTIVE_HWACK);
> 
> But still pointless, no?
> 

Just to understand, is it pointless because we have a preemption in 
flight and we're thus going to call execlists_dequeue below, which will 
eventually clear the flag in execlists_submit_ports? Or do we just don't 
care if this gets cleared here because we always clear it before a write 
to the elsp and we're only interested in it being clear between the 
write and the subsequent csb event?

Also, now that I think about it, with the current flow it doesn't look 
like we would clear EXECLISTS_ACTIVE_PREEMPT if a preempt-to-idle 
happens on idle HW, so we still need a condition for that even if we 
drop the one for EXECLISTS_ACTIVE_HWACK.

Daniele

>> @@ -1020,8 +1051,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                          /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>                          GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   
>> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
>> +                       /*
>> +                        * Check if preempted to real idle, either directly or
>> +                        * the preemptive context already finished executing
>> +                        */
>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>>                                  GEM_TRACE("%s preempt-idle\n", engine->name);
>>   
>>                                  execlists_cancel_port_requests(execlists);
>> @@ -2157,7 +2193,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>>          engine->unpark = NULL;
>>   
>>          engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>> -       if (engine->i915->preempt_context)
>> +       if (engine->i915->preempt_context ||
>> +           HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>                  engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> 
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-30 19:45         ` Daniele Ceraolo Spurio
@ 2018-04-26 14:02           ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-04-26 14:02 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, Chris Wilson, intel-gfx; +Cc: mika.kuoppala



On 2018-03-30 21:45, Daniele Ceraolo Spurio wrote:
>
>
> On 30/03/18 08:42, Lis, Tomasz wrote:
>>
>>
>> On 2018-03-29 00:28, Chris Wilson wrote:
>>> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>>>
>>>> On 2018-03-28 01:27, Chris Wilson wrote:
>>>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>>>> The patch adds support of preempt-to-idle requesting by setting a 
>>>>>> proper
>>>>>> bit within Execlist Control Register, and receiving preemption 
>>>>>> result from
>>>>>> Context Status Buffer.
>>>>>>
>>>>>> Preemption in previous gens required a special batch buffer to be 
>>>>>> executed,
>>>>>> so the Command Streamer never preempted to idle directly. In 
>>>>>> Icelake it is
>>>>>> possible, as there is a hardware mechanism to inform the kernel 
>>>>>> about
>>>>>> status of the preemption request.
>>>>>>
>>>>>> This patch does not cover using the new preemption mechanism when 
>>>>>> GuC is
>>>>>> active.
>>>>>>
>>>>>> Bspec: 18922
>>>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>>>>> +++++++++++++++++++++++++++-----
>>>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> index 800230b..c32580b 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>>>>> *dev_priv)
>>>>>> ((dev_priv)->info.has_logical_ring_elsq)
>>>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>>> ((dev_priv)->info.has_logical_ring_preemption)
>>>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>>>> + ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>>>    #define HAS_EXECLISTS(dev_priv) 
>>>>>> HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> index 4364922..66b6700 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>>>>> intel_cannonlake_info = {
>>>>>>           GEN(11), \
>>>>>>           .ddb_size = 2048, \
>>>>>>           .has_csr = 0, \
>>>>>> -       .has_logical_ring_elsq = 1
>>>>>> +       .has_logical_ring_elsq = 1, \
>>>>>> +       .has_hw_preempt_to_idle = 1
>>>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>>>           GEN11_FEATURES,
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>>>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> index 933e316..4eb97b5 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>>>           func(has_logical_ring_contexts); \
>>>>>>           func(has_logical_ring_elsq); \
>>>>>>           func(has_logical_ring_preemption); \
>>>>>> +       func(has_hw_preempt_to_idle); \
>>>>>>           func(has_overlay); \
>>>>>>           func(has_pooled_eu); \
>>>>>>           func(has_psr); \
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> index ba7f783..1a22de4 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> @@ -153,6 +153,7 @@
>>>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>>>>> intel_engine_cs *engine,
>>>>>>                                   const struct i915_request *last,
>>>>>>                                   int prio)
>>>>>>    {
>>>>>> -       return engine->i915->preempt_context && prio > 
>>>>>> max(rq_prio(last), 0);
>>>>>> +       return (engine->i915->preempt_context ||
>>>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>>>> Well, you haven't actually disabled allocating the preempt_context 
>>>>> so...
>>>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>>>> as that would mean adding a temporary condition on GuC until the GuC
>>>> preemption is merged.
>>>> I will add the conditions and disable the allocation in v2 of the 
>>>> patch.
>>>>> But at any rate, making this an engine->flag would eliminate one 
>>>>> pointer
>>>>> dance.
>>>> Could be an interesting idea for a separate patch.
>>> To land first ;)
>> :)
>> Sure, I can do that.
>>>>>> +                prio > max(rq_prio(last), 0);
>>>>>>    }
>>>>>>    /**
>>>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>>>>> intel_engine_cs *engine)
>>>>>> execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>>>>    }
>>>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>>>> +{
>>>>>> +       struct intel_engine_execlists *execlists = 
>>>>>> &engine->execlists;
>>>>>> +
>>>>>> +       GEM_TRACE("%s\n", engine->name);
>>>>>> +
>>>>>> +       /*
>>>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>>>> +        */
>>>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>>>> +
>>>>>> +       /* trigger preemption to idle */
>>>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>>>> Future plans? Because just inserting the branch into the setter of
>>>>> inject_preempt_context() resolves a lot of conflicts with other work.
>>>> My arguments for separate function are:
>>>> - better code readability
>>>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>>>> patches will introduce separate function as well
>>>> - only 4 lines of the function would be common
>>>> - the name inject_preempt_context() wouldn't match the new purpose, so
>>>> renaming would be needed
>>>> - reduced self-documenting code due to two separate preempt methods 
>>>> not
>>>> having distinct names
>>>>
>>>> That's all, I don't have any future plans for it. If you want me to
>>>> merge the two, let me know.
>>> The problem that I am worrying about is that we will duplicate bunch of
>>> other code, the actual ELS[PQ] write is the smaller portion. Plus we
>>> already have the branch on something much more pleasant.
>> I see. I don't know any details there, so I'm not able to weigh that.
>> Just let me know whether this possible duplication outweights the 
>> arguments I provided, and I will merge these functions.
>> I'm not overly attached to my solution.
>>>
>>>>>> @@ -962,10 +987,13 @@ static void 
>>>>>> execlists_submission_tasklet(unsigned long data)
>>>>>>                                     status, buf[2*head + 1],
>>>>>> execlists->active);
>>>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>>> - GEN8_CTX_STATUS_PREEMPTED))
>>>>>> +                       /* Check if switched to active or 
>>>>>> preempted to active */
>>>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>>> + GEN8_CTX_STATUS_PREEMPTED)) &&
>>>>>> +                           !(status & 
>>>>>> GEN11_CTX_STATUS_PREEMPT_IDLE))
>>>>> Setting HWACK here is harmless as it gets cleared again. Unless, 
>>>>> there
>>>>> is some oddity in the code flow.
>>>> I will check if lack of the change affects test results.
>>>> Personally, I would keep this change, even if only for allowing simple
>>>> definition of what HWACK flag means.
>>> The simple definition is the opposite one, imo. We set the flag 
>>> after we
>>> get the corresponding response from HW; any preemption or activate 
>>> event
>>> must follow the most recent ELSP write. So that will include the
>>> preemption event following the preempt-idle write.
>>>
>>> Then on deciding that the HW is idle, we apply the complication such
>>> that execlists->active == 0. (That rule is what breaks the pattern.)
>>> -Chris
>> Ok, I will remove this unnecessary condition.
>> I tested this and lack of it doesn't seem to affect the results.
>> (I'll be out next week; expect v2 when I'm back)
>> -Tomasz
>>
>
> Do we have any test to cover a preempt to idle on already idle HW 
> (which is the case we cover with this flag here)? If not maybe we cold 
> add a selftest for that.
>
> Daniele
Looks like this case is not tested.
Also, it looks like there is a bug of some kind. Preemption-specific 
tests are passing, but I'm getting fails (with occasional passes) in 
smoketest-* cases from gem_exec_schedule.
I am working on diagnosing that.
>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (8 preceding siblings ...)
  2018-04-19 16:08 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-05-11 15:45 ` Tomasz Lis
  2018-05-11 15:45   ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
  2018-05-11 16:15 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3) Patchwork
                   ` (28 subsequent siblings)
  38 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-05-11 15:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The review of v2 changes touched issues which were addressed in a different way
than planned in that review:

1. Context status processing

While the review went towards finding common path to new preemption flag
combinations and existing cases, I decided to split the two ways, because
the !(status & GEN8_CTX_STATUS_COMPLETED_MASK) condition had to be skipped
from the preemption flow. While pre-gen11 preemption always executed the
preemptive context which guaranteed that flag set, nothing finishes execution
in the new to-real-idle preemption.

2. Testing of the idle-to-idle preemption case

It turns out the IGT test 'smoketest-render' from 'gem_exec_schedule' group
triggers the idle-to-idle case. The test submits a lot of very  short execs
with changing priorities. Its run triggers preemption many times. and due to
the short exec buffer, triggers idle-to-idle case as well, sooner or later.

Tomasz Lis (1):
  drm/i915/gen11: Preempt-to-idle support in execlists.

 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 115 ++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 92 insertions(+), 35 deletions(-)

-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-11 15:45 ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes Tomasz Lis
@ 2018-05-11 15:45   ` Tomasz Lis
  2018-05-18 21:08     ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-05-11 15:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed context state to not assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 115 ++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+		(HAS_HW_PREEMPT_TO_IDLE(i915) &&
+		!USES_GUC_SUBMISSION(i915)));
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
 
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
-		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		GEM_TRACE("%s\n", engine->name);
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	write_desc(execlists, ce->lrc_desc, n);
+		/* trigger preemption to idle */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+		       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+		       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		GEM_TRACE("%s\n", engine->name);
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void execlists_submission_tasklet(unsigned long data)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			/*
+			 * Check if preempted from idle to idle directly.
+			 * The STATUS_IDLE_ACTIVE flag is used to mark
+			 * such transition.
+			 */
+			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_HWACK);
 
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+				/*
+				 * We could not have COMPLETED anything
+				 * if we were idle before preemption.
+				 */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+			}
+
+			else {
+				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					      GEN8_CTX_STATUS_PREEMPTED))
+					execlists_set_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
+				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+					execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+					continue;
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+				/*
+				 * We should never get a
+				 * COMPLETED | IDLE_ACTIVE!
+				 */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			}
+
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (9 preceding siblings ...)
  2018-05-11 15:45 ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes Tomasz Lis
@ 2018-05-11 16:15 ` Patchwork
  2018-05-11 16:16 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (27 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-11 16:15 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3492dcf9f1e4 drm/i915/gen11: Preempt-to-idle support in execlists.
-:132: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#132: FILE: drivers/gpu/drm/i915/intel_lrc.c:542:
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:185: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#185: FILE: drivers/gpu/drm/i915/intel_lrc.c:1074:
+			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:186: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#186: FILE: drivers/gpu/drm/i915/intel_lrc.c:1075:
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:199: CHECK:BRACES: Unbalanced braces around else statement
#199: FILE: drivers/gpu/drm/i915/intel_lrc.c:1086:
+			else {

-:203: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#203: FILE: drivers/gpu/drm/i915/intel_lrc.c:1090:
+					execlists_set_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);

-:207: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#207: FILE: drivers/gpu/drm/i915/intel_lrc.c:1094:
+					execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);

-:229: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#229: FILE: drivers/gpu/drm/i915/intel_lrc.c:1112:
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			         ^

total: 0 errors, 0 warnings, 7 checks, 190 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (10 preceding siblings ...)
  2018-05-11 16:15 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3) Patchwork
@ 2018-05-11 16:16 ` Patchwork
  2018-05-11 16:33 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (26 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-11 16:16 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3663:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3665:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (11 preceding siblings ...)
  2018-05-11 16:16 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-05-11 16:33 ` Patchwork
  2018-05-11 17:46 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (25 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-11 16:33 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4169 -> Patchwork_8983 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/3/mbox/


== Changes ==

  No changes found


== Participating hosts (41 -> 37) ==

  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4169 -> Patchwork_8983

  CI_DRM_4169: 05bfe2ceaa9df8f56313507ae01344971fa4f8f4 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4475: 35f08c12aa216d5b62a5b9984b575cee6905098f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8983: 3492dcf9f1e429e4bd7fe2b95c5f5a912f5a4ade @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4475: 3ba0657bff4216d1ec7179935590261855f1651e @ git://anongit.freedesktop.org/piglit


== Linux commits ==

3492dcf9f1e4 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8983/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (12 preceding siblings ...)
  2018-05-11 16:33 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-05-11 17:46 ` Patchwork
  2018-05-25 18:26 ` [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (24 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-11 17:46 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev3)
URL   : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4169_full -> Patchwork_8983_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_8983_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_8983_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/3/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_8983_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@perf_pmu@interrupts-sync:
      shard-kbl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@gem_pwrite@big-cpu-random:
      shard-kbl:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_8983_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_concurrent@pipe-c:
      shard-hsw:          PASS -> DMESG-WARN (fdo#102614)

    igt@kms_flip@absolute-wf_vblank-interruptible:
      shard-glk:          PASS -> FAIL (fdo#106087)

    igt@kms_flip@flip-vs-wf_vblank-interruptible:
      shard-glk:          PASS -> FAIL (fdo#105312)

    igt@kms_flip@plain-flip-ts-check-interruptible:
      shard-glk:          PASS -> FAIL (fdo#100368) +1

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-c-frame-sequence:
      shard-hsw:          PASS -> FAIL (fdo#103481)

    
    ==== Possible fixes ====

    igt@gem_eio@in-flight-10ms:
      shard-glk:          FAIL (fdo#105957) -> PASS

    igt@gem_ppgtt@blt-vs-render-ctx0:
      shard-kbl:          INCOMPLETE (fdo#106023, fdo#103665) -> PASS

    igt@kms_chv_cursor_fail@pipe-b-64x64-top-edge:
      shard-apl:          FAIL (fdo#104671, fdo#104724) -> PASS

    igt@kms_flip@flip-vs-expired-vblank:
      shard-glk:          FAIL (fdo#102887) -> PASS

    igt@kms_flip@flip-vs-panning-vs-hang:
      shard-snb:          DMESG-WARN (fdo#103821) -> PASS

    igt@kms_flip@modeset-vs-vblank-race-interruptible:
      shard-hsw:          FAIL (fdo#103060) -> PASS

    igt@kms_flip@plain-flip-fb-recreate:
      shard-glk:          FAIL (fdo#100368) -> PASS +1

    igt@kms_sysfs_edid_timing:
      shard-apl:          WARN (fdo#100047) -> PASS

    
  fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103481 https://bugs.freedesktop.org/show_bug.cgi?id=103481
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
  fdo#104671 https://bugs.freedesktop.org/show_bug.cgi?id=104671
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105312 https://bugs.freedesktop.org/show_bug.cgi?id=105312
  fdo#105957 https://bugs.freedesktop.org/show_bug.cgi?id=105957
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#106087 https://bugs.freedesktop.org/show_bug.cgi?id=106087


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4169 -> Patchwork_8983

  CI_DRM_4169: 05bfe2ceaa9df8f56313507ae01344971fa4f8f4 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4475: 35f08c12aa216d5b62a5b9984b575cee6905098f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8983: 3492dcf9f1e429e4bd7fe2b95c5f5a912f5a4ade @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4475: 3ba0657bff4216d1ec7179935590261855f1651e @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8983/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-11 15:45   ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-05-18 21:08     ` Daniele Ceraolo Spurio
  2018-05-21 10:16       ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-05-18 21:08 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: mika.kuoppala



On 11/05/18 08:45, Tomasz Lis wrote:
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> v2: Added needs_preempt_context() change so that it is not created when
>      preempt-to-idle is supported. (Chris)
>      Updated setting HWACK flag so that it is cleared after
>      preempt-to-dle. (Chris, Daniele)
>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> v3: Fixed needs_preempt_context() change. (Chris)
>      Merged preemption trigger functions to one. (Chris)
>      Fixed context state to not assume COMPLETED_MASK after preemption,
>      since idle-to-idle case will not have it set.
> 
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>   drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>   drivers/gpu/drm/i915/intel_lrc.c         | 115 ++++++++++++++++++++++---------
>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>   6 files changed, 92 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 57fb3aa..6e9647b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>   		((dev_priv)->info.has_logical_ring_elsq)
>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>   		((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +		((dev_priv)->info.has_hw_preempt_to_idle)
>   
>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 33f8a4b..bdac129 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
>   
>   static bool needs_preempt_context(struct drm_i915_private *i915)
>   {
> -	return HAS_LOGICAL_RING_PREEMPTION(i915);
> +	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> +	       (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
> +		(HAS_HW_PREEMPT_TO_IDLE(i915) &&
> +		!USES_GUC_SUBMISSION(i915)));

Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even 
if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it 
anymore, right?

>   }
>   
>   int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4364922..66b6700 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>   	GEN(11), \
>   	.ddb_size = 2048, \
>   	.has_csr = 0, \
> -	.has_logical_ring_elsq = 1
> +	.has_logical_ring_elsq = 1, \
> +	.has_hw_preempt_to_idle = 1
>   
>   static const struct intel_device_info intel_icelake_11_info = {
>   	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
> +	func(has_hw_preempt_to_idle); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 29dcf34..8fe6795 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -154,6 +154,7 @@
>   #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
>   #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>   #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
>   
>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>   	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -526,31 +527,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   static void inject_preempt_context(struct intel_engine_cs *engine)

For gen11+ we don't inject a preempt context anymore, maybe we can 
rename this function to something like "inject_preempt()".

>   {
>   	struct intel_engine_execlists *execlists = &engine->execlists;
> -	struct intel_context *ce =
> -		to_intel_context(engine->i915->preempt_context, engine);
> -	unsigned int n;
>   
> -	GEM_BUG_ON(execlists->preempt_complete_status !=
> -		   upper_32_bits(ce->lrc_desc));
> -	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> -		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> -		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> +	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
> +		/*
> +		 * If we have hardware preempt-to-idle, we do not need to
> +		 * inject any job to the hardware. We only set a flag.
> +		 */
> +		GEM_TRACE("%s\n", engine->name);

This trace is in both conditional branches, might be cleaner to just put 
it before the if statement.

>   
> -	/*
> -	 * Switch to our empty preempt context so
> -	 * the state of the GPU is known (idle).
> -	 */
> -	GEM_TRACE("%s\n", engine->name);
> -	for (n = execlists_num_ports(execlists); --n; )
> -		write_desc(execlists, 0, n);
> +		/*
> +		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +		 */
> +		GEM_BUG_ON(execlists->ctrl_reg == NULL);
>   
> -	write_desc(execlists, ce->lrc_desc, n);
> +		/* trigger preemption to idle */
> +		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> +	} else {
> +		struct intel_context *ce =
> +			to_intel_context(engine->i915->preempt_context, engine);
> +		unsigned int n;
>   
> -	/* we need to manually load the submit queue */
> -	if (execlists->ctrl_reg)
> -		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +		GEM_BUG_ON(execlists->preempt_complete_status !=
> +			   upper_32_bits(ce->lrc_desc));
> +		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> +		       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> +		       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> +
> +		/*
> +		 * Switch to our empty preempt context so
> +		 * the state of the GPU is known (idle).
> +		 */
> +		GEM_TRACE("%s\n", engine->name);
> +		for (n = execlists_num_ports(execlists); --n; )
> +			write_desc(execlists, 0, n);
> +
> +		write_desc(execlists, ce->lrc_desc, n);
> +
> +		/* we need to manually load the submit queue */
> +		if (execlists->ctrl_reg)
> +			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +	}
>   
>   	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> @@ -1045,22 +1064,51 @@ static void execlists_submission_tasklet(unsigned long data)
>   				  status, buf[2*head + 1],
>   				  execlists->active);
>   
> -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -				      GEN8_CTX_STATUS_PREEMPTED))
> -				execlists_set_active(execlists,
> -						     EXECLISTS_ACTIVE_HWACK);
> -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +			/*
> +			 * Check if preempted from idle to idle directly.
> +			 * The STATUS_IDLE_ACTIVE flag is used to mark
> +			 * such transition.
> +			 */
> +			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> +			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
> +
>   				execlists_clear_active(execlists,
>   						       EXECLISTS_ACTIVE_HWACK);

EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both 
when we inject the pre-emption and on the previous A->I CSB event), so 
there should be no need to clear it.

>   
> -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -				continue;
> +				/*
> +				 * We could not have COMPLETED anything
> +				 * if we were idle before preemption.
> +				 */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
> +			}
> +
> +			else {

nitpick: formatting is wrong here.

Daniele

> +				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +					      GEN8_CTX_STATUS_PREEMPTED))
> +					execlists_set_active(execlists,
> +						       EXECLISTS_ACTIVE_HWACK);
> +
> +				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +					execlists_clear_active(execlists,
> +						       EXECLISTS_ACTIVE_HWACK);
>   
> -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +					continue;
>   
> -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> +				/*
> +				 * We should never get a
> +				 * COMPLETED | IDLE_ACTIVE!
> +				 */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +			}
> +
> +			/*
> +			 * Check if preempted to real idle, either directly or
> +			 * the preemptive context already finished executing
> +			 */
> +			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +			    (status & GEN8_CTX_STATUS_COMPLETE &&
> +			    buf[2*head + 1] == execlists->preempt_complete_status)) {
>   				GEM_TRACE("%s preempt-idle\n", engine->name);
>   
>   				execlists_cancel_port_requests(execlists);
> @@ -2217,7 +2265,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->unpark = NULL;
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -	if (engine->i915->preempt_context)
> +	if (engine->i915->preempt_context ||
> +	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	engine->i915->caps.scheduler =
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 4ec7d8d..b1083ac 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -45,6 +45,7 @@
>   #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
>   #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
>   #define	  EL_CTRL_LOAD				(1 << 0)
> +#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
>   
>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-18 21:08     ` Daniele Ceraolo Spurio
@ 2018-05-21 10:16       ` Lis, Tomasz
  2018-05-22 14:39         ` Ceraolo Spurio, Daniele
  0 siblings, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-05-21 10:16 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, intel-gfx; +Cc: mika.kuoppala



On 2018-05-18 23:08, Daniele Ceraolo Spurio wrote:
>
>
> On 11/05/18 08:45, Tomasz Lis wrote:
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result 
>> from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be 
>> executed,
>> so the Command Streamer never preempted to idle directly. In Icelake 
>> it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> v2: Added needs_preempt_context() change so that it is not created when
>>      preempt-to-idle is supported. (Chris)
>>      Updated setting HWACK flag so that it is cleared after
>>      preempt-to-dle. (Chris, Daniele)
>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>
>> v3: Fixed needs_preempt_context() change. (Chris)
>>      Merged preemption trigger functions to one. (Chris)
>>      Fixed context state to not assume COMPLETED_MASK after preemption,
>>      since idle-to-idle case will not have it set.
>>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>   drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 115 
>> ++++++++++++++++++++++---------
>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>   6 files changed, 92 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 57fb3aa..6e9647b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private 
>> *dev_priv)
>>           ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>           ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 33f8a4b..bdac129 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context 
>> **ctxp)
>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>   {
>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>> +           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
>> +        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
>> +        !USES_GUC_SUBMISSION(i915)));
>
> Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even 
> if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it 
> anymore, right?
The patch only provides gen11 way for the non-GuC submission. This is 
why the condition is so convoluted - preempt_context is still needed if 
we use GuC.
This will be simplified after GuC paches are added.
>
>>   }
>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 4364922..66b6700 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>> intel_cannonlake_info = {
>>       GEN(11), \
>>       .ddb_size = 2048, \
>>       .has_csr = 0, \
>> -    .has_logical_ring_elsq = 1
>> +    .has_logical_ring_elsq = 1, \
>> +    .has_hw_preempt_to_idle = 1
>>     static const struct intel_device_info intel_icelake_11_info = {
>>       GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>> b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>       func(has_logical_ring_contexts); \
>>       func(has_logical_ring_elsq); \
>>       func(has_logical_ring_preemption); \
>> +    func(has_hw_preempt_to_idle); \
>>       func(has_overlay); \
>>       func(has_pooled_eu); \
>>       func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 29dcf34..8fe6795 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -154,6 +154,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -526,31 +527,49 @@ static void port_assign(struct execlist_port 
>> *port, struct i915_request *rq)
>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>
> For gen11+ we don't inject a preempt context anymore, maybe we can 
> rename this function to something like "inject_preempt()".
My initial approach was to just add a second function. Merging the 
changes to inject_preempt_context() was requested by Chris; as I 
understand it is to minimize refactoring in other work in progress.
>
>>   {
>>       struct intel_engine_execlists *execlists = &engine->execlists;
>> -    struct intel_context *ce =
>> -        to_intel_context(engine->i915->preempt_context, engine);
>> -    unsigned int n;
>>   -    GEM_BUG_ON(execlists->preempt_complete_status !=
>> -           upper_32_bits(ce->lrc_desc));
>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>> +        /*
>> +         * If we have hardware preempt-to-idle, we do not need to
>> +         * inject any job to the hardware. We only set a flag.
>> +         */
>> +        GEM_TRACE("%s\n", engine->name);
>
> This trace is in both conditional branches, might be cleaner to just 
> put it before the if statement.
True, I did not differentiated the messages. Will put before.
>
>>   -    /*
>> -     * Switch to our empty preempt context so
>> -     * the state of the GPU is known (idle).
>> -     */
>> -    GEM_TRACE("%s\n", engine->name);
>> -    for (n = execlists_num_ports(execlists); --n; )
>> -        write_desc(execlists, 0, n);
>> +        /*
>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +         */
>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>   -    write_desc(execlists, ce->lrc_desc, n);
>> +        /* trigger preemption to idle */
>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>> +    } else {
>> +        struct intel_context *ce =
>> + to_intel_context(engine->i915->preempt_context, engine);
>> +        unsigned int n;
>>   -    /* we need to manually load the submit queue */
>> -    if (execlists->ctrl_reg)
>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>> +               upper_32_bits(ce->lrc_desc));
>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>> +
>> +        /*
>> +         * Switch to our empty preempt context so
>> +         * the state of the GPU is known (idle).
>> +         */
>> +        GEM_TRACE("%s\n", engine->name);
>> +        for (n = execlists_num_ports(execlists); --n; )
>> +            write_desc(execlists, 0, n);
>> +
>> +        write_desc(execlists, ce->lrc_desc, n);
>> +
>> +        /* we need to manually load the submit queue */
>> +        if (execlists->ctrl_reg)
>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +    }
>>         execlists_clear_active(&engine->execlists, 
>> EXECLISTS_ACTIVE_HWACK);
>>       execlists_set_active(&engine->execlists, 
>> EXECLISTS_ACTIVE_PREEMPT);
>> @@ -1045,22 +1064,51 @@ static void 
>> execlists_submission_tasklet(unsigned long data)
>>                     status, buf[2*head + 1],
>>                     execlists->active);
>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                      GEN8_CTX_STATUS_PREEMPTED))
>> -                execlists_set_active(execlists,
>> -                             EXECLISTS_ACTIVE_HWACK);
>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> +            /*
>> +             * Check if preempted from idle to idle directly.
>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>> +             * such transition.
>> +             */
>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>> +
>>                   execlists_clear_active(execlists,
>>                                  EXECLISTS_ACTIVE_HWACK);
>
> EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both 
> when we inject the pre-emption and on the previous A->I CSB event), so 
> there should be no need to clear it.
This is a complex case; optimizations here may lead to errors later.
But I agree - since this block is only entered on idle-to-idle 
preemption, and setting the flag can only happen when hardware is not 
idle, we should never see the ACTIVE_HWACK flag set here.
I will change it to GEM_BUG_ON(), unless I will get any errors in 
testing that.

>
>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> -                continue;
>> +                /*
>> +                 * We could not have COMPLETED anything
>> +                 * if we were idle before preemption.
>> +                 */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>> +            }
>> +
>> +            else {
>
> nitpick: formatting is wrong here.
ack.
>
> Daniele
>
>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                          GEN8_CTX_STATUS_PREEMPTED))
>> +                    execlists_set_active(execlists,
>> +                               EXECLISTS_ACTIVE_HWACK);
>> +
>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> +                    execlists_clear_active(execlists,
>> +                               EXECLISTS_ACTIVE_HWACK);
>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> +                    continue;
>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                buf[2*head + 1] == 
>> execlists->preempt_complete_status) {
>> +                /*
>> +                 * We should never get a
>> +                 * COMPLETED | IDLE_ACTIVE!
>> +                 */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>> +            }
>> +
>> +            /*
>> +             * Check if preempted to real idle, either directly or
>> +             * the preemptive context already finished executing
>> +             */
>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                buf[2*head + 1] == 
>> execlists->preempt_complete_status)) {
>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>                     execlists_cancel_port_requests(execlists);
>> @@ -2217,7 +2265,8 @@ static void 
>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>       engine->unpark = NULL;
>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>> -    if (engine->i915->preempt_context)
>> +    if (engine->i915->preempt_context ||
>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>         engine->i915->caps.scheduler =
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>> b/drivers/gpu/drm/i915/intel_lrc.h
>> index 4ec7d8d..b1083ac 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -45,6 +45,7 @@
>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>> + 0x510)
>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>> 0x550)
>>   #define      EL_CTRL_LOAD                (1 << 0)
>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>     /* The docs specify that the write pointer wraps around after 5h, 
>> "After status
>>    * is written out to the last available status QW at offset 5h, 
>> this pointer
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-21 10:16       ` Lis, Tomasz
@ 2018-05-22 14:39         ` Ceraolo Spurio, Daniele
  2018-05-22 14:54           ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Ceraolo Spurio, Daniele @ 2018-05-22 14:39 UTC (permalink / raw)
  To: Lis, Tomasz, intel-gfx; +Cc: mika.kuoppala



On 5/21/2018 3:16 AM, Lis, Tomasz wrote:
> 
> 
> On 2018-05-18 23:08, Daniele Ceraolo Spurio wrote:
>>
>>
>> On 11/05/18 08:45, Tomasz Lis wrote:
>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>> bit within Execlist Control Register, and receiving preemption result 
>>> from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be 
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake 
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when GuC is
>>> active.
>>>
>>> v2: Added needs_preempt_context() change so that it is not created when
>>>      preempt-to-idle is supported. (Chris)
>>>      Updated setting HWACK flag so that it is cleared after
>>>      preempt-to-dle. (Chris, Daniele)
>>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>
>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>      Merged preemption trigger functions to one. (Chris)
>>>      Fixed context state to not assume COMPLETED_MASK after preemption,
>>>      since idle-to-idle case will not have it set.
>>>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>>   drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
>>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>>   drivers/gpu/drm/i915/intel_lrc.c         | 115 
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>>   6 files changed, 92 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 57fb3aa..6e9647b 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private 
>>> *dev_priv)
>>>           ((dev_priv)->info.has_logical_ring_elsq)
>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>           ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 33f8a4b..bdac129 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context 
>>> **ctxp)
>>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>>   {
>>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>> +           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
>>> +        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
>>> +        !USES_GUC_SUBMISSION(i915)));
>>
>> Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even 
>> if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it 
>> anymore, right?
> The patch only provides gen11 way for the non-GuC submission. This is 
> why the condition is so convoluted - preempt_context is still needed if 
> we use GuC.
> This will be simplified after GuC paches are added.

mmm I think this check is the other way around because it returns true 
when HAS_HW_PREEMPT_TO_IDLE for !USES_GUC_SUBMISSION, so when GuC is not 
in use. BTW, GuC does not support using the preempt context on platforms 
that have HW supported preempt-to-idle, so there is no need to keep the 
preempt context around for GuC.

>>
>>>   }
>>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 4364922..66b6700 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>       GEN(11), \
>>>       .ddb_size = 2048, \
>>>       .has_csr = 0, \
>>> -    .has_logical_ring_elsq = 1
>>> +    .has_logical_ring_elsq = 1, \
>>> +    .has_hw_preempt_to_idle = 1
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>       GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>       func(has_logical_ring_contexts); \
>>>       func(has_logical_ring_elsq); \
>>>       func(has_logical_ring_preemption); \
>>> +    func(has_hw_preempt_to_idle); \
>>>       func(has_overlay); \
>>>       func(has_pooled_eu); \
>>>       func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 29dcf34..8fe6795 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -154,6 +154,7 @@
>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -526,31 +527,49 @@ static void port_assign(struct execlist_port 
>>> *port, struct i915_request *rq)
>>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>>
>> For gen11+ we don't inject a preempt context anymore, maybe we can 
>> rename this function to something like "inject_preempt()".
> My initial approach was to just add a second function. Merging the 
> changes to inject_preempt_context() was requested by Chris; as I 
> understand it is to minimize refactoring in other work in progress.
>>
>>>   {
>>>       struct intel_engine_execlists *execlists = &engine->execlists;
>>> -    struct intel_context *ce =
>>> -        to_intel_context(engine->i915->preempt_context, engine);
>>> -    unsigned int n;
>>>   -    GEM_BUG_ON(execlists->preempt_complete_status !=
>>> -           upper_32_bits(ce->lrc_desc));
>>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>> +        /*
>>> +         * If we have hardware preempt-to-idle, we do not need to
>>> +         * inject any job to the hardware. We only set a flag.
>>> +         */
>>> +        GEM_TRACE("%s\n", engine->name);
>>
>> This trace is in both conditional branches, might be cleaner to just 
>> put it before the if statement.
> True, I did not differentiated the messages. Will put before.
>>
>>>   -    /*
>>> -     * Switch to our empty preempt context so
>>> -     * the state of the GPU is known (idle).
>>> -     */
>>> -    GEM_TRACE("%s\n", engine->name);
>>> -    for (n = execlists_num_ports(execlists); --n; )
>>> -        write_desc(execlists, 0, n);
>>> +        /*
>>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +         */
>>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>>   -    write_desc(execlists, ce->lrc_desc, n);
>>> +        /* trigger preemption to idle */
>>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> +    } else {
>>> +        struct intel_context *ce =
>>> + to_intel_context(engine->i915->preempt_context, engine);
>>> +        unsigned int n;
>>>   -    /* we need to manually load the submit queue */
>>> -    if (execlists->ctrl_reg)
>>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>>> +               upper_32_bits(ce->lrc_desc));
>>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> +
>>> +        /*
>>> +         * Switch to our empty preempt context so
>>> +         * the state of the GPU is known (idle).
>>> +         */
>>> +        GEM_TRACE("%s\n", engine->name);
>>> +        for (n = execlists_num_ports(execlists); --n; )
>>> +            write_desc(execlists, 0, n);
>>> +
>>> +        write_desc(execlists, ce->lrc_desc, n);
>>> +
>>> +        /* we need to manually load the submit queue */
>>> +        if (execlists->ctrl_reg)
>>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +    }
>>>         execlists_clear_active(&engine->execlists, 
>>> EXECLISTS_ACTIVE_HWACK);
>>>       execlists_set_active(&engine->execlists, 
>>> EXECLISTS_ACTIVE_PREEMPT);
>>> @@ -1045,22 +1064,51 @@ static void 
>>> execlists_submission_tasklet(unsigned long data)
>>>                     status, buf[2*head + 1],
>>>                     execlists->active);
>>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> -                      GEN8_CTX_STATUS_PREEMPTED))
>>> -                execlists_set_active(execlists,
>>> -                             EXECLISTS_ACTIVE_HWACK);
>>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> +            /*
>>> +             * Check if preempted from idle to idle directly.
>>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>>> +             * such transition.
>>> +             */
>>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>> +
>>>                   execlists_clear_active(execlists,
>>>                                  EXECLISTS_ACTIVE_HWACK);
>>
>> EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both 
>> when we inject the pre-emption and on the previous A->I CSB event), so 
>> there should be no need to clear it.
> This is a complex case; optimizations here may lead to errors later.
> But I agree - since this block is only entered on idle-to-idle 
> preemption, and setting the flag can only happen when hardware is not 
> idle, we should never see the ACTIVE_HWACK flag set here.
> I will change it to GEM_BUG_ON(), unless I will get any errors in 
> testing that.
> 

I'm not sure we actually need to care at all about 
EXECLISTS_ACTIVE_HWACK here. From what I can see that is only used to 
make sure we don't submit while the execlists HW is loading the current 
submission. In this case however we're sure no submissions are occurring 
because EXECLISTS_ACTIVE_PREEMPT is set, so we're already guarded.

Daniele

>>
>>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> -                continue;
>>> +                /*
>>> +                 * We could not have COMPLETED anything
>>> +                 * if we were idle before preemption.
>>> +                 */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>> +            }
>>> +
>>> +            else {
>>
>> nitpick: formatting is wrong here.
> ack.
>>
>> Daniele
>>
>>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> +                          GEN8_CTX_STATUS_PREEMPTED))
>>> +                    execlists_set_active(execlists,
>>> +                               EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> +                    execlists_clear_active(execlists,
>>> +                               EXECLISTS_ACTIVE_HWACK);
>>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> +                    continue;
>>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                buf[2*head + 1] == 
>>> execlists->preempt_complete_status) {
>>> +                /*
>>> +                 * We should never get a
>>> +                 * COMPLETED | IDLE_ACTIVE!
>>> +                 */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +            }
>>> +
>>> +            /*
>>> +             * Check if preempted to real idle, either directly or
>>> +             * the preemptive context already finished executing
>>> +             */
>>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                buf[2*head + 1] == 
>>> execlists->preempt_complete_status)) {
>>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>>                     execlists_cancel_port_requests(execlists);
>>> @@ -2217,7 +2265,8 @@ static void 
>>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>>       engine->unpark = NULL;
>>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>> -    if (engine->i915->preempt_context)
>>> +    if (engine->i915->preempt_context ||
>>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>>         engine->i915->caps.scheduler =
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 4ec7d8d..b1083ac 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -45,6 +45,7 @@
>>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>>> + 0x510)
>>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>>> 0x550)
>>>   #define      EL_CTRL_LOAD                (1 << 0)
>>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>>     /* The docs specify that the write pointer wraps around after 5h, 
>>> "After status
>>>    * is written out to the last available status QW at offset 5h, 
>>> this pointer
>>>
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-22 14:39         ` Ceraolo Spurio, Daniele
@ 2018-05-22 14:54           ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-05-22 14:54 UTC (permalink / raw)
  To: Ceraolo Spurio, Daniele, intel-gfx; +Cc: mika.kuoppala



On 2018-05-22 16:39, Ceraolo Spurio, Daniele wrote:
>
>
> On 5/21/2018 3:16 AM, Lis, Tomasz wrote:
>>
>>
>> On 2018-05-18 23:08, Daniele Ceraolo Spurio wrote:
>>>
>>>
>>> On 11/05/18 08:45, Tomasz Lis wrote:
>>>> The patch adds support of preempt-to-idle requesting by setting a 
>>>> proper
>>>> bit within Execlist Control Register, and receiving preemption 
>>>> result from
>>>> Context Status Buffer.
>>>>
>>>> Preemption in previous gens required a special batch buffer to be 
>>>> executed,
>>>> so the Command Streamer never preempted to idle directly. In 
>>>> Icelake it is
>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>> status of the preemption request.
>>>>
>>>> This patch does not cover using the new preemption mechanism when 
>>>> GuC is
>>>> active.
>>>>
>>>> v2: Added needs_preempt_context() change so that it is not created 
>>>> when
>>>>      preempt-to-idle is supported. (Chris)
>>>>      Updated setting HWACK flag so that it is cleared after
>>>>      preempt-to-dle. (Chris, Daniele)
>>>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>>
>>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>>      Merged preemption trigger functions to one. (Chris)
>>>>      Fixed context state to not assume COMPLETED_MASK after 
>>>> preemption,
>>>>      since idle-to-idle case will not have it set.
>>>>
>>>> Bspec: 18922
>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>>>   drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
>>>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>>>   drivers/gpu/drm/i915/intel_lrc.c         | 115 
>>>> ++++++++++++++++++++++---------
>>>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>>>   6 files changed, 92 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 57fb3aa..6e9647b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private 
>>>> *dev_priv)
>>>>           ((dev_priv)->info.has_logical_ring_elsq)
>>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>           ((dev_priv)->info.has_logical_ring_preemption)
>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>     #define HAS_EXECLISTS(dev_priv) 
>>>> HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>>> index 33f8a4b..bdac129 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>>> @@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context 
>>>> **ctxp)
>>>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>>>   {
>>>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>>>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>>> +           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
>>>> +        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
>>>> +        !USES_GUC_SUBMISSION(i915)));
>>>
>>> Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) 
>>> even if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't 
>>> need it anymore, right?
>> The patch only provides gen11 way for the non-GuC submission. This is 
>> why the condition is so convoluted - preempt_context is still needed 
>> if we use GuC.
>> This will be simplified after GuC paches are added.
>
> mmm I think this check is the other way around because it returns true 
> when HAS_HW_PREEMPT_TO_IDLE for !USES_GUC_SUBMISSION, so when GuC is 
> not in use.
Yes, agreed. USES_GUC_SUBMISSION should not be negated.
> BTW, GuC does not support using the preempt context on platforms that 
> have HW supported preempt-to-idle, so there is no need to keep the 
> preempt context around for GuC.
Oh, I did not knew that. So the preemption is completely disabled on 
gen11 with GuC then? (because patches for gen11 preempt-to-idle are not 
upstreamed)?
>>>
>>>>   }
>>>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>> index 4364922..66b6700 100644
>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>>> intel_cannonlake_info = {
>>>>       GEN(11), \
>>>>       .ddb_size = 2048, \
>>>>       .has_csr = 0, \
>>>> -    .has_logical_ring_elsq = 1
>>>> +    .has_logical_ring_elsq = 1, \
>>>> +    .has_hw_preempt_to_idle = 1
>>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>>       GEN11_FEATURES,
>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>>> index 933e316..4eb97b5 100644
>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>       func(has_logical_ring_contexts); \
>>>>       func(has_logical_ring_elsq); \
>>>>       func(has_logical_ring_preemption); \
>>>> +    func(has_hw_preempt_to_idle); \
>>>>       func(has_overlay); \
>>>>       func(has_pooled_eu); \
>>>>       func(has_psr); \
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index 29dcf34..8fe6795 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -154,6 +154,7 @@
>>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>> @@ -526,31 +527,49 @@ static void port_assign(struct execlist_port 
>>>> *port, struct i915_request *rq)
>>>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>>>
>>> For gen11+ we don't inject a preempt context anymore, maybe we can 
>>> rename this function to something like "inject_preempt()".
>> My initial approach was to just add a second function. Merging the 
>> changes to inject_preempt_context() was requested by Chris; as I 
>> understand it is to minimize refactoring in other work in progress.
>>>
>>>>   {
>>>>       struct intel_engine_execlists *execlists = &engine->execlists;
>>>> -    struct intel_context *ce =
>>>> - to_intel_context(engine->i915->preempt_context, engine);
>>>> -    unsigned int n;
>>>>   -    GEM_BUG_ON(execlists->preempt_complete_status !=
>>>> -           upper_32_bits(ce->lrc_desc));
>>>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>>> +        /*
>>>> +         * If we have hardware preempt-to-idle, we do not need to
>>>> +         * inject any job to the hardware. We only set a flag.
>>>> +         */
>>>> +        GEM_TRACE("%s\n", engine->name);
>>>
>>> This trace is in both conditional branches, might be cleaner to just 
>>> put it before the if statement.
>> True, I did not differentiated the messages. Will put before.
>>>
>>>>   -    /*
>>>> -     * Switch to our empty preempt context so
>>>> -     * the state of the GPU is known (idle).
>>>> -     */
>>>> -    GEM_TRACE("%s\n", engine->name);
>>>> -    for (n = execlists_num_ports(execlists); --n; )
>>>> -        write_desc(execlists, 0, n);
>>>> +        /*
>>>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>> +         */
>>>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>>>   -    write_desc(execlists, ce->lrc_desc, n);
>>>> +        /* trigger preemption to idle */
>>>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>>> +    } else {
>>>> +        struct intel_context *ce =
>>>> + to_intel_context(engine->i915->preempt_context, engine);
>>>> +        unsigned int n;
>>>>   -    /* we need to manually load the submit queue */
>>>> -    if (execlists->ctrl_reg)
>>>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>>>> +               upper_32_bits(ce->lrc_desc));
>>>> + GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>>> +                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>>> +
>>>> +        /*
>>>> +         * Switch to our empty preempt context so
>>>> +         * the state of the GPU is known (idle).
>>>> +         */
>>>> +        GEM_TRACE("%s\n", engine->name);
>>>> +        for (n = execlists_num_ports(execlists); --n; )
>>>> +            write_desc(execlists, 0, n);
>>>> +
>>>> +        write_desc(execlists, ce->lrc_desc, n);
>>>> +
>>>> +        /* we need to manually load the submit queue */
>>>> +        if (execlists->ctrl_reg)
>>>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>>> +    }
>>>>         execlists_clear_active(&engine->execlists, 
>>>> EXECLISTS_ACTIVE_HWACK);
>>>>       execlists_set_active(&engine->execlists, 
>>>> EXECLISTS_ACTIVE_PREEMPT);
>>>> @@ -1045,22 +1064,51 @@ static void 
>>>> execlists_submission_tasklet(unsigned long data)
>>>>                     status, buf[2*head + 1],
>>>>                     execlists->active);
>>>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> -                      GEN8_CTX_STATUS_PREEMPTED))
>>>> -                execlists_set_active(execlists,
>>>> -                             EXECLISTS_ACTIVE_HWACK);
>>>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>>> +            /*
>>>> +             * Check if preempted from idle to idle directly.
>>>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>>>> +             * such transition.
>>>> +             */
>>>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>>> +
>>>>                   execlists_clear_active(execlists,
>>>>                                  EXECLISTS_ACTIVE_HWACK);
>>>
>>> EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it 
>>> both when we inject the pre-emption and on the previous A->I CSB 
>>> event), so there should be no need to clear it.
>> This is a complex case; optimizations here may lead to errors later.
>> But I agree - since this block is only entered on idle-to-idle 
>> preemption, and setting the flag can only happen when hardware is not 
>> idle, we should never see the ACTIVE_HWACK flag set here.
>> I will change it to GEM_BUG_ON(), unless I will get any errors in 
>> testing that.
>>
>
> I'm not sure we actually need to care at all about 
> EXECLISTS_ACTIVE_HWACK here. From what I can see that is only used to 
> make sure we don't submit while the execlists HW is loading the 
> current submission. In this case however we're sure no submissions are 
> occurring because EXECLISTS_ACTIVE_PREEMPT is set, so we're already 
> guarded.
>
> Daniele
>
>>>
>>>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>>> -                continue;
>>>> +                /*
>>>> +                 * We could not have COMPLETED anything
>>>> +                 * if we were idle before preemption.
>>>> +                 */
>>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>>> +            }
>>>> +
>>>> +            else {
>>>
>>> nitpick: formatting is wrong here.
>> ack.
>>>
>>> Daniele
>>>
>>>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> +                          GEN8_CTX_STATUS_PREEMPTED))
>>>> +                    execlists_set_active(execlists,
>>>> +                               EXECLISTS_ACTIVE_HWACK);
>>>> +
>>>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>>> +                    execlists_clear_active(execlists,
>>>> +                               EXECLISTS_ACTIVE_HWACK);
>>>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>>> +                    continue;
>>>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>>>> -                buf[2*head + 1] == 
>>>> execlists->preempt_complete_status) {
>>>> +                /*
>>>> +                 * We should never get a
>>>> +                 * COMPLETED | IDLE_ACTIVE!
>>>> +                 */
>>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>> +            }
>>>> +
>>>> +            /*
>>>> +             * Check if preempted to real idle, either directly or
>>>> +             * the preemptive context already finished executing
>>>> +             */
>>>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>>>> +                buf[2*head + 1] == 
>>>> execlists->preempt_complete_status)) {
>>>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>>> execlists_cancel_port_requests(execlists);
>>>> @@ -2217,7 +2265,8 @@ static void 
>>>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>>>       engine->unpark = NULL;
>>>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>>> -    if (engine->i915->preempt_context)
>>>> +    if (engine->i915->preempt_context ||
>>>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>>>         engine->i915->caps.scheduler =
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>>> index 4ec7d8d..b1083ac 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>>> @@ -45,6 +45,7 @@
>>>>   #define RING_EXECLIST_SQ_CONTENTS(engine) 
>>>> _MMIO((engine)->mmio_base + 0x510)
>>>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>>>> 0x550)
>>>>   #define      EL_CTRL_LOAD                (1 << 0)
>>>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>>>     /* The docs specify that the write pointer wraps around after 
>>>> 5h, "After status
>>>>    * is written out to the last available status QW at offset 5h, 
>>>> this pointer
>>>>
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (13 preceding siblings ...)
  2018-05-11 17:46 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-05-25 18:26 ` Tomasz Lis
  2018-06-11 16:37   ` Daniele Ceraolo Spurio
  2018-05-25 18:51 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4) Patchwork
                   ` (23 subsequent siblings)
  38 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-05-25 18:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 113 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
-		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
 	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	write_desc(execlists, ce->lrc_desc, n);
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
+			/*
+			 * Check if preempted from idle to idle directly.
+			 * The STATUS_IDLE_ACTIVE flag is used to mark
+			 * such transition.
+			 */
+			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+				/* Cannot be waiting for HWACK while HW is idle */
+				GEM_BUG_ON(execlists_is_active(execlists,
+						      EXECLISTS_ACTIVE_HWACK));
 
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+				/*
+				 * We could not have COMPLETED anything
+				 * if we were idle before preemption.
+				 */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+			} else {
+				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					      GEN8_CTX_STATUS_PREEMPTED))
+					execlists_set_active(execlists,
+							     EXECLISTS_ACTIVE_HWACK);
+
+				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+					execlists_clear_active(execlists,
+							       EXECLISTS_ACTIVE_HWACK);
+
+				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+					continue;
+
+				/* We should never get a COMPLETED | IDLE_ACTIVE! */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			}
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 				complete_preempt_context(execlists);
 				continue;
@@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (14 preceding siblings ...)
  2018-05-25 18:26 ` [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-05-25 18:51 ` Patchwork
  2018-05-25 18:52 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (22 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-25 18:51 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
e56337e6a35d drm/i915/gen11: Preempt-to-idle support in execlists.
-:133: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#133: FILE: drivers/gpu/drm/i915/intel_lrc.c:533:
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:190: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#190: FILE: drivers/gpu/drm/i915/intel_lrc.c:1056:
+			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:191: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#191: FILE: drivers/gpu/drm/i915/intel_lrc.c:1057:
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:194: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#194: FILE: drivers/gpu/drm/i915/intel_lrc.c:1060:
+				GEM_BUG_ON(execlists_is_active(execlists,
+						      EXECLISTS_ACTIVE_HWACK));

-:231: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#231: FILE: drivers/gpu/drm/i915/intel_lrc.c:1091:
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			         ^

total: 0 errors, 0 warnings, 5 checks, 184 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (15 preceding siblings ...)
  2018-05-25 18:51 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4) Patchwork
@ 2018-05-25 18:52 ` Patchwork
  2018-05-25 19:08 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (21 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-25 18:52 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3666:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (16 preceding siblings ...)
  2018-05-25 18:52 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-05-25 19:08 ` Patchwork
  2018-05-26  5:18 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (20 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-25 19:08 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4244 -> Patchwork_9126 =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9126 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9126, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/4/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9126:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_gttfill@basic:
      fi-pnv-d510:        PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_9126 that come from known issues:

  === IGT changes ===

    ==== Possible fixes ====

    igt@gem_mmap_gtt@basic-small-bo-tiledx:
      fi-gdg-551:         FAIL (fdo#102575) -> PASS

    
  fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575


== Participating hosts (43 -> 39) ==

  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4244 -> Patchwork_9126

  CI_DRM_4244: 475c2ec7b8c6e01cce9a360b9839dc0dd0fa9629 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9126: e56337e6a35dc880c13010e17245256799793498 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e56337e6a35d drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9126/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (17 preceding siblings ...)
  2018-05-25 19:08 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-05-26  5:18 ` Patchwork
  2018-07-06 15:52 ` [PATCH v5] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (19 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-05-26  5:18 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev4)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4244_full -> Patchwork_9126_full =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/4/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9126_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_flip@2x-plain-flip-ts-check-interruptible:
      shard-glk:          PASS -> FAIL (fdo#100368) +1

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724)

    igt@kms_setmode@basic:
      shard-apl:          PASS -> FAIL (fdo#99912)
      shard-kbl:          PASS -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      shard-apl:          DMESG-FAIL (fdo#106560) -> PASS
      shard-glk:          DMESG-FAIL (fdo#106560) -> PASS

    igt@kms_cursor_legacy@cursor-vs-flip-toggle:
      shard-hsw:          FAIL (fdo#103355) -> PASS

    igt@kms_flip@2x-dpms-vs-vblank-race-interruptible:
      shard-hsw:          DMESG-FAIL (fdo#103060) -> PASS

    igt@kms_flip@2x-modeset-vs-vblank-race:
      shard-glk:          FAIL (fdo#103060) -> PASS

    igt@kms_flip@2x-plain-flip-ts-check-interruptible:
      shard-hsw:          FAIL (fdo#100368) -> PASS

    igt@kms_flip@dpms-vs-vblank-race:
      shard-hsw:          FAIL (fdo#103060) -> PASS

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          FAIL (fdo#104724, fdo#103822) -> PASS

    igt@kms_vblank@pipe-a-ts-continuation-modeset:
      shard-apl:          DMESG-WARN (fdo#106247) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#106247 https://bugs.freedesktop.org/show_bug.cgi?id=106247
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4244 -> Patchwork_9126

  CI_DRM_4244: 475c2ec7b8c6e01cce9a360b9839dc0dd0fa9629 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9126: e56337e6a35dc880c13010e17245256799793498 @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9126/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-05-25 18:26 ` [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-06-11 16:37   ` Daniele Ceraolo Spurio
  2018-06-29 16:50     ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-06-11 16:37 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: Mika Kuoppala



On 25/05/18 11:26, Tomasz Lis wrote:
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> v2: Added needs_preempt_context() change so that it is not created when
>      preempt-to-idle is supported. (Chris)
>      Updated setting HWACK flag so that it is cleared after
>      preempt-to-dle. (Chris, Daniele)
>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> v3: Fixed needs_preempt_context() change. (Chris)
>      Merged preemption trigger functions to one. (Chris)
>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>      since idle-to-idle case will not have it set.
> 
> v4: Simplified needs_preempt_context() change. (Daniele)
>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> 
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>   drivers/gpu/drm/i915/intel_lrc.c         | 113 +++++++++++++++++++++----------
>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>   6 files changed, 86 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 487922f..35eddf7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>   		((dev_priv)->info.has_logical_ring_elsq)
>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>   		((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +		((dev_priv)->info.has_hw_preempt_to_idle)
>   
>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 45393f6..341a5ff 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
>   
>   static bool needs_preempt_context(struct drm_i915_private *i915)
>   {
> -	return HAS_LOGICAL_RING_PREEMPTION(i915);
> +	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> +	       !HAS_HW_PREEMPT_TO_IDLE(i915);
>   }
>   
>   int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 97a91e6a..ee09926 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>   	GEN(11), \
>   	.ddb_size = 2048, \
>   	.has_csr = 0, \
> -	.has_logical_ring_elsq = 1
> +	.has_logical_ring_elsq = 1, \
> +	.has_hw_preempt_to_idle = 1
>   
>   static const struct intel_device_info intel_icelake_11_info = {
>   	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
> +	func(has_hw_preempt_to_idle); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8a6058b..f95cb37 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -154,6 +154,7 @@
>   #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
>   #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>   #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
>   
>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>   	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   static void inject_preempt_context(struct intel_engine_cs *engine)

continuing the discussion from the previous patch, I still think that we 
should rename this function now that it doesn't inject a context on some 
gens. A new function name should be relatively trivial to handle from 
other patch series hitting the area (compared to having a second function).

>   {
>   	struct intel_engine_execlists *execlists = &engine->execlists;
> -	struct intel_context *ce =
> -		to_intel_context(engine->i915->preempt_context, engine);
> -	unsigned int n;
> -
> -	GEM_BUG_ON(execlists->preempt_complete_status !=
> -		   upper_32_bits(ce->lrc_desc));
> -	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> -		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> -		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>   
> -	/*
> -	 * Switch to our empty preempt context so
> -	 * the state of the GPU is known (idle).
> -	 */
>   	GEM_TRACE("%s\n", engine->name);
> -	for (n = execlists_num_ports(execlists); --n; )
> -		write_desc(execlists, 0, n);
> +	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
> +		/*
> +		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +		 */
> +		GEM_BUG_ON(execlists->ctrl_reg == NULL);
>   
> -	write_desc(execlists, ce->lrc_desc, n);
> +		/*
> +		 * If we have hardware preempt-to-idle, we do not need to
> +		 * inject any job to the hardware. We only set a flag.
> +		 */
> +		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> +	} else {
> +		struct intel_context *ce =
> +			to_intel_context(engine->i915->preempt_context, engine);
> +		unsigned int n;
>   
> -	/* we need to manually load the submit queue */
> -	if (execlists->ctrl_reg)
> -		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +		GEM_BUG_ON(execlists->preempt_complete_status !=
> +			   upper_32_bits(ce->lrc_desc));
> +		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> +			    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> +			   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> +
> +		/*
> +		 * Switch to our empty preempt context so
> +		 * the state of the GPU is known (idle).
> +		 */
> +		for (n = execlists_num_ports(execlists); --n; )
> +			write_desc(execlists, 0, n);
> +
> +		write_desc(execlists, ce->lrc_desc, n);
> +
> +		/* we need to manually load the submit queue */
> +		if (execlists->ctrl_reg)
> +			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +	}
>   
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> @@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
>   				  status, buf[2*head + 1],
>   				  execlists->active);
>   
> -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -				      GEN8_CTX_STATUS_PREEMPTED))
> -				execlists_set_active(execlists,
> -						     EXECLISTS_ACTIVE_HWACK);
> -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_HWACK);
> +			/*
> +			 * Check if preempted from idle to idle directly.
> +			 * The STATUS_IDLE_ACTIVE flag is used to mark
> +			 * such transition.
> +			 */
> +			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> +			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
> +
> +				/* Cannot be waiting for HWACK while HW is idle */

This comment does not match the check, since if the 
EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
that we're waiting for it. Personally I would just remove the BUG_ON 
since we don't really care about the value of HWACK as long as 
EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
going to submit work until the whole preempt process is complete. A 
BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
complete_preempt_context so we're covered on that side.

With the 2 minor comments addressed:

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Daniele

> +				GEM_BUG_ON(execlists_is_active(execlists,
> +						      EXECLISTS_ACTIVE_HWACK));
>   
> -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -				continue;
> +				/*
> +				 * We could not have COMPLETED anything
> +				 * if we were idle before preemption.
> +				 */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
> +			} else {
> +				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +					      GEN8_CTX_STATUS_PREEMPTED))
> +					execlists_set_active(execlists,
> +							     EXECLISTS_ACTIVE_HWACK);
> +
> +				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +					execlists_clear_active(execlists,
> +							       EXECLISTS_ACTIVE_HWACK);
> +
> +				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +					continue;
> +
> +				/* We should never get a COMPLETED | IDLE_ACTIVE! */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +			}
>   
> -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>   
> -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> +			/*
> +			 * Check if preempted to real idle, either directly or
> +			 * the preemptive context already finished executing
> +			 */
> +			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +			    (status & GEN8_CTX_STATUS_COMPLETE &&
> +			    buf[2*head + 1] == execlists->preempt_complete_status)) {
>   				GEM_TRACE("%s preempt-idle\n", engine->name);
>   				complete_preempt_context(execlists);
>   				continue;
> @@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->unpark = NULL;
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -	if (engine->i915->preempt_context)
> +	if (engine->i915->preempt_context ||
> +	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	engine->i915->caps.scheduler =
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 1593194..3249e9b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -45,6 +45,7 @@
>   #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
>   #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
>   #define	  EL_CTRL_LOAD				(1 << 0)
> +#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
>   
>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-06-11 16:37   ` Daniele Ceraolo Spurio
@ 2018-06-29 16:50     ` Lis, Tomasz
  2018-07-02 17:36       ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-06-29 16:50 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, intel-gfx; +Cc: Mika Kuoppala



On 2018-06-11 18:37, Daniele Ceraolo Spurio wrote:
>
>
> On 25/05/18 11:26, Tomasz Lis wrote:
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result 
>> from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be 
>> executed,
>> so the Command Streamer never preempted to idle directly. In Icelake 
>> it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> v2: Added needs_preempt_context() change so that it is not created when
>>      preempt-to-idle is supported. (Chris)
>>      Updated setting HWACK flag so that it is cleared after
>>      preempt-to-dle. (Chris, Daniele)
>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>
>> v3: Fixed needs_preempt_context() change. (Chris)
>>      Merged preemption trigger functions to one. (Chris)
>>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>      since idle-to-idle case will not have it set.
>>
>> v4: Simplified needs_preempt_context() change. (Daniele)
>>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 113 
>> +++++++++++++++++++++----------
>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>   6 files changed, 86 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 487922f..35eddf7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private 
>> *dev_priv)
>>           ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>           ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 45393f6..341a5ff 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context 
>> **ctxp)
>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>   {
>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>> +           !HAS_HW_PREEMPT_TO_IDLE(i915);
>>   }
>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 97a91e6a..ee09926 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -593,7 +593,8 @@ static const struct intel_device_info 
>> intel_cannonlake_info = {
>>       GEN(11), \
>>       .ddb_size = 2048, \
>>       .has_csr = 0, \
>> -    .has_logical_ring_elsq = 1
>> +    .has_logical_ring_elsq = 1, \
>> +    .has_hw_preempt_to_idle = 1
>>     static const struct intel_device_info intel_icelake_11_info = {
>>       GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>> b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>       func(has_logical_ring_contexts); \
>>       func(has_logical_ring_elsq); \
>>       func(has_logical_ring_preemption); \
>> +    func(has_hw_preempt_to_idle); \
>>       func(has_overlay); \
>>       func(has_pooled_eu); \
>>       func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 8a6058b..f95cb37 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -154,6 +154,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port 
>> *port, struct i915_request *rq)
>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>
> continuing the discussion from the previous patch, I still think that 
> we should rename this function now that it doesn't inject a context on 
> some gens. A new function name should be relatively trivial to handle 
> from other patch series hitting the area (compared to having a second 
> function).
Ok, will rename it then.
What would be the most adequate name? execlist_send_preempt_to_idle()?
>
>>   {
>>       struct intel_engine_execlists *execlists = &engine->execlists;
>> -    struct intel_context *ce =
>> -        to_intel_context(engine->i915->preempt_context, engine);
>> -    unsigned int n;
>> -
>> -    GEM_BUG_ON(execlists->preempt_complete_status !=
>> -           upper_32_bits(ce->lrc_desc));
>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>   -    /*
>> -     * Switch to our empty preempt context so
>> -     * the state of the GPU is known (idle).
>> -     */
>>       GEM_TRACE("%s\n", engine->name);
>> -    for (n = execlists_num_ports(execlists); --n; )
>> -        write_desc(execlists, 0, n);
>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>> +        /*
>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +         */
>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>   -    write_desc(execlists, ce->lrc_desc, n);
>> +        /*
>> +         * If we have hardware preempt-to-idle, we do not need to
>> +         * inject any job to the hardware. We only set a flag.
>> +         */
>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>> +    } else {
>> +        struct intel_context *ce =
>> + to_intel_context(engine->i915->preempt_context, engine);
>> +        unsigned int n;
>>   -    /* we need to manually load the submit queue */
>> -    if (execlists->ctrl_reg)
>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>> +               upper_32_bits(ce->lrc_desc));
>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>> +
>> +        /*
>> +         * Switch to our empty preempt context so
>> +         * the state of the GPU is known (idle).
>> +         */
>> +        for (n = execlists_num_ports(execlists); --n; )
>> +            write_desc(execlists, 0, n);
>> +
>> +        write_desc(execlists, ce->lrc_desc, n);
>> +
>> +        /* we need to manually load the submit queue */
>> +        if (execlists->ctrl_reg)
>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +    }
>>         execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>> @@ -1031,22 +1047,48 @@ static void process_csb(struct 
>> intel_engine_cs *engine)
>>                     status, buf[2*head + 1],
>>                     execlists->active);
>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                      GEN8_CTX_STATUS_PREEMPTED))
>> -                execlists_set_active(execlists,
>> -                             EXECLISTS_ACTIVE_HWACK);
>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> -                execlists_clear_active(execlists,
>> -                               EXECLISTS_ACTIVE_HWACK);
>> +            /*
>> +             * Check if preempted from idle to idle directly.
>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>> +             * such transition.
>> +             */
>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>> +
>> +                /* Cannot be waiting for HWACK while HW is idle */
>
> This comment does not match the check, since if the 
> EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
> that we're waiting for it. Personally I would just remove the BUG_ON 
> since we don't really care about the value of HWACK as long as 
> EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
> going to submit work until the whole preempt process is complete. A 
> BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
> complete_preempt_context so we're covered on that side.
Will remove.
>
> With the 2 minor comments addressed:
>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> Daniele
>
>> + GEM_BUG_ON(execlists_is_active(execlists,
>> +                              EXECLISTS_ACTIVE_HWACK));
>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> -                continue;
>> +                /*
>> +                 * We could not have COMPLETED anything
>> +                 * if we were idle before preemption.
>> +                 */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>> +            } else {
>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                          GEN8_CTX_STATUS_PREEMPTED))
>> +                    execlists_set_active(execlists,
>> +                                 EXECLISTS_ACTIVE_HWACK);
>> +
>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> +                    execlists_clear_active(execlists,
>> +                                   EXECLISTS_ACTIVE_HWACK);
>> +
>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> +                    continue;
>> +
>> +                /* We should never get a COMPLETED | IDLE_ACTIVE! */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>> +            }
>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                buf[2*head + 1] == 
>> execlists->preempt_complete_status) {
>> +            /*
>> +             * Check if preempted to real idle, either directly or
>> +             * the preemptive context already finished executing
>> +             */
>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                buf[2*head + 1] == 
>> execlists->preempt_complete_status)) {
>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>                   complete_preempt_context(execlists);
>>                   continue;
>> @@ -2337,7 +2379,8 @@ static void 
>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>       engine->unpark = NULL;
>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>> -    if (engine->i915->preempt_context)
>> +    if (engine->i915->preempt_context ||
>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>         engine->i915->caps.scheduler =
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>> b/drivers/gpu/drm/i915/intel_lrc.h
>> index 1593194..3249e9b 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -45,6 +45,7 @@
>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>> + 0x510)
>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>> 0x550)
>>   #define      EL_CTRL_LOAD                (1 << 0)
>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>     /* The docs specify that the write pointer wraps around after 5h, 
>> "After status
>>    * is written out to the last available status QW at offset 5h, 
>> this pointer
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-06-29 16:50     ` Lis, Tomasz
@ 2018-07-02 17:36       ` Daniele Ceraolo Spurio
  0 siblings, 0 replies; 70+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-07-02 17:36 UTC (permalink / raw)
  To: Lis, Tomasz, intel-gfx; +Cc: Mika Kuoppala



On 29/06/18 09:50, Lis, Tomasz wrote:
> 
> 
> On 2018-06-11 18:37, Daniele Ceraolo Spurio wrote:
>>
>>
>> On 25/05/18 11:26, Tomasz Lis wrote:
>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>> bit within Execlist Control Register, and receiving preemption result 
>>> from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be 
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake 
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when GuC is
>>> active.
>>>
>>> v2: Added needs_preempt_context() change so that it is not created when
>>>      preempt-to-idle is supported. (Chris)
>>>      Updated setting HWACK flag so that it is cleared after
>>>      preempt-to-dle. (Chris, Daniele)
>>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>
>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>      Merged preemption trigger functions to one. (Chris)
>>>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>>      since idle-to-idle case will not have it set.
>>>
>>> v4: Simplified needs_preempt_context() change. (Daniele)
>>>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>>   drivers/gpu/drm/i915/intel_lrc.c         | 113 
>>> +++++++++++++++++++++----------
>>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>>   6 files changed, 86 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 487922f..35eddf7 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private 
>>> *dev_priv)
>>>           ((dev_priv)->info.has_logical_ring_elsq)
>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>           ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 45393f6..341a5ff 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context 
>>> **ctxp)
>>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>>   {
>>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>> +           !HAS_HW_PREEMPT_TO_IDLE(i915);
>>>   }
>>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 97a91e6a..ee09926 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -593,7 +593,8 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>       GEN(11), \
>>>       .ddb_size = 2048, \
>>>       .has_csr = 0, \
>>> -    .has_logical_ring_elsq = 1
>>> +    .has_logical_ring_elsq = 1, \
>>> +    .has_hw_preempt_to_idle = 1
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>       GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>       func(has_logical_ring_contexts); \
>>>       func(has_logical_ring_elsq); \
>>>       func(has_logical_ring_preemption); \
>>> +    func(has_hw_preempt_to_idle); \
>>>       func(has_overlay); \
>>>       func(has_pooled_eu); \
>>>       func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 8a6058b..f95cb37 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -154,6 +154,7 @@
>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port 
>>> *port, struct i915_request *rq)
>>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>>
>> continuing the discussion from the previous patch, I still think that 
>> we should rename this function now that it doesn't inject a context on 
>> some gens. A new function name should be relatively trivial to handle 
>> from other patch series hitting the area (compared to having a second 
>> function).
> Ok, will rename it then.
> What would be the most adequate name? execlist_send_preempt_to_idle()?

even something simpler like "inject_preemption()" would work IMO. But 
I've always been bad with naming, so I'll leave it to your judgment :)

Daniele

>>
>>>   {
>>>       struct intel_engine_execlists *execlists = &engine->execlists;
>>> -    struct intel_context *ce =
>>> -        to_intel_context(engine->i915->preempt_context, engine);
>>> -    unsigned int n;
>>> -
>>> -    GEM_BUG_ON(execlists->preempt_complete_status !=
>>> -           upper_32_bits(ce->lrc_desc));
>>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>>   -    /*
>>> -     * Switch to our empty preempt context so
>>> -     * the state of the GPU is known (idle).
>>> -     */
>>>       GEM_TRACE("%s\n", engine->name);
>>> -    for (n = execlists_num_ports(execlists); --n; )
>>> -        write_desc(execlists, 0, n);
>>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>> +        /*
>>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +         */
>>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>>   -    write_desc(execlists, ce->lrc_desc, n);
>>> +        /*
>>> +         * If we have hardware preempt-to-idle, we do not need to
>>> +         * inject any job to the hardware. We only set a flag.
>>> +         */
>>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> +    } else {
>>> +        struct intel_context *ce =
>>> + to_intel_context(engine->i915->preempt_context, engine);
>>> +        unsigned int n;
>>>   -    /* we need to manually load the submit queue */
>>> -    if (execlists->ctrl_reg)
>>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>>> +               upper_32_bits(ce->lrc_desc));
>>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> +
>>> +        /*
>>> +         * Switch to our empty preempt context so
>>> +         * the state of the GPU is known (idle).
>>> +         */
>>> +        for (n = execlists_num_ports(execlists); --n; )
>>> +            write_desc(execlists, 0, n);
>>> +
>>> +        write_desc(execlists, ce->lrc_desc, n);
>>> +
>>> +        /* we need to manually load the submit queue */
>>> +        if (execlists->ctrl_reg)
>>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +    }
>>>         execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>>       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>>> @@ -1031,22 +1047,48 @@ static void process_csb(struct 
>>> intel_engine_cs *engine)
>>>                     status, buf[2*head + 1],
>>>                     execlists->active);
>>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> -                      GEN8_CTX_STATUS_PREEMPTED))
>>> -                execlists_set_active(execlists,
>>> -                             EXECLISTS_ACTIVE_HWACK);
>>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> -                execlists_clear_active(execlists,
>>> -                               EXECLISTS_ACTIVE_HWACK);
>>> +            /*
>>> +             * Check if preempted from idle to idle directly.
>>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>>> +             * such transition.
>>> +             */
>>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>> +
>>> +                /* Cannot be waiting for HWACK while HW is idle */
>>
>> This comment does not match the check, since if the 
>> EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
>> that we're waiting for it. Personally I would just remove the BUG_ON 
>> since we don't really care about the value of HWACK as long as 
>> EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
>> going to submit work until the whole preempt process is complete. A 
>> BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
>> complete_preempt_context so we're covered on that side.
> Will remove.
>>
>> With the 2 minor comments addressed:
>>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>
>> Daniele
>>
>>> + GEM_BUG_ON(execlists_is_active(execlists,
>>> +                              EXECLISTS_ACTIVE_HWACK));
>>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> -                continue;
>>> +                /*
>>> +                 * We could not have COMPLETED anything
>>> +                 * if we were idle before preemption.
>>> +                 */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>> +            } else {
>>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> +                          GEN8_CTX_STATUS_PREEMPTED))
>>> +                    execlists_set_active(execlists,
>>> +                                 EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> +                    execlists_clear_active(execlists,
>>> +                                   EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> +                    continue;
>>> +
>>> +                /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +            }
>>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                buf[2*head + 1] == 
>>> execlists->preempt_complete_status) {
>>> +            /*
>>> +             * Check if preempted to real idle, either directly or
>>> +             * the preemptive context already finished executing
>>> +             */
>>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                buf[2*head + 1] == 
>>> execlists->preempt_complete_status)) {
>>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>>                   complete_preempt_context(execlists);
>>>                   continue;
>>> @@ -2337,7 +2379,8 @@ static void 
>>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>>       engine->unpark = NULL;
>>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>> -    if (engine->i915->preempt_context)
>>> +    if (engine->i915->preempt_context ||
>>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>>         engine->i915->caps.scheduler =
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 1593194..3249e9b 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -45,6 +45,7 @@
>>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>>> + 0x510)
>>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>>> 0x550)
>>>   #define      EL_CTRL_LOAD                (1 << 0)
>>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>>     /* The docs specify that the write pointer wraps around after 5h, 
>>> "After status
>>>    * is written out to the last available status QW at offset 5h, 
>>> this pointer
>>>
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v5] drm/i915/gen11: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (18 preceding siblings ...)
  2018-05-26  5:18 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-07-06 15:52 ` Tomasz Lis
  2018-07-06 16:08 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5) Patchwork
                   ` (18 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Tomasz Lis @ 2018-07-06 15:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
    Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 114 ++++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 91a7e4f..c84a66a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2533,6 +2533,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..bf7faa7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -464,7 +464,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 55543f1..2da7e77 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 633f9fb..0be7e03 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ab89dab..aed4aeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -525,34 +526,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
-		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
 	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	write_desc(execlists, ce->lrc_desc, n);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -627,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			execlist_send_preempt_to_idle(engine);
 			return;
 		}
 
@@ -1020,22 +1036,43 @@ static void process_csb(struct intel_engine_cs *engine)
 			  execlists->active);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
+		/*
+		 * Check if preempted from idle to idle directly.
+		 * The STATUS_IDLE_ACTIVE flag is used to mark
+		 * such transition.
+		 */
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+			/*
+			 * We could not have COMPLETED anything
+			 * if we were idle before preemption.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+		} else {
+			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+				      GEN8_CTX_STATUS_PREEMPTED))
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_HWACK);
+
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
 
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			/* We should never get a COMPLETED | IDLE_ACTIVE! */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		}
 
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
+		/*
+		 * Check if preempted to real idle, either directly or
+		 * the preemptive context already finished executing
+		 */
+		if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+		    (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			GEM_TRACE("%s preempt-idle\n", engine->name);
 			complete_preempt_context(execlists);
 			continue;
@@ -2377,7 +2414,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (19 preceding siblings ...)
  2018-07-06 15:52 ` [PATCH v5] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-07-06 16:08 ` Patchwork
  2018-07-06 16:08 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (17 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-06 16:08 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
90f487fed124 drm/i915/gen11: Preempt-to-idle support in execlists.
-:141: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#141: FILE: drivers/gpu/drm/i915/intel_lrc.c:539:
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:210: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#210: FILE: drivers/gpu/drm/i915/intel_lrc.c:1045:
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:211: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#211: FILE: drivers/gpu/drm/i915/intel_lrc.c:1046:
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+

-:244: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#244: FILE: drivers/gpu/drm/i915/intel_lrc.c:1075:
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 		         ^

total: 0 errors, 0 warnings, 4 checks, 192 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (20 preceding siblings ...)
  2018-07-06 16:08 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5) Patchwork
@ 2018-07-06 16:08 ` Patchwork
  2018-07-06 16:25 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (16 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-06 16:08 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gen11: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3655:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3657:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (21 preceding siblings ...)
  2018-07-06 16:08 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-07-06 16:25 ` Patchwork
  2018-07-07 14:09 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (15 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-06 16:25 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4446 -> Patchwork_9572 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/5/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9572 that come from known issues:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_suspend@basic-s4-devices:
      {fi-kbl-8809g}:     DMESG-WARN (fdo#107139) -> INCOMPLETE (fdo#107139)

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139


== Participating hosts (47 -> 41) ==

  Missing    (6): fi-ilk-m540 fi-bxt-dsi fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 


== Build changes ==

    * Linux: CI_DRM_4446 -> Patchwork_9572

  CI_DRM_4446: 95944426a9ffda186843c78f2f925494e1bc53c5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4543: 366eed37c7c71217e1cb1f3be5e26358a41f0001 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9572: 90f487fed124443c7a8d557fa0e54dcbbcbb046e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

90f487fed124 drm/i915/gen11: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9572/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (22 preceding siblings ...)
  2018-07-06 16:25 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-07-07 14:09 ` Patchwork
  2018-07-16 13:07 ` [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency Tomasz Lis
                   ` (14 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-07 14:09 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev5)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4446_full -> Patchwork_9572_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9572_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9572_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9572_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-blt:
      shard-kbl:          PASS -> SKIP

    igt@gem_exec_schedule@deep-vebox:
      shard-kbl:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9572_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_suspend@shrink:
      shard-hsw:          PASS -> INCOMPLETE (fdo#103540, fdo#106886)

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
      shard-glk:          PASS -> FAIL (fdo#105703)

    igt@kms_draw_crc@draw-method-rgb565-mmap-wc-ytiled:
      shard-glk:          PASS -> FAIL (fdo#103184)

    igt@kms_flip@2x-flip-vs-blocking-wf-vblank:
      shard-glk:          PASS -> FAIL (fdo#100368) +1

    igt@kms_flip@modeset-vs-vblank-race-interruptible:
      shard-hsw:          PASS -> FAIL (fdo#103060)

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#103822)

    igt@kms_rotation_crc@sprite-rotation-270:
      shard-apl:          PASS -> FAIL (fdo#103925)

    igt@kms_setmode@basic:
      shard-apl:          PASS -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@kms_cursor_legacy@2x-long-nonblocking-modeset-vs-cursor-atomic:
      shard-glk:          FAIL (fdo#106509) -> PASS

    igt@kms_flip@2x-plain-flip-fb-recreate:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_universal_plane@cursor-fb-leak-pipe-c:
      shard-apl:          FAIL -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
  fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#106509 https://bugs.freedesktop.org/show_bug.cgi?id=106509
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4446 -> Patchwork_9572

  CI_DRM_4446: 95944426a9ffda186843c78f2f925494e1bc53c5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4543: 366eed37c7c71217e1cb1f3be5e26358a41f0001 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9572: 90f487fed124443c7a8d557fa0e54dcbbcbb046e @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9572/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (23 preceding siblings ...)
  2018-07-07 14:09 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-07-16 13:07 ` Tomasz Lis
  2018-07-16 13:35   ` Tvrtko Ursulin
  2018-07-18 13:24   ` Joonas Lahtinen
  2018-07-16 14:36 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6) Patchwork
                   ` (13 subsequent siblings)
  38 siblings, 2 replies; 70+ messages in thread
From: Tomasz Lis @ 2018-07-16 13:07 UTC (permalink / raw)
  To: intel-gfx

The patch adds a parameter to control the data port coherency functionality
on a per-context level. When the IOCTL is called, a command to switch data
port coherency state is added to the ordered list. All prior requests are
executed on old coherency settings, and all exec requests after the IOCTL
will use new settings.

Rationale:

The OpenCL driver develpers requested a functionality to control cache
coherency at data port level. Keeping the coherency at that level is disabled
by default due to its performance costs. OpenCL driver is planning to
enable it for a small subset of submissions, when such functionality is
required. Below are answers to basic question explaining background
of the functionality and reasoning for the proposed implementation:

1. Why do we need a coherency enable/disable switch for memory that is shared
between CPU and GEN (GPU)?

Memory coherency between CPU and GEN, while being a great feature that enables
CL_MEM_SVM_FINE_GRAIN_BUFFER OCL capability on Intel GEN architecture, adds
overhead related to tracking (snooping) memory inside different cache units
(L1$, L2$, L3$, LLC$, etc.). At the same time, minority of modern OCL
applications actually use CL_MEM_SVM_FINE_GRAIN_BUFFER (and hence require
memory coherency between CPU and GPU). The goal of coherency enable/disable
switch is to remove overhead of memory coherency when memory coherency is not
needed.

2. Why do we need a global coherency switch?

In order to support I/O commands from within EUs (Execution Units), Intel GEN
ISA (GEN Instruction Set Assembly) contains dedicated "send" instructions.
These send instructions provide several addressing models. One of these
addressing models (named "stateless") provides most flexible I/O using plain
virtual addresses (as opposed to buffer_handle+offset models). This "stateless"
model is similar to regular memory load/store operations available on typical
CPUs. Since this model provides I/O using arbitrary virtual addresses, it
enables algorithmic designs that are based on pointer-to-pointer (e.g. buffer
of pointers) concepts. For instance, it allows creating tree-like data
structures such as:
                   ________________
                  |      NODE1     |
                  | uint64_t data  |
                  +----------------|
                  | NODE*  |  NODE*|
                  +--------+-------+
                    /              \
   ________________/                \________________
  |      NODE2     |                |      NODE3     |
  | uint64_t data  |                | uint64_t data  |
  +----------------|                +----------------|
  | NODE*  |  NODE*|                | NODE*  |  NODE*|
  +--------+-------+                +--------+-------+

Please note that pointers inside such structures can point to memory locations
in different OCL allocations  - e.g. NODE1 and NODE2 can reside in one OCL
allocation while NODE3 resides in a completely separate OCL allocation.
Additionally, such pointers can be shared with CPU (i.e. using SVM - Shared
Virtual Memory feature). Using pointers from different allocations doesn't
affect the stateless addressing model which even allows scattered reading from
different allocations at the same time (i.e. by utilizing SIMD-nature of send
instructions).

When it comes to coherency programming, send instructions in stateless model
can be encoded (at ISA level) to either use or disable coherency. However, for
generic OCL applications (such as example with tree-like data structure), OCL
compiler is not able to determine origin of memory pointed to by an arbitrary
pointer - i.e. is not able to track given pointer back to a specific
allocation. As such, it's not able to decide whether coherency is needed or not
for specific pointer (or for specific I/O instruction). As a result, compiler
encodes all stateless sends as coherent (doing otherwise would lead to
functional issues resulting from data corruption). Please note that it would be
possible to workaround this (e.g. based on allocations map and pointer bounds
checking prior to each I/O instruction) but the performance cost of such
workaround would be many times greater than the cost of keeping coherency
always enabled. As such, enabling/disabling memory coherency at GEN ISA level
is not feasible and alternative method is needed.

Such alternative solution is to have a global coherency switch that allows
disabling coherency for single (though entire) GPU submission. This is
beneficial because this way we:
* can enable (and pay for) coherency only in submissions that actually need
coherency (submissions that use CL_MEM_SVM_FINE_GRAIN_BUFFER resources)
* don't care about coherency at GEN ISA granularity (no performance impact)

3. Will coherency switch be used frequently?

There are scenarios that will require frequent toggling of the coherency
switch.
E.g. an application has two OCL compute kernels: kern_master and kern_worker.
kern_master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
    Removed redundant GEM_WARN_ON()s. Improved to coding standard.
    Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
    Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
    Moved all flags checking to one place. Added mutex check.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++--
 drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 66 ++++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h             |  7 ++++
 5 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4fb9373..bae3999 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)	(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)	((IS_HASWELL(dev_priv) || \
 				 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)	(INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)	((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..44ebc31 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
 	struct i915_gem_context *ctx;
@@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_GTT_SIZE:
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->vm.total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else if (i915->mm.aliasing_ppgtt)
+			args->value = i915->mm.aliasing_ppgtt->vm.total;
 		else
-			args->value = to_i915(dev)->ggtt.vm.total;
+			args->value = i915->ggtt.vm.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
 		args->value = i915_gem_context_no_error_capture(ctx);
@@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->sched.priority;
 		break;
+	case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+		if (!HAS_DATA_PORT_COHERENCY(i915))
+			ret = -ENODEV;
+		else
+			args->value = i915_gem_context_is_data_port_coherent(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
 	struct i915_gem_context *ctx;
@@ -880,7 +888,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 			if (args->size)
 				ret = -EINVAL;
-			else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+			else if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
 				ret = -ENODEV;
 			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
 				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -893,6 +901,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!HAS_DATA_PORT_COHERENCY(i915))
+			ret = -ENODEV;
+		else if (args->value == 1)
+			i915_gem_context_set_data_port_coherent(ctx);
+		else if (args->value == 0)
+			i915_gem_context_clear_data_port_coherent(ctx);
+		else
+			ret = -EINVAL;
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index b116e49..9312343 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -126,6 +126,8 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE		3
 #define CONTEXT_BANNED			4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	5
+#define CONTEXT_DATA_PORT_COHERENT_REQUESTED	6
+#define CONTEXT_DATA_PORT_COHERENT_ACTIVE	7
 
 	/**
 	 * @hw_id: - unique identifier for the context
@@ -257,6 +259,21 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
+static inline bool i915_gem_context_is_data_port_coherent(struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_data_port_coherent(struct i915_gem_context *ctx)
+{
+	__set_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
+static inline void i915_gem_context_clear_data_port_coherent(struct i915_gem_context *ctx)
+{
+	__clear_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6fef9d1..6a08e10 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,6 +259,63 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
+static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
+{
+	u32 *cs;
+	i915_reg_t reg;
+
+	GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
+	GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(rq->i915) >= 11)
+		reg = ICL_HDC_MODE;
+	else if (INTEL_GEN(rq->i915) >= 10)
+		reg = CNL_HDC_CHICKEN0;
+	else
+		reg = HDC_CHICKEN0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(reg);
+	/* Enabling coherency means disabling the bit which forces it off */
+	if (enable)
+		*cs++ = _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT);
+	else
+		*cs++ = _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT);
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+intel_lr_context_update_data_port_coherency(struct i915_request *rq)
+{
+	struct i915_gem_context *ctx = rq->gem_context;
+	bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
+	int ret;
+
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+
+	if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
+		return 0;
+
+	ret = emit_set_data_port_coherency(rq, enable);
+
+	if (!ret) {
+		if (enable)
+			__set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+		else
+			__clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
+	}
+
+	return ret;
+}
+
 static struct i915_priolist *
 lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
@@ -2133,7 +2190,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
 	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 *cs, flags = 0;
-	int len;
+	int err, len;
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
@@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
 		/* WaForGAMHang:kbl */
 		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
 			dc_flush_wa = true;
+
+		/* Emit the switch of data port coherency state if needed */
+		err = intel_lr_context_update_data_port_coherency(request);
+		if (GEM_WARN_ON(err)) {
+			DRM_DEBUG("Data Port Coherency toggle failed.\n");
+			return err;
+		}
 	}
 
 	len = 6;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634c..6ece759 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
+#define I915_CONTEXT_PARAM_DATA_PORT_COHERENCY	0x7
 	__u64 value;
 };
 
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-16 13:07 ` [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency Tomasz Lis
@ 2018-07-16 13:35   ` Tvrtko Ursulin
  2018-07-18 13:24   ` Joonas Lahtinen
  1 sibling, 0 replies; 70+ messages in thread
From: Tvrtko Ursulin @ 2018-07-16 13:35 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx


On 16/07/2018 14:07, Tomasz Lis wrote:
> The patch adds a parameter to control the data port coherency functionality
> on a per-context level. When the IOCTL is called, a command to switch data
> port coherency state is added to the ordered list. All prior requests are
> executed on old coherency settings, and all exec requests after the IOCTL
> will use new settings.
> 
> Rationale:
> 
> The OpenCL driver develpers requested a functionality to control cache
> coherency at data port level. Keeping the coherency at that level is disabled
> by default due to its performance costs. OpenCL driver is planning to
> enable it for a small subset of submissions, when such functionality is
> required. Below are answers to basic question explaining background
> of the functionality and reasoning for the proposed implementation:
> 
> 1. Why do we need a coherency enable/disable switch for memory that is shared
> between CPU and GEN (GPU)?
> 
> Memory coherency between CPU and GEN, while being a great feature that enables
> CL_MEM_SVM_FINE_GRAIN_BUFFER OCL capability on Intel GEN architecture, adds
> overhead related to tracking (snooping) memory inside different cache units
> (L1$, L2$, L3$, LLC$, etc.). At the same time, minority of modern OCL
> applications actually use CL_MEM_SVM_FINE_GRAIN_BUFFER (and hence require
> memory coherency between CPU and GPU). The goal of coherency enable/disable
> switch is to remove overhead of memory coherency when memory coherency is not
> needed.
> 
> 2. Why do we need a global coherency switch?
> 
> In order to support I/O commands from within EUs (Execution Units), Intel GEN
> ISA (GEN Instruction Set Assembly) contains dedicated "send" instructions.
> These send instructions provide several addressing models. One of these
> addressing models (named "stateless") provides most flexible I/O using plain
> virtual addresses (as opposed to buffer_handle+offset models). This "stateless"
> model is similar to regular memory load/store operations available on typical
> CPUs. Since this model provides I/O using arbitrary virtual addresses, it
> enables algorithmic designs that are based on pointer-to-pointer (e.g. buffer
> of pointers) concepts. For instance, it allows creating tree-like data
> structures such as:
>                     ________________
>                    |      NODE1     |
>                    | uint64_t data  |
>                    +----------------|
>                    | NODE*  |  NODE*|
>                    +--------+-------+
>                      /              \
>     ________________/                \________________
>    |      NODE2     |                |      NODE3     |
>    | uint64_t data  |                | uint64_t data  |
>    +----------------|                +----------------|
>    | NODE*  |  NODE*|                | NODE*  |  NODE*|
>    +--------+-------+                +--------+-------+
> 
> Please note that pointers inside such structures can point to memory locations
> in different OCL allocations  - e.g. NODE1 and NODE2 can reside in one OCL
> allocation while NODE3 resides in a completely separate OCL allocation.
> Additionally, such pointers can be shared with CPU (i.e. using SVM - Shared
> Virtual Memory feature). Using pointers from different allocations doesn't
> affect the stateless addressing model which even allows scattered reading from
> different allocations at the same time (i.e. by utilizing SIMD-nature of send
> instructions).
> 
> When it comes to coherency programming, send instructions in stateless model
> can be encoded (at ISA level) to either use or disable coherency. However, for
> generic OCL applications (such as example with tree-like data structure), OCL
> compiler is not able to determine origin of memory pointed to by an arbitrary
> pointer - i.e. is not able to track given pointer back to a specific
> allocation. As such, it's not able to decide whether coherency is needed or not
> for specific pointer (or for specific I/O instruction). As a result, compiler
> encodes all stateless sends as coherent (doing otherwise would lead to
> functional issues resulting from data corruption). Please note that it would be
> possible to workaround this (e.g. based on allocations map and pointer bounds
> checking prior to each I/O instruction) but the performance cost of such
> workaround would be many times greater than the cost of keeping coherency
> always enabled. As such, enabling/disabling memory coherency at GEN ISA level
> is not feasible and alternative method is needed.
> 
> Such alternative solution is to have a global coherency switch that allows
> disabling coherency for single (though entire) GPU submission. This is
> beneficial because this way we:
> * can enable (and pay for) coherency only in submissions that actually need
> coherency (submissions that use CL_MEM_SVM_FINE_GRAIN_BUFFER resources)
> * don't care about coherency at GEN ISA granularity (no performance impact)
> 
> 3. Will coherency switch be used frequently?
> 
> There are scenarios that will require frequent toggling of the coherency
> switch.
> E.g. an application has two OCL compute kernels: kern_master and kern_worker.
> kern_master uses, concurrently with CPU, some fine grain SVM resources
> (CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
> computational work that needs to be executed. kern_master analyzes incoming
> work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
> for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
> the payload that kern_master produced. These two kernels work in a loop, one
> after another. Since only kern_master requires coherency, kern_worker should
> not be forced to pay for it. This means that we need to have the ability to
> toggle coherency switch on or off per each GPU submission:
> (ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
> COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...
> 
> v2: Fixed compilation warning.
> v3: Refactored the patch to add IOCTL instead of exec flag.
> v4: Renamed and documented the API flag. Used strict values.
>      Removed redundant GEM_WARN_ON()s. Improved to coding standard.
>      Introduced a macro for checking whether hardware supports the feature.
> v5: Renamed some locals. Made the flag write to be lazy.
>      Updated comments to remove misconceptions. Added gen11 support.
> v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
>      Moved all flags checking to one place. Added mutex check.
> 
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> 
> Bspec: 11419
> Bspec: 19175
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 +
>   drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++--
>   drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        | 66 ++++++++++++++++++++++++++++++++-
>   include/uapi/drm/i915_drm.h             |  7 ++++
>   5 files changed, 115 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4fb9373..bae3999 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
>   #define HAS_EDRAM(dev_priv)	(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
>   #define HAS_WT(dev_priv)	((IS_HASWELL(dev_priv) || \
>   				 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
> +#define HAS_DATA_PORT_COHERENCY(dev_priv)	(INTEL_GEN(dev_priv) >= 9)
>   
>   #define HWS_NEEDS_PHYSICAL(dev_priv)	((dev_priv)->info.hws_needs_physical)
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b10770c..44ebc31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>   int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
>   	struct i915_gem_context *ctx;
> @@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_GTT_SIZE:
>   		if (ctx->ppgtt)
>   			args->value = ctx->ppgtt->vm.total;
> -		else if (to_i915(dev)->mm.aliasing_ppgtt)
> -			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
> +		else if (i915->mm.aliasing_ppgtt)
> +			args->value = i915->mm.aliasing_ppgtt->vm.total;
>   		else
> -			args->value = to_i915(dev)->ggtt.vm.total;
> +			args->value = i915->ggtt.vm.total;
>   		break;
>   	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
>   		args->value = i915_gem_context_no_error_capture(ctx);
> @@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_PRIORITY:
>   		args->value = ctx->sched.priority;
>   		break;
> +	case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
> +		if (!HAS_DATA_PORT_COHERENCY(i915))
> +			ret = -ENODEV;
> +		else
> +			args->value = i915_gem_context_is_data_port_coherent(ctx);
> +		break;
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
>   	struct i915_gem_context *ctx;
> @@ -880,7 +888,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   
>   			if (args->size)
>   				ret = -EINVAL;
> -			else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
> +			else if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
>   				ret = -ENODEV;
>   			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
>   				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
> @@ -893,6 +901,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
> +		if (args->size)
> +			ret = -EINVAL;
> +		else if (!HAS_DATA_PORT_COHERENCY(i915))
> +			ret = -ENODEV;
> +		else if (args->value == 1)
> +			i915_gem_context_set_data_port_coherent(ctx);
> +		else if (args->value == 0)
> +			i915_gem_context_clear_data_port_coherent(ctx);
> +		else
> +			ret = -EINVAL;
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index b116e49..9312343 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -126,6 +126,8 @@ struct i915_gem_context {
>   #define CONTEXT_BANNABLE		3
>   #define CONTEXT_BANNED			4
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	5
> +#define CONTEXT_DATA_PORT_COHERENT_REQUESTED	6
> +#define CONTEXT_DATA_PORT_COHERENT_ACTIVE	7
>   
>   	/**
>   	 * @hw_id: - unique identifier for the context
> @@ -257,6 +259,21 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
>   	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
>   }
>   
> +static inline bool i915_gem_context_is_data_port_coherent(struct i915_gem_context *ctx)
> +{
> +	return test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
> +}
> +
> +static inline void i915_gem_context_set_data_port_coherent(struct i915_gem_context *ctx)
> +{
> +	__set_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
> +}
> +
> +static inline void i915_gem_context_clear_data_port_coherent(struct i915_gem_context *ctx)
> +{
> +	__clear_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
> +}
> +
>   static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
>   {
>   	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6fef9d1..6a08e10 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -259,6 +259,63 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
>   	ce->lrc_desc = desc;
>   }
>   
> +static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
> +{
> +	u32 *cs;
> +	i915_reg_t reg;
> +
> +	GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
> +	GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	if (INTEL_GEN(rq->i915) >= 11)
> +		reg = ICL_HDC_MODE;
> +	else if (INTEL_GEN(rq->i915) >= 10)
> +		reg = CNL_HDC_CHICKEN0;
> +	else
> +		reg = HDC_CHICKEN0;
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(reg);
> +	/* Enabling coherency means disabling the bit which forces it off */
> +	if (enable)
> +		*cs++ = _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT);
> +	else
> +		*cs++ = _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT);
> +	*cs++ = MI_NOOP;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +static int
> +intel_lr_context_update_data_port_coherency(struct i915_request *rq)
> +{
> +	struct i915_gem_context *ctx = rq->gem_context;
> +	bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
> +	int ret;
> +
> +	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +
> +	if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
> +		return 0;
> +
> +	ret = emit_set_data_port_coherency(rq, enable);
> +
> +	if (!ret) {
> +		if (enable)
> +			__set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
> +		else
> +			__clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
> +	}
> +
> +	return ret;
> +}
> +
>   static struct i915_priolist *
>   lookup_priolist(struct intel_engine_cs *engine, int prio)
>   {
> @@ -2133,7 +2190,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
>   		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
>   	bool vf_flush_wa = false, dc_flush_wa = false;
>   	u32 *cs, flags = 0;
> -	int len;
> +	int err, len;
>   
>   	flags |= PIPE_CONTROL_CS_STALL;
>   
> @@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
>   		/* WaForGAMHang:kbl */
>   		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
>   			dc_flush_wa = true;
> +
> +		/* Emit the switch of data port coherency state if needed */
> +		err = intel_lr_context_update_data_port_coherency(request);
> +		if (GEM_WARN_ON(err)) {
> +			DRM_DEBUG("Data Port Coherency toggle failed.\n");
> +			return err;
> +		}
>   	}
>   
>   	len = 6;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634c..6ece759 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +/*
> + * When data port level coherency is enabled, the GPU will update memory
> + * buffers shared with CPU, by forcing internal cache units to send memory
> + * writes to higher level caches faster. Enabling data port coherency has
> + * a performance cost.
> + */
> +#define I915_CONTEXT_PARAM_DATA_PORT_COHERENCY	0x7
>   	__u64 value;
>   };
>   
> 

Looks good to me!

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (24 preceding siblings ...)
  2018-07-16 13:07 ` [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency Tomasz Lis
@ 2018-07-16 14:36 ` Patchwork
  2018-07-16 14:37 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (12 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-16 14:36 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
26cf0c5fcc46 drm/i915: Add IOCTL Param to control data port coherency.
-:15: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#15: 
coherency at data port level. Keeping the coherency at that level is disabled

total: 0 errors, 1 warnings, 0 checks, 199 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (25 preceding siblings ...)
  2018-07-16 14:36 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6) Patchwork
@ 2018-07-16 14:37 ` Patchwork
  2018-07-16 14:58 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (11 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-16 14:37 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Add IOCTL Param to control data port coherency.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3653:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3654:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (26 preceding siblings ...)
  2018-07-16 14:37 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-07-16 14:58 ` Patchwork
  2018-07-16 19:26 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (10 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-16 14:58 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4493 -> Patchwork_9676 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/6/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9676 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_hangcheck:
      fi-skl-guc:         PASS -> DMESG-FAIL (fdo#107174)

    igt@kms_chamelium@dp-crc-fast:
      fi-kbl-7500u:       PASS -> DMESG-FAIL (fdo#103841)

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-peppy:       PASS -> DMESG-FAIL (fdo#102614, fdo#106103)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      fi-bdw-5557u:       DMESG-FAIL (fdo#106560) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       INCOMPLETE (fdo#103713) -> PASS

    
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
  fdo#106103 https://bugs.freedesktop.org/show_bug.cgi?id=106103
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#107174 https://bugs.freedesktop.org/show_bug.cgi?id=107174


== Participating hosts (44 -> 41) ==

  Additional (1): fi-skl-6700hq 
  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4493 -> Patchwork_9676

  CI_DRM_4493: c69b4c1274cccaa270c1e4daa68228724c80603a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4558: d8e97e1710b27a3931a1c53d1dd88c0e709c085b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9676: 26cf0c5fcc461bb5aca97673189ba1b329385a76 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

26cf0c5fcc46 drm/i915: Add IOCTL Param to control data port coherency.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9676/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (27 preceding siblings ...)
  2018-07-16 14:58 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-07-16 19:26 ` Patchwork
  2018-10-15 17:29 ` [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
                   ` (9 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-07-16 19:26 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev6)
URL   : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4493_full -> Patchwork_9676_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9676_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9676_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9676_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_ctx_param@invalid-param-get:
      shard-apl:          PASS -> FAIL +1
      shard-glk:          PASS -> FAIL +1

    igt@gem_ctx_param@invalid-param-set:
      shard-kbl:          PASS -> FAIL +1
      shard-hsw:          PASS -> FAIL +1
      shard-snb:          PASS -> FAIL +1

    igt@gem_exec_schedule@preempt-hang-render:
      shard-glk:          NOTRUN -> DMESG-FAIL

    
    ==== Warnings ====

    igt@gem_exec_schedule@deep-render:
      shard-kbl:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9676_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_missed_irq:
      shard-glk:          NOTRUN -> INCOMPLETE (k.org#198133, fdo#103359)

    igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
      shard-glk:          PASS -> FAIL (fdo#105363)

    
    ==== Possible fixes ====

    igt@kms_cursor_legacy@cursor-vs-flip-toggle:
      shard-hsw:          FAIL (fdo#103355) -> PASS

    igt@kms_flip@2x-dpms-vs-vblank-race-interruptible:
      shard-hsw:          FAIL (fdo#103060) -> PASS +2

    igt@kms_flip@2x-plain-flip-fb-recreate:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip@dpms-vs-vblank-race:
      shard-kbl:          FAIL (fdo#103060) -> PASS

    igt@kms_flip@dpms-vs-vblank-race-interruptible:
      shard-glk:          FAIL (fdo#103060) -> PASS

    igt@kms_flip@flip-vs-expired-vblank:
      shard-glk:          FAIL (fdo#105363, fdo#102887) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4493 -> Patchwork_9676

  CI_DRM_4493: c69b4c1274cccaa270c1e4daa68228724c80603a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4558: d8e97e1710b27a3931a1c53d1dd88c0e709c085b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9676: 26cf0c5fcc461bb5aca97673189ba1b329385a76 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9676/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-16 13:07 ` [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency Tomasz Lis
  2018-07-16 13:35   ` Tvrtko Ursulin
@ 2018-07-18 13:24   ` Joonas Lahtinen
  2018-07-18 14:42     ` Tvrtko Ursulin
  1 sibling, 1 reply; 70+ messages in thread
From: Joonas Lahtinen @ 2018-07-18 13:24 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx

Quoting Tomasz Lis (2018-07-16 16:07:16)
> +static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
> +{
> +       u32 *cs;
> +       i915_reg_t reg;
> +
> +       GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
> +       GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
> +
> +       cs = intel_ring_begin(rq, 4);
> +       if (IS_ERR(cs))
> +               return PTR_ERR(cs);
> +
> +       if (INTEL_GEN(rq->i915) >= 11)
> +               reg = ICL_HDC_MODE;
> +       else if (INTEL_GEN(rq->i915) >= 10)
> +               reg = CNL_HDC_CHICKEN0;
> +       else
> +               reg = HDC_CHICKEN0;
> +
> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
> +       *cs++ = i915_mmio_reg_offset(reg);
> +       /* Enabling coherency means disabling the bit which forces it off */

This comment is still spurious, please get rid of the habit of writing
comments about "what" the code is doing, useful comments should be
limited to "why", which is quite self explanatory here, that's the way
the register is.

> +static int
> +intel_lr_context_update_data_port_coherency(struct i915_request *rq)
> +{
> +       struct i915_gem_context *ctx = rq->gem_context;
> +       bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
> +       int ret;
> +
> +       lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +
> +       if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
> +               return 0;
> +
> +       ret = emit_set_data_port_coherency(rq, enable);
> +
> +       if (!ret) {
> +               if (enable)
> +                       __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
> +               else
> +                       __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
> +       }

Do we have indication that the hardware feature will be unreliable in
responding to the requests? I don't think you need the differentiation
of requested vs. active. If there is an error, we can just report back to
the user as a failed IOCTL. Now it adds unnecessary complication for no benefit.

> @@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
>                 /* WaForGAMHang:kbl */
>                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
>                         dc_flush_wa = true;
> +
> +               /* Emit the switch of data port coherency state if needed */

Ditto for spurious comment, just about what the code does.

> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
>  #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
>  #define   I915_CONTEXT_DEFAULT_PRIORITY                0
>  #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
> +/*
> + * When data port level coherency is enabled, the GPU will update memory
> + * buffers shared with CPU, by forcing internal cache units to send memory
> + * writes to higher level caches faster. Enabling data port coherency has
> + * a performance cost.
> + */

I was under impression this is enabled by default and it can be disabled
for a performance optimization?

Regards, Joonas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-18 13:24   ` Joonas Lahtinen
@ 2018-07-18 14:42     ` Tvrtko Ursulin
  2018-07-18 15:28       ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Tvrtko Ursulin @ 2018-07-18 14:42 UTC (permalink / raw)
  To: Joonas Lahtinen, Tomasz Lis, intel-gfx


On 18/07/2018 14:24, Joonas Lahtinen wrote:
> Quoting Tomasz Lis (2018-07-16 16:07:16)
>> +static int emit_set_data_port_coherency(struct i915_request *rq, bool enable)
>> +{
>> +       u32 *cs;
>> +       i915_reg_t reg;
>> +
>> +       GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
>> +       GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
>> +
>> +       cs = intel_ring_begin(rq, 4);
>> +       if (IS_ERR(cs))
>> +               return PTR_ERR(cs);
>> +
>> +       if (INTEL_GEN(rq->i915) >= 11)
>> +               reg = ICL_HDC_MODE;
>> +       else if (INTEL_GEN(rq->i915) >= 10)
>> +               reg = CNL_HDC_CHICKEN0;
>> +       else
>> +               reg = HDC_CHICKEN0;
>> +
>> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
>> +       *cs++ = i915_mmio_reg_offset(reg);
>> +       /* Enabling coherency means disabling the bit which forces it off */
> 
> This comment is still spurious, please get rid of the habit of writing
> comments about "what" the code is doing, useful comments should be
> limited to "why", which is quite self explanatory here, that's the way
> the register is.
> 
>> +static int
>> +intel_lr_context_update_data_port_coherency(struct i915_request *rq)
>> +{
>> +       struct i915_gem_context *ctx = rq->gem_context;
>> +       bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, &ctx->flags);
>> +       int ret;
>> +
>> +       lockdep_assert_held(&rq->i915->drm.struct_mutex);
>> +
>> +       if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) == enable)
>> +               return 0;
>> +
>> +       ret = emit_set_data_port_coherency(rq, enable);
>> +
>> +       if (!ret) {
>> +               if (enable)
>> +                       __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
>> +               else
>> +                       __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
>> +       }
> 
> Do we have indication that the hardware feature will be unreliable in
> responding to the requests? I don't think you need the differentiation
> of requested vs. active. If there is an error, we can just report back to
> the user as a failed IOCTL. Now it adds unnecessary complication for no benefit.

Requested vs active is for implementing the lazy emit.

AFAIR it does propagate the error out of execbuf (although we never ever 
expect it to happen), and this is just to keep the internal 
house-keeping in sync.

Regards,

Tvrtko

>> @@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct i915_request *request,
>>                  /* WaForGAMHang:kbl */
>>                  if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
>>                          dc_flush_wa = true;
>> +
>> +               /* Emit the switch of data port coherency state if needed */
> 
> Ditto for spurious comment, just about what the code does.
> 
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
>>   #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
>>   #define   I915_CONTEXT_DEFAULT_PRIORITY                0
>>   #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
>> +/*
>> + * When data port level coherency is enabled, the GPU will update memory
>> + * buffers shared with CPU, by forcing internal cache units to send memory
>> + * writes to higher level caches faster. Enabling data port coherency has
>> + * a performance cost.
>> + */
> 
> I was under impression this is enabled by default and it can be disabled
> for a performance optimization?
> 
> Regards, Joonas
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-18 14:42     ` Tvrtko Ursulin
@ 2018-07-18 15:28       ` Lis, Tomasz
  2018-07-19  7:12         ` Joonas Lahtinen
  0 siblings, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-07-18 15:28 UTC (permalink / raw)
  To: Tvrtko Ursulin, Joonas Lahtinen, intel-gfx



On 2018-07-18 16:42, Tvrtko Ursulin wrote:
>
> On 18/07/2018 14:24, Joonas Lahtinen wrote:
>> Quoting Tomasz Lis (2018-07-16 16:07:16)
>>> +static int emit_set_data_port_coherency(struct i915_request *rq, 
>>> bool enable)
>>> +{
>>> +       u32 *cs;
>>> +       i915_reg_t reg;
>>> +
>>> +       GEM_BUG_ON(rq->engine->class != RENDER_CLASS);
>>> +       GEM_BUG_ON(INTEL_GEN(rq->i915) < 9);
>>> +
>>> +       cs = intel_ring_begin(rq, 4);
>>> +       if (IS_ERR(cs))
>>> +               return PTR_ERR(cs);
>>> +
>>> +       if (INTEL_GEN(rq->i915) >= 11)
>>> +               reg = ICL_HDC_MODE;
>>> +       else if (INTEL_GEN(rq->i915) >= 10)
>>> +               reg = CNL_HDC_CHICKEN0;
>>> +       else
>>> +               reg = HDC_CHICKEN0;
>>> +
>>> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
>>> +       *cs++ = i915_mmio_reg_offset(reg);
>>> +       /* Enabling coherency means disabling the bit which forces 
>>> it off */
>>
>> This comment is still spurious, please get rid of the habit of writing
>> comments about "what" the code is doing, useful comments should be
>> limited to "why", which is quite self explanatory here, that's the way
>> the register is.
Ok, I will read the related doc:
https://www.kernel.org/doc/html/v4.10/process/coding-style.html#commenting
>>
>>> +static int
>>> +intel_lr_context_update_data_port_coherency(struct i915_request *rq)
>>> +{
>>> +       struct i915_gem_context *ctx = rq->gem_context;
>>> +       bool enable = test_bit(CONTEXT_DATA_PORT_COHERENT_REQUESTED, 
>>> &ctx->flags);
>>> +       int ret;
>>> +
>>> + lockdep_assert_held(&rq->i915->drm.struct_mutex);
>>> +
>>> +       if (test_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags) 
>>> == enable)
>>> +               return 0;
>>> +
>>> +       ret = emit_set_data_port_coherency(rq, enable);
>>> +
>>> +       if (!ret) {
>>> +               if (enable)
>>> + __set_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
>>> +               else
>>> + __clear_bit(CONTEXT_DATA_PORT_COHERENT_ACTIVE, &ctx->flags);
>>> +       }
>>
>> Do we have indication that the hardware feature will be unreliable in
>> responding to the requests? I don't think you need the differentiation
>> of requested vs. active. If there is an error, we can just report 
>> back to
>> the user as a failed IOCTL. Now it adds unnecessary complication for 
>> no benefit.
>
> Requested vs active is for implementing the lazy emit.
>
> AFAIR it does propagate the error out of execbuf (although we never 
> ever expect it to happen), and this is just to keep the internal 
> house-keeping in sync.
>
> Regards,
>
> Tvrtko
>
>>> @@ -2164,6 +2221,13 @@ static int gen8_emit_flush_render(struct 
>>> i915_request *request,
>>>                  /* WaForGAMHang:kbl */
>>>                  if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
>>>                          dc_flush_wa = true;
>>> +
>>> +               /* Emit the switch of data port coherency state if 
>>> needed */
>>
>> Ditto for spurious comment, just about what the code does.
>>
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
>>>   #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
>>>   #define   I915_CONTEXT_DEFAULT_PRIORITY                0
>>>   #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
>>> +/*
>>> + * When data port level coherency is enabled, the GPU will update 
>>> memory
>>> + * buffers shared with CPU, by forcing internal cache units to send 
>>> memory
>>> + * writes to higher level caches faster. Enabling data port 
>>> coherency has
>>> + * a performance cost.
>>> + */
>>
>> I was under impression this is enabled by default and it can be disabled
>> for a performance optimization?
This is true, coherency is kept by default. We disable it as a 
workaround: performance-related for gen11, and due to minor hardware 
issue on previous platforms. See WaForceEnableNonCoherent.
-Tomasz
>>
>> Regards, Joonas
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-18 15:28       ` Lis, Tomasz
@ 2018-07-19  7:12         ` Joonas Lahtinen
  2018-07-19 15:10           ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Joonas Lahtinen @ 2018-07-19  7:12 UTC (permalink / raw)
  To: Lis, Tomasz, Tvrtko Ursulin, intel-gfx

Quoting Lis, Tomasz (2018-07-18 18:28:32)
> 
> On 2018-07-18 16:42, Tvrtko Ursulin wrote:
> >
> > On 18/07/2018 14:24, Joonas Lahtinen wrote:
> >> Quoting Tomasz Lis (2018-07-16 16:07:16)

<SNIP>

> >>> +++ b/include/uapi/drm/i915_drm.h
> >>> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
> >>>   #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
> >>>   #define   I915_CONTEXT_DEFAULT_PRIORITY                0
> >>>   #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
> >>> +/*
> >>> + * When data port level coherency is enabled, the GPU will update 
> >>> memory
> >>> + * buffers shared with CPU, by forcing internal cache units to send 
> >>> memory
> >>> + * writes to higher level caches faster. Enabling data port 
> >>> coherency has
> >>> + * a performance cost.
> >>> + */
> >>
> >> I was under impression this is enabled by default and it can be disabled
> >> for a performance optimization?
> This is true, coherency is kept by default. We disable it as a 
> workaround: performance-related for gen11, and due to minor hardware 
> issue on previous platforms. See WaForceEnableNonCoherent.

Ok, then you definitely want to rephrase the comment to bake that
information in it. Now it sounds like it needs to be turned on to have
coherency.

Regards, Joonas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.
  2018-07-19  7:12         ` Joonas Lahtinen
@ 2018-07-19 15:10           ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-07-19 15:10 UTC (permalink / raw)
  To: Joonas Lahtinen, Tvrtko Ursulin, intel-gfx



On 2018-07-19 09:12, Joonas Lahtinen wrote:
> Quoting Lis, Tomasz (2018-07-18 18:28:32)
>> On 2018-07-18 16:42, Tvrtko Ursulin wrote:
>>> On 18/07/2018 14:24, Joonas Lahtinen wrote:
>>>> Quoting Tomasz Lis (2018-07-16 16:07:16)
> <SNIP>
>
>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>> @@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
>>>>>    #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
>>>>>    #define   I915_CONTEXT_DEFAULT_PRIORITY                0
>>>>>    #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
>>>>> +/*
>>>>> + * When data port level coherency is enabled, the GPU will update
>>>>> memory
>>>>> + * buffers shared with CPU, by forcing internal cache units to send
>>>>> memory
>>>>> + * writes to higher level caches faster. Enabling data port
>>>>> coherency has
>>>>> + * a performance cost.
>>>>> + */
>>>> I was under impression this is enabled by default and it can be disabled
>>>> for a performance optimization?
>> This is true, coherency is kept by default. We disable it as a
>> workaround: performance-related for gen11, and due to minor hardware
>> issue on previous platforms. See WaForceEnableNonCoherent.
> Ok, then you definitely want to rephrase the comment to bake that
> information in it. Now it sounds like it needs to be turned on to have
> coherency.
I'm not sure if I understand what you're asking for.
Should I emphasize that the feature is disabled unless the flag is set? 
This seem obvious...
Or should I provide the reason why it is disabled on specific platforms? 
This should probably be done within workaround setup, not in user api 
definition. Or maybe it's enough to have it in Bspec? Bspec links are 
provided in the patch.
Or should I just mention the workaround name?
-Tomasz
> Regards, Joonas

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (28 preceding siblings ...)
  2018-07-16 19:26 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-10-15 17:29 ` Tomasz Lis
  2018-10-16 10:53   ` Joonas Lahtinen
  2018-10-15 17:44 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7) Patchwork
                   ` (8 subsequent siblings)
  38 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-10-15 17:29 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
    Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef0..4817438 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2597,6 +2597,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..98ca20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 0a05cc7..f708d97 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -597,7 +597,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN10_FEATURES, \
 	GEN(11), \
 	.ddb_size = 2048, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index af70026..7dcf0fd 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..4c2bfed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -488,29 +489,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
+	GEM_TRACE("%s\n", engine->name);
 
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	write_desc(execlists, ce->lrc_desc, n);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -583,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			execlist_send_preempt_to_idle(engine);
 			return;
 		}
 
@@ -910,22 +931,43 @@ static void process_csb(struct intel_engine_cs *engine)
 			  execlists->active);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
+		/*
+		 * Check if preempted from idle to idle directly.
+		 * The STATUS_IDLE_ACTIVE flag is used to mark
+		 * such transition.
+		 */
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
 
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			/*
+			 * We could not have COMPLETED anything
+			 * if we were idle before preemption.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+		} else {
+			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+				      GEN8_CTX_STATUS_PREEMPTED))
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_HWACK);
+
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
 
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/* We should never get a COMPLETED | IDLE_ACTIVE! */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		}
+
+		/*
+		 * Check if preempted to real idle, either directly or
+		 * the preemptive context already finished executing
+		 */
+		if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+		    (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			GEM_TRACE("%s preempt-idle\n", engine->name);
 			complete_preempt_context(execlists);
 			continue;
@@ -2138,7 +2180,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (29 preceding siblings ...)
  2018-10-15 17:29 ` [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-10-15 17:44 ` Patchwork
  2018-10-15 17:45 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (7 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-10-15 17:44 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
40f273c57c71 drm/i915/icl: Preempt-to-idle support in execlists.
-:129: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#129: FILE: drivers/gpu/drm/i915/intel_lrc.c:502:
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:205: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#205: FILE: drivers/gpu/drm/i915/intel_lrc.c:940:
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:239: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#239: FILE: drivers/gpu/drm/i915/intel_lrc.c:970:
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 		         ^

total: 0 errors, 0 warnings, 3 checks, 187 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (30 preceding siblings ...)
  2018-10-15 17:44 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7) Patchwork
@ 2018-10-15 17:45 ` Patchwork
  2018-10-15 18:07 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (6 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-10-15 17:45 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/icl: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3725:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3727:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (31 preceding siblings ...)
  2018-10-15 17:45 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-10-15 18:07 ` Patchwork
  2018-10-15 23:55 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (5 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-10-15 18:07 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4982 -> Patchwork_10461 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/7/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10461 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@amdgpu/amd_basic@cs-compute:
      fi-kbl-8809g:       NOTRUN -> FAIL (fdo#108094)

    igt@amdgpu/amd_prime@amd-to-i915:
      fi-kbl-8809g:       NOTRUN -> FAIL (fdo#107341)

    igt@gem_exec_suspend@basic-s3:
      fi-kbl-soraka:      NOTRUN -> INCOMPLETE (fdo#107556, fdo#107774, fdo#107859)

    igt@kms_frontbuffer_tracking@basic:
      fi-byt-clapper:     PASS -> FAIL (fdo#103167)

    
    ==== Possible fixes ====

    igt@drv_getparams_basic@basic-subslice-total:
      fi-snb-2520m:       DMESG-WARN (fdo#103713) -> PASS +10

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-byt-clapper:     FAIL (fdo#107362, fdo#103191) -> PASS
      fi-icl-u2:          INCOMPLETE (fdo#107713) -> PASS

    
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#107341 https://bugs.freedesktop.org/show_bug.cgi?id=107341
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107556 https://bugs.freedesktop.org/show_bug.cgi?id=107556
  fdo#107713 https://bugs.freedesktop.org/show_bug.cgi?id=107713
  fdo#107774 https://bugs.freedesktop.org/show_bug.cgi?id=107774
  fdo#107859 https://bugs.freedesktop.org/show_bug.cgi?id=107859
  fdo#108094 https://bugs.freedesktop.org/show_bug.cgi?id=108094


== Participating hosts (52 -> 47) ==

  Additional (2): fi-kbl-soraka fi-skl-guc 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-apl-guc fi-ctg-p8600 fi-kbl-7560u 


== Build changes ==

    * Linux: CI_DRM_4982 -> Patchwork_10461

  CI_DRM_4982: 6222b112cd485ea16d06c120531becf97ee57bc7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4678: 9310a1265ceabeec736bdf0a76e1e0357c76c0b1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10461: 40f273c57c71aeb1957dd683859fdee8baffd13a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

40f273c57c71 drm/i915/icl: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10461/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (32 preceding siblings ...)
  2018-10-15 18:07 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-10-15 23:55 ` Patchwork
  2018-11-09 17:18 ` [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
                   ` (4 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-10-15 23:55 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev7)
URL   : https://patchwork.freedesktop.org/series/40747/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4982_full -> Patchwork_10461_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_10461_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10461_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10461_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@kms_atomic_transition@1x-modeset-transitions:
      shard-skl:          NOTRUN -> FAIL

    
    ==== Warnings ====

    igt@pm_rc6_residency@rc6-accuracy:
      shard-snb:          PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_10461_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_suspend@sysfs-reader:
      shard-snb:          PASS -> DMESG-WARN (fdo#102365)

    igt@gem_exec_schedule@pi-ringfull-render:
      shard-skl:          NOTRUN -> FAIL (fdo#103158)

    igt@gem_ppgtt@blt-vs-render-ctxn:
      shard-skl:          NOTRUN -> TIMEOUT (fdo#108039)

    igt@gem_userptr_blits@readonly-unsync:
      shard-skl:          NOTRUN -> INCOMPLETE (fdo#108074)

    igt@kms_busy@extended-modeset-hang-newfb-render-a:
      shard-skl:          NOTRUN -> DMESG-WARN (fdo#107956) +4

    igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
      shard-skl:          NOTRUN -> FAIL (fdo#105458)

    igt@kms_ccs@pipe-b-crc-sprite-planes-basic:
      shard-skl:          NOTRUN -> FAIL (fdo#107725, fdo#108145)

    igt@kms_color@pipe-a-legacy-gamma:
      shard-skl:          NOTRUN -> FAIL (fdo#104782, fdo#108145)

    igt@kms_cursor_crc@cursor-256x256-suspend:
      shard-skl:          NOTRUN -> FAIL (fdo#103191, fdo#103232)

    igt@kms_cursor_crc@cursor-256x85-offscreen:
      shard-skl:          NOTRUN -> FAIL (fdo#103232)

    igt@kms_draw_crc@fill-fb:
      shard-skl:          NOTRUN -> FAIL (fdo#103184)

    igt@kms_fbcon_fbt@psr:
      shard-skl:          NOTRUN -> FAIL (fdo#107882)

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-blt:
      shard-apl:          PASS -> FAIL (fdo#103167)

    igt@kms_frontbuffer_tracking@fbc-1p-rte:
      shard-apl:          PASS -> FAIL (fdo#103167, fdo#105682)

    igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-onoff:
      shard-glk:          PASS -> FAIL (fdo#103167) +4

    igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-fullscreen:
      shard-skl:          NOTRUN -> FAIL (fdo#105682)

    igt@kms_frontbuffer_tracking@fbcpsr-stridechange:
      shard-skl:          NOTRUN -> FAIL (fdo#105683)

    igt@kms_frontbuffer_tracking@psr-1p-primscrn-cur-indfb-draw-mmap-wc:
      shard-skl:          NOTRUN -> FAIL (fdo#103167) +2

    igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
      shard-skl:          NOTRUN -> FAIL (fdo#103191, fdo#107362)

    igt@kms_plane@pixel-format-pipe-b-planes:
      shard-skl:          NOTRUN -> DMESG-FAIL (fdo#103166, fdo#106885) +1

    igt@kms_plane@plane-position-covered-pipe-a-planes:
      shard-apl:          PASS -> FAIL (fdo#103166) +1

    igt@kms_plane_alpha_blend@pipe-a-alpha-7efc:
      shard-skl:          NOTRUN -> FAIL (fdo#108145) +5

    igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
      shard-apl:          PASS -> FAIL (fdo#108145)

    igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
      shard-skl:          NOTRUN -> FAIL (fdo#108146)

    igt@kms_plane_multiple@atomic-pipe-c-tiling-x:
      shard-glk:          PASS -> FAIL (fdo#103166)

    igt@kms_rotation_crc@exhaust-fences:
      shard-skl:          NOTRUN -> DMESG-WARN (fdo#105748)

    igt@pm_backlight@fade_with_suspend:
      shard-skl:          NOTRUN -> FAIL (fdo#107847)

    
    ==== Possible fixes ====

    igt@gem_wait@busy-default:
      shard-snb:          INCOMPLETE (fdo#105411) -> PASS

    igt@kms_busy@extended-modeset-hang-newfb-render-c:
      shard-kbl:          DMESG-WARN (fdo#107956) -> PASS

    igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
      shard-glk:          DMESG-WARN (fdo#107956) -> PASS

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-gtt:
      shard-apl:          FAIL (fdo#103167) -> PASS +1

    igt@kms_frontbuffer_tracking@fbc-1p-rte:
      shard-glk:          FAIL (fdo#103167, fdo#105682) -> PASS

    igt@kms_plane@plane-position-covered-pipe-a-planes:
      shard-glk:          FAIL (fdo#103166) -> PASS

    igt@kms_plane_multiple@atomic-pipe-c-tiling-yf:
      shard-apl:          FAIL (fdo#103166) -> PASS +1

    igt@perf@polling:
      shard-hsw:          FAIL (fdo#102252) -> PASS

    
  fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
  fdo#102365 https://bugs.freedesktop.org/show_bug.cgi?id=102365
  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
  fdo#104782 https://bugs.freedesktop.org/show_bug.cgi?id=104782
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#105458 https://bugs.freedesktop.org/show_bug.cgi?id=105458
  fdo#105682 https://bugs.freedesktop.org/show_bug.cgi?id=105682
  fdo#105683 https://bugs.freedesktop.org/show_bug.cgi?id=105683
  fdo#105748 https://bugs.freedesktop.org/show_bug.cgi?id=105748
  fdo#106885 https://bugs.freedesktop.org/show_bug.cgi?id=106885
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107725 https://bugs.freedesktop.org/show_bug.cgi?id=107725
  fdo#107847 https://bugs.freedesktop.org/show_bug.cgi?id=107847
  fdo#107882 https://bugs.freedesktop.org/show_bug.cgi?id=107882
  fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
  fdo#108039 https://bugs.freedesktop.org/show_bug.cgi?id=108039
  fdo#108074 https://bugs.freedesktop.org/show_bug.cgi?id=108074
  fdo#108145 https://bugs.freedesktop.org/show_bug.cgi?id=108145
  fdo#108146 https://bugs.freedesktop.org/show_bug.cgi?id=108146


== Participating hosts (6 -> 6) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4982 -> Patchwork_10461

  CI_DRM_4982: 6222b112cd485ea16d06c120531becf97ee57bc7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4678: 9310a1265ceabeec736bdf0a76e1e0357c76c0b1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10461: 40f273c57c71aeb1957dd683859fdee8baffd13a @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10461/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-10-15 17:29 ` [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-10-16 10:53   ` Joonas Lahtinen
  2018-10-19 16:00     ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Joonas Lahtinen @ 2018-10-16 10:53 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: Mika Kuoppala

Quoting Tomasz Lis (2018-10-15 20:29:18)
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> v2: Added needs_preempt_context() change so that it is not created when
>     preempt-to-idle is supported. (Chris)
>     Updated setting HWACK flag so that it is cleared after
>     preempt-to-dle. (Chris, Daniele)
>     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> v3: Fixed needs_preempt_context() change. (Chris)
>     Merged preemption trigger functions to one. (Chris)
>     Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>     since idle-to-idle case will not have it set.
> 
> v4: Simplified needs_preempt_context() change. (Daniele)
>     Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> 
> v5: Renamed inject_preempt_context(). (Daniele)
>     Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
> 
> Bspec: 18922
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

This R-b was on v4, and should be indicated with # v4 comment.

The commit message doesn't say much about why preempting to idle is
beneficial? The pre-Gen11 codepath needs to be maintained anyway.

Regards, Joonas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-10-16 10:53   ` Joonas Lahtinen
@ 2018-10-19 16:00     ` Lis, Tomasz
  2018-10-23  9:13       ` Joonas Lahtinen
  0 siblings, 1 reply; 70+ messages in thread
From: Lis, Tomasz @ 2018-10-19 16:00 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx; +Cc: Mika Kuoppala



On 2018-10-16 12:53, Joonas Lahtinen wrote:
> Quoting Tomasz Lis (2018-10-15 20:29:18)
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be executed,
>> so the Command Streamer never preempted to idle directly. In Icelake it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> v2: Added needs_preempt_context() change so that it is not created when
>>      preempt-to-idle is supported. (Chris)
>>      Updated setting HWACK flag so that it is cleared after
>>      preempt-to-dle. (Chris, Daniele)
>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>
>> v3: Fixed needs_preempt_context() change. (Chris)
>>      Merged preemption trigger functions to one. (Chris)
>>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>      since idle-to-idle case will not have it set.
>>
>> v4: Simplified needs_preempt_context() change. (Daniele)
>>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>
>> v5: Renamed inject_preempt_context(). (Daniele)
>>      Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
>>
>> Bspec: 18922
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> This R-b was on v4, and should be indicated with # v4 comment.
>
> The commit message doesn't say much about why preempting to idle is
> beneficial? The pre-Gen11 codepath needs to be maintained anyway.
>
> Regards, Joonas
The benefit is one less context switch - there is no "preempt context".
-Tomasz


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-10-19 16:00     ` Lis, Tomasz
@ 2018-10-23  9:13       ` Joonas Lahtinen
  2018-10-23  9:24         ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Joonas Lahtinen @ 2018-10-23  9:13 UTC (permalink / raw)
  To: Lis, Tomasz, intel-gfx; +Cc: Mika Kuoppala

Quoting Lis, Tomasz (2018-10-19 19:00:15)
> 
> 
> On 2018-10-16 12:53, Joonas Lahtinen wrote:
> > Quoting Tomasz Lis (2018-10-15 20:29:18)
> >> The patch adds support of preempt-to-idle requesting by setting a proper
> >> bit within Execlist Control Register, and receiving preemption result from
> >> Context Status Buffer.
> >>
> >> Preemption in previous gens required a special batch buffer to be executed,
> >> so the Command Streamer never preempted to idle directly. In Icelake it is
> >> possible, as there is a hardware mechanism to inform the kernel about
> >> status of the preemption request.
> >>
> >> This patch does not cover using the new preemption mechanism when GuC is
> >> active.
> >>
> >> v2: Added needs_preempt_context() change so that it is not created when
> >>      preempt-to-idle is supported. (Chris)
> >>      Updated setting HWACK flag so that it is cleared after
> >>      preempt-to-dle. (Chris, Daniele)
> >>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> >>
> >> v3: Fixed needs_preempt_context() change. (Chris)
> >>      Merged preemption trigger functions to one. (Chris)
> >>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
> >>      since idle-to-idle case will not have it set.
> >>
> >> v4: Simplified needs_preempt_context() change. (Daniele)
> >>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> >>
> >> v5: Renamed inject_preempt_context(). (Daniele)
> >>      Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
> >>
> >> Bspec: 18922
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> >> Cc: Michal Winiarski <michal.winiarski@intel.com>
> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > This R-b was on v4, and should be indicated with # v4 comment.
> >
> > The commit message doesn't say much about why preempting to idle is
> > beneficial? The pre-Gen11 codepath needs to be maintained anyway.
> >
> > Regards, Joonas
> The benefit is one less context switch - there is no "preempt context".

Yes.

But that still doesn't quite explain what material benefits there are? :)

Is there some actual workloads/microbenchmarks that get an improvement?

This alters the behavior between different platforms for a very delicate
feature, probably resulting in slightly different bugs. So there should
be some more reasoning than just because we can.

Regards, Joonas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-10-23  9:13       ` Joonas Lahtinen
@ 2018-10-23  9:24         ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-10-23  9:24 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx; +Cc: Mika Kuoppala



On 2018-10-23 11:13, Joonas Lahtinen wrote:
> Quoting Lis, Tomasz (2018-10-19 19:00:15)
>>
>> On 2018-10-16 12:53, Joonas Lahtinen wrote:
>>> Quoting Tomasz Lis (2018-10-15 20:29:18)
>>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>>> bit within Execlist Control Register, and receiving preemption result from
>>>> Context Status Buffer.
>>>>
>>>> Preemption in previous gens required a special batch buffer to be executed,
>>>> so the Command Streamer never preempted to idle directly. In Icelake it is
>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>> status of the preemption request.
>>>>
>>>> This patch does not cover using the new preemption mechanism when GuC is
>>>> active.
>>>>
>>>> v2: Added needs_preempt_context() change so that it is not created when
>>>>       preempt-to-idle is supported. (Chris)
>>>>       Updated setting HWACK flag so that it is cleared after
>>>>       preempt-to-dle. (Chris, Daniele)
>>>>       Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>>
>>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>>       Merged preemption trigger functions to one. (Chris)
>>>>       Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>>>       since idle-to-idle case will not have it set.
>>>>
>>>> v4: Simplified needs_preempt_context() change. (Daniele)
>>>>       Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>>>
>>>> v5: Renamed inject_preempt_context(). (Daniele)
>>>>       Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
>>>>
>>>> Bspec: 18922
>>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> This R-b was on v4, and should be indicated with # v4 comment.
>>>
>>> The commit message doesn't say much about why preempting to idle is
>>> beneficial? The pre-Gen11 codepath needs to be maintained anyway.
>>>
>>> Regards, Joonas
>> The benefit is one less context switch - there is no "preempt context".
> Yes.
>
> But that still doesn't quite explain what material benefits there are? :)
>
> Is there some actual workloads/microbenchmarks that get an improvement?
>
> This alters the behavior between different platforms for a very delicate
> feature, probably resulting in slightly different bugs. So there should
> be some more reasoning than just because we can.
>
> Regards, Joonas
Less context switching does imply perf improvement, though it would 
require measurement - it might be hardly detectable. We may even lose 
performance - without measurements, we don't know. So not a strong argument.

One more benefit I could think of is - GuC path will use 
preempt-to-idle, so this would make execlists use the same path as GuC. 
But that's not a strong argument as well.

I must agree - there doesn't seem to be any strong enough reason to go 
with this change.
We might consider it after we have performance data.

-Tomasz

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (33 preceding siblings ...)
  2018-10-15 23:55 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-11-09 17:18 ` Tomasz Lis
  2018-12-10 15:40   ` Tvrtko Ursulin
  2018-11-09 18:17 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8) Patchwork
                   ` (3 subsequent siblings)
  38 siblings, 1 reply; 70+ messages in thread
From: Tomasz Lis @ 2018-11-09 17:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

The advantage of this new preemption path is that one less context switch is
needed, and returning information about preempion being complete is received
earlier. This leads to significant improvement in our IGT latency test.

Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
100 times, on the same platform, same kernel, without and with this patch.
Then taken average of the execution latency times:

subcase		old preempt.	icl preempt.
render-render	853.2036	840.1176
render-bsd	2328.8708	2083.2576
render-blt	2080.1501	1852.0792
render-vebox	1553.5134	1428.762

Improvement observed:

subcase		improvement
render-render	 1.53%
render-bsd	10.55%
render-blt	10.96%
render-vebox	 8.03%

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
    Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

v6: Added performance test results.

Bspec: 18922
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d25aa..d2cc9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b97963d..10b1d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4ccab83..82125cf 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 			   TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
 	GEN(11), \
 	.ddb_size = 2048, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 86ce1db..a2ee278 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08fd9b1..26b7062 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
+	GEM_TRACE("%s\n", engine->name);
 
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	write_desc(execlists, ce->lrc_desc, n);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			execlist_send_preempt_to_idle(engine);
 			return;
 		}
 
@@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
 			  execlists->active);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
+		/*
+		 * Check if preempted from idle to idle directly.
+		 * The STATUS_IDLE_ACTIVE flag is used to mark
+		 * such transition.
+		 */
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
 
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			/*
+			 * We could not have COMPLETED anything
+			 * if we were idle before preemption.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+		} else {
+			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+				      GEN8_CTX_STATUS_PREEMPTED))
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_HWACK);
+
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
 
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/* We should never get a COMPLETED | IDLE_ACTIVE! */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		}
+
+		/*
+		 * Check if preempted to real idle, either directly or
+		 * the preemptive context already finished executing
+		 */
+		if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+		    (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			GEM_TRACE("%s preempt-idle\n", engine->name);
 			complete_preempt_context(execlists);
 			continue;
@@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (34 preceding siblings ...)
  2018-11-09 17:18 ` [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-11-09 18:17 ` Patchwork
  2018-11-09 18:18 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-11-09 18:17 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
565cd7090a44 drm/i915/icl: Preempt-to-idle support in execlists.
-:18: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#18: 
The advantage of this new preemption path is that one less context switch is

-:153: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "!execlists->ctrl_reg"
#153: FILE: drivers/gpu/drm/i915/intel_lrc.c:514:
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);

-:229: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#229: FILE: drivers/gpu/drm/i915/intel_lrc.c:952:
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {

-:263: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#263: FILE: drivers/gpu/drm/i915/intel_lrc.c:982:
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 		         ^

total: 0 errors, 1 warnings, 3 checks, 187 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (35 preceding siblings ...)
  2018-11-09 18:17 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8) Patchwork
@ 2018-11-09 18:18 ` Patchwork
  2018-11-09 18:33 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-11-10  3:29 ` ✓ Fi.CI.IGT: " Patchwork
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-11-09 18:18 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL   : https://patchwork.freedesktop.org/series/40747/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/icl: Preempt-to-idle support in execlists.
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3714:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3716:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (36 preceding siblings ...)
  2018-11-09 18:18 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-11-09 18:33 ` Patchwork
  2018-11-10  3:29 ` ✓ Fi.CI.IGT: " Patchwork
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-11-09 18:33 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_5116 -> Patchwork_10797 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/40747/revisions/8/mbox/

== Known issues ==

  Here are the changes found in Patchwork_10797 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_ctx_create@basic-files:
      fi-icl-u2:          PASS -> DMESG-WARN (fdo#107724)

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
      fi-byt-clapper:     PASS -> FAIL (fdo#107362, fdo#103191)

    igt@kms_pipe_crc_basic@read-crc-pipe-b:
      fi-byt-clapper:     PASS -> FAIL (fdo#107362)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hugepages:
      fi-skl-6700k2:      INCOMPLETE -> PASS

    igt@kms_chamelium@common-hpd-after-suspend:
      fi-skl-6700k2:      TIMEOUT -> PASS

    igt@kms_flip@basic-plain-flip:
      fi-ilk-650:         DMESG-WARN (fdo#106387) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-byt-clapper:     FAIL (fdo#107362, fdo#103191) -> PASS

    
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#106387 https://bugs.freedesktop.org/show_bug.cgi?id=106387
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107724 https://bugs.freedesktop.org/show_bug.cgi?id=107724


== Participating hosts (51 -> 46) ==

  Additional (1): fi-glk-j4005 
  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-icl-u 


== Build changes ==

    * Linux: CI_DRM_5116 -> Patchwork_10797

  CI_DRM_5116: ade66f7f60026c1c7e68a12ce07d5d4000afce13 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4714: cab148ca3ec904a94d0cd43476cf7e1f8663f906 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10797: 565cd7090a442f92340477f4bfe8c55f7590344f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

565cd7090a44 drm/i915/icl: Preempt-to-idle support in execlists.

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10797/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
  2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
                   ` (37 preceding siblings ...)
  2018-11-09 18:33 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-11-10  3:29 ` Patchwork
  38 siblings, 0 replies; 70+ messages in thread
From: Patchwork @ 2018-11-10  3:29 UTC (permalink / raw)
  To: Tomasz Lis; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gen11: Preempt-to-idle support in execlists. (rev8)
URL   : https://patchwork.freedesktop.org/series/40747/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_5116_full -> Patchwork_10797_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_10797_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10797_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10797_full:

  === IGT changes ===

    ==== Warnings ====

    igt@kms_atomic_interruptible@universal-setplane-cursor:
      shard-snb:          SKIP -> PASS +2

    igt@perf_pmu@rc6:
      shard-kbl:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_10797_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_suspend@shrink:
      shard-skl:          PASS -> INCOMPLETE (fdo#106886)

    igt@gem_exec_schedule@pi-ringfull-blt:
      shard-skl:          NOTRUN -> FAIL (fdo#103158)

    igt@kms_available_modes_crc@available_mode_test_crc:
      shard-snb:          NOTRUN -> FAIL (fdo#106641)

    igt@kms_busy@extended-modeset-hang-newfb-render-a:
      shard-snb:          NOTRUN -> DMESG-WARN (fdo#107956)

    igt@kms_busy@extended-modeset-hang-newfb-render-c:
      shard-skl:          NOTRUN -> DMESG-WARN (fdo#107956) +2

    igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
      shard-kbl:          NOTRUN -> DMESG-WARN (fdo#107956) +1

    igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
      shard-glk:          NOTRUN -> DMESG-WARN (fdo#107956)

    igt@kms_color@pipe-c-degamma:
      shard-apl:          PASS -> FAIL (fdo#104782)

    igt@kms_cursor_crc@cursor-128x128-sliding:
      shard-apl:          PASS -> FAIL (fdo#103232)

    igt@kms_cursor_crc@cursor-256x256-onscreen:
      shard-skl:          PASS -> FAIL (fdo#103232)

    igt@kms_cursor_crc@cursor-256x256-suspend:
      shard-skl:          PASS -> INCOMPLETE (fdo#104108)

    igt@kms_cursor_crc@cursor-64x21-sliding:
      shard-skl:          NOTRUN -> FAIL (fdo#103232) +2

    igt@kms_draw_crc@draw-method-xrgb2101010-mmap-gtt-xtiled:
      shard-skl:          PASS -> FAIL (fdo#103184)

    igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-wc:
      shard-glk:          PASS -> FAIL (fdo#103167) +3

    igt@kms_frontbuffer_tracking@fbcpsr-1p-rte:
      shard-skl:          PASS -> FAIL (fdo#105682)

    igt@kms_frontbuffer_tracking@psr-1p-primscrn-indfb-msflip-blt:
      shard-skl:          PASS -> FAIL (fdo#103167) +1

    igt@kms_plane@pixel-format-pipe-b-planes:
      shard-skl:          NOTRUN -> DMESG-WARN (fdo#106885) +1

    igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
      shard-skl:          PASS -> FAIL (fdo#108145, fdo#107815)

    igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
      shard-skl:          NOTRUN -> FAIL (fdo#108145, fdo#107815) +1

    igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
      shard-glk:          NOTRUN -> FAIL (fdo#108145)

    igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
      shard-skl:          PASS -> FAIL (fdo#107815) +1

    igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
      shard-glk:          PASS -> FAIL (fdo#103166) +1
      shard-apl:          PASS -> FAIL (fdo#103166)

    igt@kms_setmode@basic:
      shard-snb:          NOTRUN -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@drv_suspend@forcewake:
      shard-kbl:          INCOMPLETE (fdo#103665) -> PASS

    igt@gem_cpu_reloc@full:
      shard-skl:          INCOMPLETE (fdo#108073) -> PASS

    igt@gem_ppgtt@blt-vs-render-ctx0:
      shard-skl:          TIMEOUT (fdo#108039) -> PASS
      shard-kbl:          INCOMPLETE (fdo#106023, fdo#103665, fdo#106887) -> PASS

    igt@kms_busy@extended-modeset-hang-newfb-render-b:
      shard-snb:          DMESG-WARN (fdo#107956) -> PASS

    igt@kms_cursor_crc@cursor-256x256-sliding:
      shard-apl:          FAIL (fdo#103232) -> PASS

    igt@kms_cursor_crc@cursor-256x256-suspend:
      shard-apl:          FAIL (fdo#103232, fdo#103191) -> PASS

    igt@kms_cursor_crc@cursor-64x64-suspend:
      shard-skl:          INCOMPLETE (fdo#104108) -> PASS +1

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#105363) -> PASS

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-gtt:
      shard-apl:          FAIL (fdo#103167) -> PASS +1

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
      shard-glk:          FAIL (fdo#103167) -> PASS +2

    igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
      shard-glk:          FAIL (fdo#108145) -> PASS

    igt@perf@blocking:
      shard-hsw:          FAIL (fdo#102252) -> PASS

    igt@pm_rpm@gem-execbuf:
      shard-skl:          INCOMPLETE (fdo#107807, fdo#107803) -> PASS

    igt@pm_rpm@modeset-lpsp:
      shard-skl:          INCOMPLETE (fdo#107807) -> PASS +1

    
  fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#103184 https://bugs.freedesktop.org/show_bug.cgi?id=103184
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
  fdo#104782 https://bugs.freedesktop.org/show_bug.cgi?id=104782
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105682 https://bugs.freedesktop.org/show_bug.cgi?id=105682
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#106641 https://bugs.freedesktop.org/show_bug.cgi?id=106641
  fdo#106885 https://bugs.freedesktop.org/show_bug.cgi?id=106885
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
  fdo#106887 https://bugs.freedesktop.org/show_bug.cgi?id=106887
  fdo#107803 https://bugs.freedesktop.org/show_bug.cgi?id=107803
  fdo#107807 https://bugs.freedesktop.org/show_bug.cgi?id=107807
  fdo#107815 https://bugs.freedesktop.org/show_bug.cgi?id=107815
  fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
  fdo#108039 https://bugs.freedesktop.org/show_bug.cgi?id=108039
  fdo#108073 https://bugs.freedesktop.org/show_bug.cgi?id=108073
  fdo#108145 https://bugs.freedesktop.org/show_bug.cgi?id=108145
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (6 -> 6) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_5116 -> Patchwork_10797

  CI_DRM_5116: ade66f7f60026c1c7e68a12ce07d5d4000afce13 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4714: cab148ca3ec904a94d0cd43476cf7e1f8663f906 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10797: 565cd7090a442f92340477f4bfe8c55f7590344f @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10797/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-11-09 17:18 ` [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
@ 2018-12-10 15:40   ` Tvrtko Ursulin
  2018-12-14 11:10     ` Joonas Lahtinen
  0 siblings, 1 reply; 70+ messages in thread
From: Tvrtko Ursulin @ 2018-12-10 15:40 UTC (permalink / raw)
  To: Tomasz Lis, intel-gfx; +Cc: Mika Kuoppala


On 09/11/2018 17:18, Tomasz Lis wrote:
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> The advantage of this new preemption path is that one less context switch is
> needed, and returning information about preempion being complete is received
> earlier. This leads to significant improvement in our IGT latency test.
> 
> Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
> 100 times, on the same platform, same kernel, without and with this patch.
> Then taken average of the execution latency times:
> 
> subcase		old preempt.	icl preempt.
> render-render	853.2036	840.1176
> render-bsd	2328.8708	2083.2576
> render-blt	2080.1501	1852.0792
> render-vebox	1553.5134	1428.762
> 
> Improvement observed:
> 
> subcase		improvement
> render-render	 1.53%
> render-bsd	10.55%
> render-blt	10.96%
> render-vebox	 8.03%

Who can explain what do the parts other than render-render mean? At 
least I can make sense of render-render - measure how long it takes for 
one context to preempt another, but render-$other draws a blank for me. 
How are engines pre-empting one another?

But anyway, even if only the 1.53% improvement is the real one, FWIW 
that's I think good enough to justify the patch. It is sufficiently 
small and contained that I don't see a problem. So:

Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> 
> v2: Added needs_preempt_context() change so that it is not created when
>      preempt-to-idle is supported. (Chris)
>      Updated setting HWACK flag so that it is cleared after
>      preempt-to-dle. (Chris, Daniele)
>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> v3: Fixed needs_preempt_context() change. (Chris)
>      Merged preemption trigger functions to one. (Chris)
>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>      since idle-to-idle case will not have it set.
> 
> v4: Simplified needs_preempt_context() change. (Daniele)
>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> 
> v5: Renamed inject_preempt_context(). (Daniele)
>      Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
> 
> v6: Added performance test results.
> 
> Bspec: 18922
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>   drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>   6 files changed, 84 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 08d25aa..d2cc9f1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>   		((dev_priv)->info.has_logical_ring_elsq)
>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>   		((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +		((dev_priv)->info.has_hw_preempt_to_idle)
>   
>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b97963d..10b1d61 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
>   
>   static bool needs_preempt_context(struct drm_i915_private *i915)
>   {
> -	return HAS_LOGICAL_RING_PREEMPTION(i915);
> +	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> +	       !HAS_HW_PREEMPT_TO_IDLE(i915);
>   }
>   
>   int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4ccab83..82125cf 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>   			   TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
>   	GEN(11), \
>   	.ddb_size = 2048, \
> -	.has_logical_ring_elsq = 1
> +	.has_logical_ring_elsq = 1, \
> +	.has_hw_preempt_to_idle = 1
>   
>   static const struct intel_device_info intel_icelake_11_info = {
>   	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 86ce1db..a2ee278 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -104,6 +104,7 @@ enum intel_ppgtt {
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
> +	func(has_hw_preempt_to_idle); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 08fd9b1..26b7062 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -155,6 +155,7 @@
>   #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
>   #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>   #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
>   
>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>   	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
>   }
>   
> -static void inject_preempt_context(struct intel_engine_cs *engine)
> +static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists *execlists = &engine->execlists;
> -	struct intel_context *ce =
> -		to_intel_context(engine->i915->preempt_context, engine);
> -	unsigned int n;
> +	GEM_TRACE("%s\n", engine->name);
>   
> -	GEM_BUG_ON(execlists->preempt_complete_status !=
> -		   upper_32_bits(ce->lrc_desc));
> +	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
> +		/*
> +		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +		 */
> +		GEM_BUG_ON(execlists->ctrl_reg == NULL);
>   
> -	/*
> -	 * Switch to our empty preempt context so
> -	 * the state of the GPU is known (idle).
> -	 */
> -	GEM_TRACE("%s\n", engine->name);
> -	for (n = execlists_num_ports(execlists); --n; )
> -		write_desc(execlists, 0, n);
> +		/*
> +		 * If we have hardware preempt-to-idle, we do not need to
> +		 * inject any job to the hardware. We only set a flag.
> +		 */
> +		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> +	} else {
> +		struct intel_context *ce =
> +			to_intel_context(engine->i915->preempt_context, engine);
> +		unsigned int n;
>   
> -	write_desc(execlists, ce->lrc_desc, n);
> +		GEM_BUG_ON(execlists->preempt_complete_status !=
> +			   upper_32_bits(ce->lrc_desc));
> +		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> +			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> +			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>   
> -	/* we need to manually load the submit queue */
> -	if (execlists->ctrl_reg)
> -		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +		/*
> +		 * Switch to our empty preempt context so
> +		 * the state of the GPU is known (idle).
> +		 */
> +		for (n = execlists_num_ports(execlists); --n; )
> +			write_desc(execlists, 0, n);
> +
> +		write_desc(execlists, ce->lrc_desc, n);
> +
> +		/* we need to manually load the submit queue */
> +		if (execlists->ctrl_reg)
> +			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +	}
>   
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> @@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			return;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
> -			inject_preempt_context(engine);
> +			execlist_send_preempt_to_idle(engine);
>   			return;
>   		}
>   
> @@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
>   			  execlists->active);
>   
>   		status = buf[2 * head];
> -		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -			      GEN8_CTX_STATUS_PREEMPTED))
> -			execlists_set_active(execlists,
> -					     EXECLISTS_ACTIVE_HWACK);
> -		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -			execlists_clear_active(execlists,
> -					       EXECLISTS_ACTIVE_HWACK);
> -
> -		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -			continue;
> +		/*
> +		 * Check if preempted from idle to idle directly.
> +		 * The STATUS_IDLE_ACTIVE flag is used to mark
> +		 * such transition.
> +		 */
> +		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> +		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>   
> -		/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +			/*
> +			 * We could not have COMPLETED anything
> +			 * if we were idle before preemption.
> +			 */
> +			GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
> +		} else {
> +			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +				      GEN8_CTX_STATUS_PREEMPTED))
> +				execlists_set_active(execlists,
> +						     EXECLISTS_ACTIVE_HWACK);
> +
> +			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +				execlists_clear_active(execlists,
> +						       EXECLISTS_ACTIVE_HWACK);
> +
> +			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +				continue;
>   
> -		if (status & GEN8_CTX_STATUS_COMPLETE &&
> -		    buf[2*head + 1] == execlists->preempt_complete_status) {
> +			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> +			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +		}
> +
> +		/*
> +		 * Check if preempted to real idle, either directly or
> +		 * the preemptive context already finished executing
> +		 */
> +		if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +		    (status & GEN8_CTX_STATUS_COMPLETE &&
> +		    buf[2*head + 1] == execlists->preempt_complete_status)) {
>   			GEM_TRACE("%s preempt-idle\n", engine->name);
>   			complete_preempt_context(execlists);
>   			continue;
> @@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->unpark = NULL;
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -	if (engine->i915->preempt_context)
> +	if (engine->i915->preempt_context ||
> +	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	engine->i915->caps.scheduler =
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index f5a5502..871901a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -43,6 +43,7 @@
>   #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
>   #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
>   #define	  EL_CTRL_LOAD				(1 << 0)
> +#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
>   
>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-12-10 15:40   ` Tvrtko Ursulin
@ 2018-12-14 11:10     ` Joonas Lahtinen
  2018-12-17 15:21       ` Lis, Tomasz
  0 siblings, 1 reply; 70+ messages in thread
From: Joonas Lahtinen @ 2018-12-14 11:10 UTC (permalink / raw)
  To: Tomasz Lis, Tvrtko Ursulin, intel-gfx; +Cc: Mika Kuoppala

Quoting Tvrtko Ursulin (2018-12-10 17:40:34)
> 
> On 09/11/2018 17:18, Tomasz Lis wrote:
> > The patch adds support of preempt-to-idle requesting by setting a proper
> > bit within Execlist Control Register, and receiving preemption result from
> > Context Status Buffer.
> > 
> > Preemption in previous gens required a special batch buffer to be executed,
> > so the Command Streamer never preempted to idle directly. In Icelake it is
> > possible, as there is a hardware mechanism to inform the kernel about
> > status of the preemption request.
> > 
> > This patch does not cover using the new preemption mechanism when GuC is
> > active.
> > 
> > The advantage of this new preemption path is that one less context switch is
> > needed, and returning information about preempion being complete is received
> > earlier. This leads to significant improvement in our IGT latency test.
> > 
> > Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
> > 100 times, on the same platform, same kernel, without and with this patch.
> > Then taken average of the execution latency times:
> > 
> > subcase               old preempt.    icl preempt.
> > render-render 853.2036        840.1176
> > render-bsd    2328.8708       2083.2576
> > render-blt    2080.1501       1852.0792
> > render-vebox  1553.5134       1428.762
> > 
> > Improvement observed:
> > 
> > subcase               improvement
> > render-render  1.53%
> > render-bsd    10.55%
> > render-blt    10.96%
> > render-vebox   8.03%
> 
> Who can explain what do the parts other than render-render mean? At 
> least I can make sense of render-render - measure how long it takes for 
> one context to preempt another, but render-$other draws a blank for me. 
> How are engines pre-empting one another?
> 
> But anyway, even if only the 1.53% improvement is the real one, FWIW 
> that's I think good enough to justify the patch. It is sufficiently 
> small and contained that I don't see a problem. So:
> 
> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

According to Chris, the baseline measurements are off by a decade or so
compared to where they should be. This might be attributed to execution
on frequency locked parts?

Would it be worthy to repeat the numbers with some unlocked parts?

Regards, Joonas

> 
> Regards,
> 
> Tvrtko
> 
> > 
> > v2: Added needs_preempt_context() change so that it is not created when
> >      preempt-to-idle is supported. (Chris)
> >      Updated setting HWACK flag so that it is cleared after
> >      preempt-to-dle. (Chris, Daniele)
> >      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> > 
> > v3: Fixed needs_preempt_context() change. (Chris)
> >      Merged preemption trigger functions to one. (Chris)
> >      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
> >      since idle-to-idle case will not have it set.
> > 
> > v4: Simplified needs_preempt_context() change. (Daniele)
> >      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> > 
> > v5: Renamed inject_preempt_context(). (Daniele)
> >      Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
> > 
> > v6: Added performance test results.
> > 
> > Bspec: 18922
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Cc: Michal Winiarski <michal.winiarski@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h          |   2 +
> >   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
> >   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
> >   drivers/gpu/drm/i915/intel_device_info.h |   1 +
> >   drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
> >   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
> >   6 files changed, 84 insertions(+), 35 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 08d25aa..d2cc9f1 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
> >               ((dev_priv)->info.has_logical_ring_elsq)
> >   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
> >               ((dev_priv)->info.has_logical_ring_preemption)
> > +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> > +             ((dev_priv)->info.has_hw_preempt_to_idle)
> >   
> >   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index b97963d..10b1d61 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
> >   
> >   static bool needs_preempt_context(struct drm_i915_private *i915)
> >   {
> > -     return HAS_LOGICAL_RING_PREEMPTION(i915);
> > +     return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> > +            !HAS_HW_PREEMPT_TO_IDLE(i915);
> >   }
> >   
> >   int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> > index 4ccab83..82125cf 100644
> > --- a/drivers/gpu/drm/i915/i915_pci.c
> > +++ b/drivers/gpu/drm/i915/i915_pci.c
> > @@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
> >                          TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
> >       GEN(11), \
> >       .ddb_size = 2048, \
> > -     .has_logical_ring_elsq = 1
> > +     .has_logical_ring_elsq = 1, \
> > +     .has_hw_preempt_to_idle = 1
> >   
> >   static const struct intel_device_info intel_icelake_11_info = {
> >       GEN11_FEATURES,
> > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> > index 86ce1db..a2ee278 100644
> > --- a/drivers/gpu/drm/i915/intel_device_info.h
> > +++ b/drivers/gpu/drm/i915/intel_device_info.h
> > @@ -104,6 +104,7 @@ enum intel_ppgtt {
> >       func(has_logical_ring_contexts); \
> >       func(has_logical_ring_elsq); \
> >       func(has_logical_ring_preemption); \
> > +     func(has_hw_preempt_to_idle); \
> >       func(has_overlay); \
> >       func(has_pooled_eu); \
> >       func(has_psr); \
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 08fd9b1..26b7062 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -155,6 +155,7 @@
> >   #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
> >   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
> >   #define GEN8_CTX_STATUS_LITE_RESTORE        (1 << 15)
> > +#define GEN11_CTX_STATUS_PREEMPT_IDLE        (1 << 29)
> >   
> >   #define GEN8_CTX_STATUS_COMPLETED_MASK \
> >        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> > @@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
> >       port_set(port, port_pack(i915_request_get(rq), port_count(port)));
> >   }
> >   
> > -static void inject_preempt_context(struct intel_engine_cs *engine)
> > +static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists *execlists = &engine->execlists;
> > -     struct intel_context *ce =
> > -             to_intel_context(engine->i915->preempt_context, engine);
> > -     unsigned int n;
> > +     GEM_TRACE("%s\n", engine->name);
> >   
> > -     GEM_BUG_ON(execlists->preempt_complete_status !=
> > -                upper_32_bits(ce->lrc_desc));
> > +     if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
> > +             /*
> > +              * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> > +              * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> > +              */
> > +             GEM_BUG_ON(execlists->ctrl_reg == NULL);
> >   
> > -     /*
> > -      * Switch to our empty preempt context so
> > -      * the state of the GPU is known (idle).
> > -      */
> > -     GEM_TRACE("%s\n", engine->name);
> > -     for (n = execlists_num_ports(execlists); --n; )
> > -             write_desc(execlists, 0, n);
> > +             /*
> > +              * If we have hardware preempt-to-idle, we do not need to
> > +              * inject any job to the hardware. We only set a flag.
> > +              */
> > +             writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> > +     } else {
> > +             struct intel_context *ce =
> > +                     to_intel_context(engine->i915->preempt_context, engine);
> > +             unsigned int n;
> >   
> > -     write_desc(execlists, ce->lrc_desc, n);
> > +             GEM_BUG_ON(execlists->preempt_complete_status !=
> > +                        upper_32_bits(ce->lrc_desc));
> > +             GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> > +                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> > +                                        CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> > +                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> > +                                        CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> >   
> > -     /* we need to manually load the submit queue */
> > -     if (execlists->ctrl_reg)
> > -             writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> > +             /*
> > +              * Switch to our empty preempt context so
> > +              * the state of the GPU is known (idle).
> > +              */
> > +             for (n = execlists_num_ports(execlists); --n; )
> > +                     write_desc(execlists, 0, n);
> > +
> > +             write_desc(execlists, ce->lrc_desc, n);
> > +
> > +             /* we need to manually load the submit queue */
> > +             if (execlists->ctrl_reg)
> > +                     writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> > +     }
> >   
> >       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> >       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> > @@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                       return;
> >   
> >               if (need_preempt(engine, last, execlists->queue_priority)) {
> > -                     inject_preempt_context(engine);
> > +                     execlist_send_preempt_to_idle(engine);
> >                       return;
> >               }
> >   
> > @@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
> >                         execlists->active);
> >   
> >               status = buf[2 * head];
> > -             if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > -                           GEN8_CTX_STATUS_PREEMPTED))
> > -                     execlists_set_active(execlists,
> > -                                          EXECLISTS_ACTIVE_HWACK);
> > -             if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > -                     execlists_clear_active(execlists,
> > -                                            EXECLISTS_ACTIVE_HWACK);
> > -
> > -             if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > -                     continue;
> > +             /*
> > +              * Check if preempted from idle to idle directly.
> > +              * The STATUS_IDLE_ACTIVE flag is used to mark
> > +              * such transition.
> > +              */
> > +             if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> > +                  (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
> >   
> > -             /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > -             GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > +                     /*
> > +                      * We could not have COMPLETED anything
> > +                      * if we were idle before preemption.
> > +                      */
> > +                     GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
> > +             } else {
> > +                     if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > +                                   GEN8_CTX_STATUS_PREEMPTED))
> > +                             execlists_set_active(execlists,
> > +                                                  EXECLISTS_ACTIVE_HWACK);
> > +
> > +                     if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > +                             execlists_clear_active(execlists,
> > +                                                    EXECLISTS_ACTIVE_HWACK);
> > +
> > +                     if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > +                             continue;
> >   
> > -             if (status & GEN8_CTX_STATUS_COMPLETE &&
> > -                 buf[2*head + 1] == execlists->preempt_complete_status) {
> > +                     /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > +                     GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > +             }
> > +
> > +             /*
> > +              * Check if preempted to real idle, either directly or
> > +              * the preemptive context already finished executing
> > +              */
> > +             if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> > +                 (status & GEN8_CTX_STATUS_COMPLETE &&
> > +                 buf[2*head + 1] == execlists->preempt_complete_status)) {
> >                       GEM_TRACE("%s preempt-idle\n", engine->name);
> >                       complete_preempt_context(execlists);
> >                       continue;
> > @@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
> >       engine->unpark = NULL;
> >   
> >       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> > -     if (engine->i915->preempt_context)
> > +     if (engine->i915->preempt_context ||
> > +         HAS_HW_PREEMPT_TO_IDLE(engine->i915))
> >               engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> >   
> >       engine->i915->caps.scheduler =
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> > index f5a5502..871901a 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/intel_lrc.h
> > @@ -43,6 +43,7 @@
> >   #define RING_EXECLIST_SQ_CONTENTS(engine)   _MMIO((engine)->mmio_base + 0x510)
> >   #define RING_EXECLIST_CONTROL(engine)               _MMIO((engine)->mmio_base + 0x550)
> >   #define       EL_CTRL_LOAD                          (1 << 0)
> > +#define        EL_CTRL_PREEMPT_TO_IDLE               (1 << 1)
> >   
> >   /* The docs specify that the write pointer wraps around after 5h, "After status
> >    * is written out to the last available status QW at offset 5h, this pointer
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists.
  2018-12-14 11:10     ` Joonas Lahtinen
@ 2018-12-17 15:21       ` Lis, Tomasz
  0 siblings, 0 replies; 70+ messages in thread
From: Lis, Tomasz @ 2018-12-17 15:21 UTC (permalink / raw)
  To: Joonas Lahtinen, Tvrtko Ursulin, intel-gfx; +Cc: Mika Kuoppala



On 2018-12-14 12:10, Joonas Lahtinen wrote:
> Quoting Tvrtko Ursulin (2018-12-10 17:40:34)
>> On 09/11/2018 17:18, Tomasz Lis wrote:
>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>> bit within Execlist Control Register, and receiving preemption result from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when GuC is
>>> active.
>>>
>>> The advantage of this new preemption path is that one less context switch is
>>> needed, and returning information about preempion being complete is received
>>> earlier. This leads to significant improvement in our IGT latency test.
>>>
>>> Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
>>> 100 times, on the same platform, same kernel, without and with this patch.
>>> Then taken average of the execution latency times:
>>>
>>> subcase               old preempt.    icl preempt.
>>> render-render 853.2036        840.1176
>>> render-bsd    2328.8708       2083.2576
>>> render-blt    2080.1501       1852.0792
>>> render-vebox  1553.5134       1428.762
>>>
>>> Improvement observed:
>>>
>>> subcase               improvement
>>> render-render  1.53%
>>> render-bsd    10.55%
>>> render-blt    10.96%
>>> render-vebox   8.03%
>> Who can explain what do the parts other than render-render mean? At
>> least I can make sense of render-render - measure how long it takes for
>> one context to preempt another, but render-$other draws a blank for me.
>> How are engines pre-empting one another?
These cases submit low priority spin buffer to 'render' ring, and then 
on high priority context they submit two buffers which only write 
current timestamp to a known location: first one to 'render', and second 
to the other engine.
Submission to the other engine with the same context makes sure that 
'render' gets back to spin buffer after each iteration. The two high 
priority tasks are not parallelized, because they announce they write to 
the same object.

The time taken to do all operations in one iteration is then displayed. 
So the time includes:
- preempting spin buffer
- executing timestamp write from within 'render'
- executing timestamp write from within other ring (while at the same 
time, spin buffer gets back to execution on render)

If I'm not mistaken with the above, it looks like the 'render-render' 
case is the worst one to measure performance increase - if both high 
priority tasks are executed on the same engine, there's a considerable 
chance that the hardware will get straight to another pair of them, 
without going though spin buffer and preempting it.

-Tomasz

>>
>> But anyway, even if only the 1.53% improvement is the real one, FWIW
>> that's I think good enough to justify the patch. It is sufficiently
>> small and contained that I don't see a problem. So:
>>
>> Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> According to Chris, the baseline measurements are off by a decade or so
> compared to where they should be. This might be attributed to execution
> on frequency locked parts?
>
> Would it be worthy to repeat the numbers with some unlocked parts?
>
> Regards, Joonas
>
>> Regards,
>>
>> Tvrtko
>>
>>> v2: Added needs_preempt_context() change so that it is not created when
>>>       preempt-to-idle is supported. (Chris)
>>>       Updated setting HWACK flag so that it is cleared after
>>>       preempt-to-dle. (Chris, Daniele)
>>>       Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>
>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>       Merged preemption trigger functions to one. (Chris)
>>>       Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>>       since idle-to-idle case will not have it set.
>>>
>>> v4: Simplified needs_preempt_context() change. (Daniele)
>>>       Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>>
>>> v5: Renamed inject_preempt_context(). (Daniele)
>>>       Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
>>>
>>> v6: Added performance test results.
>>>
>>> Bspec: 18922
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>>    drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>>>    drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>>    drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>>    drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
>>>    drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>>    6 files changed, 84 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 08d25aa..d2cc9f1 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>>                ((dev_priv)->info.has_logical_ring_elsq)
>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>                ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +             ((dev_priv)->info.has_hw_preempt_to_idle)
>>>    
>>>    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>    
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index b97963d..10b1d61 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
>>>    
>>>    static bool needs_preempt_context(struct drm_i915_private *i915)
>>>    {
>>> -     return HAS_LOGICAL_RING_PREEMPTION(i915);
>>> +     return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>> +            !HAS_HW_PREEMPT_TO_IDLE(i915);
>>>    }
>>>    
>>>    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>> index 4ccab83..82125cf 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>>                           TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
>>>        GEN(11), \
>>>        .ddb_size = 2048, \
>>> -     .has_logical_ring_elsq = 1
>>> +     .has_logical_ring_elsq = 1, \
>>> +     .has_hw_preempt_to_idle = 1
>>>    
>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>        GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 86ce1db..a2ee278 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -104,6 +104,7 @@ enum intel_ppgtt {
>>>        func(has_logical_ring_contexts); \
>>>        func(has_logical_ring_elsq); \
>>>        func(has_logical_ring_preemption); \
>>> +     func(has_hw_preempt_to_idle); \
>>>        func(has_overlay); \
>>>        func(has_pooled_eu); \
>>>        func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 08fd9b1..26b7062 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -155,6 +155,7 @@
>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
>>>    #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>>    #define GEN8_CTX_STATUS_LITE_RESTORE        (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE        (1 << 29)
>>>    
>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>>>        port_set(port, port_pack(i915_request_get(rq), port_count(port)));
>>>    }
>>>    
>>> -static void inject_preempt_context(struct intel_engine_cs *engine)
>>> +static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists *execlists = &engine->execlists;
>>> -     struct intel_context *ce =
>>> -             to_intel_context(engine->i915->preempt_context, engine);
>>> -     unsigned int n;
>>> +     GEM_TRACE("%s\n", engine->name);
>>>    
>>> -     GEM_BUG_ON(execlists->preempt_complete_status !=
>>> -                upper_32_bits(ce->lrc_desc));
>>> +     if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>> +             /*
>>> +              * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +              * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +              */
>>> +             GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>>    
>>> -     /*
>>> -      * Switch to our empty preempt context so
>>> -      * the state of the GPU is known (idle).
>>> -      */
>>> -     GEM_TRACE("%s\n", engine->name);
>>> -     for (n = execlists_num_ports(execlists); --n; )
>>> -             write_desc(execlists, 0, n);
>>> +             /*
>>> +              * If we have hardware preempt-to-idle, we do not need to
>>> +              * inject any job to the hardware. We only set a flag.
>>> +              */
>>> +             writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> +     } else {
>>> +             struct intel_context *ce =
>>> +                     to_intel_context(engine->i915->preempt_context, engine);
>>> +             unsigned int n;
>>>    
>>> -     write_desc(execlists, ce->lrc_desc, n);
>>> +             GEM_BUG_ON(execlists->preempt_complete_status !=
>>> +                        upper_32_bits(ce->lrc_desc));
>>> +             GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> +                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                                        CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> +                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                                        CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>>    
>>> -     /* we need to manually load the submit queue */
>>> -     if (execlists->ctrl_reg)
>>> -             writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +             /*
>>> +              * Switch to our empty preempt context so
>>> +              * the state of the GPU is known (idle).
>>> +              */
>>> +             for (n = execlists_num_ports(execlists); --n; )
>>> +                     write_desc(execlists, 0, n);
>>> +
>>> +             write_desc(execlists, ce->lrc_desc, n);
>>> +
>>> +             /* we need to manually load the submit queue */
>>> +             if (execlists->ctrl_reg)
>>> +                     writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +     }
>>>    
>>>        execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>>        execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>>> @@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>>>                        return;
>>>    
>>>                if (need_preempt(engine, last, execlists->queue_priority)) {
>>> -                     inject_preempt_context(engine);
>>> +                     execlist_send_preempt_to_idle(engine);
>>>                        return;
>>>                }
>>>    
>>> @@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
>>>                          execlists->active);
>>>    
>>>                status = buf[2 * head];
>>> -             if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> -                           GEN8_CTX_STATUS_PREEMPTED))
>>> -                     execlists_set_active(execlists,
>>> -                                          EXECLISTS_ACTIVE_HWACK);
>>> -             if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> -                     execlists_clear_active(execlists,
>>> -                                            EXECLISTS_ACTIVE_HWACK);
>>> -
>>> -             if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> -                     continue;
>>> +             /*
>>> +              * Check if preempted from idle to idle directly.
>>> +              * The STATUS_IDLE_ACTIVE flag is used to mark
>>> +              * such transition.
>>> +              */
>>> +             if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>> +                  (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>>    
>>> -             /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> -             GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +                     /*
>>> +                      * We could not have COMPLETED anything
>>> +                      * if we were idle before preemption.
>>> +                      */
>>> +                     GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>> +             } else {
>>> +                     if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> +                                   GEN8_CTX_STATUS_PREEMPTED))
>>> +                             execlists_set_active(execlists,
>>> +                                                  EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                     if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> +                             execlists_clear_active(execlists,
>>> +                                                    EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                     if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> +                             continue;
>>>    
>>> -             if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                 buf[2*head + 1] == execlists->preempt_complete_status) {
>>> +                     /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> +                     GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +             }
>>> +
>>> +             /*
>>> +              * Check if preempted to real idle, either directly or
>>> +              * the preemptive context already finished executing
>>> +              */
>>> +             if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                 (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                 buf[2*head + 1] == execlists->preempt_complete_status)) {
>>>                        GEM_TRACE("%s preempt-idle\n", engine->name);
>>>                        complete_preempt_context(execlists);
>>>                        continue;
>>> @@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>>>        engine->unpark = NULL;
>>>    
>>>        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>> -     if (engine->i915->preempt_context)
>>> +     if (engine->i915->preempt_context ||
>>> +         HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>>                engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>>    
>>>        engine->i915->caps.scheduler =
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>>> index f5a5502..871901a 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -43,6 +43,7 @@
>>>    #define RING_EXECLIST_SQ_CONTENTS(engine)   _MMIO((engine)->mmio_base + 0x510)
>>>    #define RING_EXECLIST_CONTROL(engine)               _MMIO((engine)->mmio_base + 0x550)
>>>    #define       EL_CTRL_LOAD                          (1 << 0)
>>> +#define        EL_CTRL_PREEMPT_TO_IDLE               (1 << 1)
>>>    
>>>    /* The docs specify that the write pointer wraps around after 5h, "After status
>>>     * is written out to the last available status QW at offset 5h, this pointer
>>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, other threads:[~2018-12-17 15:21 UTC | newest]

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-27 15:17 [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
2018-03-27 15:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2018-03-27 15:56 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-27 20:50 ` ✓ Fi.CI.IGT: " Patchwork
2018-03-27 23:27 ` [PATCH v1] " Chris Wilson
2018-03-28 16:06   ` Lis, Tomasz
2018-03-28 22:28     ` Chris Wilson
2018-03-30 15:42       ` Lis, Tomasz
2018-03-30 19:45         ` Daniele Ceraolo Spurio
2018-04-26 14:02           ` Lis, Tomasz
2018-03-30 18:23   ` Daniele Ceraolo Spurio
2018-04-12 17:15     ` Lis, Tomasz
2018-04-19 11:44 ` [PATCH v2] " Tomasz Lis
2018-04-19 12:00   ` Chris Wilson
2018-04-19 22:23     ` Daniele Ceraolo Spurio
2018-04-19 11:58 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev2) Patchwork
2018-04-19 11:59 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-04-19 12:13 ` ✓ Fi.CI.BAT: success " Patchwork
2018-04-19 16:08 ` ✓ Fi.CI.IGT: " Patchwork
2018-05-11 15:45 ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes Tomasz Lis
2018-05-11 15:45   ` [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
2018-05-18 21:08     ` Daniele Ceraolo Spurio
2018-05-21 10:16       ` Lis, Tomasz
2018-05-22 14:39         ` Ceraolo Spurio, Daniele
2018-05-22 14:54           ` Lis, Tomasz
2018-05-11 16:15 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev3) Patchwork
2018-05-11 16:16 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-11 16:33 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-11 17:46 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-05-25 18:26 ` [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
2018-06-11 16:37   ` Daniele Ceraolo Spurio
2018-06-29 16:50     ` Lis, Tomasz
2018-07-02 17:36       ` Daniele Ceraolo Spurio
2018-05-25 18:51 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev4) Patchwork
2018-05-25 18:52 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-25 19:08 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-26  5:18 ` ✓ Fi.CI.IGT: " Patchwork
2018-07-06 15:52 ` [PATCH v5] drm/i915/gen11: Preempt-to-idle support in execlists Tomasz Lis
2018-07-06 16:08 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev5) Patchwork
2018-07-06 16:08 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-07-06 16:25 ` ✓ Fi.CI.BAT: success " Patchwork
2018-07-07 14:09 ` ✓ Fi.CI.IGT: " Patchwork
2018-07-16 13:07 ` [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency Tomasz Lis
2018-07-16 13:35   ` Tvrtko Ursulin
2018-07-18 13:24   ` Joonas Lahtinen
2018-07-18 14:42     ` Tvrtko Ursulin
2018-07-18 15:28       ` Lis, Tomasz
2018-07-19  7:12         ` Joonas Lahtinen
2018-07-19 15:10           ` Lis, Tomasz
2018-07-16 14:36 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev6) Patchwork
2018-07-16 14:37 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-07-16 14:58 ` ✓ Fi.CI.BAT: success " Patchwork
2018-07-16 19:26 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-10-15 17:29 ` [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
2018-10-16 10:53   ` Joonas Lahtinen
2018-10-19 16:00     ` Lis, Tomasz
2018-10-23  9:13       ` Joonas Lahtinen
2018-10-23  9:24         ` Lis, Tomasz
2018-10-15 17:44 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev7) Patchwork
2018-10-15 17:45 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-10-15 18:07 ` ✓ Fi.CI.BAT: success " Patchwork
2018-10-15 23:55 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-11-09 17:18 ` [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists Tomasz Lis
2018-12-10 15:40   ` Tvrtko Ursulin
2018-12-14 11:10     ` Joonas Lahtinen
2018-12-17 15:21       ` Lis, Tomasz
2018-11-09 18:17 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gen11: Preempt-to-idle support in execlists. (rev8) Patchwork
2018-11-09 18:18 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-11-09 18:33 ` ✓ Fi.CI.BAT: success " Patchwork
2018-11-10  3:29 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.