intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine
@ 2020-08-06 11:28 Chris Wilson
  2020-08-06 12:35 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Free stale request on destroying the virtual engine (rev2) Patchwork
  2020-08-06 12:58 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  0 siblings, 2 replies; 5+ messages in thread
From: Chris Wilson @ 2020-08-06 11:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since preempt-to-busy, we may unsubmit a request while it is still on
the HW and completes asynchronously. That means it may be retired and in
the process destroy the virtual engine (as the user has closed their
context), but that engine may still be holding onto the unsubmitted
compelted request. Therefore we need to potentially cleanup the old
request on destroying the virtual engine. We also have to keep the
virtual_engine alive until after the sibling's execlists_dequeue() have
finished peeking into the virtual engines, for which we serialise with
RCU.

v2: Be paranoid and flush the tasklet as well.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 417f6b0c6c61..39c215e2b5dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -182,6 +182,7 @@
 struct virtual_engine {
 	struct intel_engine_cs base;
 	struct intel_context context;
+	struct rcu_work rcu;
 
 	/*
 	 * We allow only a single request through the virtual engine at a time
@@ -5387,33 +5388,47 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
 	return &ve->base.execlists.default_priolist.requests[0];
 }
 
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
 {
 	struct virtual_engine *ve =
-		container_of(kref, typeof(*ve), context.ref);
+		container_of(wrk, typeof(*ve), rcu.work);
 	unsigned int n;
 
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	GEM_BUG_ON(ve->request);
 	GEM_BUG_ON(ve->context.inflight);
 
+	if (unlikely(ve->request)) {
+		struct i915_request *old;
+
+		spin_lock_irq(&ve->base.active.lock);
+
+		old = fetch_and_zero(&ve->request);
+		if (old) {
+			GEM_BUG_ON(!i915_request_completed(old));
+			__i915_request_submit(old);
+			i915_request_put(old);
+		}
+
+		spin_unlock_irq(&ve->base.active.lock);
+	}
+
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
-		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irqsave(&sibling->active.lock, flags);
+		spin_lock_irq(&sibling->active.lock);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irqrestore(&sibling->active.lock, flags);
+		spin_unlock_irq(&sibling->active.lock);
 	}
-	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	tasklet_kill(&ve->base.execlists.tasklet);
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
@@ -5425,6 +5440,25 @@ static void virtual_context_destroy(struct kref *kref)
 	kfree(ve);
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+
+	/*
+	 * When destroying the virtual engine, we have to be aware that
+	 * it may still be in use from an hardirq/softirq context causing
+	 * the resubmission of a completed request (background completion
+	 * due to preempt-to-busy). Before we can free the engine, we need
+	 * to flush the submission code and tasklets that are still potentially
+	 * accessing the engine. Flushing the tasklets require process context,
+	 * and since we can guard the resubmit onto the engine with an RCU read
+	 * lock, we can delegate the free of the engine to an RCU worker.
+	 */
+	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+	queue_rcu_work(system_wq, &ve->rcu);
+}
+
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
 {
 	int swp;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Free stale request on destroying the virtual engine (rev2)
  2020-08-06 11:28 [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
@ 2020-08-06 12:35 ` Patchwork
  2020-08-06 12:58 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2020-08-06 12:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gt: Free stale request on destroying the virtual engine (rev2)
URL   : https://patchwork.freedesktop.org/series/79398/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.0
Fast mode used, each commit won't be checked separately.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/gt: Free stale request on destroying the virtual engine (rev2)
  2020-08-06 11:28 [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
  2020-08-06 12:35 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Free stale request on destroying the virtual engine (rev2) Patchwork
@ 2020-08-06 12:58 ` Patchwork
  1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2020-08-06 12:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 6028 bytes --]

== Series Details ==

Series: drm/i915/gt: Free stale request on destroying the virtual engine (rev2)
URL   : https://patchwork.freedesktop.org/series/79398/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8848 -> Patchwork_18314
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18314 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18314, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18314:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@memory_region:
    - fi-skl-lmem:        [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-skl-lmem/igt@i915_selftest@live@memory_region.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-skl-lmem/igt@i915_selftest@live@memory_region.html

  * igt@i915_selftest@live@requests:
    - fi-cml-s:           [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-cml-s/igt@i915_selftest@live@requests.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-cml-s/igt@i915_selftest@live@requests.html

  
Known issues
------------

  Here are the changes found in Patchwork_18314 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-apl-guc:         [PASS][5] -> [DMESG-WARN][6] ([i915#1635] / [i915#1982])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-apl-guc/igt@i915_module_load@reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-apl-guc/igt@i915_module_load@reload.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload:
    - fi-tgl-u2:          [DMESG-WARN][7] ([i915#1982]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-tgl-u2/igt@i915_module_load@reload.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-tgl-u2/igt@i915_module_load@reload.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-bsw-kefka:       [DMESG-WARN][9] ([i915#1982]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_pm_rpm@module-reload:
    - fi-byt-j1900:       [DMESG-WARN][11] ([i915#1982]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html

  * igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1:
    - fi-icl-u2:          [DMESG-WARN][13] ([i915#1982]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-icl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-icl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1.html

  
#### Warnings ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-kbl-x1275:       [DMESG-WARN][15] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][16] ([i915#1982] / [i915#62] / [i915#92])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-kbl-x1275:       [DMESG-WARN][17] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][18] ([i915#62] / [i915#92])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-kbl-x1275/igt@i915_pm_rpm@basic-pci-d3-state.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-kbl-x1275/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@prime_vgem@basic-fence-flip:
    - fi-kbl-x1275:       [DMESG-WARN][19] ([i915#62] / [i915#92]) -> [DMESG-WARN][20] ([i915#62] / [i915#92] / [i915#95]) +5 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8848/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2100]: https://gitlab.freedesktop.org/drm/intel/issues/2100
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (44 -> 38)
------------------------------

  Additional (1): fi-tgl-y 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_8848 -> Patchwork_18314

  CI-20190529: 20190529
  CI_DRM_8848: f39035176cb854c6d620af7614a60a485ee26818 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5760: 53d2ddd2f5f7beb98a996d7f2c3976c87d4da01f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18314: fcebc7632372ad87fa009b95da4c706ce390f678 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

fcebc7632372 drm/i915/gt: Free stale request on destroying the virtual engine

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18314/index.html

[-- Attachment #1.2: Type: text/html, Size: 7939 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine
  2020-07-13 12:04 [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
@ 2020-07-13 12:15 ` Mika Kuoppala
  0 siblings, 0 replies; 5+ messages in thread
From: Mika Kuoppala @ 2020-07-13 12:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable, Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since preempt-to-busy, we may unsubmit a request while it is still on
> the HW and completes asynchronously. That means it may be retired and in
> the process destroy the virtual engine (as the user has closed their
> context), but that engine may still be holding onto the unsubmitted
> compelted request. Therefore we need to potentially cleanup the old

completed.
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine
@ 2020-07-13 12:04 Chris Wilson
  2020-07-13 12:15 ` Mika Kuoppala
  0 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2020-07-13 12:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

Since preempt-to-busy, we may unsubmit a request while it is still on
the HW and completes asynchronously. That means it may be retired and in
the process destroy the virtual engine (as the user has closed their
context), but that engine may still be holding onto the unsubmitted
compelted request. Therefore we need to potentially cleanup the old
request on destroying the virtual engine. We also have to keep the
virtual_engine alive until after the sibling's execlists_dequeue() have
finished peeking into the virtual engines, for which we serialise with
RCU.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2118
Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
References: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.4+
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index cd4262cc96e2..8425fd917d75 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -179,6 +179,7 @@
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
 struct virtual_engine {
+	struct rcu_head rcu;
 	struct intel_engine_cs base;
 	struct intel_context context;
 
@@ -5385,10 +5386,25 @@ static void virtual_context_destroy(struct kref *kref)
 		container_of(kref, typeof(*ve), context.ref);
 	unsigned int n;
 
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	GEM_BUG_ON(ve->request);
 	GEM_BUG_ON(ve->context.inflight);
 
+	if (unlikely(ve->request)) {
+		struct i915_request *old;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ve->base.active.lock, flags);
+
+		old = fetch_and_zero(&ve->request);
+		if (old) {
+			GEM_BUG_ON(!i915_request_completed(old));
+			__i915_request_submit(old);
+			i915_request_put(old);
+		}
+
+		spin_unlock_irqrestore(&ve->base.active.lock, flags);
+	}
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
@@ -5414,7 +5430,7 @@ static void virtual_context_destroy(struct kref *kref)
 	intel_engine_free_request_pool(&ve->base);
 
 	kfree(ve->bonds);
-	kfree(ve);
+	kfree_rcu(ve, rcu);
 }
 
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-08-06 12:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-06 11:28 [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
2020-08-06 12:35 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Free stale request on destroying the virtual engine (rev2) Patchwork
2020-08-06 12:58 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2020-07-13 12:04 [Intel-gfx] [PATCH] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
2020-07-13 12:15 ` Mika Kuoppala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).