* [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
@ 2020-01-29 19:54 Chris Wilson
2020-01-29 23:21 ` Chris Wilson
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Chris Wilson @ 2020-01-29 19:54 UTC (permalink / raw)
To: intel-gfx
On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from wthin the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++---
drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++---
drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++--
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 ++--
drivers/gpu/drm/i915/i915_gem.c | 6 +++++
drivers/gpu/drm/i915/i915_vma.c | 27 ++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_vma.h | 2 ++
10 files changed, 46 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 39fe9a5b4820..2504f4d05edf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{
unsigned int flags;
- flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never
* actually map it).
*/
- flags |= PIN_MAPPABLE;
+ flags = PIN_MAPPABLE;
else
- flags |= PIN_HIGH;
+ flags = PIN_HIGH;
- return i915_vma_pin(vma, 0, 0, flags);
+ return i915_ggtt_pin(vma, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 79096722ce16..6af50d62d712 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 143268083135..bf6c0f949e35 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret)
goto err_unref;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8f15ab7d8d88..e63ae4a17110 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..366013367526 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- flags = PIN_GLOBAL;
-
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 87716529cd2f..465f87b65901 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err)
return err;
@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..c4c1523da7a6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma))
goto err;
- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_vma_pin(vma, 0, 0, flags);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ff79da5657f8..dda1a0365f39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (ret)
return ERR_PTR(ret);
+ ret = i915_vma_wait_for_bind(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ return ERR_PTR(ret);
+ }
+
return vma;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 84e03da0d5f9..f11abd9553e8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -339,6 +339,25 @@ struct i915_vma_work *i915_vma_work(void)
return vw;
}
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+ int err = 0;
+
+ if (!rcu_access_pointer(vma->active.excl.fence)) {
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
+ rcu_read_unlock();
+ if (fence) {
+ err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
+ }
+ }
+
+ return err;
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -977,8 +996,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
- if (err != -ENOSPC)
+ if (err != -ENOSPC) {
+ if (!err) {
+ err = i915_vma_wait_for_bind(vma);
+ if (err)
+ i915_vma_unpin(vma);
+ }
return err;
+ }
/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02b31a62951e..e1ced1df13e1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma);
+int i915_vma_wait_for_bind(struct i915_vma *vma);
+
static inline int i915_vma_sync(struct i915_vma *vma)
{
/* Wait for the asynchronous bindings and pending GPU reads */
--
2.25.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-01-29 19:54 [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Chris Wilson
@ 2020-01-29 23:21 ` Chris Wilson
2020-01-30 4:23 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex (rev2) Patchwork
2020-02-07 16:10 ` [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Daniel Vetter
2 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2020-01-29 23:21 UTC (permalink / raw)
To: intel-gfx
On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from wthin the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++---
drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++---
drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++--
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 ++--
drivers/gpu/drm/i915/i915_gem.c | 6 +++++
drivers/gpu/drm/i915/i915_vma.c | 27 ++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_vma.h | 2 ++
10 files changed, 46 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 39fe9a5b4820..2504f4d05edf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{
unsigned int flags;
- flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never
* actually map it).
*/
- flags |= PIN_MAPPABLE;
+ flags = PIN_MAPPABLE;
else
- flags |= PIN_HIGH;
+ flags = PIN_HIGH;
- return i915_vma_pin(vma, 0, 0, flags);
+ return i915_ggtt_pin(vma, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 79096722ce16..6af50d62d712 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 143268083135..bf6c0f949e35 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret)
goto err_unref;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9a0d0282f3ca..5906fc7df2a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3263,7 +3263,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..366013367526 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- flags = PIN_GLOBAL;
-
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 87716529cd2f..465f87b65901 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err)
return err;
@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..c4c1523da7a6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma))
goto err;
- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_vma_pin(vma, 0, 0, flags);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ff79da5657f8..dda1a0365f39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (ret)
return ERR_PTR(ret);
+ ret = i915_vma_wait_for_bind(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ return ERR_PTR(ret);
+ }
+
return vma;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 84e03da0d5f9..828dc037211d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -339,6 +339,25 @@ struct i915_vma_work *i915_vma_work(void)
return vw;
}
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+ int err = 0;
+
+ if (rcu_access_pointer(vma->active.excl.fence)) {
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
+ rcu_read_unlock();
+ if (fence) {
+ err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
+ }
+ }
+
+ return err;
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -977,8 +996,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
- if (err != -ENOSPC)
+ if (err != -ENOSPC) {
+ if (!err) {
+ err = i915_vma_wait_for_bind(vma);
+ if (err)
+ i915_vma_unpin(vma);
+ }
return err;
+ }
/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02b31a62951e..e1ced1df13e1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma);
+int i915_vma_wait_for_bind(struct i915_vma *vma);
+
static inline int i915_vma_sync(struct i915_vma *vma)
{
/* Wait for the asynchronous bindings and pending GPU reads */
--
2.25.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex (rev2)
2020-01-29 19:54 [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Chris Wilson
2020-01-29 23:21 ` Chris Wilson
@ 2020-01-30 4:23 ` Patchwork
2020-02-07 16:10 ` [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Daniel Vetter
2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2020-01-30 4:23 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex (rev2)
URL : https://patchwork.freedesktop.org/series/72740/
State : failure
== Summary ==
CI Bug Log - changes from CI_DRM_7839 -> Patchwork_16324
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with Patchwork_16324 absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in Patchwork_16324, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/index.html
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in Patchwork_16324:
### IGT changes ###
#### Possible regressions ####
* igt@gem_busy@busy-all:
- fi-bsw-nick: NOTRUN -> [TIMEOUT][1]
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-bsw-nick/igt@gem_busy@busy-all.html
* igt@gem_exec_fence@basic-await-default:
- fi-bsw-kefka: NOTRUN -> [TIMEOUT][2]
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-bsw-kefka/igt@gem_exec_fence@basic-await-default.html
#### Warnings ####
* igt@gem_exec_suspend@basic-s3:
- fi-cml-s: [FAIL][3] ([fdo#103375]) -> [TIMEOUT][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-cml-s/igt@gem_exec_suspend@basic-s3.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-cml-s/igt@gem_exec_suspend@basic-s3.html
Known issues
------------
Here are the changes found in Patchwork_16324 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_exec_parallel@fds:
- fi-byt-j1900: [PASS][5] -> [TIMEOUT][6] ([fdo#112271])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-byt-j1900/igt@gem_exec_parallel@fds.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-byt-j1900/igt@gem_exec_parallel@fds.html
* igt@i915_pm_rpm@module-reload:
- fi-skl-6770hq: [PASS][7] -> [FAIL][8] ([i915#178])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
* igt@i915_selftest@live_execlists:
- fi-icl-y: [PASS][9] -> [DMESG-FAIL][10] ([fdo#108569])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-icl-y/igt@i915_selftest@live_execlists.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-icl-y/igt@i915_selftest@live_execlists.html
* igt@i915_selftest@live_gtt:
- fi-kbl-7500u: [PASS][11] -> [TIMEOUT][12] ([fdo#112271])
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-kbl-7500u/igt@i915_selftest@live_gtt.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-kbl-7500u/igt@i915_selftest@live_gtt.html
#### Possible fixes ####
* igt@gem_exec_parallel@contexts:
- fi-byt-j1900: [FAIL][13] ([i915#694]) -> [PASS][14]
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-byt-j1900/igt@gem_exec_parallel@contexts.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-byt-j1900/igt@gem_exec_parallel@contexts.html
* igt@gem_exec_suspend@basic-s0:
- fi-cml-s: [FAIL][15] ([fdo#103375]) -> [PASS][16]
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-cml-s/igt@gem_exec_suspend@basic-s0.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-cml-s/igt@gem_exec_suspend@basic-s0.html
#### Warnings ####
* igt@gem_exec_parallel@contexts:
- fi-byt-n2820: [TIMEOUT][17] ([fdo#112271]) -> [FAIL][18] ([i915#694])
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-byt-n2820/igt@gem_exec_parallel@contexts.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-byt-n2820/igt@gem_exec_parallel@contexts.html
* igt@gem_exec_parallel@fds:
- fi-byt-n2820: [FAIL][19] ([i915#694]) -> [TIMEOUT][20] ([fdo#112271])
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-byt-n2820/igt@gem_exec_parallel@fds.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-byt-n2820/igt@gem_exec_parallel@fds.html
* igt@kms_chamelium@hdmi-hpd-fast:
- fi-kbl-7500u: [FAIL][21] ([fdo#111407]) -> [FAIL][22] ([fdo#111096] / [i915#323])
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7839/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
[fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
[fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
[fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
[fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
[fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
[i915#178]: https://gitlab.freedesktop.org/drm/intel/issues/178
[i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323
[i915#694]: https://gitlab.freedesktop.org/drm/intel/issues/694
Participating hosts (47 -> 40)
------------------------------
Additional (2): fi-bsw-kefka fi-kbl-r
Missing (9): fi-bsw-n3050 fi-hsw-4200u fi-glk-dsi fi-byt-squawks fi-bsw-cyan fi-ilk-650 fi-ivb-3770 fi-bdw-samus fi-skl-6600u
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_7839 -> Patchwork_16324
CI-20190529: 20190529
CI_DRM_7839: 41a9319a45aaf77e220c8101d6ce76ec66036ffc @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5406: 786c79af483a9f6e4688811f74116030c734ca1f @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_16324: d4ab9b3a35df26acdf0636c7b4bcd12b0c369436 @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
d4ab9b3a35df drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16324/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-01-29 19:54 [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Chris Wilson
2020-01-29 23:21 ` Chris Wilson
2020-01-30 4:23 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex (rev2) Patchwork
@ 2020-02-07 16:10 ` Daniel Vetter
2020-02-07 16:12 ` Daniel Vetter
2 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2020-02-07 16:10 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Wed, Jan 29, 2020 at 07:54:52PM +0000, Chris Wilson wrote:
> On Braswell and Broxton (also known as Valleyview and Apollolake), we
> need to serialise updates of the GGTT using the big stop_machine()
> hammer. This has the side effect of appearing to lockdep as a possible
> reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
> allocations). However, we want to use vm->mutex (including ggtt->mutex)
> from wthin the shrinker and so must avoid such possible taints. For this
> purpose, we introduced the asynchronous vma binding and we can apply it
> to the PIN_GLOBAL so long as take care to add the necessary waits for
> the worker afterwards.
>
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Hm, isn't that just the usual "lockdep cant see past a wait on another
thread" trick that doesn't actually fix anything? The locking context for
the pte writes and the wait seem exactly the same, at first glance at
least.
I dont' really have a different idea though :-/
-Daniel
> ---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++---
> drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 +
> drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++---
> drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++--
> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 ++--
> drivers/gpu/drm/i915/i915_gem.c | 6 +++++
> drivers/gpu/drm/i915/i915_vma.c | 27 ++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_vma.h | 2 ++
> 10 files changed, 46 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 39fe9a5b4820..2504f4d05edf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> {
> unsigned int flags;
>
> - flags = PIN_GLOBAL;
> if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
> /*
> * On g33, we cannot place HWS above 256MiB, so
> @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> * above the mappable region (even though we never
> * actually map it).
> */
> - flags |= PIN_MAPPABLE;
> + flags = PIN_MAPPABLE;
> else
> - flags |= PIN_HIGH;
> + flags = PIN_HIGH;
>
> - return i915_vma_pin(vma, 0, 0, flags);
> + return i915_ggtt_pin(vma, 0, flags);
> }
>
> static int init_status_page(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 79096722ce16..6af50d62d712 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
> if (ggtt->vm.clear_range != nop_clear_range)
> ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
> + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
> }
>
> ggtt->invalidate = gen8_ggtt_invalidate;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 143268083135..bf6c0f949e35 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
> goto err_unref;
> }
>
> - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (ret)
> goto err_unref;
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 8f15ab7d8d88..e63ae4a17110 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
> goto err;
> }
>
> - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (err)
> goto err;
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
> index 374b28f13ca0..366013367526 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.c
> @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
> if (atomic_fetch_inc(&ring->pin_count))
> return 0;
>
> - flags = PIN_GLOBAL;
> -
> /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>
> if (vma->obj->stolen)
> flags |= PIN_MAPPABLE;
> else
> flags |= PIN_HIGH;
>
> - ret = i915_vma_pin(vma, 0, 0, flags);
> + ret = i915_ggtt_pin(vma, 0, flags);
> if (unlikely(ret))
> goto err_unpin;
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 87716529cd2f..465f87b65901 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
> if (atomic_add_unless(&tl->pin_count, 1, 0))
> return 0;
>
> - err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
> if (err)
> return err;
>
> @@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
> goto err_rollback;
> }
>
> - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (err) {
> __idle_hwsp_free(vma->private, cacheline);
> goto err_rollback;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> index 5d00a3b2d914..c4c1523da7a6 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> @@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
> if (IS_ERR(vma))
> goto err;
>
> - flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> - ret = i915_vma_pin(vma, 0, 0, flags);
> + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> + ret = i915_ggtt_pin(vma, 0, flags);
> if (ret) {
> vma = ERR_PTR(ret);
> goto err;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ff79da5657f8..dda1a0365f39 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> if (ret)
> return ERR_PTR(ret);
>
> + ret = i915_vma_wait_for_bind(vma);
> + if (ret) {
> + i915_vma_unpin(vma);
> + return ERR_PTR(ret);
> + }
> +
> return vma;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 84e03da0d5f9..f11abd9553e8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -339,6 +339,25 @@ struct i915_vma_work *i915_vma_work(void)
> return vw;
> }
>
> +int i915_vma_wait_for_bind(struct i915_vma *vma)
> +{
> + int err = 0;
> +
> + if (!rcu_access_pointer(vma->active.excl.fence)) {
> + struct dma_fence *fence;
> +
> + rcu_read_lock();
> + fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
> + rcu_read_unlock();
> + if (fence) {
> + err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
> + dma_fence_put(fence);
> + }
> + }
> +
> + return err;
> +}
> +
> /**
> * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
> * @vma: VMA to map
> @@ -977,8 +996,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
>
> do {
> err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
> - if (err != -ENOSPC)
> + if (err != -ENOSPC) {
> + if (!err) {
> + err = i915_vma_wait_for_bind(vma);
> + if (err)
> + i915_vma_unpin(vma);
> + }
> return err;
> + }
>
> /* Unlike i915_vma_pin, we don't take no for an answer! */
> flush_idle_contexts(vm->gt);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 02b31a62951e..e1ced1df13e1 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
> void i915_vma_make_shrinkable(struct i915_vma *vma);
> void i915_vma_make_purgeable(struct i915_vma *vma);
>
> +int i915_vma_wait_for_bind(struct i915_vma *vma);
> +
> static inline int i915_vma_sync(struct i915_vma *vma)
> {
> /* Wait for the asynchronous bindings and pending GPU reads */
> --
> 2.25.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-02-07 16:10 ` [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Daniel Vetter
@ 2020-02-07 16:12 ` Daniel Vetter
0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2020-02-07 16:12 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Fri, Feb 07, 2020 at 05:10:40PM +0100, Daniel Vetter wrote:
> On Wed, Jan 29, 2020 at 07:54:52PM +0000, Chris Wilson wrote:
> > On Braswell and Broxton (also known as Valleyview and Apollolake), we
> > need to serialise updates of the GGTT using the big stop_machine()
> > hammer. This has the side effect of appearing to lockdep as a possible
> > reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
> > allocations). However, we want to use vm->mutex (including ggtt->mutex)
> > from wthin the shrinker and so must avoid such possible taints. For this
> > purpose, we introduced the asynchronous vma binding and we can apply it
> > to the PIN_GLOBAL so long as take care to add the necessary waits for
> > the worker afterwards.
> >
> > Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> Hm, isn't that just the usual "lockdep cant see past a wait on another
> thread" trick that doesn't actually fix anything? The locking context for
> the pte writes and the wait seem exactly the same, at first glance at
> least.
>
> I dont' really have a different idea though :-/
2nd one I just realized: dma_fence can't be blocked on anything that might
go into reclaim, so doing this with a dma_fence also doesn't really work.
-Daniel
> > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++---
> > drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 +
> > drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
> > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
> > drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++---
> > drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++--
> > drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 ++--
> > drivers/gpu/drm/i915/i915_gem.c | 6 +++++
> > drivers/gpu/drm/i915/i915_vma.c | 27 ++++++++++++++++++++++-
> > drivers/gpu/drm/i915/i915_vma.h | 2 ++
> > 10 files changed, 46 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 39fe9a5b4820..2504f4d05edf 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> > {
> > unsigned int flags;
> >
> > - flags = PIN_GLOBAL;
> > if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
> > /*
> > * On g33, we cannot place HWS above 256MiB, so
> > @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> > * above the mappable region (even though we never
> > * actually map it).
> > */
> > - flags |= PIN_MAPPABLE;
> > + flags = PIN_MAPPABLE;
> > else
> > - flags |= PIN_HIGH;
> > + flags = PIN_HIGH;
> >
> > - return i915_vma_pin(vma, 0, 0, flags);
> > + return i915_ggtt_pin(vma, 0, flags);
> > }
> >
> > static int init_status_page(struct intel_engine_cs *engine)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index 79096722ce16..6af50d62d712 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> > ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
> > if (ggtt->vm.clear_range != nop_clear_range)
> > ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
> > + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
> > }
> >
> > ggtt->invalidate = gen8_ggtt_invalidate;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index 143268083135..bf6c0f949e35 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
> > goto err_unref;
> > }
> >
> > - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> > + ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
> > if (ret)
> > goto err_unref;
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 8f15ab7d8d88..e63ae4a17110 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
> > goto err;
> > }
> >
> > - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> > + err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> > if (err)
> > goto err;
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
> > index 374b28f13ca0..366013367526 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ring.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ring.c
> > @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
> > if (atomic_fetch_inc(&ring->pin_count))
> > return 0;
> >
> > - flags = PIN_GLOBAL;
> > -
> > /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
> > - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> > + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> >
> > if (vma->obj->stolen)
> > flags |= PIN_MAPPABLE;
> > else
> > flags |= PIN_HIGH;
> >
> > - ret = i915_vma_pin(vma, 0, 0, flags);
> > + ret = i915_ggtt_pin(vma, 0, flags);
> > if (unlikely(ret))
> > goto err_unpin;
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > index 87716529cd2f..465f87b65901 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > @@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
> > if (atomic_add_unless(&tl->pin_count, 1, 0))
> > return 0;
> >
> > - err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
> > + err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
> > if (err)
> > return err;
> >
> > @@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
> > goto err_rollback;
> > }
> >
> > - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> > + err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> > if (err) {
> > __idle_hwsp_free(vma->private, cacheline);
> > goto err_rollback;
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > index 5d00a3b2d914..c4c1523da7a6 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > @@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
> > if (IS_ERR(vma))
> > goto err;
> >
> > - flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> > - ret = i915_vma_pin(vma, 0, 0, flags);
> > + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> > + ret = i915_ggtt_pin(vma, 0, flags);
> > if (ret) {
> > vma = ERR_PTR(ret);
> > goto err;
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ff79da5657f8..dda1a0365f39 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> > if (ret)
> > return ERR_PTR(ret);
> >
> > + ret = i915_vma_wait_for_bind(vma);
> > + if (ret) {
> > + i915_vma_unpin(vma);
> > + return ERR_PTR(ret);
> > + }
> > +
> > return vma;
> > }
> >
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 84e03da0d5f9..f11abd9553e8 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -339,6 +339,25 @@ struct i915_vma_work *i915_vma_work(void)
> > return vw;
> > }
> >
> > +int i915_vma_wait_for_bind(struct i915_vma *vma)
> > +{
> > + int err = 0;
> > +
> > + if (!rcu_access_pointer(vma->active.excl.fence)) {
> > + struct dma_fence *fence;
> > +
> > + rcu_read_lock();
> > + fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
> > + rcu_read_unlock();
> > + if (fence) {
> > + err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
> > + dma_fence_put(fence);
> > + }
> > + }
> > +
> > + return err;
> > +}
> > +
> > /**
> > * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
> > * @vma: VMA to map
> > @@ -977,8 +996,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
> >
> > do {
> > err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
> > - if (err != -ENOSPC)
> > + if (err != -ENOSPC) {
> > + if (!err) {
> > + err = i915_vma_wait_for_bind(vma);
> > + if (err)
> > + i915_vma_unpin(vma);
> > + }
> > return err;
> > + }
> >
> > /* Unlike i915_vma_pin, we don't take no for an answer! */
> > flush_idle_contexts(vm->gt);
> > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> > index 02b31a62951e..e1ced1df13e1 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.h
> > +++ b/drivers/gpu/drm/i915/i915_vma.h
> > @@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
> > void i915_vma_make_shrinkable(struct i915_vma *vma);
> > void i915_vma_make_purgeable(struct i915_vma *vma);
> >
> > +int i915_vma_wait_for_bind(struct i915_vma *vma);
> > +
> > static inline int i915_vma_sync(struct i915_vma *vma)
> > {
> > /* Wait for the asynchronous bindings and pending GPU reads */
> > --
> > 2.25.0
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-01-30 15:21 ` Chris Wilson
@ 2020-01-30 16:15 ` Ruhl, Michael J
0 siblings, 0 replies; 8+ messages in thread
From: Ruhl, Michael J @ 2020-01-30 16:15 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
>-----Original Message-----
>From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf Of Chris
>Wilson
>Sent: Thursday, January 30, 2020 10:21 AM
>To: intel-gfx@lists.freedesktop.org
>Subject: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim
>tainting the ggtt->mutex
>
>On Braswell and Broxton (also known as Valleyview and Apollolake), we
>need to serialise updates of the GGTT using the big stop_machine()
>hammer. This has the side effect of appearing to lockdep as a possible
>reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
>allocations). However, we want to use vm->mutex (including ggtt->mutex)
>from wthin the shrinker and so must avoid such possible taints. For this
s/wthin/within
m
>purpose, we introduced the asynchronous vma binding and we can apply it
>to the PIN_GLOBAL so long as take care to add the necessary waits for
>the worker afterwards.
>
>Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++---
> drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +-
> drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--
> drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +--
> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +--
> drivers/gpu/drm/i915/i915_active.c | 10 ++++--
> drivers/gpu/drm/i915/i915_active.h | 3 +-
> drivers/gpu/drm/i915/i915_gem.c | 6 ++++
> drivers/gpu/drm/i915/i915_vma.c | 38 +++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_vma.h | 2 ++
> 12 files changed, 66 insertions(+), 21 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>index decb63462410..86af5edd6933 100644
>--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs
>*engine,
> {
> unsigned int flags;
>
>- flags = PIN_GLOBAL;
> if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt-
>>ggtt))
> /*
> * On g33, we cannot place HWS above 256MiB, so
>@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct
>intel_engine_cs *engine,
> * above the mappable region (even though we never
> * actually map it).
> */
>- flags |= PIN_MAPPABLE;
>+ flags = PIN_MAPPABLE;
> else
>- flags |= PIN_HIGH;
>+ flags = PIN_HIGH;
>
>- return i915_vma_pin(vma, 0, 0, flags);
>+ return i915_ggtt_pin(vma, 0, flags);
> }
>
> static int init_status_page(struct intel_engine_cs *engine)
>diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>index f624fc5c19c3..d9fd25480a46 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>@@ -109,7 +109,7 @@ static void ggtt_suspend_mappings(struct i915_ggtt
>*ggtt)
> struct i915_vma *vma;
>
> list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
>- i915_vma_sync(vma);
>+ i915_vma_wait_for_bind(vma);
>
> ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
> ggtt->invalidate(ggtt);
>@@ -851,6 +851,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
> ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
> ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
>+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
> }
>
> ggtt->invalidate = gen8_ggtt_invalidate;
>diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
>b/drivers/gpu/drm/i915/gt/intel_gt.c
>index 51019611bc1e..f1f1b306e0af 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>@@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt,
>unsigned int size)
> goto err_unref;
> }
>
>- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (ret)
> goto err_unref;
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
>b/drivers/gpu/drm/i915/gt/intel_lrc.c
>index eb83c87c8b4e..fc0a72cc54fd 100644
>--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>@@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs
>*engine)
> goto err;
> }
>
>- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (err)
> goto err;
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c
>b/drivers/gpu/drm/i915/gt/intel_ring.c
>index 374b28f13ca0..366013367526 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ring.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
>@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
> if (atomic_fetch_inc(&ring->pin_count))
> return 0;
>
>- flags = PIN_GLOBAL;
>-
> /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
>- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>
> if (vma->obj->stolen)
> flags |= PIN_MAPPABLE;
> else
> flags |= PIN_HIGH;
>
>- ret = i915_vma_pin(vma, 0, 0, flags);
>+ ret = i915_ggtt_pin(vma, 0, flags);
> if (unlikely(ret))
> goto err_unpin;
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c
>b/drivers/gpu/drm/i915/gt/intel_timeline.c
>index 87716529cd2f..465f87b65901 100644
>--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
>+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
>@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
> if (atomic_add_unless(&tl->pin_count, 1, 0))
> return 0;
>
>- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
>+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
> if (err)
> return err;
>
>@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
> goto err_rollback;
> }
>
>- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
> if (err) {
> __idle_hwsp_free(vma->private, cacheline);
> goto err_rollback;
>diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
>b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
>index 5d00a3b2d914..c4c1523da7a6 100644
>--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
>+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
>@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct
>intel_guc *guc, u32 size)
> if (IS_ERR(vma))
> goto err;
>
>- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>- ret = i915_vma_pin(vma, 0, 0, flags);
>+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>+ ret = i915_ggtt_pin(vma, 0, flags);
> if (ret) {
> vma = ERR_PTR(ret);
> goto err;
>diff --git a/drivers/gpu/drm/i915/i915_active.c
>b/drivers/gpu/drm/i915/i915_active.c
>index 3d2e7cf55e52..da58e5d084f4 100644
>--- a/drivers/gpu/drm/i915/i915_active.c
>+++ b/drivers/gpu/drm/i915/i915_active.c
>@@ -390,13 +390,19 @@ int i915_active_ref(struct i915_active *ref,
> return err;
> }
>
>-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
>+struct dma_fence *
>+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
> {
>+ struct dma_fence *prev;
>+
> /* We expect the caller to manage the exclusive timeline ordering */
> GEM_BUG_ON(i915_active_is_idle(ref));
>
>- if (!__i915_active_fence_set(&ref->excl, f))
>+ prev = __i915_active_fence_set(&ref->excl, f);
>+ if (!prev)
> atomic_inc(&ref->count);
>+
>+ return prev;
> }
>
> bool i915_active_acquire_if_busy(struct i915_active *ref)
>diff --git a/drivers/gpu/drm/i915/i915_active.h
>b/drivers/gpu/drm/i915/i915_active.h
>index 51e1e854ca55..973ff0447c6c 100644
>--- a/drivers/gpu/drm/i915/i915_active.h
>+++ b/drivers/gpu/drm/i915/i915_active.h
>@@ -173,7 +173,8 @@ i915_active_add_request(struct i915_active *ref,
>struct i915_request *rq)
> return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
> }
>
>-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
>+struct dma_fence *
>+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
>
> static inline bool i915_active_has_exclusive(struct i915_active *ref)
> {
>diff --git a/drivers/gpu/drm/i915/i915_gem.c
>b/drivers/gpu/drm/i915/i915_gem.c
>index ff79da5657f8..dda1a0365f39 100644
>--- a/drivers/gpu/drm/i915/i915_gem.c
>+++ b/drivers/gpu/drm/i915/i915_gem.c
>@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct
>drm_i915_gem_object *obj,
> if (ret)
> return ERR_PTR(ret);
>
>+ ret = i915_vma_wait_for_bind(vma);
>+ if (ret) {
>+ i915_vma_unpin(vma);
>+ return ERR_PTR(ret);
>+ }
>+
> return vma;
> }
>
>diff --git a/drivers/gpu/drm/i915/i915_vma.c
>b/drivers/gpu/drm/i915/i915_vma.c
>index 84e03da0d5f9..f5dc84e3fef8 100644
>--- a/drivers/gpu/drm/i915/i915_vma.c
>+++ b/drivers/gpu/drm/i915/i915_vma.c
>@@ -294,6 +294,7 @@ struct i915_vma_work {
> struct dma_fence_work base;
> struct i915_vma *vma;
> struct drm_i915_gem_object *pinned;
>+ struct i915_sw_dma_fence_cb cb;
> enum i915_cache_level cache_level;
> unsigned int flags;
> };
>@@ -339,6 +340,25 @@ struct i915_vma_work *i915_vma_work(void)
> return vw;
> }
>
>+int i915_vma_wait_for_bind(struct i915_vma *vma)
>+{
>+ int err = 0;
>+
>+ if (rcu_access_pointer(vma->active.excl.fence)) {
>+ struct dma_fence *fence;
>+
>+ rcu_read_lock();
>+ fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
>+ rcu_read_unlock();
>+ if (fence) {
>+ err = dma_fence_wait(fence,
>MAX_SCHEDULE_TIMEOUT);
>+ dma_fence_put(fence);
>+ }
>+ }
>+
>+ return err;
>+}
>+
> /**
> * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address
>space.
> * @vma: VMA to map
>@@ -386,6 +406,8 @@ int i915_vma_bind(struct i915_vma *vma,
>
> trace_i915_vma_bind(vma, bind_flags);
> if (work && (bind_flags & ~vma_flags) & vma->vm-
>>bind_async_flags) {
>+ struct dma_fence *prev;
>+
> work->vma = vma;
> work->cache_level = cache_level;
> work->flags = bind_flags | I915_VMA_ALLOC;
>@@ -399,8 +421,12 @@ int i915_vma_bind(struct i915_vma *vma,
> * part of the obj->resv->excl_fence as it only affects
> * execution and not content or object's backing store
>lifetime.
> */
>- GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
>- i915_active_set_exclusive(&vma->active, &work->base.dma);
>+ prev = i915_active_set_exclusive(&vma->active, &work-
>>base.dma);
>+ if (prev)
>+ __i915_sw_fence_await_dma_fence(&work-
>>base.chain,
>+ prev,
>+ &work->cb);
>+
> work->base.dma.error = 0; /* enable the queue_work() */
>
> if (vma->obj) {
>@@ -977,8 +1003,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align,
>unsigned int flags)
>
> do {
> err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
>- if (err != -ENOSPC)
>+ if (err != -ENOSPC) {
>+ if (!err) {
>+ err = i915_vma_wait_for_bind(vma);
>+ if (err)
>+ i915_vma_unpin(vma);
>+ }
> return err;
>+ }
>
> /* Unlike i915_vma_pin, we don't take no for an answer! */
> flush_idle_contexts(vm->gt);
>diff --git a/drivers/gpu/drm/i915/i915_vma.h
>b/drivers/gpu/drm/i915/i915_vma.h
>index 02b31a62951e..e1ced1df13e1 100644
>--- a/drivers/gpu/drm/i915/i915_vma.h
>+++ b/drivers/gpu/drm/i915/i915_vma.h
>@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct
>i915_vma *vma);
> void i915_vma_make_shrinkable(struct i915_vma *vma);
> void i915_vma_make_purgeable(struct i915_vma *vma);
>
>+int i915_vma_wait_for_bind(struct i915_vma *vma);
>+
> static inline int i915_vma_sync(struct i915_vma *vma)
> {
> /* Wait for the asynchronous bindings and pending GPU reads */
>--
>2.25.0
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-01-30 9:22 [Intel-gfx] [PATCH 2/2] " Chris Wilson
2020-01-30 11:06 ` [Intel-gfx] [PATCH] " Chris Wilson
@ 2020-01-30 15:21 ` Chris Wilson
2020-01-30 16:15 ` Ruhl, Michael J
1 sibling, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2020-01-30 15:21 UTC (permalink / raw)
To: intel-gfx
On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from wthin the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++---
drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +-
drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--
drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +--
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +--
drivers/gpu/drm/i915/i915_active.c | 10 ++++--
drivers/gpu/drm/i915/i915_active.h | 3 +-
drivers/gpu/drm/i915/i915_gem.c | 6 ++++
drivers/gpu/drm/i915/i915_vma.c | 38 +++++++++++++++++++++--
drivers/gpu/drm/i915/i915_vma.h | 2 ++
12 files changed, 66 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index decb63462410..86af5edd6933 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{
unsigned int flags;
- flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never
* actually map it).
*/
- flags |= PIN_MAPPABLE;
+ flags = PIN_MAPPABLE;
else
- flags |= PIN_HIGH;
+ flags = PIN_HIGH;
- return i915_vma_pin(vma, 0, 0, flags);
+ return i915_ggtt_pin(vma, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index f624fc5c19c3..d9fd25480a46 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -109,7 +109,7 @@ static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
struct i915_vma *vma;
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
- i915_vma_sync(vma);
+ i915_vma_wait_for_bind(vma);
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
ggtt->invalidate(ggtt);
@@ -851,6 +851,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 51019611bc1e..f1f1b306e0af 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret)
goto err_unref;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eb83c87c8b4e..fc0a72cc54fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..366013367526 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- flags = PIN_GLOBAL;
-
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 87716529cd2f..465f87b65901 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err)
return err;
@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..c4c1523da7a6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma))
goto err;
- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_vma_pin(vma, 0, 0, flags);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 3d2e7cf55e52..da58e5d084f4 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -390,13 +390,19 @@ int i915_active_ref(struct i915_active *ref,
return err;
}
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{
+ struct dma_fence *prev;
+
/* We expect the caller to manage the exclusive timeline ordering */
GEM_BUG_ON(i915_active_is_idle(ref));
- if (!__i915_active_fence_set(&ref->excl, f))
+ prev = __i915_active_fence_set(&ref->excl, f);
+ if (!prev)
atomic_inc(&ref->count);
+
+ return prev;
}
bool i915_active_acquire_if_busy(struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 51e1e854ca55..973ff0447c6c 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -173,7 +173,8 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
}
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
static inline bool i915_active_has_exclusive(struct i915_active *ref)
{
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ff79da5657f8..dda1a0365f39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (ret)
return ERR_PTR(ret);
+ ret = i915_vma_wait_for_bind(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ return ERR_PTR(ret);
+ }
+
return vma;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 84e03da0d5f9..f5dc84e3fef8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -294,6 +294,7 @@ struct i915_vma_work {
struct dma_fence_work base;
struct i915_vma *vma;
struct drm_i915_gem_object *pinned;
+ struct i915_sw_dma_fence_cb cb;
enum i915_cache_level cache_level;
unsigned int flags;
};
@@ -339,6 +340,25 @@ struct i915_vma_work *i915_vma_work(void)
return vw;
}
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+ int err = 0;
+
+ if (rcu_access_pointer(vma->active.excl.fence)) {
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
+ rcu_read_unlock();
+ if (fence) {
+ err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
+ }
+ }
+
+ return err;
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -386,6 +406,8 @@ int i915_vma_bind(struct i915_vma *vma,
trace_i915_vma_bind(vma, bind_flags);
if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
+ struct dma_fence *prev;
+
work->vma = vma;
work->cache_level = cache_level;
work->flags = bind_flags | I915_VMA_ALLOC;
@@ -399,8 +421,12 @@ int i915_vma_bind(struct i915_vma *vma,
* part of the obj->resv->excl_fence as it only affects
* execution and not content or object's backing store lifetime.
*/
- GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
- i915_active_set_exclusive(&vma->active, &work->base.dma);
+ prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
+ if (prev)
+ __i915_sw_fence_await_dma_fence(&work->base.chain,
+ prev,
+ &work->cb);
+
work->base.dma.error = 0; /* enable the queue_work() */
if (vma->obj) {
@@ -977,8 +1003,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
- if (err != -ENOSPC)
+ if (err != -ENOSPC) {
+ if (!err) {
+ err = i915_vma_wait_for_bind(vma);
+ if (err)
+ i915_vma_unpin(vma);
+ }
return err;
+ }
/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02b31a62951e..e1ced1df13e1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma);
+int i915_vma_wait_for_bind(struct i915_vma *vma);
+
static inline int i915_vma_sync(struct i915_vma *vma)
{
/* Wait for the asynchronous bindings and pending GPU reads */
--
2.25.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
2020-01-30 9:22 [Intel-gfx] [PATCH 2/2] " Chris Wilson
@ 2020-01-30 11:06 ` Chris Wilson
2020-01-30 15:21 ` Chris Wilson
1 sibling, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2020-01-30 11:06 UTC (permalink / raw)
To: intel-gfx
On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from wthin the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++---
drivers/gpu/drm/i915/gt/intel_ggtt.c | 5 +--
drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--
drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +--
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +--
drivers/gpu/drm/i915/i915_active.c | 10 ++++--
drivers/gpu/drm/i915/i915_active.h | 3 +-
drivers/gpu/drm/i915/i915_gem.c | 6 ++++
drivers/gpu/drm/i915/i915_vma.c | 38 +++++++++++++++++++++--
drivers/gpu/drm/i915/i915_vma.h | 2 ++
12 files changed, 65 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8707d7264490..449bf7770300 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{
unsigned int flags;
- flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never
* actually map it).
*/
- flags |= PIN_MAPPABLE;
+ flags = PIN_MAPPABLE;
else
- flags |= PIN_HIGH;
+ flags = PIN_HIGH;
- return i915_vma_pin(vma, 0, 0, flags);
+ return i915_ggtt_pin(vma, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 81a63f7bc6c4..3106424d17d7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -854,6 +854,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
@@ -1161,8 +1162,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
intel_gt_check_and_clear_faults(ggtt->vm.gt);
- mutex_lock(&ggtt->vm.mutex);
-
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
@@ -1189,8 +1188,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
atomic_set(&ggtt->vm.open, open);
ggtt->invalidate(ggtt);
- mutex_unlock(&ggtt->vm.mutex);
-
if (flush)
wbinvd_on_all_cpus();
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 143268083135..bf6c0f949e35 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret)
goto err_unref;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eb83c87c8b4e..fc0a72cc54fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..366013367526 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- flags = PIN_GLOBAL;
-
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 87716529cd2f..465f87b65901 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err)
return err;
@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..c4c1523da7a6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma))
goto err;
- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_vma_pin(vma, 0, 0, flags);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 3d2e7cf55e52..da58e5d084f4 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -390,13 +390,19 @@ int i915_active_ref(struct i915_active *ref,
return err;
}
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{
+ struct dma_fence *prev;
+
/* We expect the caller to manage the exclusive timeline ordering */
GEM_BUG_ON(i915_active_is_idle(ref));
- if (!__i915_active_fence_set(&ref->excl, f))
+ prev = __i915_active_fence_set(&ref->excl, f);
+ if (!prev)
atomic_inc(&ref->count);
+
+ return prev;
}
bool i915_active_acquire_if_busy(struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 51e1e854ca55..973ff0447c6c 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -173,7 +173,8 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
}
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
static inline bool i915_active_has_exclusive(struct i915_active *ref)
{
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ff79da5657f8..dda1a0365f39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (ret)
return ERR_PTR(ret);
+ ret = i915_vma_wait_for_bind(vma);
+ if (ret) {
+ i915_vma_unpin(vma);
+ return ERR_PTR(ret);
+ }
+
return vma;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 84e03da0d5f9..f5dc84e3fef8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -294,6 +294,7 @@ struct i915_vma_work {
struct dma_fence_work base;
struct i915_vma *vma;
struct drm_i915_gem_object *pinned;
+ struct i915_sw_dma_fence_cb cb;
enum i915_cache_level cache_level;
unsigned int flags;
};
@@ -339,6 +340,25 @@ struct i915_vma_work *i915_vma_work(void)
return vw;
}
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+ int err = 0;
+
+ if (rcu_access_pointer(vma->active.excl.fence)) {
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
+ rcu_read_unlock();
+ if (fence) {
+ err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
+ }
+ }
+
+ return err;
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -386,6 +406,8 @@ int i915_vma_bind(struct i915_vma *vma,
trace_i915_vma_bind(vma, bind_flags);
if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
+ struct dma_fence *prev;
+
work->vma = vma;
work->cache_level = cache_level;
work->flags = bind_flags | I915_VMA_ALLOC;
@@ -399,8 +421,12 @@ int i915_vma_bind(struct i915_vma *vma,
* part of the obj->resv->excl_fence as it only affects
* execution and not content or object's backing store lifetime.
*/
- GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
- i915_active_set_exclusive(&vma->active, &work->base.dma);
+ prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
+ if (prev)
+ __i915_sw_fence_await_dma_fence(&work->base.chain,
+ prev,
+ &work->cb);
+
work->base.dma.error = 0; /* enable the queue_work() */
if (vma->obj) {
@@ -977,8 +1003,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
- if (err != -ENOSPC)
+ if (err != -ENOSPC) {
+ if (!err) {
+ err = i915_vma_wait_for_bind(vma);
+ if (err)
+ i915_vma_unpin(vma);
+ }
return err;
+ }
/* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 02b31a62951e..e1ced1df13e1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma);
+int i915_vma_wait_for_bind(struct i915_vma *vma);
+
static inline int i915_vma_sync(struct i915_vma *vma)
{
/* Wait for the asynchronous bindings and pending GPU reads */
--
2.25.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-02-07 16:12 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-29 19:54 [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Chris Wilson
2020-01-29 23:21 ` Chris Wilson
2020-01-30 4:23 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex (rev2) Patchwork
2020-02-07 16:10 ` [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex Daniel Vetter
2020-02-07 16:12 ` Daniel Vetter
2020-01-30 9:22 [Intel-gfx] [PATCH 2/2] " Chris Wilson
2020-01-30 11:06 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-01-30 15:21 ` Chris Wilson
2020-01-30 16:15 ` Ruhl, Michael J
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.