From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com> To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org Cc: maarten.lankhorst@linux.intel.com, matthew.auld@intel.com, "Thomas Hellström" <thomas.hellstrom@linux.intel.com> Subject: [PATCH 5/6] drm/i915/ttm: Attach the migration fence to a region timeline on eviction Date: Fri, 8 Oct 2021 15:35:29 +0200 [thread overview] Message-ID: <20211008133530.664509-6-thomas.hellstrom@linux.intel.com> (raw) In-Reply-To: <20211008133530.664509-1-thomas.hellstrom@linux.intel.com> On eviction, TTM requires that migration fences from the same region are ordered using dma_fence_is_later(). For request-based fences we therefore need to use the same context for the migration, but now that we use a dma_fence_work for error recovery, and, in addition, might need to coalesce the migration fence with async unbind fences, Create a coalesce fence for this. Chain the coalesce fence on the migration fence and attach it to a region timeline. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 84 ++++++++++++++++++---- drivers/gpu/drm/i915/intel_memory_region.c | 43 +++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 7 ++ 3 files changed, 119 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 79d4d50aa4e5..625ce52e8662 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -672,9 +672,10 @@ static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear, } } -static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, - struct i915_refct_sgt *dst_rsgt, bool allow_accel) +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel) { struct i915_ttm_memcpy_work *copy_work; struct dma_fence *fence; @@ -689,7 +690,7 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, /* Don't fail with -ENOMEM. Move sync instead. */ __i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt, allow_accel); - return 0; + return NULL; } dma_fence_work_init(©_work->base, &i915_ttm_memcpy_ops); @@ -714,14 +715,45 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, fence = dma_fence_get(©_work->base.dma); dma_fence_work_commit_imm(©_work->base); - /* - * We're synchronizing here for now. For async moves, return the - * fence. - */ - dma_fence_wait(fence, false); - dma_fence_put(fence); + return fence; +} - return ret; +/** + * struct i915_coalesce_fence - A dma-fence used to coalesce multiple fences + * similar to struct dm_fence_array, and at the same time being timeline- + * attached. + * @base: struct dma_fence_work base. + * @cb: Callback for timeline attachment. + */ +struct i915_coalesce_fence { + struct dma_fence_work base; + struct i915_sw_dma_fence_cb cb; +}; + +/* No .work or .release callback. Just coalescing. */ +static const struct dma_fence_work_ops i915_coalesce_fence_ops = { + .name = "Coalesce fence", +}; + +static struct dma_fence * +i915_ttm_coalesce_fence(struct dma_fence *fence, struct intel_memory_region *mr) +{ + struct i915_coalesce_fence *coalesce = + kmalloc(sizeof(*coalesce), GFP_KERNEL); + + if (!coalesce) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + return NULL; + } + + dma_fence_work_init(&coalesce->base, &i915_coalesce_fence_ops); + dma_fence_work_chain(&coalesce->base, fence); + dma_fence_work_timeline_attach(&mr->tl, &coalesce->base, &coalesce->cb); + dma_fence_get(&coalesce->base.dma); + dma_fence_work_commit_imm(&coalesce->base); + dma_fence_put(fence); + return &coalesce->base.dma; } static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, @@ -734,6 +766,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ttm_manager_type(bo->bdev, dst_mem->mem_type); struct ttm_tt *ttm = bo->ttm; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence = NULL; bool clear; int ret; @@ -765,7 +798,23 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + if (fence && evict) { + struct intel_memory_region *mr = + i915_ttm_region(bo->bdev, bo->resource->mem_type); + + /* + * Attach to the region timeline and for future async unbind, + * which requires a timeline. Also future async unbind fences + * can be attached here. + */ + fence = i915_ttm_coalesce_fence(fence, mr); + } + + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); @@ -1223,6 +1272,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, .interruptible = intr, }; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence; int ret; assert_object_held(dst); @@ -1238,10 +1288,14 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, return ret; dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_rsgt, allow_accel); - + fence = __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, + dst_rsgt, allow_accel); i915_refct_sgt_put(dst_rsgt); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e7f7e6627750..aa1733e840f7 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -7,6 +7,9 @@ #include "i915_drv.h" #include "i915_ttm_buddy_manager.h" +static const struct dma_fence_work_timeline_ops tl_ops; +static void intel_region_timeline_release_work(struct work_struct *work); + static const struct { u16 class; u16 instance; @@ -127,6 +130,10 @@ intel_memory_region_create(struct drm_i915_private *i915, } kref_init(&mem->kref); + + INIT_WORK(&mem->tl_put_work, intel_region_timeline_release_work); + dma_fence_work_timeline_init(&mem->tl, NULL, &tl_ops); + return mem; err_free: @@ -238,6 +245,42 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915) } } +static void intel_region_timeline_get(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + intel_memory_region_get(mr); +} + +static void intel_region_timeline_release_work(struct work_struct *work) +{ + struct intel_memory_region *mr = + container_of(work, typeof(*mr), tl_put_work); + + __intel_memory_region_destroy(&mr->kref); +} + +static void intel_region_timeline_release(struct kref *ref) +{ + struct intel_memory_region *mr = container_of(ref, typeof(*mr), kref); + + /* May be called from hardirq context, so queue the final release. */ + queue_work(system_unbound_wq, &mr->tl_put_work); +} + +static void intel_region_timeline_put(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + kref_put(&mr->kref, intel_region_timeline_release); +} + +static const struct dma_fence_work_timeline_ops tl_ops = { + .name = "Region timeline", + .get = intel_region_timeline_get, + .put = intel_region_timeline_put, +}; + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_memory_region.c" #include "selftests/mock_region.c" diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3feae3353d33..928819e2edba 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -13,6 +13,8 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "i915_sw_fence_work.h" + struct drm_i915_private; struct drm_i915_gem_object; struct drm_printer; @@ -94,6 +96,11 @@ struct intel_memory_region { bool is_range_manager; void *region_private; + + /** Timeline for TTM eviction fences */ + struct dma_fence_work_timeline tl; + /** Work struct for _region_put() from atomic / irq context */ + struct work_struct tl_put_work; }; struct intel_memory_region * -- 2.31.1
WARNING: multiple messages have this Message-ID (diff)
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com> To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org Cc: maarten.lankhorst@linux.intel.com, matthew.auld@intel.com, "Thomas Hellström" <thomas.hellstrom@linux.intel.com> Subject: [Intel-gfx] [PATCH 5/6] drm/i915/ttm: Attach the migration fence to a region timeline on eviction Date: Fri, 8 Oct 2021 15:35:29 +0200 [thread overview] Message-ID: <20211008133530.664509-6-thomas.hellstrom@linux.intel.com> (raw) In-Reply-To: <20211008133530.664509-1-thomas.hellstrom@linux.intel.com> On eviction, TTM requires that migration fences from the same region are ordered using dma_fence_is_later(). For request-based fences we therefore need to use the same context for the migration, but now that we use a dma_fence_work for error recovery, and, in addition, might need to coalesce the migration fence with async unbind fences, Create a coalesce fence for this. Chain the coalesce fence on the migration fence and attach it to a region timeline. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 84 ++++++++++++++++++---- drivers/gpu/drm/i915/intel_memory_region.c | 43 +++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 7 ++ 3 files changed, 119 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 79d4d50aa4e5..625ce52e8662 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -672,9 +672,10 @@ static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear, } } -static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, - struct i915_refct_sgt *dst_rsgt, bool allow_accel) +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel) { struct i915_ttm_memcpy_work *copy_work; struct dma_fence *fence; @@ -689,7 +690,7 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, /* Don't fail with -ENOMEM. Move sync instead. */ __i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt, allow_accel); - return 0; + return NULL; } dma_fence_work_init(©_work->base, &i915_ttm_memcpy_ops); @@ -714,14 +715,45 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, fence = dma_fence_get(©_work->base.dma); dma_fence_work_commit_imm(©_work->base); - /* - * We're synchronizing here for now. For async moves, return the - * fence. - */ - dma_fence_wait(fence, false); - dma_fence_put(fence); + return fence; +} - return ret; +/** + * struct i915_coalesce_fence - A dma-fence used to coalesce multiple fences + * similar to struct dm_fence_array, and at the same time being timeline- + * attached. + * @base: struct dma_fence_work base. + * @cb: Callback for timeline attachment. + */ +struct i915_coalesce_fence { + struct dma_fence_work base; + struct i915_sw_dma_fence_cb cb; +}; + +/* No .work or .release callback. Just coalescing. */ +static const struct dma_fence_work_ops i915_coalesce_fence_ops = { + .name = "Coalesce fence", +}; + +static struct dma_fence * +i915_ttm_coalesce_fence(struct dma_fence *fence, struct intel_memory_region *mr) +{ + struct i915_coalesce_fence *coalesce = + kmalloc(sizeof(*coalesce), GFP_KERNEL); + + if (!coalesce) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + return NULL; + } + + dma_fence_work_init(&coalesce->base, &i915_coalesce_fence_ops); + dma_fence_work_chain(&coalesce->base, fence); + dma_fence_work_timeline_attach(&mr->tl, &coalesce->base, &coalesce->cb); + dma_fence_get(&coalesce->base.dma); + dma_fence_work_commit_imm(&coalesce->base); + dma_fence_put(fence); + return &coalesce->base.dma; } static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, @@ -734,6 +766,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ttm_manager_type(bo->bdev, dst_mem->mem_type); struct ttm_tt *ttm = bo->ttm; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence = NULL; bool clear; int ret; @@ -765,7 +798,23 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + if (fence && evict) { + struct intel_memory_region *mr = + i915_ttm_region(bo->bdev, bo->resource->mem_type); + + /* + * Attach to the region timeline and for future async unbind, + * which requires a timeline. Also future async unbind fences + * can be attached here. + */ + fence = i915_ttm_coalesce_fence(fence, mr); + } + + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); @@ -1223,6 +1272,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, .interruptible = intr, }; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence; int ret; assert_object_held(dst); @@ -1238,10 +1288,14 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, return ret; dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_rsgt, allow_accel); - + fence = __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, + dst_rsgt, allow_accel); i915_refct_sgt_put(dst_rsgt); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e7f7e6627750..aa1733e840f7 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -7,6 +7,9 @@ #include "i915_drv.h" #include "i915_ttm_buddy_manager.h" +static const struct dma_fence_work_timeline_ops tl_ops; +static void intel_region_timeline_release_work(struct work_struct *work); + static const struct { u16 class; u16 instance; @@ -127,6 +130,10 @@ intel_memory_region_create(struct drm_i915_private *i915, } kref_init(&mem->kref); + + INIT_WORK(&mem->tl_put_work, intel_region_timeline_release_work); + dma_fence_work_timeline_init(&mem->tl, NULL, &tl_ops); + return mem; err_free: @@ -238,6 +245,42 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915) } } +static void intel_region_timeline_get(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + intel_memory_region_get(mr); +} + +static void intel_region_timeline_release_work(struct work_struct *work) +{ + struct intel_memory_region *mr = + container_of(work, typeof(*mr), tl_put_work); + + __intel_memory_region_destroy(&mr->kref); +} + +static void intel_region_timeline_release(struct kref *ref) +{ + struct intel_memory_region *mr = container_of(ref, typeof(*mr), kref); + + /* May be called from hardirq context, so queue the final release. */ + queue_work(system_unbound_wq, &mr->tl_put_work); +} + +static void intel_region_timeline_put(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + kref_put(&mr->kref, intel_region_timeline_release); +} + +static const struct dma_fence_work_timeline_ops tl_ops = { + .name = "Region timeline", + .get = intel_region_timeline_get, + .put = intel_region_timeline_put, +}; + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_memory_region.c" #include "selftests/mock_region.c" diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3feae3353d33..928819e2edba 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -13,6 +13,8 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "i915_sw_fence_work.h" + struct drm_i915_private; struct drm_i915_gem_object; struct drm_printer; @@ -94,6 +96,11 @@ struct intel_memory_region { bool is_range_manager; void *region_private; + + /** Timeline for TTM eviction fences */ + struct dma_fence_work_timeline tl; + /** Work struct for _region_put() from atomic / irq context */ + struct work_struct tl_put_work; }; struct intel_memory_region * -- 2.31.1
next prev parent reply other threads:[~2021-10-08 13:36 UTC|newest] Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-10-08 13:35 [PATCH 0/6] drm/i915: Failsafe migration blits Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-08 13:35 ` [PATCH 1/6] drm/i915: Update dma_fence_work Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-13 12:41 ` Daniel Vetter 2021-10-13 12:41 ` [Intel-gfx] " Daniel Vetter 2021-10-13 12:59 ` Thomas Hellström 2021-10-13 12:59 ` [Intel-gfx] " Thomas Hellström 2021-10-08 13:35 ` [PATCH 2/6] drm/i915: Introduce refcounted sg-tables Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-13 14:41 ` Daniel Vetter 2021-10-13 14:41 ` [Intel-gfx] " Daniel Vetter 2021-10-13 14:55 ` Thomas Hellström 2021-10-13 14:55 ` [Intel-gfx] " Thomas Hellström 2021-10-08 13:35 ` [PATCH 3/6] drm/i915/ttm: Failsafe migration blits Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-08 13:35 ` [PATCH 4/6] drm/i915: Add a struct dma_fence_work timeline Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-13 12:43 ` Daniel Vetter 2021-10-13 14:21 ` Thomas Hellström 2021-10-13 14:33 ` Daniel Vetter 2021-10-13 14:39 ` Thomas Hellström 2021-10-08 13:35 ` Thomas Hellström [this message] 2021-10-08 13:35 ` [Intel-gfx] [PATCH 5/6] drm/i915/ttm: Attach the migration fence to a region timeline on eviction Thomas Hellström 2021-10-08 13:35 ` [PATCH 6/6] drm/i915: Use irq work for coalescing-only dma-fence-work Thomas Hellström 2021-10-08 13:35 ` [Intel-gfx] " Thomas Hellström 2021-10-08 17:00 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Failsafe migration blits Patchwork 2021-10-08 17:29 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork 2021-10-09 0:04 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork 2021-10-14 1:50 ` [PATCH 0/6] " Dave Airlie 2021-10-14 1:50 ` [Intel-gfx] " Dave Airlie 2021-10-14 7:29 ` Thomas Hellström 2021-10-14 7:29 ` Thomas Hellström
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211008133530.664509-6-thomas.hellstrom@linux.intel.com \ --to=thomas.hellstrom@linux.intel.com \ --cc=dri-devel@lists.freedesktop.org \ --cc=intel-gfx@lists.freedesktop.org \ --cc=maarten.lankhorst@linux.intel.com \ --cc=matthew.auld@intel.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.