* [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-20 21:47 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-20 21:47 UTC (permalink / raw) To: intel-gfx, dri-devel Cc: tvrtko.ursulin, daniele.ceraolospurio, john.c.harrison A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + + GEM_BUG_ON(num_siblings != 1); + + for (i = 0; i < width; ++i) { + ce = intel_context_create(engines[i]); + if (!ce) { + err = ERR_PTR(-ENOMEM); + goto unwind; + } + + if (i == 0) + parent = ce; + else + intel_context_bind_parent_child(parent, ce); + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + intel_context_set_nopreempt(parent); + intel_context_set_single_submission(parent); + for_each_child(parent, ce) { + intel_context_set_nopreempt(ce); + intel_context_set_single_submission(ce); + } + + return parent; + +unwind: + if (parent) + intel_context_put(parent); + return err; +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + .create_parallel = execlists_create_parallel, .create_virtual = execlists_create_virtual, }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 56156cf18c41..70f4b309522d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, void lrc_unpin(struct intel_context *ce) { + if (unlikely(ce->parallel.last_rq)) + i915_request_put(ce->parallel.last_rq); check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1341752dc70e..ddc9a97fcc8f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) GEM_BUG_ON(!intel_context_is_parent(ce)); GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); - if (ce->parallel.last_rq) - i915_request_put(ce->parallel.last_rq); unpin_guc_id(guc, ce); lrc_unpin(ce); } -- 2.32.0 ^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-20 21:47 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-20 21:47 UTC (permalink / raw) To: intel-gfx, dri-devel Cc: tvrtko.ursulin, daniele.ceraolospurio, john.c.harrison A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + + GEM_BUG_ON(num_siblings != 1); + + for (i = 0; i < width; ++i) { + ce = intel_context_create(engines[i]); + if (!ce) { + err = ERR_PTR(-ENOMEM); + goto unwind; + } + + if (i == 0) + parent = ce; + else + intel_context_bind_parent_child(parent, ce); + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + intel_context_set_nopreempt(parent); + intel_context_set_single_submission(parent); + for_each_child(parent, ce) { + intel_context_set_nopreempt(ce); + intel_context_set_single_submission(ce); + } + + return parent; + +unwind: + if (parent) + intel_context_put(parent); + return err; +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + .create_parallel = execlists_create_parallel, .create_virtual = execlists_create_virtual, }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 56156cf18c41..70f4b309522d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, void lrc_unpin(struct intel_context *ce) { + if (unlikely(ce->parallel.last_rq)) + i915_request_put(ce->parallel.last_rq); check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1341752dc70e..ddc9a97fcc8f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) GEM_BUG_ON(!intel_context_is_parent(ce)); GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); - if (ce->parallel.last_rq) - i915_request_put(ce->parallel.last_rq); unpin_guc_id(guc, ce); lrc_unpin(ce); } -- 2.32.0 ^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/execlists: Weak parallel submission support for execlists 2021-10-20 21:47 ` [Intel-gfx] " Matthew Brost (?) @ 2021-10-21 0:53 ` Patchwork -1 siblings, 0 replies; 22+ messages in thread From: Patchwork @ 2021-10-21 0:53 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx [-- Attachment #1: Type: text/plain, Size: 1747 bytes --] == Series Details == Series: drm/i915/execlists: Weak parallel submission support for execlists URL : https://patchwork.freedesktop.org/series/96088/ State : success == Summary == CI Bug Log - changes from CI_DRM_10767 -> Patchwork_21398 ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/index.html Known issues ------------ Here are the changes found in Patchwork_21398 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@kms_frontbuffer_tracking@basic: - fi-cml-u2: [PASS][1] -> [DMESG-WARN][2] ([i915#4269]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/fi-cml-u2/igt@kms_frontbuffer_tracking@basic.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/fi-cml-u2/igt@kms_frontbuffer_tracking@basic.html [i915#4269]: https://gitlab.freedesktop.org/drm/intel/issues/4269 Participating hosts (41 -> 37) ------------------------------ Missing (4): fi-ctg-p8600 fi-bsw-cyan bat-dg1-6 fi-hsw-4200u Build changes ------------- * Linux: CI_DRM_10767 -> Patchwork_21398 CI-20190529: 20190529 CI_DRM_10767: 4d947bb057406e5c30081736db70da3f5726e0cd @ git://anongit.freedesktop.org/gfx-ci/linux IGT_6258: 4c80c71d7dec29b6376846ae96bd04dc0b6e34d9 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git Patchwork_21398: a7325f8bdd56fe264255b44f93be2ddda1b3929f @ git://anongit.freedesktop.org/gfx-ci/linux == Linux commits == a7325f8bdd56 drm/i915/execlists: Weak parallel submission support for execlists == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/index.html [-- Attachment #2: Type: text/html, Size: 2339 bytes --] ^ permalink raw reply [flat|nested] 22+ messages in thread
* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/execlists: Weak parallel submission support for execlists 2021-10-20 21:47 ` [Intel-gfx] " Matthew Brost (?) (?) @ 2021-10-21 6:14 ` Patchwork -1 siblings, 0 replies; 22+ messages in thread From: Patchwork @ 2021-10-21 6:14 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx [-- Attachment #1: Type: text/plain, Size: 30289 bytes --] == Series Details == Series: drm/i915/execlists: Weak parallel submission support for execlists URL : https://patchwork.freedesktop.org/series/96088/ State : success == Summary == CI Bug Log - changes from CI_DRM_10767_full -> Patchwork_21398_full ==================================================== Summary ------- **SUCCESS** No regressions found. Known issues ------------ Here are the changes found in Patchwork_21398_full that come from known issues: ### IGT changes ### #### Issues hit #### * igt@gem_create@create-massive: - shard-kbl: NOTRUN -> [DMESG-WARN][1] ([i915#3002]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl4/igt@gem_create@create-massive.html - shard-apl: NOTRUN -> [DMESG-WARN][2] ([i915#3002]) [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@gem_create@create-massive.html * igt@gem_ctx_isolation@preservation-s3@vecs0: - shard-apl: NOTRUN -> [DMESG-WARN][3] ([i915#180]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@gem_ctx_isolation@preservation-s3@vecs0.html * igt@gem_ctx_persistence@legacy-engines-hostile-preempt: - shard-snb: NOTRUN -> [SKIP][4] ([fdo#109271] / [i915#1099]) +4 similar issues [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-snb2/igt@gem_ctx_persistence@legacy-engines-hostile-preempt.html * igt@gem_exec_fair@basic-deadline: - shard-glk: [PASS][5] -> [FAIL][6] ([i915#2846]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk9/igt@gem_exec_fair@basic-deadline.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk5/igt@gem_exec_fair@basic-deadline.html * igt@gem_exec_fair@basic-flow@rcs0: - shard-tglb: [PASS][7] -> [FAIL][8] ([i915#2842]) +3 similar issues [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-tglb1/igt@gem_exec_fair@basic-flow@rcs0.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb3/igt@gem_exec_fair@basic-flow@rcs0.html * igt@gem_exec_fair@basic-none@vcs1: - shard-iclb: NOTRUN -> [FAIL][9] ([i915#2842]) [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb4/igt@gem_exec_fair@basic-none@vcs1.html * igt@gem_exec_fair@basic-pace-share@rcs0: - shard-glk: [PASS][10] -> [FAIL][11] ([i915#2842]) [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk7/igt@gem_exec_fair@basic-pace-share@rcs0.html [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk4/igt@gem_exec_fair@basic-pace-share@rcs0.html * igt@gem_exec_fair@basic-throttle@rcs0: - shard-tglb: NOTRUN -> [FAIL][12] ([i915#2842]) [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb3/igt@gem_exec_fair@basic-throttle@rcs0.html * igt@gem_exec_params@no-vebox: - shard-skl: NOTRUN -> [SKIP][13] ([fdo#109271]) +77 similar issues [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl5/igt@gem_exec_params@no-vebox.html * igt@gem_userptr_blits@readonly-unsync: - shard-tglb: NOTRUN -> [SKIP][14] ([i915#3297]) [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@gem_userptr_blits@readonly-unsync.html * igt@gen9_exec_parse@basic-rejected: - shard-iclb: NOTRUN -> [SKIP][15] ([i915#2856]) [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@gen9_exec_parse@basic-rejected.html - shard-tglb: NOTRUN -> [SKIP][16] ([i915#2856]) [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@gen9_exec_parse@basic-rejected.html * igt@i915_pm_rpm@dpms-non-lpsp: - shard-tglb: NOTRUN -> [SKIP][17] ([fdo#111644] / [i915#1397] / [i915#2411]) [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@i915_pm_rpm@dpms-non-lpsp.html * igt@i915_suspend@fence-restore-tiled2untiled: - shard-apl: [PASS][18] -> [DMESG-WARN][19] ([i915#180]) [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-apl8/igt@i915_suspend@fence-restore-tiled2untiled.html [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@i915_suspend@fence-restore-tiled2untiled.html * igt@kms_big_fb@x-tiled-32bpp-rotate-0: - shard-glk: [PASS][20] -> [DMESG-WARN][21] ([i915#118]) +2 similar issues [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk4/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk3/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-hflip: - shard-apl: NOTRUN -> [SKIP][22] ([fdo#109271] / [i915#3777]) [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-hflip.html * igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-0-hflip: - shard-kbl: NOTRUN -> [SKIP][23] ([fdo#109271] / [i915#3777]) [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-0-hflip.html * igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0-hflip: - shard-tglb: NOTRUN -> [SKIP][24] ([fdo#111615]) [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0-hflip.html - shard-iclb: NOTRUN -> [SKIP][25] ([fdo#110723]) [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0-hflip.html * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs: - shard-snb: NOTRUN -> [SKIP][26] ([fdo#109271]) +451 similar issues [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-snb5/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs.html * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_rc_ccs_cc: - shard-iclb: NOTRUN -> [SKIP][27] ([fdo#109278] / [i915#3886]) [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_rc_ccs_cc.html * igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_rc_ccs_cc: - shard-apl: NOTRUN -> [SKIP][28] ([fdo#109271] / [i915#3886]) +5 similar issues [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_rc_ccs_cc.html * igt@kms_ccs@pipe-c-crc-primary-basic-y_tiled_gen12_mc_ccs: - shard-kbl: NOTRUN -> [SKIP][29] ([fdo#109271] / [i915#3886]) +4 similar issues [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@kms_ccs@pipe-c-crc-primary-basic-y_tiled_gen12_mc_ccs.html * igt@kms_ccs@pipe-d-bad-rotation-90-y_tiled_gen12_mc_ccs: - shard-iclb: NOTRUN -> [SKIP][30] ([fdo#109278]) +1 similar issue [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_ccs@pipe-d-bad-rotation-90-y_tiled_gen12_mc_ccs.html * igt@kms_ccs@pipe-d-crc-primary-rotation-180-yf_tiled_ccs: - shard-tglb: NOTRUN -> [SKIP][31] ([i915#3689]) [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb3/igt@kms_ccs@pipe-d-crc-primary-rotation-180-yf_tiled_ccs.html * igt@kms_chamelium@hdmi-mode-timings: - shard-iclb: NOTRUN -> [SKIP][32] ([fdo#109284] / [fdo#111827]) [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_chamelium@hdmi-mode-timings.html * igt@kms_color_chamelium@pipe-a-ctm-0-25: - shard-snb: NOTRUN -> [SKIP][33] ([fdo#109271] / [fdo#111827]) +25 similar issues [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-snb7/igt@kms_color_chamelium@pipe-a-ctm-0-25.html * igt@kms_color_chamelium@pipe-b-ctm-0-75: - shard-tglb: NOTRUN -> [SKIP][34] ([fdo#109284] / [fdo#111827]) [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb3/igt@kms_color_chamelium@pipe-b-ctm-0-75.html * igt@kms_color_chamelium@pipe-b-ctm-limited-range: - shard-skl: NOTRUN -> [SKIP][35] ([fdo#109271] / [fdo#111827]) +2 similar issues [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl5/igt@kms_color_chamelium@pipe-b-ctm-limited-range.html * igt@kms_color_chamelium@pipe-c-ctm-negative: - shard-kbl: NOTRUN -> [SKIP][36] ([fdo#109271] / [fdo#111827]) +11 similar issues [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl1/igt@kms_color_chamelium@pipe-c-ctm-negative.html * igt@kms_color_chamelium@pipe-d-ctm-0-25: - shard-apl: NOTRUN -> [SKIP][37] ([fdo#109271] / [fdo#111827]) +10 similar issues [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_color_chamelium@pipe-d-ctm-0-25.html * igt@kms_content_protection@uevent: - shard-apl: NOTRUN -> [FAIL][38] ([i915#2105]) [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl2/igt@kms_content_protection@uevent.html * igt@kms_cursor_crc@pipe-b-cursor-512x170-random: - shard-tglb: NOTRUN -> [SKIP][39] ([fdo#109279] / [i915#3359]) [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@kms_cursor_crc@pipe-b-cursor-512x170-random.html - shard-iclb: NOTRUN -> [SKIP][40] ([fdo#109278] / [fdo#109279]) [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_cursor_crc@pipe-b-cursor-512x170-random.html * igt@kms_cursor_crc@pipe-c-cursor-max-size-random: - shard-tglb: NOTRUN -> [SKIP][41] ([i915#3359]) [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@kms_cursor_crc@pipe-c-cursor-max-size-random.html * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy: - shard-tglb: NOTRUN -> [SKIP][42] ([fdo#111825]) +2 similar issues [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html - shard-iclb: NOTRUN -> [SKIP][43] ([fdo#109274] / [fdo#109278]) [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html * igt@kms_fbcon_fbt@fbc-suspend: - shard-apl: [PASS][44] -> [INCOMPLETE][45] ([i915#180] / [i915#1982]) [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-apl6/igt@kms_fbcon_fbt@fbc-suspend.html [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_fbcon_fbt@fbc-suspend.html * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-dp1: - shard-apl: NOTRUN -> [FAIL][46] ([i915#79]) [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-dp1.html * igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a1: - shard-glk: [PASS][47] -> [FAIL][48] ([i915#79]) +1 similar issue [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk5/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a1.html [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk6/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a1.html * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1: - shard-kbl: [PASS][49] -> [DMESG-WARN][50] ([i915#180]) +2 similar issues [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl2/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html * igt@kms_flip@flip-vs-suspend-interruptible@c-dp1: - shard-kbl: NOTRUN -> [DMESG-WARN][51] ([i915#180]) [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html * igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1: - shard-skl: [PASS][52] -> [FAIL][53] ([i915#2122]) +2 similar issues [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl2/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1.html [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl10/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1.html * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs: - shard-apl: NOTRUN -> [SKIP][54] ([fdo#109271] / [i915#2672]) [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs.html * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-pwrite: - shard-glk: [PASS][55] -> [FAIL][56] ([i915#2546]) [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-pwrite.html [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk3/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-pwrite.html * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-blt: - shard-kbl: NOTRUN -> [SKIP][57] ([fdo#109271]) +181 similar issues [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-blt.html * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-shrfb-draw-blt: - shard-iclb: NOTRUN -> [SKIP][58] ([fdo#109280]) [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb1/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-shrfb-draw-blt.html * igt@kms_pipe_b_c_ivb@disable-pipe-b-enable-pipe-c: - shard-apl: NOTRUN -> [SKIP][59] ([fdo#109271]) +148 similar issues [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@kms_pipe_b_c_ivb@disable-pipe-b-enable-pipe-c.html * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d: - shard-apl: NOTRUN -> [SKIP][60] ([fdo#109271] / [i915#533]) [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html * igt@kms_pipe_crc_basic@hang-read-crc-pipe-d: - shard-kbl: NOTRUN -> [SKIP][61] ([fdo#109271] / [i915#533]) +2 similar issues [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl4/igt@kms_pipe_crc_basic@hang-read-crc-pipe-d.html * igt@kms_pipe_crc_basic@read-crc-pipe-d-frame-sequence: - shard-skl: NOTRUN -> [SKIP][62] ([fdo#109271] / [i915#533]) [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl5/igt@kms_pipe_crc_basic@read-crc-pipe-d-frame-sequence.html * igt@kms_plane_alpha_blend@pipe-b-alpha-basic: - shard-apl: NOTRUN -> [FAIL][63] ([fdo#108145] / [i915#265]) +1 similar issue [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html * igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping: - shard-kbl: NOTRUN -> [SKIP][64] ([fdo#109271] / [i915#2733]) [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl1/igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping.html * igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-1: - shard-apl: NOTRUN -> [SKIP][65] ([fdo#109271] / [i915#658]) +3 similar issues [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-1.html * igt@kms_psr2_sf@plane-move-sf-dmg-area-3: - shard-kbl: NOTRUN -> [SKIP][66] ([fdo#109271] / [i915#658]) +4 similar issues [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl1/igt@kms_psr2_sf@plane-move-sf-dmg-area-3.html - shard-skl: NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#658]) [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl5/igt@kms_psr2_sf@plane-move-sf-dmg-area-3.html * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-5: - shard-tglb: NOTRUN -> [SKIP][68] ([i915#2920]) [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-5.html * igt@kms_setmode@basic: - shard-snb: NOTRUN -> [FAIL][69] ([i915#31]) [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-snb5/igt@kms_setmode@basic.html * igt@kms_vblank@pipe-a-ts-continuation-suspend: - shard-apl: [PASS][70] -> [DMESG-WARN][71] ([i915#180] / [i915#295]) [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-apl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl2/igt@kms_vblank@pipe-a-ts-continuation-suspend.html * igt@kms_writeback@writeback-fb-id: - shard-apl: NOTRUN -> [SKIP][72] ([fdo#109271] / [i915#2437]) [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl2/igt@kms_writeback@writeback-fb-id.html * igt@kms_writeback@writeback-pixel-formats: - shard-kbl: NOTRUN -> [SKIP][73] ([fdo#109271] / [i915#2437]) [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@kms_writeback@writeback-pixel-formats.html * igt@nouveau_crc@pipe-d-ctx-flip-skip-current-frame: - shard-tglb: NOTRUN -> [SKIP][74] ([i915#2530]) [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@nouveau_crc@pipe-d-ctx-flip-skip-current-frame.html * igt@prime_vgem@fence-write-hang: - shard-tglb: NOTRUN -> [SKIP][75] ([fdo#109295]) [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@prime_vgem@fence-write-hang.html * igt@sysfs_clients@fair-0: - shard-kbl: NOTRUN -> [SKIP][76] ([fdo#109271] / [i915#2994]) [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl4/igt@sysfs_clients@fair-0.html * igt@sysfs_clients@recycle: - shard-apl: NOTRUN -> [SKIP][77] ([fdo#109271] / [i915#2994]) [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@sysfs_clients@recycle.html #### Possible fixes #### * igt@drm_mm@all@evict: - shard-skl: [INCOMPLETE][78] ([i915#198]) -> [PASS][79] [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl7/igt@drm_mm@all@evict.html [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl6/igt@drm_mm@all@evict.html * igt@gem_eio@unwedge-stress: - shard-tglb: [TIMEOUT][80] ([i915#2369] / [i915#3063] / [i915#3648]) -> [PASS][81] [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-tglb8/igt@gem_eio@unwedge-stress.html [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-tglb1/igt@gem_eio@unwedge-stress.html * igt@gem_exec_fair@basic-pace@rcs0: - shard-kbl: [FAIL][82] ([i915#2842]) -> [PASS][83] +1 similar issue [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl4/igt@gem_exec_fair@basic-pace@rcs0.html [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@gem_exec_fair@basic-pace@rcs0.html * igt@i915_pm_dc@dc9-dpms: - shard-iclb: [FAIL][84] ([i915#4275]) -> [PASS][85] [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-iclb1/igt@i915_pm_dc@dc9-dpms.html [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb6/igt@i915_pm_dc@dc9-dpms.html * igt@i915_selftest@live@hangcheck: - shard-snb: [INCOMPLETE][86] ([i915#3921]) -> [PASS][87] [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-snb5/igt@i915_selftest@live@hangcheck.html [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-snb7/igt@i915_selftest@live@hangcheck.html * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions: - shard-skl: [FAIL][88] ([i915#2346]) -> [PASS][89] [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl4/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1: - shard-skl: [FAIL][90] ([i915#2122]) -> [PASS][91] [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl4/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl7/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html * igt@kms_flip@flip-vs-expired-vblank@c-edp1: - shard-skl: [FAIL][92] ([i915#79]) -> [PASS][93] +1 similar issue [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl10/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl8/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html * igt@kms_flip@flip-vs-suspend-interruptible@b-dp1: - shard-apl: [DMESG-WARN][94] ([i915#180]) -> [PASS][95] [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html * igt@kms_flip@modeset-vs-vblank-race-interruptible@c-hdmi-a1: - shard-glk: [FAIL][96] ([i915#407]) -> [PASS][97] [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-glk6/igt@kms_flip@modeset-vs-vblank-race-interruptible@c-hdmi-a1.html [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-glk2/igt@kms_flip@modeset-vs-vblank-race-interruptible@c-hdmi-a1.html * igt@kms_frontbuffer_tracking@fbc-suspend: - shard-kbl: [DMESG-WARN][98] ([i915#180]) -> [PASS][99] +5 similar issues [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@kms_frontbuffer_tracking@fbc-suspend.html [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl4/igt@kms_frontbuffer_tracking@fbc-suspend.html * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min: - shard-skl: [FAIL][100] ([fdo#108145] / [i915#265]) -> [PASS][101] +1 similar issue [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl4/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl7/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html * igt@kms_vblank@pipe-a-ts-continuation-suspend: - shard-kbl: [DMESG-WARN][102] ([i915#180] / [i915#295]) -> [PASS][103] [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html #### Warnings #### * igt@gem_exec_fair@basic-none-rrul@rcs0: - shard-iclb: [FAIL][104] ([i915#2852]) -> [FAIL][105] ([i915#2842]) [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-iclb7/igt@gem_exec_fair@basic-none-rrul@rcs0.html [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb4/igt@gem_exec_fair@basic-none-rrul@rcs0.html * igt@gem_exec_fair@basic-pace@vecs0: - shard-kbl: [SKIP][106] ([fdo#109271]) -> [FAIL][107] ([i915#2842]) [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl4/igt@gem_exec_fair@basic-pace@vecs0.html [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@gem_exec_fair@basic-pace@vecs0.html * igt@i915_pm_rc6_residency@rc6-fence: - shard-iclb: [WARN][108] ([i915#1804] / [i915#2684]) -> [WARN][109] ([i915#2684]) +1 similar issue [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-iclb7/igt@i915_pm_rc6_residency@rc6-fence.html [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-iclb5/igt@i915_pm_rc6_residency@rc6-fence.html * igt@kms_flip@flip-vs-suspend-interruptible@b-dp1: - shard-kbl: [INCOMPLETE][110] -> [DMESG-WARN][111] ([i915#180]) [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl2/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html * igt@runner@aborted: - shard-kbl: ([FAIL][112], [FAIL][113], [FAIL][114], [FAIL][115], [FAIL][116], [FAIL][117], [FAIL][118], [FAIL][119], [FAIL][120]) ([fdo#109271] / [i915#180] / [i915#1814] / [i915#3363] / [i915#4312] / [i915#602] / [i915#92]) -> ([FAIL][121], [FAIL][122], [FAIL][123], [FAIL][124], [FAIL][125], [FAIL][126]) ([i915#1436] / [i915#180] / [i915#1814] / [i915#3002] / [i915#3363] / [i915#4312] / [i915#92]) [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@runner@aborted.html [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl3/igt@runner@aborted.html [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@runner@aborted.html [115]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@runner@aborted.html [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@runner@aborted.html [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl3/igt@runner@aborted.html [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl6/igt@runner@aborted.html [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl3/igt@runner@aborted.html [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-kbl3/igt@runner@aborted.html [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@runner@aborted.html [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@runner@aborted.html [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl3/igt@runner@aborted.html [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@runner@aborted.html [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl4/igt@runner@aborted.html [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-kbl6/igt@runner@aborted.html - shard-apl: [FAIL][127] ([i915#180] / [i915#3363] / [i915#4312]) -> ([FAIL][128], [FAIL][129], [FAIL][130], [FAIL][131], [FAIL][132]) ([i915#180] / [i915#1814] / [i915#3002] / [i915#3363] / [i915#4312]) [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-apl6/igt@runner@aborted.html [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@runner@aborted.html [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl8/igt@runner@aborted.html [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@runner@aborted.html [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl3/igt@runner@aborted.html [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-apl2/igt@runner@aborted.html - shard-skl: ([FAIL][133], [FAIL][134]) ([i915#3002] / [i915#3363] / [i915#4312]) -> ([FAIL][135], [FAIL][136], [FAIL][137]) ([i915#2029] / [i915#3002] / [i915#3363] / [i915#4312]) [133]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl8/igt@runner@aborted.html [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10767/shard-skl10/igt@runner@aborted.html [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl9/igt@runner@aborted.html [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl3/igt@runner@aborted.html [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/shard-skl8/igt@runner@aborted.html [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145 [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274 [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278 [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279 [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280 [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284 [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295 [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723 [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615 [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644 [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825 [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827 [i915#1099]: https://gitlab.freedesktop.org/drm/intel/issues/1099 [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118 [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397 [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436 [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180 [i915#1804]: https://gitlab.freedesktop.org/drm/intel/issues/1804 [i915#1814]: https://gitlab.freedesktop.org/drm/intel/issues/1814 [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198 [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982 [i915#2029]: https://gitlab.freedesktop.org/drm/intel/issues/2029 [i915#2105]: https://gitlab.freedesktop.org/drm/intel/issues/2105 [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122 [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346 [i915#2369]: https://gitlab.freedesktop.org/drm/intel/issues/2369 [i915 == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21398/index.html [-- Attachment #2: Type: text/html, Size: 37025 bytes --] ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-10-20 21:47 ` [Intel-gfx] " Matthew Brost @ 2021-10-26 21:58 ` John Harrison -1 siblings, 0 replies; 22+ messages in thread From: John Harrison @ 2021-10-26 21:58 UTC (permalink / raw) To: Matthew Brost, intel-gfx, dri-devel; +Cc: tvrtko.ursulin, daniele.ceraolospurio On 10/20/2021 14:47, Matthew Brost wrote: > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > execlists. Doing as little as possible to support this interface for > execlists - basically just passing submit fences between each request > generated and virtual engines are not allowed. This is on par with what > is there for the existing (hopefully soon deprecated) bonding interface. > > We perma-pin these execlists contexts to align with GuC implementation. > > v2: > (John Harrison) > - Drop siblings array as num_siblings must be 1 > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > 5 files changed, 52 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index fb33d0322960..35e87a7d0ea9 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > struct intel_engine_cs **siblings = NULL; > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > - return -ENODEV; > - > if (get_user(slot, &ext->engine_index)) > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > if (get_user(num_siblings, &ext->num_siblings)) > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > + num_siblings); > + return -EINVAL; > + } > + > if (slot >= set->num_engines) { > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > slot, set->num_engines); > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 5634d14052bc..1bec92e1d8e6 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > + intel_context_is_parallel(ce)) > return 0; > > /* Preallocate tracking nodes */ > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > * Callers responsibility to validate that this function is used > * correctly but we use GEM_BUG_ON here ensure that they do. > */ > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > GEM_BUG_ON(intel_context_is_pinned(parent)); > GEM_BUG_ON(intel_context_is_child(parent)); > GEM_BUG_ON(intel_context_is_pinned(child)); > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index bedb80057046..2865b422300d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > { > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > - intel_context_force_single_submission(ce)); > + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. > } > > static bool can_merge_ctx(const struct intel_context *prev, > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > current->comm); > } > > +static struct intel_context * > +execlists_create_parallel(struct intel_engine_cs **engines, > + unsigned int num_siblings, > + unsigned int width) > +{ > + struct intel_context *parent = NULL, *ce, *err; > + int i; > + > + GEM_BUG_ON(num_siblings != 1); > + > + for (i = 0; i < width; ++i) { > + ce = intel_context_create(engines[i]); > + if (!ce) { > + err = ERR_PTR(-ENOMEM); > + goto unwind; > + } > + > + if (i == 0) > + parent = ce; > + else > + intel_context_bind_parent_child(parent, ce); > + } > + > + parent->parallel.fence_context = dma_fence_context_alloc(1); > + > + intel_context_set_nopreempt(parent); > + intel_context_set_single_submission(parent); Can you explain the need for setting single submission? John. > + for_each_child(parent, ce) { > + intel_context_set_nopreempt(ce); > + intel_context_set_single_submission(ce); > + } > + > + return parent; > + > +unwind: > + if (parent) > + intel_context_put(parent); > + return err; > +} > + > static const struct intel_context_ops execlists_context_ops = { > .flags = COPS_HAS_INFLIGHT, > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > .reset = lrc_reset, > .destroy = lrc_destroy, > > + .create_parallel = execlists_create_parallel, > .create_virtual = execlists_create_virtual, > }; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 56156cf18c41..70f4b309522d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > void lrc_unpin(struct intel_context *ce) > { > + if (unlikely(ce->parallel.last_rq)) > + i915_request_put(ce->parallel.last_rq); > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > ce->engine); > } > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 1341752dc70e..ddc9a97fcc8f 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > GEM_BUG_ON(!intel_context_is_parent(ce)); > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > - if (ce->parallel.last_rq) > - i915_request_put(ce->parallel.last_rq); > unpin_guc_id(guc, ce); > lrc_unpin(ce); > } ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-26 21:58 ` John Harrison 0 siblings, 0 replies; 22+ messages in thread From: John Harrison @ 2021-10-26 21:58 UTC (permalink / raw) To: Matthew Brost, intel-gfx, dri-devel; +Cc: tvrtko.ursulin, daniele.ceraolospurio On 10/20/2021 14:47, Matthew Brost wrote: > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > execlists. Doing as little as possible to support this interface for > execlists - basically just passing submit fences between each request > generated and virtual engines are not allowed. This is on par with what > is there for the existing (hopefully soon deprecated) bonding interface. > > We perma-pin these execlists contexts to align with GuC implementation. > > v2: > (John Harrison) > - Drop siblings array as num_siblings must be 1 > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > 5 files changed, 52 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index fb33d0322960..35e87a7d0ea9 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > struct intel_engine_cs **siblings = NULL; > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > - return -ENODEV; > - > if (get_user(slot, &ext->engine_index)) > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > if (get_user(num_siblings, &ext->num_siblings)) > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > + num_siblings); > + return -EINVAL; > + } > + > if (slot >= set->num_engines) { > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > slot, set->num_engines); > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 5634d14052bc..1bec92e1d8e6 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > + intel_context_is_parallel(ce)) > return 0; > > /* Preallocate tracking nodes */ > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > * Callers responsibility to validate that this function is used > * correctly but we use GEM_BUG_ON here ensure that they do. > */ > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > GEM_BUG_ON(intel_context_is_pinned(parent)); > GEM_BUG_ON(intel_context_is_child(parent)); > GEM_BUG_ON(intel_context_is_pinned(child)); > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index bedb80057046..2865b422300d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > { > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > - intel_context_force_single_submission(ce)); > + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. > } > > static bool can_merge_ctx(const struct intel_context *prev, > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > current->comm); > } > > +static struct intel_context * > +execlists_create_parallel(struct intel_engine_cs **engines, > + unsigned int num_siblings, > + unsigned int width) > +{ > + struct intel_context *parent = NULL, *ce, *err; > + int i; > + > + GEM_BUG_ON(num_siblings != 1); > + > + for (i = 0; i < width; ++i) { > + ce = intel_context_create(engines[i]); > + if (!ce) { > + err = ERR_PTR(-ENOMEM); > + goto unwind; > + } > + > + if (i == 0) > + parent = ce; > + else > + intel_context_bind_parent_child(parent, ce); > + } > + > + parent->parallel.fence_context = dma_fence_context_alloc(1); > + > + intel_context_set_nopreempt(parent); > + intel_context_set_single_submission(parent); Can you explain the need for setting single submission? John. > + for_each_child(parent, ce) { > + intel_context_set_nopreempt(ce); > + intel_context_set_single_submission(ce); > + } > + > + return parent; > + > +unwind: > + if (parent) > + intel_context_put(parent); > + return err; > +} > + > static const struct intel_context_ops execlists_context_ops = { > .flags = COPS_HAS_INFLIGHT, > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > .reset = lrc_reset, > .destroy = lrc_destroy, > > + .create_parallel = execlists_create_parallel, > .create_virtual = execlists_create_virtual, > }; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 56156cf18c41..70f4b309522d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > void lrc_unpin(struct intel_context *ce) > { > + if (unlikely(ce->parallel.last_rq)) > + i915_request_put(ce->parallel.last_rq); > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > ce->engine); > } > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 1341752dc70e..ddc9a97fcc8f 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > GEM_BUG_ON(!intel_context_is_parent(ce)); > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > - if (ce->parallel.last_rq) > - i915_request_put(ce->parallel.last_rq); > unpin_guc_id(guc, ce); > lrc_unpin(ce); > } ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-10-26 21:58 ` [Intel-gfx] " John Harrison @ 2021-10-27 19:17 ` Matthew Brost -1 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-27 19:17 UTC (permalink / raw) To: John Harrison; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > On 10/20/2021 14:47, Matthew Brost wrote: > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > execlists. Doing as little as possible to support this interface for > > execlists - basically just passing submit fences between each request > > generated and virtual engines are not allowed. This is on par with what > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > v2: > > (John Harrison) > > - Drop siblings array as num_siblings must be 1 > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index fb33d0322960..35e87a7d0ea9 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > struct intel_engine_cs **siblings = NULL; > > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > - return -ENODEV; > > - > > if (get_user(slot, &ext->engine_index)) > > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > if (get_user(num_siblings, &ext->num_siblings)) > > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > + num_siblings); > > + return -EINVAL; > > + } > > + > > if (slot >= set->num_engines) { > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > slot, set->num_engines); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > index 5634d14052bc..1bec92e1d8e6 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > + intel_context_is_parallel(ce)) > > return 0; > > /* Preallocate tracking nodes */ > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > * Callers responsibility to validate that this function is used > > * correctly but we use GEM_BUG_ON here ensure that they do. > > */ > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > GEM_BUG_ON(intel_context_is_child(parent)); > > GEM_BUG_ON(intel_context_is_pinned(child)); > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index bedb80057046..2865b422300d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > > { > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > - intel_context_force_single_submission(ce)); > > + return intel_context_force_single_submission(ce); > I think this is actually going to break GVT. > > Not so much this change here but the whole use of single submission outside > of GVT. It looks like the GVT driver overloads the single submission flag to > tag requests that it owns. If we start using that flag elsewhere when GVT is > active, I think that will cause much confusion within the GVT code. > > The correct fix would be to create a new flag just for GVT usage alongside > the single submission one. GVT would then set both but only check for its > own private flag. The parallel code would obviously only set the existing > single submission flag. > Ok, see below. > > > } > > static bool can_merge_ctx(const struct intel_context *prev, > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > current->comm); > > } > > +static struct intel_context * > > +execlists_create_parallel(struct intel_engine_cs **engines, > > + unsigned int num_siblings, > > + unsigned int width) > > +{ > > + struct intel_context *parent = NULL, *ce, *err; > > + int i; > > + > > + GEM_BUG_ON(num_siblings != 1); > > + > > + for (i = 0; i < width; ++i) { > > + ce = intel_context_create(engines[i]); > > + if (!ce) { > > + err = ERR_PTR(-ENOMEM); > > + goto unwind; > > + } > > + > > + if (i == 0) > > + parent = ce; > > + else > > + intel_context_bind_parent_child(parent, ce); > > + } > > + > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > + > > + intel_context_set_nopreempt(parent); > > + intel_context_set_single_submission(parent); > Can you explain the need for setting single submission? > I think I can actually pull this out. This was needed when I tried to truely implement a guarante that all the parallel requests would be running simultaneously. Couldn't ever to get that working because of the mess that is the execlists scheduler - a simple wait at the head of queue until everyone joined just blew up for whatever reason. I don't believe this servers a purpose anymore, so I'll just drop it. Matt > John. > > > + for_each_child(parent, ce) { > > + intel_context_set_nopreempt(ce); > > + intel_context_set_single_submission(ce); > > + } > > + > > + return parent; > > + > > +unwind: > > + if (parent) > > + intel_context_put(parent); > > + return err; > > +} > > + > > static const struct intel_context_ops execlists_context_ops = { > > .flags = COPS_HAS_INFLIGHT, > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > .reset = lrc_reset, > > .destroy = lrc_destroy, > > + .create_parallel = execlists_create_parallel, > > .create_virtual = execlists_create_virtual, > > }; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 56156cf18c41..70f4b309522d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > void lrc_unpin(struct intel_context *ce) > > { > > + if (unlikely(ce->parallel.last_rq)) > > + i915_request_put(ce->parallel.last_rq); > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > ce->engine); > > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 1341752dc70e..ddc9a97fcc8f 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > - if (ce->parallel.last_rq) > > - i915_request_put(ce->parallel.last_rq); > > unpin_guc_id(guc, ce); > > lrc_unpin(ce); > > } > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-27 19:17 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-27 19:17 UTC (permalink / raw) To: John Harrison; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > On 10/20/2021 14:47, Matthew Brost wrote: > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > execlists. Doing as little as possible to support this interface for > > execlists - basically just passing submit fences between each request > > generated and virtual engines are not allowed. This is on par with what > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > v2: > > (John Harrison) > > - Drop siblings array as num_siblings must be 1 > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index fb33d0322960..35e87a7d0ea9 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > struct intel_engine_cs **siblings = NULL; > > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > - return -ENODEV; > > - > > if (get_user(slot, &ext->engine_index)) > > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > if (get_user(num_siblings, &ext->num_siblings)) > > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > + num_siblings); > > + return -EINVAL; > > + } > > + > > if (slot >= set->num_engines) { > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > slot, set->num_engines); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > index 5634d14052bc..1bec92e1d8e6 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > + intel_context_is_parallel(ce)) > > return 0; > > /* Preallocate tracking nodes */ > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > * Callers responsibility to validate that this function is used > > * correctly but we use GEM_BUG_ON here ensure that they do. > > */ > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > GEM_BUG_ON(intel_context_is_child(parent)); > > GEM_BUG_ON(intel_context_is_pinned(child)); > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index bedb80057046..2865b422300d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > > { > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > - intel_context_force_single_submission(ce)); > > + return intel_context_force_single_submission(ce); > I think this is actually going to break GVT. > > Not so much this change here but the whole use of single submission outside > of GVT. It looks like the GVT driver overloads the single submission flag to > tag requests that it owns. If we start using that flag elsewhere when GVT is > active, I think that will cause much confusion within the GVT code. > > The correct fix would be to create a new flag just for GVT usage alongside > the single submission one. GVT would then set both but only check for its > own private flag. The parallel code would obviously only set the existing > single submission flag. > Ok, see below. > > > } > > static bool can_merge_ctx(const struct intel_context *prev, > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > current->comm); > > } > > +static struct intel_context * > > +execlists_create_parallel(struct intel_engine_cs **engines, > > + unsigned int num_siblings, > > + unsigned int width) > > +{ > > + struct intel_context *parent = NULL, *ce, *err; > > + int i; > > + > > + GEM_BUG_ON(num_siblings != 1); > > + > > + for (i = 0; i < width; ++i) { > > + ce = intel_context_create(engines[i]); > > + if (!ce) { > > + err = ERR_PTR(-ENOMEM); > > + goto unwind; > > + } > > + > > + if (i == 0) > > + parent = ce; > > + else > > + intel_context_bind_parent_child(parent, ce); > > + } > > + > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > + > > + intel_context_set_nopreempt(parent); > > + intel_context_set_single_submission(parent); > Can you explain the need for setting single submission? > I think I can actually pull this out. This was needed when I tried to truely implement a guarante that all the parallel requests would be running simultaneously. Couldn't ever to get that working because of the mess that is the execlists scheduler - a simple wait at the head of queue until everyone joined just blew up for whatever reason. I don't believe this servers a purpose anymore, so I'll just drop it. Matt > John. > > > + for_each_child(parent, ce) { > > + intel_context_set_nopreempt(ce); > > + intel_context_set_single_submission(ce); > > + } > > + > > + return parent; > > + > > +unwind: > > + if (parent) > > + intel_context_put(parent); > > + return err; > > +} > > + > > static const struct intel_context_ops execlists_context_ops = { > > .flags = COPS_HAS_INFLIGHT, > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > .reset = lrc_reset, > > .destroy = lrc_destroy, > > + .create_parallel = execlists_create_parallel, > > .create_virtual = execlists_create_virtual, > > }; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 56156cf18c41..70f4b309522d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > void lrc_unpin(struct intel_context *ce) > > { > > + if (unlikely(ce->parallel.last_rq)) > > + i915_request_put(ce->parallel.last_rq); > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > ce->engine); > > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 1341752dc70e..ddc9a97fcc8f 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > - if (ce->parallel.last_rq) > > - i915_request_put(ce->parallel.last_rq); > > unpin_guc_id(guc, ce); > > lrc_unpin(ce); > > } > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-10-27 19:17 ` [Intel-gfx] " Matthew Brost @ 2021-10-27 20:04 ` John Harrison -1 siblings, 0 replies; 22+ messages in thread From: John Harrison @ 2021-10-27 20:04 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On 10/27/2021 12:17, Matthew Brost wrote: > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >> On 10/20/2021 14:47, Matthew Brost wrote: >>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>> execlists. Doing as little as possible to support this interface for >>> execlists - basically just passing submit fences between each request >>> generated and virtual engines are not allowed. This is on par with what >>> is there for the existing (hopefully soon deprecated) bonding interface. >>> >>> We perma-pin these execlists contexts to align with GuC implementation. >>> >>> v2: >>> (John Harrison) >>> - Drop siblings array as num_siblings must be 1 >>> >>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>> --- >>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>> 5 files changed, 52 insertions(+), 10 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> index fb33d0322960..35e87a7d0ea9 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>> struct intel_engine_cs **siblings = NULL; >>> intel_engine_mask_t prev_mask; >>> - /* FIXME: This is NIY for execlists */ >>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>> - return -ENODEV; >>> - >>> if (get_user(slot, &ext->engine_index)) >>> return -EFAULT; >>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>> if (get_user(num_siblings, &ext->num_siblings)) >>> return -EFAULT; >>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>> + num_siblings); >>> + return -EINVAL; >>> + } >>> + >>> if (slot >= set->num_engines) { >>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>> slot, set->num_engines); >>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>> index 5634d14052bc..1bec92e1d8e6 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>> __i915_active_acquire(&ce->active); >>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>> + intel_context_is_parallel(ce)) >>> return 0; >>> /* Preallocate tracking nodes */ >>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>> * Callers responsibility to validate that this function is used >>> * correctly but we use GEM_BUG_ON here ensure that they do. >>> */ >>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>> GEM_BUG_ON(intel_context_is_child(parent)); >>> GEM_BUG_ON(intel_context_is_pinned(child)); >>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> index bedb80057046..2865b422300d 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>> static bool ctx_single_port_submission(const struct intel_context *ce) >>> { >>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>> - intel_context_force_single_submission(ce)); >>> + return intel_context_force_single_submission(ce); >> I think this is actually going to break GVT. >> >> Not so much this change here but the whole use of single submission outside >> of GVT. It looks like the GVT driver overloads the single submission flag to >> tag requests that it owns. If we start using that flag elsewhere when GVT is >> active, I think that will cause much confusion within the GVT code. >> >> The correct fix would be to create a new flag just for GVT usage alongside >> the single submission one. GVT would then set both but only check for its >> own private flag. The parallel code would obviously only set the existing >> single submission flag. >> > Ok, see below. > >>> } >>> static bool can_merge_ctx(const struct intel_context *prev, >>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>> current->comm); >>> } >>> +static struct intel_context * >>> +execlists_create_parallel(struct intel_engine_cs **engines, >>> + unsigned int num_siblings, >>> + unsigned int width) >>> +{ >>> + struct intel_context *parent = NULL, *ce, *err; >>> + int i; >>> + >>> + GEM_BUG_ON(num_siblings != 1); >>> + >>> + for (i = 0; i < width; ++i) { >>> + ce = intel_context_create(engines[i]); >>> + if (!ce) { >>> + err = ERR_PTR(-ENOMEM); >>> + goto unwind; >>> + } >>> + >>> + if (i == 0) >>> + parent = ce; >>> + else >>> + intel_context_bind_parent_child(parent, ce); >>> + } >>> + >>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>> + >>> + intel_context_set_nopreempt(parent); >>> + intel_context_set_single_submission(parent); >> Can you explain the need for setting single submission? >> > I think I can actually pull this out. This was needed when I tried to > truely implement a guarante that all the parallel requests would be > running simultaneously. Couldn't ever to get that working because of the > mess that is the execlists scheduler - a simple wait at the head of > queue until everyone joined just blew up for whatever reason. I don't > believe this servers a purpose anymore, so I'll just drop it. > > Matt Is that not going to be a problem? I thought concurrent execution was a fundamental requirement? John. > >> John. >> >>> + for_each_child(parent, ce) { >>> + intel_context_set_nopreempt(ce); >>> + intel_context_set_single_submission(ce); >>> + } >>> + >>> + return parent; >>> + >>> +unwind: >>> + if (parent) >>> + intel_context_put(parent); >>> + return err; >>> +} >>> + >>> static const struct intel_context_ops execlists_context_ops = { >>> .flags = COPS_HAS_INFLIGHT, >>> @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { >>> .reset = lrc_reset, >>> .destroy = lrc_destroy, >>> + .create_parallel = execlists_create_parallel, >>> .create_virtual = execlists_create_virtual, >>> }; >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> index 56156cf18c41..70f4b309522d 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, >>> void lrc_unpin(struct intel_context *ce) >>> { >>> + if (unlikely(ce->parallel.last_rq)) >>> + i915_request_put(ce->parallel.last_rq); >>> check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, >>> ce->engine); >>> } >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> index 1341752dc70e..ddc9a97fcc8f 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) >>> GEM_BUG_ON(!intel_context_is_parent(ce)); >>> GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); >>> - if (ce->parallel.last_rq) >>> - i915_request_put(ce->parallel.last_rq); >>> unpin_guc_id(guc, ce); >>> lrc_unpin(ce); >>> } ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-27 20:04 ` John Harrison 0 siblings, 0 replies; 22+ messages in thread From: John Harrison @ 2021-10-27 20:04 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On 10/27/2021 12:17, Matthew Brost wrote: > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >> On 10/20/2021 14:47, Matthew Brost wrote: >>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>> execlists. Doing as little as possible to support this interface for >>> execlists - basically just passing submit fences between each request >>> generated and virtual engines are not allowed. This is on par with what >>> is there for the existing (hopefully soon deprecated) bonding interface. >>> >>> We perma-pin these execlists contexts to align with GuC implementation. >>> >>> v2: >>> (John Harrison) >>> - Drop siblings array as num_siblings must be 1 >>> >>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>> --- >>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>> 5 files changed, 52 insertions(+), 10 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> index fb33d0322960..35e87a7d0ea9 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>> struct intel_engine_cs **siblings = NULL; >>> intel_engine_mask_t prev_mask; >>> - /* FIXME: This is NIY for execlists */ >>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>> - return -ENODEV; >>> - >>> if (get_user(slot, &ext->engine_index)) >>> return -EFAULT; >>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>> if (get_user(num_siblings, &ext->num_siblings)) >>> return -EFAULT; >>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>> + num_siblings); >>> + return -EINVAL; >>> + } >>> + >>> if (slot >= set->num_engines) { >>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>> slot, set->num_engines); >>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>> index 5634d14052bc..1bec92e1d8e6 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>> __i915_active_acquire(&ce->active); >>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>> + intel_context_is_parallel(ce)) >>> return 0; >>> /* Preallocate tracking nodes */ >>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>> * Callers responsibility to validate that this function is used >>> * correctly but we use GEM_BUG_ON here ensure that they do. >>> */ >>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>> GEM_BUG_ON(intel_context_is_child(parent)); >>> GEM_BUG_ON(intel_context_is_pinned(child)); >>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> index bedb80057046..2865b422300d 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>> static bool ctx_single_port_submission(const struct intel_context *ce) >>> { >>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>> - intel_context_force_single_submission(ce)); >>> + return intel_context_force_single_submission(ce); >> I think this is actually going to break GVT. >> >> Not so much this change here but the whole use of single submission outside >> of GVT. It looks like the GVT driver overloads the single submission flag to >> tag requests that it owns. If we start using that flag elsewhere when GVT is >> active, I think that will cause much confusion within the GVT code. >> >> The correct fix would be to create a new flag just for GVT usage alongside >> the single submission one. GVT would then set both but only check for its >> own private flag. The parallel code would obviously only set the existing >> single submission flag. >> > Ok, see below. > >>> } >>> static bool can_merge_ctx(const struct intel_context *prev, >>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>> current->comm); >>> } >>> +static struct intel_context * >>> +execlists_create_parallel(struct intel_engine_cs **engines, >>> + unsigned int num_siblings, >>> + unsigned int width) >>> +{ >>> + struct intel_context *parent = NULL, *ce, *err; >>> + int i; >>> + >>> + GEM_BUG_ON(num_siblings != 1); >>> + >>> + for (i = 0; i < width; ++i) { >>> + ce = intel_context_create(engines[i]); >>> + if (!ce) { >>> + err = ERR_PTR(-ENOMEM); >>> + goto unwind; >>> + } >>> + >>> + if (i == 0) >>> + parent = ce; >>> + else >>> + intel_context_bind_parent_child(parent, ce); >>> + } >>> + >>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>> + >>> + intel_context_set_nopreempt(parent); >>> + intel_context_set_single_submission(parent); >> Can you explain the need for setting single submission? >> > I think I can actually pull this out. This was needed when I tried to > truely implement a guarante that all the parallel requests would be > running simultaneously. Couldn't ever to get that working because of the > mess that is the execlists scheduler - a simple wait at the head of > queue until everyone joined just blew up for whatever reason. I don't > believe this servers a purpose anymore, so I'll just drop it. > > Matt Is that not going to be a problem? I thought concurrent execution was a fundamental requirement? John. > >> John. >> >>> + for_each_child(parent, ce) { >>> + intel_context_set_nopreempt(ce); >>> + intel_context_set_single_submission(ce); >>> + } >>> + >>> + return parent; >>> + >>> +unwind: >>> + if (parent) >>> + intel_context_put(parent); >>> + return err; >>> +} >>> + >>> static const struct intel_context_ops execlists_context_ops = { >>> .flags = COPS_HAS_INFLIGHT, >>> @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { >>> .reset = lrc_reset, >>> .destroy = lrc_destroy, >>> + .create_parallel = execlists_create_parallel, >>> .create_virtual = execlists_create_virtual, >>> }; >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> index 56156cf18c41..70f4b309522d 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, >>> void lrc_unpin(struct intel_context *ce) >>> { >>> + if (unlikely(ce->parallel.last_rq)) >>> + i915_request_put(ce->parallel.last_rq); >>> check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, >>> ce->engine); >>> } >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> index 1341752dc70e..ddc9a97fcc8f 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) >>> GEM_BUG_ON(!intel_context_is_parent(ce)); >>> GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); >>> - if (ce->parallel.last_rq) >>> - i915_request_put(ce->parallel.last_rq); >>> unpin_guc_id(guc, ce); >>> lrc_unpin(ce); >>> } ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-10-27 20:04 ` [Intel-gfx] " John Harrison @ 2021-10-27 20:10 ` Matthew Brost -1 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-27 20:10 UTC (permalink / raw) To: John Harrison; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > On 10/27/2021 12:17, Matthew Brost wrote: > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > execlists. Doing as little as possible to support this interface for > > > > execlists - basically just passing submit fences between each request > > > > generated and virtual engines are not allowed. This is on par with what > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > v2: > > > > (John Harrison) > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > --- > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > struct intel_engine_cs **siblings = NULL; > > > > intel_engine_mask_t prev_mask; > > > > - /* FIXME: This is NIY for execlists */ > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > - return -ENODEV; > > > > - > > > > if (get_user(slot, &ext->engine_index)) > > > > return -EFAULT; > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > return -EFAULT; > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > + num_siblings); > > > > + return -EINVAL; > > > > + } > > > > + > > > > if (slot >= set->num_engines) { > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > slot, set->num_engines); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > __i915_active_acquire(&ce->active); > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > + intel_context_is_parallel(ce)) > > > > return 0; > > > > /* Preallocate tracking nodes */ > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > * Callers responsibility to validate that this function is used > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > */ > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > index bedb80057046..2865b422300d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > { > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > - intel_context_force_single_submission(ce)); > > > > + return intel_context_force_single_submission(ce); > > > I think this is actually going to break GVT. > > > > > > Not so much this change here but the whole use of single submission outside > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > active, I think that will cause much confusion within the GVT code. > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > the single submission one. GVT would then set both but only check for its > > > own private flag. The parallel code would obviously only set the existing > > > single submission flag. > > > > > Ok, see below. > > > > > > } > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > current->comm); > > > > } > > > > +static struct intel_context * > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > + unsigned int num_siblings, > > > > + unsigned int width) > > > > +{ > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > + int i; > > > > + > > > > + GEM_BUG_ON(num_siblings != 1); > > > > + > > > > + for (i = 0; i < width; ++i) { > > > > + ce = intel_context_create(engines[i]); > > > > + if (!ce) { > > > > + err = ERR_PTR(-ENOMEM); > > > > + goto unwind; > > > > + } > > > > + > > > > + if (i == 0) > > > > + parent = ce; > > > > + else > > > > + intel_context_bind_parent_child(parent, ce); > > > > + } > > > > + > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > + > > > > + intel_context_set_nopreempt(parent); > > > > + intel_context_set_single_submission(parent); > > > Can you explain the need for setting single submission? > > > > > I think I can actually pull this out. This was needed when I tried to > > truely implement a guarante that all the parallel requests would be > > running simultaneously. Couldn't ever to get that working because of the > > mess that is the execlists scheduler - a simple wait at the head of > > queue until everyone joined just blew up for whatever reason. I don't > > believe this servers a purpose anymore, so I'll just drop it. > > > > Matt > Is that not going to be a problem? I thought concurrent execution was a > fundamental requirement? > I don't think so. See the commit message. This implmementation is on par with the bonding interface - there is no guarantee whatsoever that with the bonding interface bonded requests actually run at the same time. It says hopefully these submissions run together. That's what I do in this patch too for execlists, hence the 'weak' clause in the commit message. Matt > John. > > > > > > John. > > > > > > > + for_each_child(parent, ce) { > > > > + intel_context_set_nopreempt(ce); > > > > + intel_context_set_single_submission(ce); > > > > + } > > > > + > > > > + return parent; > > > > + > > > > +unwind: > > > > + if (parent) > > > > + intel_context_put(parent); > > > > + return err; > > > > +} > > > > + > > > > static const struct intel_context_ops execlists_context_ops = { > > > > .flags = COPS_HAS_INFLIGHT, > > > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > > > .reset = lrc_reset, > > > > .destroy = lrc_destroy, > > > > + .create_parallel = execlists_create_parallel, > > > > .create_virtual = execlists_create_virtual, > > > > }; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > index 56156cf18c41..70f4b309522d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > > > void lrc_unpin(struct intel_context *ce) > > > > { > > > > + if (unlikely(ce->parallel.last_rq)) > > > > + i915_request_put(ce->parallel.last_rq); > > > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > > > ce->engine); > > > > } > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > index 1341752dc70e..ddc9a97fcc8f 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > > > - if (ce->parallel.last_rq) > > > > - i915_request_put(ce->parallel.last_rq); > > > > unpin_guc_id(guc, ce); > > > > lrc_unpin(ce); > > > > } > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-10-27 20:10 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-10-27 20:10 UTC (permalink / raw) To: John Harrison; +Cc: intel-gfx, dri-devel, tvrtko.ursulin, daniele.ceraolospurio On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > On 10/27/2021 12:17, Matthew Brost wrote: > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > execlists. Doing as little as possible to support this interface for > > > > execlists - basically just passing submit fences between each request > > > > generated and virtual engines are not allowed. This is on par with what > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > v2: > > > > (John Harrison) > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > --- > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > struct intel_engine_cs **siblings = NULL; > > > > intel_engine_mask_t prev_mask; > > > > - /* FIXME: This is NIY for execlists */ > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > - return -ENODEV; > > > > - > > > > if (get_user(slot, &ext->engine_index)) > > > > return -EFAULT; > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > return -EFAULT; > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > + num_siblings); > > > > + return -EINVAL; > > > > + } > > > > + > > > > if (slot >= set->num_engines) { > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > slot, set->num_engines); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > __i915_active_acquire(&ce->active); > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > + intel_context_is_parallel(ce)) > > > > return 0; > > > > /* Preallocate tracking nodes */ > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > * Callers responsibility to validate that this function is used > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > */ > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > index bedb80057046..2865b422300d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > { > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > - intel_context_force_single_submission(ce)); > > > > + return intel_context_force_single_submission(ce); > > > I think this is actually going to break GVT. > > > > > > Not so much this change here but the whole use of single submission outside > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > active, I think that will cause much confusion within the GVT code. > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > the single submission one. GVT would then set both but only check for its > > > own private flag. The parallel code would obviously only set the existing > > > single submission flag. > > > > > Ok, see below. > > > > > > } > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > current->comm); > > > > } > > > > +static struct intel_context * > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > + unsigned int num_siblings, > > > > + unsigned int width) > > > > +{ > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > + int i; > > > > + > > > > + GEM_BUG_ON(num_siblings != 1); > > > > + > > > > + for (i = 0; i < width; ++i) { > > > > + ce = intel_context_create(engines[i]); > > > > + if (!ce) { > > > > + err = ERR_PTR(-ENOMEM); > > > > + goto unwind; > > > > + } > > > > + > > > > + if (i == 0) > > > > + parent = ce; > > > > + else > > > > + intel_context_bind_parent_child(parent, ce); > > > > + } > > > > + > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > + > > > > + intel_context_set_nopreempt(parent); > > > > + intel_context_set_single_submission(parent); > > > Can you explain the need for setting single submission? > > > > > I think I can actually pull this out. This was needed when I tried to > > truely implement a guarante that all the parallel requests would be > > running simultaneously. Couldn't ever to get that working because of the > > mess that is the execlists scheduler - a simple wait at the head of > > queue until everyone joined just blew up for whatever reason. I don't > > believe this servers a purpose anymore, so I'll just drop it. > > > > Matt > Is that not going to be a problem? I thought concurrent execution was a > fundamental requirement? > I don't think so. See the commit message. This implmementation is on par with the bonding interface - there is no guarantee whatsoever that with the bonding interface bonded requests actually run at the same time. It says hopefully these submissions run together. That's what I do in this patch too for execlists, hence the 'weak' clause in the commit message. Matt > John. > > > > > > John. > > > > > > > + for_each_child(parent, ce) { > > > > + intel_context_set_nopreempt(ce); > > > > + intel_context_set_single_submission(ce); > > > > + } > > > > + > > > > + return parent; > > > > + > > > > +unwind: > > > > + if (parent) > > > > + intel_context_put(parent); > > > > + return err; > > > > +} > > > > + > > > > static const struct intel_context_ops execlists_context_ops = { > > > > .flags = COPS_HAS_INFLIGHT, > > > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > > > .reset = lrc_reset, > > > > .destroy = lrc_destroy, > > > > + .create_parallel = execlists_create_parallel, > > > > .create_virtual = execlists_create_virtual, > > > > }; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > index 56156cf18c41..70f4b309522d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > > > void lrc_unpin(struct intel_context *ce) > > > > { > > > > + if (unlikely(ce->parallel.last_rq)) > > > > + i915_request_put(ce->parallel.last_rq); > > > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > > > ce->engine); > > > > } > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > index 1341752dc70e..ddc9a97fcc8f 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > > > - if (ce->parallel.last_rq) > > > > - i915_request_put(ce->parallel.last_rq); > > > > unpin_guc_id(guc, ce); > > > > lrc_unpin(ce); > > > > } > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-10-27 20:10 ` [Intel-gfx] " Matthew Brost @ 2021-11-01 10:35 ` Tvrtko Ursulin -1 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-01 10:35 UTC (permalink / raw) To: Matthew Brost, John Harrison Cc: intel-gfx, daniele.ceraolospurio, dri-devel, tvrtko.ursulin On 27/10/2021 21:10, Matthew Brost wrote: > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >> On 10/27/2021 12:17, Matthew Brost wrote: >>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>> execlists. Doing as little as possible to support this interface for >>>>> execlists - basically just passing submit fences between each request >>>>> generated and virtual engines are not allowed. This is on par with what >>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>> >>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>> >>>>> v2: >>>>> (John Harrison) >>>>> - Drop siblings array as num_siblings must be 1 >>>>> >>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>> struct intel_engine_cs **siblings = NULL; >>>>> intel_engine_mask_t prev_mask; >>>>> - /* FIXME: This is NIY for execlists */ >>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>> - return -ENODEV; >>>>> - >>>>> if (get_user(slot, &ext->engine_index)) >>>>> return -EFAULT; >>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>> return -EFAULT; >>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>> + num_siblings); >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> if (slot >= set->num_engines) { >>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>> slot, set->num_engines); >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>> __i915_active_acquire(&ce->active); >>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>> + intel_context_is_parallel(ce)) >>>>> return 0; >>>>> /* Preallocate tracking nodes */ >>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>> * Callers responsibility to validate that this function is used >>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>> */ >>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> index bedb80057046..2865b422300d 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>> { >>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>> - intel_context_force_single_submission(ce)); >>>>> + return intel_context_force_single_submission(ce); >>>> I think this is actually going to break GVT. >>>> >>>> Not so much this change here but the whole use of single submission outside >>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>> active, I think that will cause much confusion within the GVT code. >>>> >>>> The correct fix would be to create a new flag just for GVT usage alongside >>>> the single submission one. GVT would then set both but only check for its >>>> own private flag. The parallel code would obviously only set the existing >>>> single submission flag. >>>> >>> Ok, see below. >>> >>>>> } >>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>> current->comm); >>>>> } >>>>> +static struct intel_context * >>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>> + unsigned int num_siblings, >>>>> + unsigned int width) >>>>> +{ >>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>> + int i; >>>>> + >>>>> + GEM_BUG_ON(num_siblings != 1); >>>>> + >>>>> + for (i = 0; i < width; ++i) { >>>>> + ce = intel_context_create(engines[i]); >>>>> + if (!ce) { >>>>> + err = ERR_PTR(-ENOMEM); >>>>> + goto unwind; >>>>> + } >>>>> + >>>>> + if (i == 0) >>>>> + parent = ce; >>>>> + else >>>>> + intel_context_bind_parent_child(parent, ce); >>>>> + } >>>>> + >>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>> + >>>>> + intel_context_set_nopreempt(parent); >>>>> + intel_context_set_single_submission(parent); >>>> Can you explain the need for setting single submission? >>>> >>> I think I can actually pull this out. This was needed when I tried to >>> truely implement a guarante that all the parallel requests would be >>> running simultaneously. Couldn't ever to get that working because of the >>> mess that is the execlists scheduler - a simple wait at the head of >>> queue until everyone joined just blew up for whatever reason. I don't >>> believe this servers a purpose anymore, so I'll just drop it. >>> >>> Matt >> Is that not going to be a problem? I thought concurrent execution was a >> fundamental requirement? >> > > I don't think so. See the commit message. This implmementation is on par > with the bonding interface - there is no guarantee whatsoever that with > the bonding interface bonded requests actually run at the same time. It > says hopefully these submissions run together. That's what I do in this > patch too for execlists, hence the 'weak' clause in the commit message. With the new uapi definition implying a stricter guarantee - why not have this patch use special bb semaphore pre/post-ambles so scheduling behaviour is closer between the two backends? Regards, Tvrtko > > Matt > >> John. >> >>> >>>> John. >>>> >>>>> + for_each_child(parent, ce) { >>>>> + intel_context_set_nopreempt(ce); >>>>> + intel_context_set_single_submission(ce); >>>>> + } >>>>> + >>>>> + return parent; >>>>> + >>>>> +unwind: >>>>> + if (parent) >>>>> + intel_context_put(parent); >>>>> + return err; >>>>> +} >>>>> + >>>>> static const struct intel_context_ops execlists_context_ops = { >>>>> .flags = COPS_HAS_INFLIGHT, >>>>> @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { >>>>> .reset = lrc_reset, >>>>> .destroy = lrc_destroy, >>>>> + .create_parallel = execlists_create_parallel, >>>>> .create_virtual = execlists_create_virtual, >>>>> }; >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> index 56156cf18c41..70f4b309522d 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, >>>>> void lrc_unpin(struct intel_context *ce) >>>>> { >>>>> + if (unlikely(ce->parallel.last_rq)) >>>>> + i915_request_put(ce->parallel.last_rq); >>>>> check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, >>>>> ce->engine); >>>>> } >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> index 1341752dc70e..ddc9a97fcc8f 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) >>>>> GEM_BUG_ON(!intel_context_is_parent(ce)); >>>>> GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); >>>>> - if (ce->parallel.last_rq) >>>>> - i915_request_put(ce->parallel.last_rq); >>>>> unpin_guc_id(guc, ce); >>>>> lrc_unpin(ce); >>>>> } >> ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-11-01 10:35 ` Tvrtko Ursulin 0 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-01 10:35 UTC (permalink / raw) To: Matthew Brost, John Harrison; +Cc: intel-gfx, dri-devel On 27/10/2021 21:10, Matthew Brost wrote: > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >> On 10/27/2021 12:17, Matthew Brost wrote: >>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>> execlists. Doing as little as possible to support this interface for >>>>> execlists - basically just passing submit fences between each request >>>>> generated and virtual engines are not allowed. This is on par with what >>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>> >>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>> >>>>> v2: >>>>> (John Harrison) >>>>> - Drop siblings array as num_siblings must be 1 >>>>> >>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>> struct intel_engine_cs **siblings = NULL; >>>>> intel_engine_mask_t prev_mask; >>>>> - /* FIXME: This is NIY for execlists */ >>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>> - return -ENODEV; >>>>> - >>>>> if (get_user(slot, &ext->engine_index)) >>>>> return -EFAULT; >>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>> return -EFAULT; >>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>> + num_siblings); >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> if (slot >= set->num_engines) { >>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>> slot, set->num_engines); >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>> __i915_active_acquire(&ce->active); >>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>> + intel_context_is_parallel(ce)) >>>>> return 0; >>>>> /* Preallocate tracking nodes */ >>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>> * Callers responsibility to validate that this function is used >>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>> */ >>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> index bedb80057046..2865b422300d 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>> { >>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>> - intel_context_force_single_submission(ce)); >>>>> + return intel_context_force_single_submission(ce); >>>> I think this is actually going to break GVT. >>>> >>>> Not so much this change here but the whole use of single submission outside >>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>> active, I think that will cause much confusion within the GVT code. >>>> >>>> The correct fix would be to create a new flag just for GVT usage alongside >>>> the single submission one. GVT would then set both but only check for its >>>> own private flag. The parallel code would obviously only set the existing >>>> single submission flag. >>>> >>> Ok, see below. >>> >>>>> } >>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>> current->comm); >>>>> } >>>>> +static struct intel_context * >>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>> + unsigned int num_siblings, >>>>> + unsigned int width) >>>>> +{ >>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>> + int i; >>>>> + >>>>> + GEM_BUG_ON(num_siblings != 1); >>>>> + >>>>> + for (i = 0; i < width; ++i) { >>>>> + ce = intel_context_create(engines[i]); >>>>> + if (!ce) { >>>>> + err = ERR_PTR(-ENOMEM); >>>>> + goto unwind; >>>>> + } >>>>> + >>>>> + if (i == 0) >>>>> + parent = ce; >>>>> + else >>>>> + intel_context_bind_parent_child(parent, ce); >>>>> + } >>>>> + >>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>> + >>>>> + intel_context_set_nopreempt(parent); >>>>> + intel_context_set_single_submission(parent); >>>> Can you explain the need for setting single submission? >>>> >>> I think I can actually pull this out. This was needed when I tried to >>> truely implement a guarante that all the parallel requests would be >>> running simultaneously. Couldn't ever to get that working because of the >>> mess that is the execlists scheduler - a simple wait at the head of >>> queue until everyone joined just blew up for whatever reason. I don't >>> believe this servers a purpose anymore, so I'll just drop it. >>> >>> Matt >> Is that not going to be a problem? I thought concurrent execution was a >> fundamental requirement? >> > > I don't think so. See the commit message. This implmementation is on par > with the bonding interface - there is no guarantee whatsoever that with > the bonding interface bonded requests actually run at the same time. It > says hopefully these submissions run together. That's what I do in this > patch too for execlists, hence the 'weak' clause in the commit message. With the new uapi definition implying a stricter guarantee - why not have this patch use special bb semaphore pre/post-ambles so scheduling behaviour is closer between the two backends? Regards, Tvrtko > > Matt > >> John. >> >>> >>>> John. >>>> >>>>> + for_each_child(parent, ce) { >>>>> + intel_context_set_nopreempt(ce); >>>>> + intel_context_set_single_submission(ce); >>>>> + } >>>>> + >>>>> + return parent; >>>>> + >>>>> +unwind: >>>>> + if (parent) >>>>> + intel_context_put(parent); >>>>> + return err; >>>>> +} >>>>> + >>>>> static const struct intel_context_ops execlists_context_ops = { >>>>> .flags = COPS_HAS_INFLIGHT, >>>>> @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { >>>>> .reset = lrc_reset, >>>>> .destroy = lrc_destroy, >>>>> + .create_parallel = execlists_create_parallel, >>>>> .create_virtual = execlists_create_virtual, >>>>> }; >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> index 56156cf18c41..70f4b309522d 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >>>>> @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, >>>>> void lrc_unpin(struct intel_context *ce) >>>>> { >>>>> + if (unlikely(ce->parallel.last_rq)) >>>>> + i915_request_put(ce->parallel.last_rq); >>>>> check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, >>>>> ce->engine); >>>>> } >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> index 1341752dc70e..ddc9a97fcc8f 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) >>>>> GEM_BUG_ON(!intel_context_is_parent(ce)); >>>>> GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); >>>>> - if (ce->parallel.last_rq) >>>>> - i915_request_put(ce->parallel.last_rq); >>>>> unpin_guc_id(guc, ce); >>>>> lrc_unpin(ce); >>>>> } >> ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-11-01 10:35 ` Tvrtko Ursulin @ 2021-11-11 16:49 ` Matthew Brost -1 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-11-11 16:49 UTC (permalink / raw) To: Tvrtko Ursulin Cc: intel-gfx, daniele.ceraolospurio, dri-devel, John Harrison, tvrtko.ursulin On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: > > On 27/10/2021 21:10, Matthew Brost wrote: > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > > > execlists. Doing as little as possible to support this interface for > > > > > > execlists - basically just passing submit fences between each request > > > > > > generated and virtual engines are not allowed. This is on par with what > > > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > > > > > v2: > > > > > > (John Harrison) > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > > > --- > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > intel_engine_mask_t prev_mask; > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > - return -ENODEV; > > > > > > - > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > return -EFAULT; > > > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > return -EFAULT; > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > > > + num_siblings); > > > > > > + return -EINVAL; > > > > > > + } > > > > > > + > > > > > > if (slot >= set->num_engines) { > > > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > > > slot, set->num_engines); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > > > __i915_active_acquire(&ce->active); > > > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > > > + intel_context_is_parallel(ce)) > > > > > > return 0; > > > > > > /* Preallocate tracking nodes */ > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > > > * Callers responsibility to validate that this function is used > > > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > > > */ > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > index bedb80057046..2865b422300d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > > > { > > > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > > > - intel_context_force_single_submission(ce)); > > > > > > + return intel_context_force_single_submission(ce); > > > > > I think this is actually going to break GVT. > > > > > > > > > > Not so much this change here but the whole use of single submission outside > > > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > > > active, I think that will cause much confusion within the GVT code. > > > > > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > > > the single submission one. GVT would then set both but only check for its > > > > > own private flag. The parallel code would obviously only set the existing > > > > > single submission flag. > > > > > > > > > Ok, see below. > > > > > > > > > > } > > > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > > > current->comm); > > > > > > } > > > > > > +static struct intel_context * > > > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > > > + unsigned int num_siblings, > > > > > > + unsigned int width) > > > > > > +{ > > > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > > > + int i; > > > > > > + > > > > > > + GEM_BUG_ON(num_siblings != 1); > > > > > > + > > > > > > + for (i = 0; i < width; ++i) { > > > > > > + ce = intel_context_create(engines[i]); > > > > > > + if (!ce) { > > > > > > + err = ERR_PTR(-ENOMEM); > > > > > > + goto unwind; > > > > > > + } > > > > > > + > > > > > > + if (i == 0) > > > > > > + parent = ce; > > > > > > + else > > > > > > + intel_context_bind_parent_child(parent, ce); > > > > > > + } > > > > > > + > > > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > > > + > > > > > > + intel_context_set_nopreempt(parent); > > > > > > + intel_context_set_single_submission(parent); > > > > > Can you explain the need for setting single submission? > > > > > > > > > I think I can actually pull this out. This was needed when I tried to > > > > truely implement a guarante that all the parallel requests would be > > > > running simultaneously. Couldn't ever to get that working because of the > > > > mess that is the execlists scheduler - a simple wait at the head of > > > > queue until everyone joined just blew up for whatever reason. I don't > > > > believe this servers a purpose anymore, so I'll just drop it. > > > > > > > > Matt > > > Is that not going to be a problem? I thought concurrent execution was a > > > fundamental requirement? > > > > > > > I don't think so. See the commit message. This implmementation is on par > > with the bonding interface - there is no guarantee whatsoever that with > > the bonding interface bonded requests actually run at the same time. It > > says hopefully these submissions run together. That's what I do in this > > patch too for execlists, hence the 'weak' clause in the commit message. > > With the new uapi definition implying a stricter guarantee - why not have > this patch use special bb semaphore pre/post-ambles so scheduling behaviour > is closer between the two backends? > We could do that in a follow up if needed, as this bare minimum to get this uAPI working. The real fix would be update the execlists scheduler to be able to do a join of parallel requests and then schedule them together. Should be fairly simple, tried to do this, but the execlists scheduler is such a mess doing something simple is near impossible. IMO there is little point wasting time on a legacy submission interface. This implementation works as well as the old uAPI, let's get this in and move on. Matt > Regards, > > Tvrtko > > > > > Matt > > > > > John. > > > > > > > > > > > > John. > > > > > > > > > > > + for_each_child(parent, ce) { > > > > > > + intel_context_set_nopreempt(ce); > > > > > > + intel_context_set_single_submission(ce); > > > > > > + } > > > > > > + > > > > > > + return parent; > > > > > > + > > > > > > +unwind: > > > > > > + if (parent) > > > > > > + intel_context_put(parent); > > > > > > + return err; > > > > > > +} > > > > > > + > > > > > > static const struct intel_context_ops execlists_context_ops = { > > > > > > .flags = COPS_HAS_INFLIGHT, > > > > > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > > > > > .reset = lrc_reset, > > > > > > .destroy = lrc_destroy, > > > > > > + .create_parallel = execlists_create_parallel, > > > > > > .create_virtual = execlists_create_virtual, > > > > > > }; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > index 56156cf18c41..70f4b309522d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > > > > > void lrc_unpin(struct intel_context *ce) > > > > > > { > > > > > > + if (unlikely(ce->parallel.last_rq)) > > > > > > + i915_request_put(ce->parallel.last_rq); > > > > > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > > > > > ce->engine); > > > > > > } > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > index 1341752dc70e..ddc9a97fcc8f 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > > > > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > > > > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > > > > > - if (ce->parallel.last_rq) > > > > > > - i915_request_put(ce->parallel.last_rq); > > > > > > unpin_guc_id(guc, ce); > > > > > > lrc_unpin(ce); > > > > > > } > > > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-11-11 16:49 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-11-11 16:49 UTC (permalink / raw) To: Tvrtko Ursulin; +Cc: intel-gfx, dri-devel On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: > > On 27/10/2021 21:10, Matthew Brost wrote: > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > > > execlists. Doing as little as possible to support this interface for > > > > > > execlists - basically just passing submit fences between each request > > > > > > generated and virtual engines are not allowed. This is on par with what > > > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > > > > > v2: > > > > > > (John Harrison) > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > > > --- > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > intel_engine_mask_t prev_mask; > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > - return -ENODEV; > > > > > > - > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > return -EFAULT; > > > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > return -EFAULT; > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > > > + num_siblings); > > > > > > + return -EINVAL; > > > > > > + } > > > > > > + > > > > > > if (slot >= set->num_engines) { > > > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > > > slot, set->num_engines); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > > > __i915_active_acquire(&ce->active); > > > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > > > + intel_context_is_parallel(ce)) > > > > > > return 0; > > > > > > /* Preallocate tracking nodes */ > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > > > * Callers responsibility to validate that this function is used > > > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > > > */ > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > index bedb80057046..2865b422300d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > > > { > > > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > > > - intel_context_force_single_submission(ce)); > > > > > > + return intel_context_force_single_submission(ce); > > > > > I think this is actually going to break GVT. > > > > > > > > > > Not so much this change here but the whole use of single submission outside > > > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > > > active, I think that will cause much confusion within the GVT code. > > > > > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > > > the single submission one. GVT would then set both but only check for its > > > > > own private flag. The parallel code would obviously only set the existing > > > > > single submission flag. > > > > > > > > > Ok, see below. > > > > > > > > > > } > > > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > > > current->comm); > > > > > > } > > > > > > +static struct intel_context * > > > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > > > + unsigned int num_siblings, > > > > > > + unsigned int width) > > > > > > +{ > > > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > > > + int i; > > > > > > + > > > > > > + GEM_BUG_ON(num_siblings != 1); > > > > > > + > > > > > > + for (i = 0; i < width; ++i) { > > > > > > + ce = intel_context_create(engines[i]); > > > > > > + if (!ce) { > > > > > > + err = ERR_PTR(-ENOMEM); > > > > > > + goto unwind; > > > > > > + } > > > > > > + > > > > > > + if (i == 0) > > > > > > + parent = ce; > > > > > > + else > > > > > > + intel_context_bind_parent_child(parent, ce); > > > > > > + } > > > > > > + > > > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > > > + > > > > > > + intel_context_set_nopreempt(parent); > > > > > > + intel_context_set_single_submission(parent); > > > > > Can you explain the need for setting single submission? > > > > > > > > > I think I can actually pull this out. This was needed when I tried to > > > > truely implement a guarante that all the parallel requests would be > > > > running simultaneously. Couldn't ever to get that working because of the > > > > mess that is the execlists scheduler - a simple wait at the head of > > > > queue until everyone joined just blew up for whatever reason. I don't > > > > believe this servers a purpose anymore, so I'll just drop it. > > > > > > > > Matt > > > Is that not going to be a problem? I thought concurrent execution was a > > > fundamental requirement? > > > > > > > I don't think so. See the commit message. This implmementation is on par > > with the bonding interface - there is no guarantee whatsoever that with > > the bonding interface bonded requests actually run at the same time. It > > says hopefully these submissions run together. That's what I do in this > > patch too for execlists, hence the 'weak' clause in the commit message. > > With the new uapi definition implying a stricter guarantee - why not have > this patch use special bb semaphore pre/post-ambles so scheduling behaviour > is closer between the two backends? > We could do that in a follow up if needed, as this bare minimum to get this uAPI working. The real fix would be update the execlists scheduler to be able to do a join of parallel requests and then schedule them together. Should be fairly simple, tried to do this, but the execlists scheduler is such a mess doing something simple is near impossible. IMO there is little point wasting time on a legacy submission interface. This implementation works as well as the old uAPI, let's get this in and move on. Matt > Regards, > > Tvrtko > > > > > Matt > > > > > John. > > > > > > > > > > > > John. > > > > > > > > > > > + for_each_child(parent, ce) { > > > > > > + intel_context_set_nopreempt(ce); > > > > > > + intel_context_set_single_submission(ce); > > > > > > + } > > > > > > + > > > > > > + return parent; > > > > > > + > > > > > > +unwind: > > > > > > + if (parent) > > > > > > + intel_context_put(parent); > > > > > > + return err; > > > > > > +} > > > > > > + > > > > > > static const struct intel_context_ops execlists_context_ops = { > > > > > > .flags = COPS_HAS_INFLIGHT, > > > > > > @@ -2616,6 +2655,7 @@ static const struct intel_context_ops execlists_context_ops = { > > > > > > .reset = lrc_reset, > > > > > > .destroy = lrc_destroy, > > > > > > + .create_parallel = execlists_create_parallel, > > > > > > .create_virtual = execlists_create_virtual, > > > > > > }; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > index 56156cf18c41..70f4b309522d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > > > > > void lrc_unpin(struct intel_context *ce) > > > > > > { > > > > > > + if (unlikely(ce->parallel.last_rq)) > > > > > > + i915_request_put(ce->parallel.last_rq); > > > > > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > > > > > ce->engine); > > > > > > } > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > index 1341752dc70e..ddc9a97fcc8f 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > @@ -2961,8 +2961,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > > > > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > > > > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > > > > > - if (ce->parallel.last_rq) > > > > > > - i915_request_put(ce->parallel.last_rq); > > > > > > unpin_guc_id(guc, ce); > > > > > > lrc_unpin(ce); > > > > > > } > > > ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-11-11 16:49 ` Matthew Brost @ 2021-11-12 14:13 ` Tvrtko Ursulin -1 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-12 14:13 UTC (permalink / raw) To: Matthew Brost Cc: intel-gfx, daniele.ceraolospurio, dri-devel, John Harrison, tvrtko.ursulin On 11/11/2021 16:49, Matthew Brost wrote: > On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: >> >> On 27/10/2021 21:10, Matthew Brost wrote: >>> On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >>>> On 10/27/2021 12:17, Matthew Brost wrote: >>>>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>>>> execlists. Doing as little as possible to support this interface for >>>>>>> execlists - basically just passing submit fences between each request >>>>>>> generated and virtual engines are not allowed. This is on par with what >>>>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>>>> >>>>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>>>> >>>>>>> v2: >>>>>>> (John Harrison) >>>>>>> - Drop siblings array as num_siblings must be 1 >>>>>>> >>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>>>> --- >>>>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>> struct intel_engine_cs **siblings = NULL; >>>>>>> intel_engine_mask_t prev_mask; >>>>>>> - /* FIXME: This is NIY for execlists */ >>>>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>>>> - return -ENODEV; >>>>>>> - >>>>>>> if (get_user(slot, &ext->engine_index)) >>>>>>> return -EFAULT; >>>>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>>>> return -EFAULT; >>>>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>>>> + num_siblings); >>>>>>> + return -EINVAL; >>>>>>> + } >>>>>>> + >>>>>>> if (slot >= set->num_engines) { >>>>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>>>> slot, set->num_engines); >>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>>>> __i915_active_acquire(&ce->active); >>>>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>>>> + intel_context_is_parallel(ce)) >>>>>>> return 0; >>>>>>> /* Preallocate tracking nodes */ >>>>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>>>> * Callers responsibility to validate that this function is used >>>>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>>>> */ >>>>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> index bedb80057046..2865b422300d 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>>>> { >>>>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>>>> - intel_context_force_single_submission(ce)); >>>>>>> + return intel_context_force_single_submission(ce); >>>>>> I think this is actually going to break GVT. >>>>>> >>>>>> Not so much this change here but the whole use of single submission outside >>>>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>>>> active, I think that will cause much confusion within the GVT code. >>>>>> >>>>>> The correct fix would be to create a new flag just for GVT usage alongside >>>>>> the single submission one. GVT would then set both but only check for its >>>>>> own private flag. The parallel code would obviously only set the existing >>>>>> single submission flag. >>>>>> >>>>> Ok, see below. >>>>> >>>>>>> } >>>>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>>>> current->comm); >>>>>>> } >>>>>>> +static struct intel_context * >>>>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>>>> + unsigned int num_siblings, >>>>>>> + unsigned int width) >>>>>>> +{ >>>>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>>>> + int i; >>>>>>> + >>>>>>> + GEM_BUG_ON(num_siblings != 1); >>>>>>> + >>>>>>> + for (i = 0; i < width; ++i) { >>>>>>> + ce = intel_context_create(engines[i]); >>>>>>> + if (!ce) { >>>>>>> + err = ERR_PTR(-ENOMEM); >>>>>>> + goto unwind; >>>>>>> + } >>>>>>> + >>>>>>> + if (i == 0) >>>>>>> + parent = ce; >>>>>>> + else >>>>>>> + intel_context_bind_parent_child(parent, ce); >>>>>>> + } >>>>>>> + >>>>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>>>> + >>>>>>> + intel_context_set_nopreempt(parent); >>>>>>> + intel_context_set_single_submission(parent); >>>>>> Can you explain the need for setting single submission? >>>>>> >>>>> I think I can actually pull this out. This was needed when I tried to >>>>> truely implement a guarante that all the parallel requests would be >>>>> running simultaneously. Couldn't ever to get that working because of the >>>>> mess that is the execlists scheduler - a simple wait at the head of >>>>> queue until everyone joined just blew up for whatever reason. I don't >>>>> believe this servers a purpose anymore, so I'll just drop it. >>>>> >>>>> Matt >>>> Is that not going to be a problem? I thought concurrent execution was a >>>> fundamental requirement? >>>> >>> >>> I don't think so. See the commit message. This implmementation is on par >>> with the bonding interface - there is no guarantee whatsoever that with >>> the bonding interface bonded requests actually run at the same time. It >>> says hopefully these submissions run together. That's what I do in this >>> patch too for execlists, hence the 'weak' clause in the commit message. >> >> With the new uapi definition implying a stricter guarantee - why not have >> this patch use special bb semaphore pre/post-ambles so scheduling behaviour >> is closer between the two backends? >> > > We could do that in a follow up if needed, as this bare minimum to get > this uAPI working. The real fix would be update the execlists scheduler > to be able to do a join of parallel requests and then schedule them > together. Should be fairly simple, tried to do this, but the execlists > scheduler is such a mess doing something simple is near impossible. IMO > there is little point wasting time on a legacy submission interface. > This implementation works as well as the old uAPI, let's get this in and > move on. Bashing aside, what downside do you see in just doing what I suggested right now? Code is there and all so it is a simple matter of adding a conditional somewhere to use it. And it would make the behaviour between the two backends closer. So it sounds like a no brainer to me. Or I am missing something? Regards, Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-11-12 14:13 ` Tvrtko Ursulin 0 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-12 14:13 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx, dri-devel On 11/11/2021 16:49, Matthew Brost wrote: > On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: >> >> On 27/10/2021 21:10, Matthew Brost wrote: >>> On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >>>> On 10/27/2021 12:17, Matthew Brost wrote: >>>>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>>>> execlists. Doing as little as possible to support this interface for >>>>>>> execlists - basically just passing submit fences between each request >>>>>>> generated and virtual engines are not allowed. This is on par with what >>>>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>>>> >>>>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>>>> >>>>>>> v2: >>>>>>> (John Harrison) >>>>>>> - Drop siblings array as num_siblings must be 1 >>>>>>> >>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>>>> --- >>>>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>> struct intel_engine_cs **siblings = NULL; >>>>>>> intel_engine_mask_t prev_mask; >>>>>>> - /* FIXME: This is NIY for execlists */ >>>>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>>>> - return -ENODEV; >>>>>>> - >>>>>>> if (get_user(slot, &ext->engine_index)) >>>>>>> return -EFAULT; >>>>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>>>> return -EFAULT; >>>>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>>>> + num_siblings); >>>>>>> + return -EINVAL; >>>>>>> + } >>>>>>> + >>>>>>> if (slot >= set->num_engines) { >>>>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>>>> slot, set->num_engines); >>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>>>> __i915_active_acquire(&ce->active); >>>>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>>>> + intel_context_is_parallel(ce)) >>>>>>> return 0; >>>>>>> /* Preallocate tracking nodes */ >>>>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>>>> * Callers responsibility to validate that this function is used >>>>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>>>> */ >>>>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> index bedb80057046..2865b422300d 100644 >>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>>>> { >>>>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>>>> - intel_context_force_single_submission(ce)); >>>>>>> + return intel_context_force_single_submission(ce); >>>>>> I think this is actually going to break GVT. >>>>>> >>>>>> Not so much this change here but the whole use of single submission outside >>>>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>>>> active, I think that will cause much confusion within the GVT code. >>>>>> >>>>>> The correct fix would be to create a new flag just for GVT usage alongside >>>>>> the single submission one. GVT would then set both but only check for its >>>>>> own private flag. The parallel code would obviously only set the existing >>>>>> single submission flag. >>>>>> >>>>> Ok, see below. >>>>> >>>>>>> } >>>>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>>>> current->comm); >>>>>>> } >>>>>>> +static struct intel_context * >>>>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>>>> + unsigned int num_siblings, >>>>>>> + unsigned int width) >>>>>>> +{ >>>>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>>>> + int i; >>>>>>> + >>>>>>> + GEM_BUG_ON(num_siblings != 1); >>>>>>> + >>>>>>> + for (i = 0; i < width; ++i) { >>>>>>> + ce = intel_context_create(engines[i]); >>>>>>> + if (!ce) { >>>>>>> + err = ERR_PTR(-ENOMEM); >>>>>>> + goto unwind; >>>>>>> + } >>>>>>> + >>>>>>> + if (i == 0) >>>>>>> + parent = ce; >>>>>>> + else >>>>>>> + intel_context_bind_parent_child(parent, ce); >>>>>>> + } >>>>>>> + >>>>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>>>> + >>>>>>> + intel_context_set_nopreempt(parent); >>>>>>> + intel_context_set_single_submission(parent); >>>>>> Can you explain the need for setting single submission? >>>>>> >>>>> I think I can actually pull this out. This was needed when I tried to >>>>> truely implement a guarante that all the parallel requests would be >>>>> running simultaneously. Couldn't ever to get that working because of the >>>>> mess that is the execlists scheduler - a simple wait at the head of >>>>> queue until everyone joined just blew up for whatever reason. I don't >>>>> believe this servers a purpose anymore, so I'll just drop it. >>>>> >>>>> Matt >>>> Is that not going to be a problem? I thought concurrent execution was a >>>> fundamental requirement? >>>> >>> >>> I don't think so. See the commit message. This implmementation is on par >>> with the bonding interface - there is no guarantee whatsoever that with >>> the bonding interface bonded requests actually run at the same time. It >>> says hopefully these submissions run together. That's what I do in this >>> patch too for execlists, hence the 'weak' clause in the commit message. >> >> With the new uapi definition implying a stricter guarantee - why not have >> this patch use special bb semaphore pre/post-ambles so scheduling behaviour >> is closer between the two backends? >> > > We could do that in a follow up if needed, as this bare minimum to get > this uAPI working. The real fix would be update the execlists scheduler > to be able to do a join of parallel requests and then schedule them > together. Should be fairly simple, tried to do this, but the execlists > scheduler is such a mess doing something simple is near impossible. IMO > there is little point wasting time on a legacy submission interface. > This implementation works as well as the old uAPI, let's get this in and > move on. Bashing aside, what downside do you see in just doing what I suggested right now? Code is there and all so it is a simple matter of adding a conditional somewhere to use it. And it would make the behaviour between the two backends closer. So it sounds like a no brainer to me. Or I am missing something? Regards, Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-11-12 14:13 ` Tvrtko Ursulin @ 2021-11-12 17:59 ` Matthew Brost -1 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-11-12 17:59 UTC (permalink / raw) To: Tvrtko Ursulin Cc: intel-gfx, daniele.ceraolospurio, dri-devel, John Harrison, tvrtko.ursulin On Fri, Nov 12, 2021 at 02:13:50PM +0000, Tvrtko Ursulin wrote: > > On 11/11/2021 16:49, Matthew Brost wrote: > > On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: > > > > > > On 27/10/2021 21:10, Matthew Brost wrote: > > > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > > > > > execlists. Doing as little as possible to support this interface for > > > > > > > > execlists - basically just passing submit fences between each request > > > > > > > > generated and virtual engines are not allowed. This is on par with what > > > > > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > > > > > > > > > v2: > > > > > > > > (John Harrison) > > > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > > > > > --- > > > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > > > intel_engine_mask_t prev_mask; > > > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > > > - return -ENODEV; > > > > > > > > - > > > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > > > return -EFAULT; > > > > > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > > > return -EFAULT; > > > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > > > > > + num_siblings); > > > > > > > > + return -EINVAL; > > > > > > > > + } > > > > > > > > + > > > > > > > > if (slot >= set->num_engines) { > > > > > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > > > > > slot, set->num_engines); > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > > > > > __i915_active_acquire(&ce->active); > > > > > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > > > > > + intel_context_is_parallel(ce)) > > > > > > > > return 0; > > > > > > > > /* Preallocate tracking nodes */ > > > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > > > > > * Callers responsibility to validate that this function is used > > > > > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > > > > > */ > > > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > index bedb80057046..2865b422300d 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > > > > > { > > > > > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > > > > > - intel_context_force_single_submission(ce)); > > > > > > > > + return intel_context_force_single_submission(ce); > > > > > > > I think this is actually going to break GVT. > > > > > > > > > > > > > > Not so much this change here but the whole use of single submission outside > > > > > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > > > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > > > > > active, I think that will cause much confusion within the GVT code. > > > > > > > > > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > > > > > the single submission one. GVT would then set both but only check for its > > > > > > > own private flag. The parallel code would obviously only set the existing > > > > > > > single submission flag. > > > > > > > > > > > > > Ok, see below. > > > > > > > > > > > > > > } > > > > > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > > > > > current->comm); > > > > > > > > } > > > > > > > > +static struct intel_context * > > > > > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > > > > > + unsigned int num_siblings, > > > > > > > > + unsigned int width) > > > > > > > > +{ > > > > > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > > > > > + int i; > > > > > > > > + > > > > > > > > + GEM_BUG_ON(num_siblings != 1); > > > > > > > > + > > > > > > > > + for (i = 0; i < width; ++i) { > > > > > > > > + ce = intel_context_create(engines[i]); > > > > > > > > + if (!ce) { > > > > > > > > + err = ERR_PTR(-ENOMEM); > > > > > > > > + goto unwind; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (i == 0) > > > > > > > > + parent = ce; > > > > > > > > + else > > > > > > > > + intel_context_bind_parent_child(parent, ce); > > > > > > > > + } > > > > > > > > + > > > > > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > > > > > + > > > > > > > > + intel_context_set_nopreempt(parent); > > > > > > > > + intel_context_set_single_submission(parent); > > > > > > > Can you explain the need for setting single submission? > > > > > > > > > > > > > I think I can actually pull this out. This was needed when I tried to > > > > > > truely implement a guarante that all the parallel requests would be > > > > > > running simultaneously. Couldn't ever to get that working because of the > > > > > > mess that is the execlists scheduler - a simple wait at the head of > > > > > > queue until everyone joined just blew up for whatever reason. I don't > > > > > > believe this servers a purpose anymore, so I'll just drop it. > > > > > > > > > > > > Matt > > > > > Is that not going to be a problem? I thought concurrent execution was a > > > > > fundamental requirement? > > > > > > > > > > > > > I don't think so. See the commit message. This implmementation is on par > > > > with the bonding interface - there is no guarantee whatsoever that with > > > > the bonding interface bonded requests actually run at the same time. It > > > > says hopefully these submissions run together. That's what I do in this > > > > patch too for execlists, hence the 'weak' clause in the commit message. > > > > > > With the new uapi definition implying a stricter guarantee - why not have > > > this patch use special bb semaphore pre/post-ambles so scheduling behaviour > > > is closer between the two backends? > > > > > > > We could do that in a follow up if needed, as this bare minimum to get > > this uAPI working. The real fix would be update the execlists scheduler > > to be able to do a join of parallel requests and then schedule them > > together. Should be fairly simple, tried to do this, but the execlists > > scheduler is such a mess doing something simple is near impossible. IMO > > there is little point wasting time on a legacy submission interface. > > This implementation works as well as the old uAPI, let's get this in and > > move on. > > Bashing aside, what downside do you see in just doing what I suggested right > now? Code is there and all so it is a simple matter of adding a conditional > somewhere to use it. And it would make the behaviour between the two > backends closer. So it sounds like a no brainer to me. Or I am missing > something? For parallel submission, user batches should be inserting semaphore to ensure that they are running together - the kernel inserting them is redundant. The reason we do this for GuC submission is for safe preemption, in execlists we just don't allow preemption while the requests are running. As I said, the correct solution is update the execlists scheduler to actually run these requests in parallel. Tried that but proved difficult and landed on this patch. If someone wants to fix the the execlists scheduler in a follow up they are welcome to but in the meantime what I have in place is on par with the bonded interface. I see no reason why this patch can't be merged. Matt > > Regards, > > Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-11-12 17:59 ` Matthew Brost 0 siblings, 0 replies; 22+ messages in thread From: Matthew Brost @ 2021-11-12 17:59 UTC (permalink / raw) To: Tvrtko Ursulin; +Cc: intel-gfx, dri-devel On Fri, Nov 12, 2021 at 02:13:50PM +0000, Tvrtko Ursulin wrote: > > On 11/11/2021 16:49, Matthew Brost wrote: > > On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: > > > > > > On 27/10/2021 21:10, Matthew Brost wrote: > > > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > > > > > > > execlists. Doing as little as possible to support this interface for > > > > > > > > execlists - basically just passing submit fences between each request > > > > > > > > generated and virtual engines are not allowed. This is on par with what > > > > > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > > > > > > > > > v2: > > > > > > > > (John Harrison) > > > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > > > > > --- > > > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- > > > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > > > intel_engine_mask_t prev_mask; > > > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > > > - return -ENODEV; > > > > > > > > - > > > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > > > return -EFAULT; > > > > > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > > > return -EFAULT; > > > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > > > > > > > + num_siblings); > > > > > > > > + return -EINVAL; > > > > > > > > + } > > > > > > > > + > > > > > > > > if (slot >= set->num_engines) { > > > > > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > > > > > > > slot, set->num_engines); > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > > > > > > > __i915_active_acquire(&ce->active); > > > > > > > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > > > > > > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > > > > > > > + intel_context_is_parallel(ce)) > > > > > > > > return 0; > > > > > > > > /* Preallocate tracking nodes */ > > > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > > > > > > > * Callers responsibility to validate that this function is used > > > > > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > > > > > */ > > > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > index bedb80057046..2865b422300d 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > > > > > > > static bool ctx_single_port_submission(const struct intel_context *ce) > > > > > > > > { > > > > > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > > > > > - intel_context_force_single_submission(ce)); > > > > > > > > + return intel_context_force_single_submission(ce); > > > > > > > I think this is actually going to break GVT. > > > > > > > > > > > > > > Not so much this change here but the whole use of single submission outside > > > > > > > of GVT. It looks like the GVT driver overloads the single submission flag to > > > > > > > tag requests that it owns. If we start using that flag elsewhere when GVT is > > > > > > > active, I think that will cause much confusion within the GVT code. > > > > > > > > > > > > > > The correct fix would be to create a new flag just for GVT usage alongside > > > > > > > the single submission one. GVT would then set both but only check for its > > > > > > > own private flag. The parallel code would obviously only set the existing > > > > > > > single submission flag. > > > > > > > > > > > > > Ok, see below. > > > > > > > > > > > > > > } > > > > > > > > static bool can_merge_ctx(const struct intel_context *prev, > > > > > > > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > > > > > > > current->comm); > > > > > > > > } > > > > > > > > +static struct intel_context * > > > > > > > > +execlists_create_parallel(struct intel_engine_cs **engines, > > > > > > > > + unsigned int num_siblings, > > > > > > > > + unsigned int width) > > > > > > > > +{ > > > > > > > > + struct intel_context *parent = NULL, *ce, *err; > > > > > > > > + int i; > > > > > > > > + > > > > > > > > + GEM_BUG_ON(num_siblings != 1); > > > > > > > > + > > > > > > > > + for (i = 0; i < width; ++i) { > > > > > > > > + ce = intel_context_create(engines[i]); > > > > > > > > + if (!ce) { > > > > > > > > + err = ERR_PTR(-ENOMEM); > > > > > > > > + goto unwind; > > > > > > > > + } > > > > > > > > + > > > > > > > > + if (i == 0) > > > > > > > > + parent = ce; > > > > > > > > + else > > > > > > > > + intel_context_bind_parent_child(parent, ce); > > > > > > > > + } > > > > > > > > + > > > > > > > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > > > > > > > + > > > > > > > > + intel_context_set_nopreempt(parent); > > > > > > > > + intel_context_set_single_submission(parent); > > > > > > > Can you explain the need for setting single submission? > > > > > > > > > > > > > I think I can actually pull this out. This was needed when I tried to > > > > > > truely implement a guarante that all the parallel requests would be > > > > > > running simultaneously. Couldn't ever to get that working because of the > > > > > > mess that is the execlists scheduler - a simple wait at the head of > > > > > > queue until everyone joined just blew up for whatever reason. I don't > > > > > > believe this servers a purpose anymore, so I'll just drop it. > > > > > > > > > > > > Matt > > > > > Is that not going to be a problem? I thought concurrent execution was a > > > > > fundamental requirement? > > > > > > > > > > > > > I don't think so. See the commit message. This implmementation is on par > > > > with the bonding interface - there is no guarantee whatsoever that with > > > > the bonding interface bonded requests actually run at the same time. It > > > > says hopefully these submissions run together. That's what I do in this > > > > patch too for execlists, hence the 'weak' clause in the commit message. > > > > > > With the new uapi definition implying a stricter guarantee - why not have > > > this patch use special bb semaphore pre/post-ambles so scheduling behaviour > > > is closer between the two backends? > > > > > > > We could do that in a follow up if needed, as this bare minimum to get > > this uAPI working. The real fix would be update the execlists scheduler > > to be able to do a join of parallel requests and then schedule them > > together. Should be fairly simple, tried to do this, but the execlists > > scheduler is such a mess doing something simple is near impossible. IMO > > there is little point wasting time on a legacy submission interface. > > This implementation works as well as the old uAPI, let's get this in and > > move on. > > Bashing aside, what downside do you see in just doing what I suggested right > now? Code is there and all so it is a simple matter of adding a conditional > somewhere to use it. And it would make the behaviour between the two > backends closer. So it sounds like a no brainer to me. Or I am missing > something? For parallel submission, user batches should be inserting semaphore to ensure that they are running together - the kernel inserting them is redundant. The reason we do this for GuC submission is for safe preemption, in execlists we just don't allow preemption while the requests are running. As I said, the correct solution is update the execlists scheduler to actually run these requests in parallel. Tried that but proved difficult and landed on this patch. If someone wants to fix the the execlists scheduler in a follow up they are welcome to but in the meantime what I have in place is on par with the bonded interface. I see no reason why this patch can't be merged. Matt > > Regards, > > Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists 2021-11-12 17:59 ` Matthew Brost @ 2021-11-15 10:34 ` Tvrtko Ursulin -1 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-15 10:34 UTC (permalink / raw) To: Matthew Brost; +Cc: intel-gfx, dri-devel On 12/11/2021 17:59, Matthew Brost wrote: > On Fri, Nov 12, 2021 at 02:13:50PM +0000, Tvrtko Ursulin wrote: >> >> On 11/11/2021 16:49, Matthew Brost wrote: >>> On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: >>>> >>>> On 27/10/2021 21:10, Matthew Brost wrote: >>>>> On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >>>>>> On 10/27/2021 12:17, Matthew Brost wrote: >>>>>>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>>>>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>>>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>>>>>> execlists. Doing as little as possible to support this interface for >>>>>>>>> execlists - basically just passing submit fences between each request >>>>>>>>> generated and virtual engines are not allowed. This is on par with what >>>>>>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>>>>>> >>>>>>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>>>>>> >>>>>>>>> v2: >>>>>>>>> (John Harrison) >>>>>>>>> - Drop siblings array as num_siblings must be 1 >>>>>>>>> >>>>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>>>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>>>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>>>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>>>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>>>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>>>> struct intel_engine_cs **siblings = NULL; >>>>>>>>> intel_engine_mask_t prev_mask; >>>>>>>>> - /* FIXME: This is NIY for execlists */ >>>>>>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>>>>>> - return -ENODEV; >>>>>>>>> - >>>>>>>>> if (get_user(slot, &ext->engine_index)) >>>>>>>>> return -EFAULT; >>>>>>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>>>>>> return -EFAULT; >>>>>>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>>>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>>>>>> + num_siblings); >>>>>>>>> + return -EINVAL; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> if (slot >= set->num_engines) { >>>>>>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>>>>>> slot, set->num_engines); >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>>>>>> __i915_active_acquire(&ce->active); >>>>>>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>>>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>>>>>> + intel_context_is_parallel(ce)) >>>>>>>>> return 0; >>>>>>>>> /* Preallocate tracking nodes */ >>>>>>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>>>>>> * Callers responsibility to validate that this function is used >>>>>>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>>>>>> */ >>>>>>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>>>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>>>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>>>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> index bedb80057046..2865b422300d 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>>>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>>>>>> { >>>>>>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>>>>>> - intel_context_force_single_submission(ce)); >>>>>>>>> + return intel_context_force_single_submission(ce); >>>>>>>> I think this is actually going to break GVT. >>>>>>>> >>>>>>>> Not so much this change here but the whole use of single submission outside >>>>>>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>>>>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>>>>>> active, I think that will cause much confusion within the GVT code. >>>>>>>> >>>>>>>> The correct fix would be to create a new flag just for GVT usage alongside >>>>>>>> the single submission one. GVT would then set both but only check for its >>>>>>>> own private flag. The parallel code would obviously only set the existing >>>>>>>> single submission flag. >>>>>>>> >>>>>>> Ok, see below. >>>>>>> >>>>>>>>> } >>>>>>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>>>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>>>>>> current->comm); >>>>>>>>> } >>>>>>>>> +static struct intel_context * >>>>>>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>>>>>> + unsigned int num_siblings, >>>>>>>>> + unsigned int width) >>>>>>>>> +{ >>>>>>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>>>>>> + int i; >>>>>>>>> + >>>>>>>>> + GEM_BUG_ON(num_siblings != 1); >>>>>>>>> + >>>>>>>>> + for (i = 0; i < width; ++i) { >>>>>>>>> + ce = intel_context_create(engines[i]); >>>>>>>>> + if (!ce) { >>>>>>>>> + err = ERR_PTR(-ENOMEM); >>>>>>>>> + goto unwind; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (i == 0) >>>>>>>>> + parent = ce; >>>>>>>>> + else >>>>>>>>> + intel_context_bind_parent_child(parent, ce); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>>>>>> + >>>>>>>>> + intel_context_set_nopreempt(parent); >>>>>>>>> + intel_context_set_single_submission(parent); >>>>>>>> Can you explain the need for setting single submission? >>>>>>>> >>>>>>> I think I can actually pull this out. This was needed when I tried to >>>>>>> truely implement a guarante that all the parallel requests would be >>>>>>> running simultaneously. Couldn't ever to get that working because of the >>>>>>> mess that is the execlists scheduler - a simple wait at the head of >>>>>>> queue until everyone joined just blew up for whatever reason. I don't >>>>>>> believe this servers a purpose anymore, so I'll just drop it. >>>>>>> >>>>>>> Matt >>>>>> Is that not going to be a problem? I thought concurrent execution was a >>>>>> fundamental requirement? >>>>>> >>>>> >>>>> I don't think so. See the commit message. This implmementation is on par >>>>> with the bonding interface - there is no guarantee whatsoever that with >>>>> the bonding interface bonded requests actually run at the same time. It >>>>> says hopefully these submissions run together. That's what I do in this >>>>> patch too for execlists, hence the 'weak' clause in the commit message. >>>> >>>> With the new uapi definition implying a stricter guarantee - why not have >>>> this patch use special bb semaphore pre/post-ambles so scheduling behaviour >>>> is closer between the two backends? >>>> >>> >>> We could do that in a follow up if needed, as this bare minimum to get >>> this uAPI working. The real fix would be update the execlists scheduler >>> to be able to do a join of parallel requests and then schedule them >>> together. Should be fairly simple, tried to do this, but the execlists >>> scheduler is such a mess doing something simple is near impossible. IMO >>> there is little point wasting time on a legacy submission interface. >>> This implementation works as well as the old uAPI, let's get this in and >>> move on. >> >> Bashing aside, what downside do you see in just doing what I suggested right >> now? Code is there and all so it is a simple matter of adding a conditional >> somewhere to use it. And it would make the behaviour between the two >> backends closer. So it sounds like a no brainer to me. Or I am missing >> something? > > For parallel submission, user batches should be inserting semaphore to > ensure that they are running together - the kernel inserting them is > redundant. The reason we do this for GuC submission is for safe > preemption, in execlists we just don't allow preemption while the > requests are running. As I said, the correct solution is update the > execlists scheduler to actually run these requests in parallel. Tried > that but proved difficult and landed on this patch. If someone wants to > fix the the execlists scheduler in a follow up they are welcome to but > in the meantime what I have in place is on par with the bonded > interface. I see no reason why this patch can't be merged. Nowhere I wrote this patch cannot be merged. I was asking why you don't add another one on top. I'd recommend adding mention of no preempt behaviour to the commit messasge. And sentinel as well. As the commit already talks about limitations "doing as little" as possible, it makes sense to list all limitations and design choices. Regards, Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists @ 2021-11-15 10:34 ` Tvrtko Ursulin 0 siblings, 0 replies; 22+ messages in thread From: Tvrtko Ursulin @ 2021-11-15 10:34 UTC (permalink / raw) To: Matthew Brost Cc: intel-gfx, daniele.ceraolospurio, dri-devel, John Harrison, tvrtko.ursulin On 12/11/2021 17:59, Matthew Brost wrote: > On Fri, Nov 12, 2021 at 02:13:50PM +0000, Tvrtko Ursulin wrote: >> >> On 11/11/2021 16:49, Matthew Brost wrote: >>> On Mon, Nov 01, 2021 at 10:35:09AM +0000, Tvrtko Ursulin wrote: >>>> >>>> On 27/10/2021 21:10, Matthew Brost wrote: >>>>> On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: >>>>>> On 10/27/2021 12:17, Matthew Brost wrote: >>>>>>> On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: >>>>>>>> On 10/20/2021 14:47, Matthew Brost wrote: >>>>>>>>> A weak implementation of parallel submission (multi-bb execbuf IOCTL) for >>>>>>>>> execlists. Doing as little as possible to support this interface for >>>>>>>>> execlists - basically just passing submit fences between each request >>>>>>>>> generated and virtual engines are not allowed. This is on par with what >>>>>>>>> is there for the existing (hopefully soon deprecated) bonding interface. >>>>>>>>> >>>>>>>>> We perma-pin these execlists contexts to align with GuC implementation. >>>>>>>>> >>>>>>>>> v2: >>>>>>>>> (John Harrison) >>>>>>>>> - Drop siblings array as num_siblings must be 1 >>>>>>>>> >>>>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- >>>>>>>>> drivers/gpu/drm/i915/gt/intel_context.c | 4 +- >>>>>>>>> .../drm/i915/gt/intel_execlists_submission.c | 44 ++++++++++++++++++- >>>>>>>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + >>>>>>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - >>>>>>>>> 5 files changed, 52 insertions(+), 10 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> index fb33d0322960..35e87a7d0ea9 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >>>>>>>>> @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>>>> struct intel_engine_cs **siblings = NULL; >>>>>>>>> intel_engine_mask_t prev_mask; >>>>>>>>> - /* FIXME: This is NIY for execlists */ >>>>>>>>> - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) >>>>>>>>> - return -ENODEV; >>>>>>>>> - >>>>>>>>> if (get_user(slot, &ext->engine_index)) >>>>>>>>> return -EFAULT; >>>>>>>>> @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, >>>>>>>>> if (get_user(num_siblings, &ext->num_siblings)) >>>>>>>>> return -EFAULT; >>>>>>>>> + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { >>>>>>>>> + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", >>>>>>>>> + num_siblings); >>>>>>>>> + return -EINVAL; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> if (slot >= set->num_engines) { >>>>>>>>> drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", >>>>>>>>> slot, set->num_engines); >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> index 5634d14052bc..1bec92e1d8e6 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >>>>>>>>> @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) >>>>>>>>> __i915_active_acquire(&ce->active); >>>>>>>>> - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) >>>>>>>>> + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || >>>>>>>>> + intel_context_is_parallel(ce)) >>>>>>>>> return 0; >>>>>>>>> /* Preallocate tracking nodes */ >>>>>>>>> @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, >>>>>>>>> * Callers responsibility to validate that this function is used >>>>>>>>> * correctly but we use GEM_BUG_ON here ensure that they do. >>>>>>>>> */ >>>>>>>>> - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); >>>>>>>>> GEM_BUG_ON(intel_context_is_pinned(parent)); >>>>>>>>> GEM_BUG_ON(intel_context_is_child(parent)); >>>>>>>>> GEM_BUG_ON(intel_context_is_pinned(child)); >>>>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> index bedb80057046..2865b422300d 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >>>>>>>>> @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) >>>>>>>>> static bool ctx_single_port_submission(const struct intel_context *ce) >>>>>>>>> { >>>>>>>>> - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && >>>>>>>>> - intel_context_force_single_submission(ce)); >>>>>>>>> + return intel_context_force_single_submission(ce); >>>>>>>> I think this is actually going to break GVT. >>>>>>>> >>>>>>>> Not so much this change here but the whole use of single submission outside >>>>>>>> of GVT. It looks like the GVT driver overloads the single submission flag to >>>>>>>> tag requests that it owns. If we start using that flag elsewhere when GVT is >>>>>>>> active, I think that will cause much confusion within the GVT code. >>>>>>>> >>>>>>>> The correct fix would be to create a new flag just for GVT usage alongside >>>>>>>> the single submission one. GVT would then set both but only check for its >>>>>>>> own private flag. The parallel code would obviously only set the existing >>>>>>>> single submission flag. >>>>>>>> >>>>>>> Ok, see below. >>>>>>> >>>>>>>>> } >>>>>>>>> static bool can_merge_ctx(const struct intel_context *prev, >>>>>>>>> @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, >>>>>>>>> current->comm); >>>>>>>>> } >>>>>>>>> +static struct intel_context * >>>>>>>>> +execlists_create_parallel(struct intel_engine_cs **engines, >>>>>>>>> + unsigned int num_siblings, >>>>>>>>> + unsigned int width) >>>>>>>>> +{ >>>>>>>>> + struct intel_context *parent = NULL, *ce, *err; >>>>>>>>> + int i; >>>>>>>>> + >>>>>>>>> + GEM_BUG_ON(num_siblings != 1); >>>>>>>>> + >>>>>>>>> + for (i = 0; i < width; ++i) { >>>>>>>>> + ce = intel_context_create(engines[i]); >>>>>>>>> + if (!ce) { >>>>>>>>> + err = ERR_PTR(-ENOMEM); >>>>>>>>> + goto unwind; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (i == 0) >>>>>>>>> + parent = ce; >>>>>>>>> + else >>>>>>>>> + intel_context_bind_parent_child(parent, ce); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + parent->parallel.fence_context = dma_fence_context_alloc(1); >>>>>>>>> + >>>>>>>>> + intel_context_set_nopreempt(parent); >>>>>>>>> + intel_context_set_single_submission(parent); >>>>>>>> Can you explain the need for setting single submission? >>>>>>>> >>>>>>> I think I can actually pull this out. This was needed when I tried to >>>>>>> truely implement a guarante that all the parallel requests would be >>>>>>> running simultaneously. Couldn't ever to get that working because of the >>>>>>> mess that is the execlists scheduler - a simple wait at the head of >>>>>>> queue until everyone joined just blew up for whatever reason. I don't >>>>>>> believe this servers a purpose anymore, so I'll just drop it. >>>>>>> >>>>>>> Matt >>>>>> Is that not going to be a problem? I thought concurrent execution was a >>>>>> fundamental requirement? >>>>>> >>>>> >>>>> I don't think so. See the commit message. This implmementation is on par >>>>> with the bonding interface - there is no guarantee whatsoever that with >>>>> the bonding interface bonded requests actually run at the same time. It >>>>> says hopefully these submissions run together. That's what I do in this >>>>> patch too for execlists, hence the 'weak' clause in the commit message. >>>> >>>> With the new uapi definition implying a stricter guarantee - why not have >>>> this patch use special bb semaphore pre/post-ambles so scheduling behaviour >>>> is closer between the two backends? >>>> >>> >>> We could do that in a follow up if needed, as this bare minimum to get >>> this uAPI working. The real fix would be update the execlists scheduler >>> to be able to do a join of parallel requests and then schedule them >>> together. Should be fairly simple, tried to do this, but the execlists >>> scheduler is such a mess doing something simple is near impossible. IMO >>> there is little point wasting time on a legacy submission interface. >>> This implementation works as well as the old uAPI, let's get this in and >>> move on. >> >> Bashing aside, what downside do you see in just doing what I suggested right >> now? Code is there and all so it is a simple matter of adding a conditional >> somewhere to use it. And it would make the behaviour between the two >> backends closer. So it sounds like a no brainer to me. Or I am missing >> something? > > For parallel submission, user batches should be inserting semaphore to > ensure that they are running together - the kernel inserting them is > redundant. The reason we do this for GuC submission is for safe > preemption, in execlists we just don't allow preemption while the > requests are running. As I said, the correct solution is update the > execlists scheduler to actually run these requests in parallel. Tried > that but proved difficult and landed on this patch. If someone wants to > fix the the execlists scheduler in a follow up they are welcome to but > in the meantime what I have in place is on par with the bonded > interface. I see no reason why this patch can't be merged. Nowhere I wrote this patch cannot be merged. I was asking why you don't add another one on top. I'd recommend adding mention of no preempt behaviour to the commit messasge. And sentinel as well. As the commit already talks about limitations "doing as little" as possible, it makes sense to list all limitations and design choices. Regards, Tvrtko ^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2021-11-15 10:34 UTC | newest] Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-10-20 21:47 [PATCH] drm/i915/execlists: Weak parallel submission support for execlists Matthew Brost 2021-10-20 21:47 ` [Intel-gfx] " Matthew Brost 2021-10-21 0:53 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork 2021-10-21 6:14 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork 2021-10-26 21:58 ` [PATCH] " John Harrison 2021-10-26 21:58 ` [Intel-gfx] " John Harrison 2021-10-27 19:17 ` Matthew Brost 2021-10-27 19:17 ` [Intel-gfx] " Matthew Brost 2021-10-27 20:04 ` John Harrison 2021-10-27 20:04 ` [Intel-gfx] " John Harrison 2021-10-27 20:10 ` Matthew Brost 2021-10-27 20:10 ` [Intel-gfx] " Matthew Brost 2021-11-01 10:35 ` Tvrtko Ursulin 2021-11-01 10:35 ` Tvrtko Ursulin 2021-11-11 16:49 ` Matthew Brost 2021-11-11 16:49 ` Matthew Brost 2021-11-12 14:13 ` Tvrtko Ursulin 2021-11-12 14:13 ` Tvrtko Ursulin 2021-11-12 17:59 ` Matthew Brost 2021-11-12 17:59 ` Matthew Brost 2021-11-15 10:34 ` Tvrtko Ursulin 2021-11-15 10:34 ` Tvrtko Ursulin
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.