From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by gabe.freedesktop.org (Postfix) with ESMTPS id 81D1210E34C for ; Tue, 13 Dec 2022 15:39:29 +0000 (UTC) Message-ID: Date: Tue, 13 Dec 2022 16:39:14 +0100 Content-Language: en-US To: =?UTF-8?Q?Zbigniew_Kempczy=c5=84ski?= References: <20221212125035.51326-1-zbigniew.kempczynski@intel.com> <20221212125035.51326-3-zbigniew.kempczynski@intel.com> From: Karolina Stolarek In-Reply-To: <20221212125035.51326-3-zbigniew.kempczynski@intel.com> Content-Type: text/plain; charset="UTF-8"; format=flowed Content-Transfer-Encoding: 8bit MIME-Version: 1.0 Subject: Re: [igt-dev] [PATCH i-g-t 2/3] lib/i915_blt: Extract blit emit functions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: igt-dev@lists.freedesktop.org Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: On 12.12.2022 13:50, Zbigniew Kempczyński wrote: > Add some flexibility in building user pipelines extracting blitter > emission code to dedicated functions. Previous blitter functions which > do one blit-and-execute are rewritten to use those functions. > Requires usage with stateful allocator (offset might be acquired more > than one, so it must not change). > > Signed-off-by: Zbigniew Kempczyński > --- > lib/i915/i915_blt.c | 263 ++++++++++++++++++++++++++++++++------------ > lib/i915/i915_blt.h | 19 ++++ > 2 files changed, 213 insertions(+), 69 deletions(-) > > diff --git a/lib/i915/i915_blt.c b/lib/i915/i915_blt.c > index 42c28623f9..32ad608775 100644 > --- a/lib/i915/i915_blt.c > +++ b/lib/i915/i915_blt.c > @@ -503,58 +503,61 @@ static void dump_bb_ext(struct gen12_block_copy_data_ext *data) > } > > /** > - * blt_block_copy: > + * emit_blt_block_copy: > * @i915: drm fd > - * @ctx: intel_ctx_t context > - * @e: blitter engine for @ctx > * @ahnd: allocator handle > * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version) > * @ext: extended blitter data (for DG2+, supports flatccs compression) > + * @bb_pos: position at which insert block copy commands > + * @emit_bbe: emit MI_BATCH_BUFFER_END after block-copy or not > * > - * Function does blit between @src and @dst described in @blt object. > + * Function inserts block-copy blit into batch at @bb_pos. Allows concatenating > + * with other commands to achieve pipelining. > * > * Returns: > - * execbuffer status. > + * Next write position in batch. > */ > -int blt_block_copy(int i915, > - const intel_ctx_t *ctx, > - const struct intel_execution_engine2 *e, > - uint64_t ahnd, > - const struct blt_copy_data *blt, > - const struct blt_block_copy_data_ext *ext) > +uint64_t emit_blt_block_copy(int i915, > + uint64_t ahnd, > + const struct blt_copy_data *blt, > + const struct blt_block_copy_data_ext *ext, > + uint64_t bb_pos, > + bool emit_bbe) > { > - struct drm_i915_gem_execbuffer2 execbuf = {}; > - struct drm_i915_gem_exec_object2 obj[3] = {}; > struct gen12_block_copy_data data = {}; > struct gen12_block_copy_data_ext dext = {}; > - uint64_t dst_offset, src_offset, bb_offset, alignment; > - uint32_t *bb; > - int i, ret; > + uint64_t dst_offset, src_offset, bb_offset; > + uint32_t bbe = MI_BATCH_BUFFER_END; > + uint8_t *bb; > > igt_assert_f(ahnd, "block-copy supports softpin only\n"); > igt_assert_f(blt, "block-copy requires data to do blit\n"); > > - alignment = gem_detect_safe_alignment(i915); > - src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment); > - if (__special_mode(blt) == SM_FULL_RESOLVE) > - dst_offset = src_offset; > - else > - dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); > - bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment); > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0); > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0); > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0); Why do we pass 0 as object alignment here? In surf-copy and fast-copy we pass "alignment" in. > > fill_data(&data, blt, src_offset, dst_offset, ext); > > - i = sizeof(data) / sizeof(uint32_t); > bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size, > PROT_READ | PROT_WRITE); > - memcpy(bb, &data, sizeof(data)); > + > + igt_assert(bb_pos + sizeof(data) < blt->bb.size); I'd say we need extra space for potential MI_BATCH_BUFFER_END here > + memcpy(bb + bb_pos, &data, sizeof(data)); > + bb_pos += sizeof(data); > > if (ext) { > fill_data_ext(&dext, ext); > - memcpy(bb + i, &dext, sizeof(dext)); > - i += sizeof(dext) / sizeof(uint32_t); > + igt_assert(bb_pos + sizeof(dext) < blt->bb.size); > + memcpy(bb + bb_pos, &dext, sizeof(dext)); > + bb_pos += sizeof(dext); > + } > + > + if (emit_bbe) { > + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size); > + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); > + bb_pos += sizeof(uint32_t); > } > - bb[i++] = MI_BATCH_BUFFER_END; > > if (blt->print_bb) { > igt_info("[BLOCK COPY]\n"); > @@ -569,6 +572,44 @@ int blt_block_copy(int i915, > > munmap(bb, blt->bb.size); > > + return bb_pos; > +} > + > +/** > + * blt_block_copy: > + * @i915: drm fd > + * @ctx: intel_ctx_t context > + * @e: blitter engine for @ctx > + * @ahnd: allocator handle > + * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version) > + * @ext: extended blitter data (for DG2+, supports flatccs compression) > + * > + * Function does blit between @src and @dst described in @blt object. > + * > + * Returns: > + * execbuffer status. > + */ > +int blt_block_copy(int i915, > + const intel_ctx_t *ctx, > + const struct intel_execution_engine2 *e, > + uint64_t ahnd, > + const struct blt_copy_data *blt, > + const struct blt_block_copy_data_ext *ext) > +{ > + struct drm_i915_gem_execbuffer2 execbuf = {}; > + struct drm_i915_gem_exec_object2 obj[3] = {}; > + uint64_t dst_offset, src_offset, bb_offset; > + int ret; > + > + igt_assert_f(ahnd, "block-copy supports softpin only\n"); > + igt_assert_f(blt, "block-copy requires data to do blit\n"); > + > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0); > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0); > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0); > + > + emit_blt_block_copy(i915, ahnd, blt, ext, 0, true); > + > obj[0].offset = CANONICAL(dst_offset); > obj[1].offset = CANONICAL(src_offset); > obj[2].offset = CANONICAL(bb_offset > @@ -655,31 +696,30 @@ static void dump_bb_surf_ctrl_cmd(const struct gen12_ctrl_surf_copy_data *data) > } > > /** > - * blt_ctrl_surf_copy: > + * emit_blt_ctrl_surf_copy: > * @i915: drm fd > - * @ctx: intel_ctx_t context > - * @e: blitter engine for @ctx > * @ahnd: allocator handle > * @surf: blitter data for ctrl-surf-copy > + * @bb_pos: position at which insert block copy commands > + * @emit_bbe: emit MI_BATCH_BUFFER_END after ctrl-surf-copy or not > * > - * Function does ctrl-surf-copy blit between @src and @dst described in > - * @blt object. > + * Function emits ctrl-surf-copy blit between @src and @dst described in > + * @blt object at @bb_pos. Allows concatenating with other commands to > + * achieve pipelining. > * > * Returns: > - * execbuffer status. > + * Next write position in batch. > */ > -int blt_ctrl_surf_copy(int i915, > - const intel_ctx_t *ctx, > - const struct intel_execution_engine2 *e, > - uint64_t ahnd, > - const struct blt_ctrl_surf_copy_data *surf) > +uint64_t emit_blt_ctrl_surf_copy(int i915, > + uint64_t ahnd, > + const struct blt_ctrl_surf_copy_data *surf, > + uint64_t bb_pos, > + bool emit_bbe) > { > - struct drm_i915_gem_execbuffer2 execbuf = {}; > - struct drm_i915_gem_exec_object2 obj[3] = {}; > struct gen12_ctrl_surf_copy_data data = {}; > uint64_t dst_offset, src_offset, bb_offset, alignment; > + uint32_t bbe = MI_BATCH_BUFFER_END; > uint32_t *bb; > - int i; > > igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n"); > igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n"); > @@ -695,12 +735,9 @@ int blt_ctrl_surf_copy(int i915, > data.dw00.size_of_ctrl_copy = __ccs_size(surf) / CCS_RATIO - 1; > data.dw00.length = 0x3; > > - src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, > - alignment); > - dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, > - alignment); > - bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, > - alignment); > + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment); > + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment); > + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment); > > data.dw01.src_address_lo = src_offset; > data.dw02.src_address_hi = src_offset >> 32; > @@ -710,11 +747,18 @@ int blt_ctrl_surf_copy(int i915, > data.dw04.dst_address_hi = dst_offset >> 32; > data.dw04.dst_mocs = surf->dst.mocs; > > - i = sizeof(data) / sizeof(uint32_t); > bb = gem_mmap__device_coherent(i915, surf->bb.handle, 0, surf->bb.size, > PROT_READ | PROT_WRITE); > - memcpy(bb, &data, sizeof(data)); > - bb[i++] = MI_BATCH_BUFFER_END; > + > + igt_assert(bb_pos + sizeof(data) < surf->bb.size); > + memcpy(bb + bb_pos, &data, sizeof(data)); > + bb_pos += sizeof(data); > + > + if (emit_bbe) { > + igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size); > + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); > + bb_pos += sizeof(uint32_t); > + } > > if (surf->print_bb) { > igt_info("BB [CTRL SURF]:\n"); > @@ -724,8 +768,46 @@ int blt_ctrl_surf_copy(int i915, > > dump_bb_surf_ctrl_cmd(&data); > } > + > munmap(bb, surf->bb.size); > > + return bb_pos; > +} > + > +/** > + * blt_ctrl_surf_copy: > + * @i915: drm fd > + * @ctx: intel_ctx_t context > + * @e: blitter engine for @ctx > + * @ahnd: allocator handle > + * @surf: blitter data for ctrl-surf-copy > + * > + * Function does ctrl-surf-copy blit between @src and @dst described in > + * @blt object. > + * > + * Returns: > + * execbuffer status. > + */ > +int blt_ctrl_surf_copy(int i915, > + const intel_ctx_t *ctx, > + const struct intel_execution_engine2 *e, > + uint64_t ahnd, > + const struct blt_ctrl_surf_copy_data *surf) > +{ > + struct drm_i915_gem_execbuffer2 execbuf = {}; > + struct drm_i915_gem_exec_object2 obj[3] = {}; > + uint64_t dst_offset, src_offset, bb_offset, alignment; > + > + igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n"); > + igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n"); > + > + alignment = max_t(uint64_t, gem_detect_safe_alignment(i915), 1ull << 16); > + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment); > + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment); > + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment); > + > + emit_blt_ctrl_surf_copy(i915, ahnd, surf, 0, true); > + > obj[0].offset = CANONICAL(dst_offset); > obj[1].offset = CANONICAL(src_offset); > obj[2].offset = CANONICAL(bb_offset); > @@ -869,31 +951,31 @@ static void dump_bb_fast_cmd(struct gen12_fast_copy_data *data) > } > > /** > - * blt_fast_copy: > + * emit_blt_fast_copy: > * @i915: drm fd > - * @ctx: intel_ctx_t context > - * @e: blitter engine for @ctx > * @ahnd: allocator handle > * @blt: blitter data for fast-copy (same as for block-copy but doesn't use > * compression fields). > + * @bb_pos: position at which insert block copy commands > + * @emit_bbe: emit MI_BATCH_BUFFER_END after fast-copy or not > * > - * Function does fast blit between @src and @dst described in @blt object. > + * Function emits fast-copy blit between @src and @dst described in @blt object > + * at @bb_pos. Allows concatenating with other commands to > + * achieve pipelining. > * > * Returns: > - * execbuffer status. > + * Next write position in batch. > */ > -int blt_fast_copy(int i915, > - const intel_ctx_t *ctx, > - const struct intel_execution_engine2 *e, > - uint64_t ahnd, > - const struct blt_copy_data *blt) > +uint64_t emit_blt_fast_copy(int i915, > + uint64_t ahnd, > + const struct blt_copy_data *blt, > + uint64_t bb_pos, > + bool emit_bbe) > { > - struct drm_i915_gem_execbuffer2 execbuf = {}; > - struct drm_i915_gem_exec_object2 obj[3] = {}; > struct gen12_fast_copy_data data = {}; > uint64_t dst_offset, src_offset, bb_offset, alignment; > + uint32_t bbe = MI_BATCH_BUFFER_END; > uint32_t *bb; > - int i, ret; > > alignment = gem_detect_safe_alignment(i915); > > @@ -931,22 +1013,65 @@ int blt_fast_copy(int i915, > data.dw08.src_address_lo = src_offset; > data.dw09.src_address_hi = src_offset >> 32; > > - i = sizeof(data) / sizeof(uint32_t); > bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size, > PROT_READ | PROT_WRITE); > > - memcpy(bb, &data, sizeof(data)); > - bb[i++] = MI_BATCH_BUFFER_END; > + igt_assert(bb_pos + sizeof(data) < blt->bb.size); > + memcpy(bb + bb_pos, &data, sizeof(data)); > + bb_pos += sizeof(data); > + > + if (emit_bbe) { > + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size); > + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); > + bb_pos += sizeof(uint32_t); > + } > > if (blt->print_bb) { > igt_info("BB [FAST COPY]\n"); > - igt_info("blit [src offset: %llx, dst offset: %llx\n", > - (long long) src_offset, (long long) dst_offset); > + igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n", > + (long long) src_offset, (long long) dst_offset, > + (long long) bb_offset); Nitpick: as you're touching these lines anyway, could you delete space after cast? They're not needed. In general, I'm fine with the changes, but would like to clarify a couple of things above before giving r-b. All the best, Karolina > dump_bb_fast_cmd(&data); > } > > munmap(bb, blt->bb.size); > > + return bb_pos; > +} > + > +/** > + * blt_fast_copy: > + * @i915: drm fd > + * @ctx: intel_ctx_t context > + * @e: blitter engine for @ctx > + * @ahnd: allocator handle > + * @blt: blitter data for fast-copy (same as for block-copy but doesn't use > + * compression fields). > + * > + * Function does fast blit between @src and @dst described in @blt object. > + * > + * Returns: > + * execbuffer status. > + */ > +int blt_fast_copy(int i915, > + const intel_ctx_t *ctx, > + const struct intel_execution_engine2 *e, > + uint64_t ahnd, > + const struct blt_copy_data *blt) > +{ > + struct drm_i915_gem_execbuffer2 execbuf = {}; > + struct drm_i915_gem_exec_object2 obj[3] = {}; > + uint64_t dst_offset, src_offset, bb_offset, alignment; > + int ret; > + > + alignment = gem_detect_safe_alignment(i915); > + > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment); > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment); > + > + emit_blt_fast_copy(i915, ahnd, blt, 0, true); > + > obj[0].offset = CANONICAL(dst_offset); > obj[1].offset = CANONICAL(src_offset); > obj[2].offset = CANONICAL(bb_offset); > diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h > index e0e8b52bc2..34db9bb962 100644 > --- a/lib/i915/i915_blt.h > +++ b/lib/i915/i915_blt.h > @@ -168,6 +168,13 @@ bool blt_supports_compression(int i915); > bool blt_supports_tiling(int i915, enum blt_tiling tiling); > const char *blt_tiling_name(enum blt_tiling tiling); > > +uint64_t emit_blt_block_copy(int i915, > + uint64_t ahnd, > + const struct blt_copy_data *blt, > + const struct blt_block_copy_data_ext *ext, > + uint64_t bb_pos, > + bool emit_bbe); > + > int blt_block_copy(int i915, > const intel_ctx_t *ctx, > const struct intel_execution_engine2 *e, > @@ -175,12 +182,24 @@ int blt_block_copy(int i915, > const struct blt_copy_data *blt, > const struct blt_block_copy_data_ext *ext); > > +uint64_t emit_blt_ctrl_surf_copy(int i915, > + uint64_t ahnd, > + const struct blt_ctrl_surf_copy_data *surf, > + uint64_t bb_pos, > + bool emit_bbe); > + > int blt_ctrl_surf_copy(int i915, > const intel_ctx_t *ctx, > const struct intel_execution_engine2 *e, > uint64_t ahnd, > const struct blt_ctrl_surf_copy_data *surf); > > +uint64_t emit_blt_fast_copy(int i915, > + uint64_t ahnd, > + const struct blt_copy_data *blt, > + uint64_t bb_pos, > + bool emit_bbe); > + > int blt_fast_copy(int i915, > const intel_ctx_t *ctx, > const struct intel_execution_engine2 *e,