All of lore.kernel.org
 help / color / mirror / Atom feed
From: Karolina Stolarek <karolina.stolarek@intel.com>
To: "Zbigniew Kempczyński" <zbigniew.kempczynski@intel.com>
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [igt-dev] [PATCH i-g-t 2/3] lib/i915_blt: Extract blit emit functions
Date: Tue, 13 Dec 2022 16:39:14 +0100	[thread overview]
Message-ID: <d748443c-e770-10d6-42a5-9eb5545dc62d@intel.com> (raw)
In-Reply-To: <20221212125035.51326-3-zbigniew.kempczynski@intel.com>

On 12.12.2022 13:50, Zbigniew Kempczyński wrote:
> Add some flexibility in building user pipelines extracting blitter
> emission code to dedicated functions. Previous blitter functions which
> do one blit-and-execute are rewritten to use those functions.
> Requires usage with stateful allocator (offset might be acquired more
> than one, so it must not change).
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> ---
>   lib/i915/i915_blt.c | 263 ++++++++++++++++++++++++++++++++------------
>   lib/i915/i915_blt.h |  19 ++++
>   2 files changed, 213 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/i915/i915_blt.c b/lib/i915/i915_blt.c
> index 42c28623f9..32ad608775 100644
> --- a/lib/i915/i915_blt.c
> +++ b/lib/i915/i915_blt.c
> @@ -503,58 +503,61 @@ static void dump_bb_ext(struct gen12_block_copy_data_ext *data)
>   }
>   
>   /**
> - * blt_block_copy:
> + * emit_blt_block_copy:
>    * @i915: drm fd
> - * @ctx: intel_ctx_t context
> - * @e: blitter engine for @ctx
>    * @ahnd: allocator handle
>    * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
>    * @ext: extended blitter data (for DG2+, supports flatccs compression)
> + * @bb_pos: position at which insert block copy commands
> + * @emit_bbe: emit MI_BATCH_BUFFER_END after block-copy or not
>    *
> - * Function does blit between @src and @dst described in @blt object.
> + * Function inserts block-copy blit into batch at @bb_pos. Allows concatenating
> + * with other commands to achieve pipelining.
>    *
>    * Returns:
> - * execbuffer status.
> + * Next write position in batch.
>    */
> -int blt_block_copy(int i915,
> -		   const intel_ctx_t *ctx,
> -		   const struct intel_execution_engine2 *e,
> -		   uint64_t ahnd,
> -		   const struct blt_copy_data *blt,
> -		   const struct blt_block_copy_data_ext *ext)
> +uint64_t emit_blt_block_copy(int i915,
> +			     uint64_t ahnd,
> +			     const struct blt_copy_data *blt,
> +			     const struct blt_block_copy_data_ext *ext,
> +			     uint64_t bb_pos,
> +			     bool emit_bbe)
>   {
> -	struct drm_i915_gem_execbuffer2 execbuf = {};
> -	struct drm_i915_gem_exec_object2 obj[3] = {};
>   	struct gen12_block_copy_data data = {};
>   	struct gen12_block_copy_data_ext dext = {};
> -	uint64_t dst_offset, src_offset, bb_offset, alignment;
> -	uint32_t *bb;
> -	int i, ret;
> +	uint64_t dst_offset, src_offset, bb_offset;
> +	uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint8_t *bb;
>   
>   	igt_assert_f(ahnd, "block-copy supports softpin only\n");
>   	igt_assert_f(blt, "block-copy requires data to do blit\n");
>   
> -	alignment = gem_detect_safe_alignment(i915);
> -	src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
> -	if (__special_mode(blt) == SM_FULL_RESOLVE)
> -		dst_offset = src_offset;
> -	else
> -		dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
> -	bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
> +	src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
> +	dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
> +	bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);

Why do we pass 0 as object alignment here? In surf-copy and fast-copy we 
pass "alignment" in.

>   
>   	fill_data(&data, blt, src_offset, dst_offset, ext);
>   
> -	i = sizeof(data) / sizeof(uint32_t);
>   	bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
>   				       PROT_READ | PROT_WRITE);
> -	memcpy(bb, &data, sizeof(data));
> +
> +	igt_assert(bb_pos + sizeof(data) < blt->bb.size);

I'd say we need extra space for potential MI_BATCH_BUFFER_END here

> +	memcpy(bb + bb_pos, &data, sizeof(data));
> +	bb_pos += sizeof(data);
>   
>   	if (ext) {
>   		fill_data_ext(&dext, ext);
> -		memcpy(bb + i, &dext, sizeof(dext));
> -		i += sizeof(dext) / sizeof(uint32_t);
> +		igt_assert(bb_pos + sizeof(dext) < blt->bb.size);
> +		memcpy(bb + bb_pos, &dext, sizeof(dext));
> +		bb_pos += sizeof(dext);
> +	}
> +
> +	if (emit_bbe) {
> +		igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
> +		memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> +		bb_pos += sizeof(uint32_t);
>   	}
> -	bb[i++] = MI_BATCH_BUFFER_END;
>   
>   	if (blt->print_bb) {
>   		igt_info("[BLOCK COPY]\n");
> @@ -569,6 +572,44 @@ int blt_block_copy(int i915,
>   
>   	munmap(bb, blt->bb.size);
>   
> +	return bb_pos;
> +}
> +
> +/**
> + * blt_block_copy:
> + * @i915: drm fd
> + * @ctx: intel_ctx_t context
> + * @e: blitter engine for @ctx
> + * @ahnd: allocator handle
> + * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
> + * @ext: extended blitter data (for DG2+, supports flatccs compression)
> + *
> + * Function does blit between @src and @dst described in @blt object.
> + *
> + * Returns:
> + * execbuffer status.
> + */
> +int blt_block_copy(int i915,
> +		   const intel_ctx_t *ctx,
> +		   const struct intel_execution_engine2 *e,
> +		   uint64_t ahnd,
> +		   const struct blt_copy_data *blt,
> +		   const struct blt_block_copy_data_ext *ext)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t dst_offset, src_offset, bb_offset;
> +	int ret;
> +
> +	igt_assert_f(ahnd, "block-copy supports softpin only\n");
> +	igt_assert_f(blt, "block-copy requires data to do blit\n");
> +
> +	src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
> +	dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
> +	bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);
> +
> +	emit_blt_block_copy(i915, ahnd, blt, ext, 0, true);
> +
>   	obj[0].offset = CANONICAL(dst_offset);
>   	obj[1].offset = CANONICAL(src_offset);
>   	obj[2].offset = CANONICAL(bb_offset > @@ -655,31 +696,30 @@ static void dump_bb_surf_ctrl_cmd(const struct 
gen12_ctrl_surf_copy_data *data)
>   }
>   
>   /**
> - * blt_ctrl_surf_copy:
> + * emit_blt_ctrl_surf_copy:
>    * @i915: drm fd
> - * @ctx: intel_ctx_t context
> - * @e: blitter engine for @ctx
>    * @ahnd: allocator handle
>    * @surf: blitter data for ctrl-surf-copy
> + * @bb_pos: position at which insert block copy commands
> + * @emit_bbe: emit MI_BATCH_BUFFER_END after ctrl-surf-copy or not
>    *
> - * Function does ctrl-surf-copy blit between @src and @dst described in
> - * @blt object.
> + * Function emits ctrl-surf-copy blit between @src and @dst described in
> + * @blt object at @bb_pos. Allows concatenating with other commands to
> + * achieve pipelining.
>    *
>    * Returns:
> - * execbuffer status.
> + * Next write position in batch.
>    */
> -int blt_ctrl_surf_copy(int i915,
> -		       const intel_ctx_t *ctx,
> -		       const struct intel_execution_engine2 *e,
> -		       uint64_t ahnd,
> -		       const struct blt_ctrl_surf_copy_data *surf)
> +uint64_t emit_blt_ctrl_surf_copy(int i915,
> +				 uint64_t ahnd,
> +				 const struct blt_ctrl_surf_copy_data *surf,
> +				 uint64_t bb_pos,
> +				 bool emit_bbe)
>   {
> -	struct drm_i915_gem_execbuffer2 execbuf = {};
> -	struct drm_i915_gem_exec_object2 obj[3] = {};
>   	struct gen12_ctrl_surf_copy_data data = {};
>   	uint64_t dst_offset, src_offset, bb_offset, alignment;
> +	uint32_t bbe = MI_BATCH_BUFFER_END;
>   	uint32_t *bb;
> -	int i;
>   
>   	igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
>   	igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
> @@ -695,12 +735,9 @@ int blt_ctrl_surf_copy(int i915,
>   	data.dw00.size_of_ctrl_copy = __ccs_size(surf) / CCS_RATIO - 1;
>   	data.dw00.length = 0x3;
>   
> -	src_offset = get_offset(ahnd, surf->src.handle, surf->src.size,
> -				alignment);
> -	dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size,
> -				alignment);
> -	bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size,
> -			       alignment);
> +	src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
> +	dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
> +	bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
>   
>   	data.dw01.src_address_lo = src_offset;
>   	data.dw02.src_address_hi = src_offset >> 32;
> @@ -710,11 +747,18 @@ int blt_ctrl_surf_copy(int i915,
>   	data.dw04.dst_address_hi = dst_offset >> 32;
>   	data.dw04.dst_mocs = surf->dst.mocs;
>   
> -	i = sizeof(data) / sizeof(uint32_t);
>   	bb = gem_mmap__device_coherent(i915, surf->bb.handle, 0, surf->bb.size,
>   				       PROT_READ | PROT_WRITE);
> -	memcpy(bb, &data, sizeof(data));
> -	bb[i++] = MI_BATCH_BUFFER_END;
> +
> +	igt_assert(bb_pos + sizeof(data) < surf->bb.size);
> +	memcpy(bb + bb_pos, &data, sizeof(data));
> +	bb_pos += sizeof(data);
> +
> +	if (emit_bbe) {
> +		igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size);
> +		memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> +		bb_pos += sizeof(uint32_t);
> +	}
>   
>   	if (surf->print_bb) {
>   		igt_info("BB [CTRL SURF]:\n");
> @@ -724,8 +768,46 @@ int blt_ctrl_surf_copy(int i915,
>   
>   		dump_bb_surf_ctrl_cmd(&data);
>   	}
> +
>   	munmap(bb, surf->bb.size);
>   
> +	return bb_pos;
> +}
> +
> +/**
> + * blt_ctrl_surf_copy:
> + * @i915: drm fd
> + * @ctx: intel_ctx_t context
> + * @e: blitter engine for @ctx
> + * @ahnd: allocator handle
> + * @surf: blitter data for ctrl-surf-copy
> + *
> + * Function does ctrl-surf-copy blit between @src and @dst described in
> + * @blt object.
> + *
> + * Returns:
> + * execbuffer status.
> + */
> +int blt_ctrl_surf_copy(int i915,
> +		       const intel_ctx_t *ctx,
> +		       const struct intel_execution_engine2 *e,
> +		       uint64_t ahnd,
> +		       const struct blt_ctrl_surf_copy_data *surf)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t dst_offset, src_offset, bb_offset, alignment;
> +
> +	igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
> +	igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
> +
> +	alignment = max_t(uint64_t, gem_detect_safe_alignment(i915), 1ull << 16);
> +	src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
> +	dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
> +	bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
> +
> +	emit_blt_ctrl_surf_copy(i915, ahnd, surf, 0, true);
> +
>   	obj[0].offset = CANONICAL(dst_offset);
>   	obj[1].offset = CANONICAL(src_offset);
>   	obj[2].offset = CANONICAL(bb_offset);
> @@ -869,31 +951,31 @@ static void dump_bb_fast_cmd(struct gen12_fast_copy_data *data)
>   }
>   
>   /**
> - * blt_fast_copy:
> + * emit_blt_fast_copy:
>    * @i915: drm fd
> - * @ctx: intel_ctx_t context
> - * @e: blitter engine for @ctx
>    * @ahnd: allocator handle
>    * @blt: blitter data for fast-copy (same as for block-copy but doesn't use
>    * compression fields).
> + * @bb_pos: position at which insert block copy commands
> + * @emit_bbe: emit MI_BATCH_BUFFER_END after fast-copy or not
>    *
> - * Function does fast blit between @src and @dst described in @blt object.
> + * Function emits fast-copy blit between @src and @dst described in @blt object
> + * at @bb_pos. Allows concatenating with other commands to
> + * achieve pipelining.
>    *
>    * Returns:
> - * execbuffer status.
> + * Next write position in batch.
>    */
> -int blt_fast_copy(int i915,
> -		  const intel_ctx_t *ctx,
> -		  const struct intel_execution_engine2 *e,
> -		  uint64_t ahnd,
> -		  const struct blt_copy_data *blt)
> +uint64_t emit_blt_fast_copy(int i915,
> +			    uint64_t ahnd,
> +			    const struct blt_copy_data *blt,
> +			    uint64_t bb_pos,
> +			    bool emit_bbe)
>   {
> -	struct drm_i915_gem_execbuffer2 execbuf = {};
> -	struct drm_i915_gem_exec_object2 obj[3] = {};
>   	struct gen12_fast_copy_data data = {};
>   	uint64_t dst_offset, src_offset, bb_offset, alignment;
> +	uint32_t bbe = MI_BATCH_BUFFER_END;
>   	uint32_t *bb;
> -	int i, ret;
>   
>   	alignment = gem_detect_safe_alignment(i915);
>   
> @@ -931,22 +1013,65 @@ int blt_fast_copy(int i915,
>   	data.dw08.src_address_lo = src_offset;
>   	data.dw09.src_address_hi = src_offset >> 32;
>   
> -	i = sizeof(data) / sizeof(uint32_t);
>   	bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
>   				       PROT_READ | PROT_WRITE);
>   
> -	memcpy(bb, &data, sizeof(data));
> -	bb[i++] = MI_BATCH_BUFFER_END;
> +	igt_assert(bb_pos + sizeof(data) < blt->bb.size);
> +	memcpy(bb + bb_pos, &data, sizeof(data));
> +	bb_pos += sizeof(data);
> +
> +	if (emit_bbe) {
> +		igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
> +		memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> +		bb_pos += sizeof(uint32_t);
> +	}
>   
>   	if (blt->print_bb) {
>   		igt_info("BB [FAST COPY]\n");
> -		igt_info("blit [src offset: %llx, dst offset: %llx\n",
> -			 (long long) src_offset, (long long) dst_offset);
> +		igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n",
> +			 (long long) src_offset, (long long) dst_offset,
> +			 (long long) bb_offset);

Nitpick: as you're touching these lines anyway, could you delete space 
after cast? They're not needed.

In general, I'm fine with the changes, but would like to clarify a 
couple of things above before giving r-b.

All the best,
Karolina

>   		dump_bb_fast_cmd(&data);
>   	}
>   
>   	munmap(bb, blt->bb.size);
>   
> +	return bb_pos;
> +}
> +
> +/**
> + * blt_fast_copy:
> + * @i915: drm fd
> + * @ctx: intel_ctx_t context
> + * @e: blitter engine for @ctx
> + * @ahnd: allocator handle
> + * @blt: blitter data for fast-copy (same as for block-copy but doesn't use
> + * compression fields).
> + *
> + * Function does fast blit between @src and @dst described in @blt object.
> + *
> + * Returns:
> + * execbuffer status.
> + */
> +int blt_fast_copy(int i915,
> +		  const intel_ctx_t *ctx,
> +		  const struct intel_execution_engine2 *e,
> +		  uint64_t ahnd,
> +		  const struct blt_copy_data *blt)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t dst_offset, src_offset, bb_offset, alignment;
> +	int ret;
> +
> +	alignment = gem_detect_safe_alignment(i915);
> +
> +	src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
> +	dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
> +	bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
> +
> +	emit_blt_fast_copy(i915, ahnd, blt, 0, true);
> +
>   	obj[0].offset = CANONICAL(dst_offset);
>   	obj[1].offset = CANONICAL(src_offset);
>   	obj[2].offset = CANONICAL(bb_offset);
> diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h
> index e0e8b52bc2..34db9bb962 100644
> --- a/lib/i915/i915_blt.h
> +++ b/lib/i915/i915_blt.h
> @@ -168,6 +168,13 @@ bool blt_supports_compression(int i915);
>   bool blt_supports_tiling(int i915, enum blt_tiling tiling);
>   const char *blt_tiling_name(enum blt_tiling tiling);
>   
> +uint64_t emit_blt_block_copy(int i915,
> +			     uint64_t ahnd,
> +			     const struct blt_copy_data *blt,
> +			     const struct blt_block_copy_data_ext *ext,
> +			     uint64_t bb_pos,
> +			     bool emit_bbe);
> +
>   int blt_block_copy(int i915,
>   		   const intel_ctx_t *ctx,
>   		   const struct intel_execution_engine2 *e,
> @@ -175,12 +182,24 @@ int blt_block_copy(int i915,
>   		   const struct blt_copy_data *blt,
>   		   const struct blt_block_copy_data_ext *ext);
>   
> +uint64_t emit_blt_ctrl_surf_copy(int i915,
> +				 uint64_t ahnd,
> +				 const struct blt_ctrl_surf_copy_data *surf,
> +				 uint64_t bb_pos,
> +				 bool emit_bbe);
> +
>   int blt_ctrl_surf_copy(int i915,
>   		       const intel_ctx_t *ctx,
>   		       const struct intel_execution_engine2 *e,
>   		       uint64_t ahnd,
>   		       const struct blt_ctrl_surf_copy_data *surf);
>   
> +uint64_t emit_blt_fast_copy(int i915,
> +			    uint64_t ahnd,
> +			    const struct blt_copy_data *blt,
> +			    uint64_t bb_pos,
> +			    bool emit_bbe);
> +
>   int blt_fast_copy(int i915,
>   		  const intel_ctx_t *ctx,
>   		  const struct intel_execution_engine2 *e,

  reply	other threads:[~2022-12-13 15:39 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-12 12:50 [igt-dev] [PATCH i-g-t 0/3] Add block-multicopy-* subtests Zbigniew Kempczyński
2022-12-12 12:50 ` [igt-dev] [PATCH i-g-t 1/3] lib/i915_blt: Remove src == dst pitch restriction Zbigniew Kempczyński
2022-12-13 15:37   ` Karolina Stolarek
2022-12-14 18:57     ` Zbigniew Kempczyński
2022-12-15  8:41       ` Karolina Stolarek
2022-12-15 10:43         ` Karolina Stolarek
2022-12-12 12:50 ` [igt-dev] [PATCH i-g-t 2/3] lib/i915_blt: Extract blit emit functions Zbigniew Kempczyński
2022-12-13 15:39   ` Karolina Stolarek [this message]
2022-12-14 19:40     ` Zbigniew Kempczyński
2022-12-15  8:42       ` Karolina Stolarek
2022-12-15 10:41         ` Zbigniew Kempczyński
2022-12-15 10:47           ` Karolina Stolarek
2022-12-12 12:50 ` [igt-dev] [PATCH i-g-t 3/3] tests/gem_ccs: Add block-multicopy subtest Zbigniew Kempczyński
2022-12-13 15:40   ` Karolina Stolarek
2022-12-14 19:57     ` Zbigniew Kempczyński
2022-12-15  8:42       ` Karolina Stolarek
2022-12-15 11:00         ` Zbigniew Kempczyński
2022-12-15 11:47           ` Karolina Stolarek
2022-12-12 13:23 ` [igt-dev] ✓ Fi.CI.BAT: success for Add block-multicopy-* subtests Patchwork
2022-12-12 21:05 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d748443c-e770-10d6-42a5-9eb5545dc62d@intel.com \
    --to=karolina.stolarek@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=zbigniew.kempczynski@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.