All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Hellstrom, Thomas" <thomas.hellstrom@intel.com>
To: "dri-devel@lists.freedesktop.org"
	<dri-devel@lists.freedesktop.org>,
	"C, Ramalingam" <ramalingam.c@intel.com>,
	"intel-gfx@lists.freedesktop.org"
	<intel-gfx@lists.freedesktop.org>
Cc: "Auld, Matthew" <matthew.auld@intel.com>
Subject: Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/selftest_migrate: Check CCS meta data clear
Date: Mon, 21 Mar 2022 10:39:08 +0000	[thread overview]
Message-ID: <2ac354b3df30889cc52c10613900467df30f4a47.camel@intel.com> (raw)
In-Reply-To: <20220319204229.9846-5-ramalingam.c@intel.com>

On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote:
> While clearing the Flat-CCS capable lmem object, we need to clear the
> CCS
> meta data corresponding to the memory.
> 
> As part of live_migrate_clear add check for the ccs meta data clear
> for
> the Flat-CCS capable lmem object.
> 
> Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_migrate.c    |  32 +++
>  drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++-
> --
>  2 files changed, 278 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index c1db8daf994a..bbfea570c239 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd,
> u64 src_addr, u64 dst_addr,
>         return cmd;
>  }
>  
> +static int emit_copy_ccs(struct i915_request *rq,
> +                        u32 dst_offset, u8 dst_access,
> +                        u32 src_offset, u8 src_access, int size)
> +{
> +       struct drm_i915_private *i915 = rq->engine->i915;
> +       int mocs = rq->engine->gt->mocs.uc_index << 1;
> +       u32 num_ccs_blks, ccs_ring_size;
> +       u32 *cs;
> +
> +       ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
> +       WARN_ON(!ccs_ring_size);
> +
> +       cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
> +       if (IS_ERR(cs))
> +               return PTR_ERR(cs);
> +
> +       num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
> +                                   NUM_CCS_BYTES_PER_BLOCK);
> +
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset,
> +                                     src_access, dst_access,
> +                                     mocs, mocs, num_ccs_blks);
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       if (ccs_ring_size & 1)
> +               *cs++ = MI_NOOP;
> +
> +       intel_ring_advance(rq, cs);
> +
> +       return 0;
> +}


This would be an unused function if selftests are not configured,
right?


> +
>  static int emit_copy(struct i915_request *rq,
>                      u32 dst_offset, u32 src_offset, int size)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> index b5da8b8cd039..e32cc994f4a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate,
>         return err;
>  }
>  
> +static int intel_context_copy_ccs(struct intel_context *ce,
> +                                 const struct i915_deps *deps,
> +                                 struct scatterlist *sg,
> +                                 enum i915_cache_level cache_level,
> +                                 bool write_to_ccs,
> +                                 struct i915_request **out)
> +{
> +       u8 src_access = write_to_ccs ? DIRECT_ACCESS :
> INDIRECT_ACCESS;
> +       u8 dst_access = write_to_ccs ? INDIRECT_ACCESS :
> DIRECT_ACCESS;
> +       struct sgt_dma it = sg_sgt(sg);
> +       struct i915_request *rq;
> +       u32 offset;
> +       int err;
> +
> +       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
> +       *out = NULL;
> +
> +       GEM_BUG_ON(ce->ring->size < SZ_64K);
> +
> +       offset = 0;
> +       if (HAS_64K_PAGES(ce->engine->i915))
> +               offset = CHUNK_SZ;
> +       offset += (u64)rq->engine->instance << 32;
> +
> +       do {
> +               int len;
> +
> +               rq = i915_request_create(ce);
> +               if (IS_ERR(rq)) {
> +                       err = PTR_ERR(rq);
> +                       goto out_ce;
> +               }
> +
> +               if (deps) {
> +                       err = i915_request_await_deps(rq, deps);
> +                       if (err)
> +                               goto out_rq;
> +
> +                       if (rq->engine->emit_init_breadcrumb) {
> +                               err = rq->engine-
> >emit_init_breadcrumb(rq);
> +                               if (err)
> +                                       goto out_rq;
> +                       }
> +
> +                       deps = NULL;
> +               }
> +
> +               /* The PTE updates + clear must not be interrupted.
> */
> +               err = emit_no_arbitration(rq);
> +               if (err)
> +                       goto out_rq;
> +
> +               len = emit_pte(rq, &it, cache_level, true, offset,
> CHUNK_SZ);
> +               if (len <= 0) {
> +                       err = len;
> +                       goto out_rq;
> +               }
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = emit_copy_ccs(rq, offset, dst_access,
> +                                   offset, src_access, len);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
> +                                            MI_FLUSH_DW_CCS);
> +
> +               /* Arbitration is re-enabled between requests. */
> +out_rq:
> +               if (*out)
> +                       i915_request_put(*out);
> +               *out = i915_request_get(rq);
> +               i915_request_add(rq);
> +               if (err || !it.sg || !sg_dma_len(it.sg))
> +                       break;
> +
> +               cond_resched();
> +       } while (1);
> +
> +out_ce:
> +       return err;
> +}
> +
> +static int
> +intel_migrate_ccs_copy(struct intel_migrate *m,
> +                      struct i915_gem_ww_ctx *ww,
> +                      const struct i915_deps *deps,
> +                      struct scatterlist *sg,
> +                      enum i915_cache_level cache_level,
> +                      bool write_to_ccs,
> +                      struct i915_request **out)
> +{
> +       struct intel_context *ce;
> +       int err;
> +
> +       *out = NULL;
> +       if (!m->context)
> +               return -ENODEV;
> +
> +       ce = intel_migrate_create_context(m);
> +       if (IS_ERR(ce))
> +               ce = intel_context_get(m->context);
> +       GEM_BUG_ON(IS_ERR(ce));
> +
> +       err = intel_context_pin_ww(ce, ww);
> +       if (err)
> +               goto out;
> +
> +       err = intel_context_copy_ccs(ce, deps, sg, cache_level,
> +                                    write_to_ccs, out);
> +
> +       intel_context_unpin(ce);
> +out:
> +       intel_context_put(ce);
> +       return err;
> +}
> +
>  static int clear(struct intel_migrate *migrate,
>                  int (*fn)(struct intel_migrate *migrate,
>                            struct i915_gem_ww_ctx *ww,
> @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate,
>         struct drm_i915_gem_object *obj;
>         struct i915_request *rq;
>         struct i915_gem_ww_ctx ww;
> -       u32 *vaddr;
> +       u32 *vaddr, val = 0;
> +       bool ccs_cap = false;
>         int err = 0;
>         int i;
>  
> @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate,
>         /* Consider the rounded up memory too */
>         sz = obj->base.size;
>  
> +       if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
> +               ccs_cap = true;
> +
>         for_i915_gem_ww(&ww, err, true) {
> +               int ccs_bytes;
> +
>                 err = i915_gem_object_lock(obj, &ww);
>                 if (err)
>                         continue;
> @@ -170,44 +296,136 @@ static int clear(struct intel_migrate
> *migrate,
>                         vaddr[i] = ~i;
>                 i915_gem_object_flush_map(obj);
>  
> -               err = fn(migrate, &ww, obj, sz, &rq);
> -               if (!err)
> -                       continue;
> +               if (ccs_cap && !val) {
> +                       /* Write the obj data into ccs surface */
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    true, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = 0x5a5a5a5a;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL, obj->mm.pages->sgl,
> +                                                    obj-
> >cache_level, false, &rq);

Why do we read back CCS content here?

> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes; i += 4) {
> +                               if (vaddr[i] != ~i) {
> +                                       pr_err("%ps ccs write and
> read failed, offset: %d\n",
> +                                              fn, i);
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +               }
>  
> -               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> -                       pr_err("%ps failed, size: %u\n", fn, sz);
> -               if (rq) {
> -                       i915_request_wait(rq, 0, HZ);
> +               err = fn(migrate, &ww, obj, val, &rq);
> +               if (rq && !err) {
> +                       if (i915_request_wait(rq, 0, HZ) < 0) {
> +                               pr_err("%ps timed out, size: %u\n",
> fn, sz);
> +                               err = -ETIME;
> +                       }
>                         i915_request_put(rq);
> +                       rq = NULL;
>                 }
> -               i915_gem_object_unpin_map(obj);
> -       }
> -       if (err)
> -               goto err_out;
> +               if (err)
> +                       continue;
>  
> -       if (rq) {
> -               if (i915_request_wait(rq, 0, HZ) < 0) {
> -                       pr_err("%ps timed out, size: %u\n", fn, sz);
> -                       err = -ETIME;
> +               i915_gem_object_flush_map(obj);
> +
> +               /* Verify the set/clear of the obj mem */
> +               for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> +                       int x = i * 1024 +
> +                               i915_prandom_u32_max_state(1024,
> prng);
> +
> +                       if (vaddr[x] != val) {
> +                               pr_err("%ps failed, (%u != %u),
> offset: %zu\n",
> +                                      fn, vaddr[x], val,  x *
> sizeof(u32));
> +                               igt_hexdump(vaddr + i * 1024, 4096);
> +                               err = -EINVAL;
> +                       }
>                 }
> -               i915_request_put(rq);
> -       }
> +               if (err)
> +                       continue;
>  
> -       for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> -               int x = i * 1024 + i915_prandom_u32_max_state(1024,
> prng);
> +               if (ccs_cap && !val) {
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = ~i;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    false, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       ccs_bytes = GET_CCS_BYTES(i915, sz);
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes /
> sizeof(u32); i++) {
> +                               if (vaddr[i]) {

I think this is incorrect. This assumes that CCS data is read back
contiguous for the whole buffer, but instead CCS data is read back
per 8MiB chunk and placed at the beginning of each chunk?

/Thomas



> +                                       pr_err("%ps ccs clearing
> failed, offset: %d/%lu\n",
> +                                              fn, i, (ccs_bytes /
> sizeof(u32)) -  1);
> +                                       igt_hexdump(vaddr + i,
> ccs_bytes - i * sizeof(u32));
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +               }
> +               i915_gem_object_unpin_map(obj);
> +       }
>  
> -               if (vaddr[x] != sz) {
> -                       pr_err("%ps failed, size: %u, offset: %zu\n",
> -                              fn, sz, x * sizeof(u32));
> -                       igt_hexdump(vaddr + i * 1024, 4096);
> -                       err = -EINVAL;
> +       if (err) {
> +               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> +                       pr_err("%ps failed, size: %u\n", fn, sz);
> +               if (rq && err != -EINVAL) {
> +                       i915_request_wait(rq, 0, HZ);
> +                       i915_request_put(rq);
>                 }
> +
> +               i915_gem_object_unpin_map(obj);
> +       } else {
> +               pr_debug("%ps Passed. size: %u\n", fn, sz);
>         }
>  
> -       i915_gem_object_unpin_map(obj);
> -err_out:
>         i915_gem_object_put(obj);
> -
>         return err;
>  }
>  

----------------------------------------------------------------------
Intel Sweden AB
Registered Office: Isafjordsgatan 30B, 164 40 Kista, Stockholm, Sweden
Registration Number: 556189-6027

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

WARNING: multiple messages have this Message-ID (diff)
From: "Hellstrom, Thomas" <thomas.hellstrom@intel.com>
To: "dri-devel@lists.freedesktop.org"
	<dri-devel@lists.freedesktop.org>,
	"C, Ramalingam" <ramalingam.c@intel.com>,
	"intel-gfx@lists.freedesktop.org"
	<intel-gfx@lists.freedesktop.org>
Cc: "Auld, Matthew" <matthew.auld@intel.com>
Subject: Re: [PATCH v4 4/8] drm/i915/selftest_migrate: Check CCS meta data clear
Date: Mon, 21 Mar 2022 10:39:08 +0000	[thread overview]
Message-ID: <2ac354b3df30889cc52c10613900467df30f4a47.camel@intel.com> (raw)
In-Reply-To: <20220319204229.9846-5-ramalingam.c@intel.com>

On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote:
> While clearing the Flat-CCS capable lmem object, we need to clear the
> CCS
> meta data corresponding to the memory.
> 
> As part of live_migrate_clear add check for the ccs meta data clear
> for
> the Flat-CCS capable lmem object.
> 
> Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_migrate.c    |  32 +++
>  drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++-
> --
>  2 files changed, 278 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index c1db8daf994a..bbfea570c239 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd,
> u64 src_addr, u64 dst_addr,
>         return cmd;
>  }
>  
> +static int emit_copy_ccs(struct i915_request *rq,
> +                        u32 dst_offset, u8 dst_access,
> +                        u32 src_offset, u8 src_access, int size)
> +{
> +       struct drm_i915_private *i915 = rq->engine->i915;
> +       int mocs = rq->engine->gt->mocs.uc_index << 1;
> +       u32 num_ccs_blks, ccs_ring_size;
> +       u32 *cs;
> +
> +       ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
> +       WARN_ON(!ccs_ring_size);
> +
> +       cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
> +       if (IS_ERR(cs))
> +               return PTR_ERR(cs);
> +
> +       num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
> +                                   NUM_CCS_BYTES_PER_BLOCK);
> +
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset,
> +                                     src_access, dst_access,
> +                                     mocs, mocs, num_ccs_blks);
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       if (ccs_ring_size & 1)
> +               *cs++ = MI_NOOP;
> +
> +       intel_ring_advance(rq, cs);
> +
> +       return 0;
> +}


This would be an unused function if selftests are not configured,
right?


> +
>  static int emit_copy(struct i915_request *rq,
>                      u32 dst_offset, u32 src_offset, int size)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> index b5da8b8cd039..e32cc994f4a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate,
>         return err;
>  }
>  
> +static int intel_context_copy_ccs(struct intel_context *ce,
> +                                 const struct i915_deps *deps,
> +                                 struct scatterlist *sg,
> +                                 enum i915_cache_level cache_level,
> +                                 bool write_to_ccs,
> +                                 struct i915_request **out)
> +{
> +       u8 src_access = write_to_ccs ? DIRECT_ACCESS :
> INDIRECT_ACCESS;
> +       u8 dst_access = write_to_ccs ? INDIRECT_ACCESS :
> DIRECT_ACCESS;
> +       struct sgt_dma it = sg_sgt(sg);
> +       struct i915_request *rq;
> +       u32 offset;
> +       int err;
> +
> +       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
> +       *out = NULL;
> +
> +       GEM_BUG_ON(ce->ring->size < SZ_64K);
> +
> +       offset = 0;
> +       if (HAS_64K_PAGES(ce->engine->i915))
> +               offset = CHUNK_SZ;
> +       offset += (u64)rq->engine->instance << 32;
> +
> +       do {
> +               int len;
> +
> +               rq = i915_request_create(ce);
> +               if (IS_ERR(rq)) {
> +                       err = PTR_ERR(rq);
> +                       goto out_ce;
> +               }
> +
> +               if (deps) {
> +                       err = i915_request_await_deps(rq, deps);
> +                       if (err)
> +                               goto out_rq;
> +
> +                       if (rq->engine->emit_init_breadcrumb) {
> +                               err = rq->engine-
> >emit_init_breadcrumb(rq);
> +                               if (err)
> +                                       goto out_rq;
> +                       }
> +
> +                       deps = NULL;
> +               }
> +
> +               /* The PTE updates + clear must not be interrupted.
> */
> +               err = emit_no_arbitration(rq);
> +               if (err)
> +                       goto out_rq;
> +
> +               len = emit_pte(rq, &it, cache_level, true, offset,
> CHUNK_SZ);
> +               if (len <= 0) {
> +                       err = len;
> +                       goto out_rq;
> +               }
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = emit_copy_ccs(rq, offset, dst_access,
> +                                   offset, src_access, len);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
> +                                            MI_FLUSH_DW_CCS);
> +
> +               /* Arbitration is re-enabled between requests. */
> +out_rq:
> +               if (*out)
> +                       i915_request_put(*out);
> +               *out = i915_request_get(rq);
> +               i915_request_add(rq);
> +               if (err || !it.sg || !sg_dma_len(it.sg))
> +                       break;
> +
> +               cond_resched();
> +       } while (1);
> +
> +out_ce:
> +       return err;
> +}
> +
> +static int
> +intel_migrate_ccs_copy(struct intel_migrate *m,
> +                      struct i915_gem_ww_ctx *ww,
> +                      const struct i915_deps *deps,
> +                      struct scatterlist *sg,
> +                      enum i915_cache_level cache_level,
> +                      bool write_to_ccs,
> +                      struct i915_request **out)
> +{
> +       struct intel_context *ce;
> +       int err;
> +
> +       *out = NULL;
> +       if (!m->context)
> +               return -ENODEV;
> +
> +       ce = intel_migrate_create_context(m);
> +       if (IS_ERR(ce))
> +               ce = intel_context_get(m->context);
> +       GEM_BUG_ON(IS_ERR(ce));
> +
> +       err = intel_context_pin_ww(ce, ww);
> +       if (err)
> +               goto out;
> +
> +       err = intel_context_copy_ccs(ce, deps, sg, cache_level,
> +                                    write_to_ccs, out);
> +
> +       intel_context_unpin(ce);
> +out:
> +       intel_context_put(ce);
> +       return err;
> +}
> +
>  static int clear(struct intel_migrate *migrate,
>                  int (*fn)(struct intel_migrate *migrate,
>                            struct i915_gem_ww_ctx *ww,
> @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate,
>         struct drm_i915_gem_object *obj;
>         struct i915_request *rq;
>         struct i915_gem_ww_ctx ww;
> -       u32 *vaddr;
> +       u32 *vaddr, val = 0;
> +       bool ccs_cap = false;
>         int err = 0;
>         int i;
>  
> @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate,
>         /* Consider the rounded up memory too */
>         sz = obj->base.size;
>  
> +       if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
> +               ccs_cap = true;
> +
>         for_i915_gem_ww(&ww, err, true) {
> +               int ccs_bytes;
> +
>                 err = i915_gem_object_lock(obj, &ww);
>                 if (err)
>                         continue;
> @@ -170,44 +296,136 @@ static int clear(struct intel_migrate
> *migrate,
>                         vaddr[i] = ~i;
>                 i915_gem_object_flush_map(obj);
>  
> -               err = fn(migrate, &ww, obj, sz, &rq);
> -               if (!err)
> -                       continue;
> +               if (ccs_cap && !val) {
> +                       /* Write the obj data into ccs surface */
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    true, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = 0x5a5a5a5a;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL, obj->mm.pages->sgl,
> +                                                    obj-
> >cache_level, false, &rq);

Why do we read back CCS content here?

> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes; i += 4) {
> +                               if (vaddr[i] != ~i) {
> +                                       pr_err("%ps ccs write and
> read failed, offset: %d\n",
> +                                              fn, i);
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +               }
>  
> -               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> -                       pr_err("%ps failed, size: %u\n", fn, sz);
> -               if (rq) {
> -                       i915_request_wait(rq, 0, HZ);
> +               err = fn(migrate, &ww, obj, val, &rq);
> +               if (rq && !err) {
> +                       if (i915_request_wait(rq, 0, HZ) < 0) {
> +                               pr_err("%ps timed out, size: %u\n",
> fn, sz);
> +                               err = -ETIME;
> +                       }
>                         i915_request_put(rq);
> +                       rq = NULL;
>                 }
> -               i915_gem_object_unpin_map(obj);
> -       }
> -       if (err)
> -               goto err_out;
> +               if (err)
> +                       continue;
>  
> -       if (rq) {
> -               if (i915_request_wait(rq, 0, HZ) < 0) {
> -                       pr_err("%ps timed out, size: %u\n", fn, sz);
> -                       err = -ETIME;
> +               i915_gem_object_flush_map(obj);
> +
> +               /* Verify the set/clear of the obj mem */
> +               for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> +                       int x = i * 1024 +
> +                               i915_prandom_u32_max_state(1024,
> prng);
> +
> +                       if (vaddr[x] != val) {
> +                               pr_err("%ps failed, (%u != %u),
> offset: %zu\n",
> +                                      fn, vaddr[x], val,  x *
> sizeof(u32));
> +                               igt_hexdump(vaddr + i * 1024, 4096);
> +                               err = -EINVAL;
> +                       }
>                 }
> -               i915_request_put(rq);
> -       }
> +               if (err)
> +                       continue;
>  
> -       for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> -               int x = i * 1024 + i915_prandom_u32_max_state(1024,
> prng);
> +               if (ccs_cap && !val) {
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = ~i;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    false, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       ccs_bytes = GET_CCS_BYTES(i915, sz);
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes /
> sizeof(u32); i++) {
> +                               if (vaddr[i]) {

I think this is incorrect. This assumes that CCS data is read back
contiguous for the whole buffer, but instead CCS data is read back
per 8MiB chunk and placed at the beginning of each chunk?

/Thomas



> +                                       pr_err("%ps ccs clearing
> failed, offset: %d/%lu\n",
> +                                              fn, i, (ccs_bytes /
> sizeof(u32)) -  1);
> +                                       igt_hexdump(vaddr + i,
> ccs_bytes - i * sizeof(u32));
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +               }
> +               i915_gem_object_unpin_map(obj);
> +       }
>  
> -               if (vaddr[x] != sz) {
> -                       pr_err("%ps failed, size: %u, offset: %zu\n",
> -                              fn, sz, x * sizeof(u32));
> -                       igt_hexdump(vaddr + i * 1024, 4096);
> -                       err = -EINVAL;
> +       if (err) {
> +               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> +                       pr_err("%ps failed, size: %u\n", fn, sz);
> +               if (rq && err != -EINVAL) {
> +                       i915_request_wait(rq, 0, HZ);
> +                       i915_request_put(rq);
>                 }
> +
> +               i915_gem_object_unpin_map(obj);
> +       } else {
> +               pr_debug("%ps Passed. size: %u\n", fn, sz);
>         }
>  
> -       i915_gem_object_unpin_map(obj);
> -err_out:
>         i915_gem_object_put(obj);
> -
>         return err;
>  }
>  

----------------------------------------------------------------------
Intel Sweden AB
Registered Office: Isafjordsgatan 30B, 164 40 Kista, Stockholm, Sweden
Registration Number: 556189-6027

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

  parent reply	other threads:[~2022-03-21 10:39 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-19 20:42 [PATCH v4 0/8] drm/i915/ttm: Evict and restore of compressed object Ramalingam C
2022-03-19 20:42 ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 1/8] drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+ Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-21  8:49   ` Hellstrom, Thomas
2022-03-21  8:49     ` [Intel-gfx] " Hellstrom, Thomas
2022-03-21 23:07     ` Ramalingam C
2022-03-21 23:07       ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 2/8] drm/i915/gt: Clear compress metadata for Flat-ccs objects Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 3/8] drm/i915/selftest_migrate: Consider the possible roundup of size Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 4/8] drm/i915/selftest_migrate: Check CCS meta data clear Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-20  1:39   ` kernel test robot
2022-03-21 10:39   ` Hellstrom, Thomas [this message]
2022-03-21 10:39     ` Hellstrom, Thomas
2022-03-21 23:05     ` Ramalingam C
2022-03-21 23:05       ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 5/8] drm/i915/gt: Optimize the migration loop Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 6/8] drm/ttm: Add a parameter to add extra pages into ttm_tt Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-21 10:11   ` Das, Nirmoy
2022-03-21 23:06     ` Ramalingam C
2022-03-19 20:42 ` [PATCH v4 7/8] drm/i915/gem: Add extra pages in ttm_tt for ccs data Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-19 20:42 ` [PATCH v4 8/8] drm/i915/migrate: Evict and restore the flatccs capable lmem obj Ramalingam C
2022-03-19 20:42   ` [Intel-gfx] " Ramalingam C
2022-03-19 20:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/ttm: Evict and restore of compressed object (rev2) Patchwork
2022-03-19 20:52 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-03-19 21:26 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2022-03-19 21:26 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2ac354b3df30889cc52c10613900467df30f4a47.camel@intel.com \
    --to=thomas.hellstrom@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=ramalingam.c@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.