From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
<intel-gfx@lists.freedesktop.org>,
<dri-devel@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH 2/4] drm/i915/guc: put all guc objects in lmem when available
Date: Fri, 6 Aug 2021 11:43:39 -0700 [thread overview]
Message-ID: <28aafcd4-19b7-05f4-5054-59a76043b342@intel.com> (raw)
In-Reply-To: <20210803051121.72017-3-matthew.brost@intel.com>
On 8/2/2021 22:11, Matthew Brost wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> The firmware binary has to be loaded from lmem and the recommendation is
> to put all other objects in there as well. Note that we don't fall back
> to system memory if the allocation in lmem fails because all objects are
> allocated during driver load and if we have issues with lmem at that point
> something is seriously wrong with the system, so no point in trying to
> handle it.
>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 ++++++++
> drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 ++
> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++-
> drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 +++-
> drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 ++++-
> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 75 +++++++++++++++++++++--
> 6 files changed, 127 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> index eb345305dc52..034226c5d4d0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> @@ -103,6 +103,32 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
> size, page_size, flags);
> }
>
> +struct drm_i915_gem_object *
> +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
> + const void *data, size_t size)
> +{
> + struct drm_i915_gem_object *obj;
> + void *map;
> +
> + obj = i915_gem_object_create_lmem(i915,
> + round_up(size, PAGE_SIZE),
> + I915_BO_ALLOC_CONTIGUOUS);
> + if (IS_ERR(obj))
> + return obj;
> +
> + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
> + if (IS_ERR(map)) {
> + i915_gem_object_put(obj);
> + return map;
> + }
> +
> + memcpy(map, data, size);
> +
> + i915_gem_object_unpin_map(obj);
> +
> + return obj;
> +}
> +
> struct drm_i915_gem_object *
> i915_gem_object_create_lmem(struct drm_i915_private *i915,
> resource_size_t size,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> index 4ee81fc66302..1b88ea13435c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
> @@ -23,6 +23,10 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
>
> bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
>
> +struct drm_i915_gem_object *
> +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
> + const void *data, size_t size);
> +
> struct drm_i915_gem_object *
> __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
> resource_size_t size,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> index 979128e28372..55160d3e401a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> @@ -3,6 +3,7 @@
> * Copyright © 2014-2019 Intel Corporation
> */
>
> +#include "gem/i915_gem_lmem.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_irq.h"
> #include "gt/intel_gt_pm_irq.h"
> @@ -630,7 +631,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
> u64 flags;
> int ret;
>
> - obj = i915_gem_object_create_shmem(gt->i915, size);
> + if (HAS_LMEM(gt->i915))
> + obj = i915_gem_object_create_lmem(gt->i915, size,
> + I915_BO_ALLOC_CPU_CLEAR |
> + I915_BO_ALLOC_CONTIGUOUS);
> + else
> + obj = i915_gem_object_create_shmem(gt->i915, size);
> +
> if (IS_ERR(obj))
> return ERR_CAST(obj);
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 76fe766ad1bc..962be0c12208 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -41,7 +41,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
> }
>
> /* Copy RSA signature from the fw image to HW for verification */
> -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> struct intel_uncore *uncore)
> {
> u32 rsa[UOS_RSA_SCRATCH_COUNT];
> @@ -49,10 +49,13 @@ static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
> int i;
>
> copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
> - GEM_BUG_ON(copied < sizeof(rsa));
> + if (copied < sizeof(rsa))
> + return -ENOMEM;
>
> for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
> intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
> +
> + return 0;
> }
>
> /*
> @@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc)
> * by the DMA engine in one operation, whereas the RSA signature is
> * loaded via MMIO.
> */
> - guc_xfer_rsa(&guc->fw, uncore);
> + ret = guc_xfer_rsa(&guc->fw, uncore);
> + if (ret)
> + goto out;
>
> /*
> * Current uCode expects the code to be loaded at 8k; locations below
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index fc5387b410a2..ff4b6869b80b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
> vma->obj, true));
> if (IS_ERR(vaddr)) {
> i915_vma_unpin_and_release(&vma, 0);
> - return PTR_ERR(vaddr);
> + err = PTR_ERR(vaddr);
> + goto unpin_out;
> }
>
> copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
> - GEM_BUG_ON(copied < huc->fw.rsa_size);
> -
> i915_gem_object_unpin_map(vma->obj);
>
> + if (copied < huc->fw.rsa_size) {
> + err = -ENOMEM;
> + goto unpin_out;
> + }
> +
> huc->rsa_data = vma;
>
> return 0;
> +
> +unpin_out:
> + i915_vma_unpin_and_release(&vma, 0);
> + return err;
> }
>
> static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index f632dbd32b42..f8cb00ffb506 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -7,6 +7,7 @@
> #include <linux/firmware.h>
> #include <drm/drm_print.h>
>
> +#include "gem/i915_gem_lmem.h"
> #include "intel_uc_fw.h"
> #include "intel_uc_fw_abi.h"
> #include "i915_drv.h"
> @@ -370,7 +371,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
> if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
> uc_fw->private_data_size = css->private_data_size;
>
> - obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
> + if (HAS_LMEM(i915))
> + obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
> + else
> + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
> +
> if (IS_ERR(obj)) {
> err = PTR_ERR(obj);
> goto fail;
> @@ -414,6 +419,7 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> struct drm_i915_gem_object *obj = uc_fw->obj;
> struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
> struct i915_vma *dummy = &uc_fw->dummy;
> + u32 pte_flags = 0;
>
> dummy->node.start = uc_fw_ggtt_offset(uc_fw);
> dummy->node.size = obj->base.size;
> @@ -424,9 +430,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
>
> /* uc_fw->obj cache domains were not controlled across suspend */
> - drm_clflush_sg(dummy->pages);
> + if (i915_gem_object_has_struct_page(obj))
> + drm_clflush_sg(dummy->pages);
> +
> + if (i915_gem_object_is_lmem(obj))
> + pte_flags |= PTE_LM;
>
> - ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0);
> + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
> }
>
> static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
> @@ -585,13 +595,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
> */
> size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
> {
> - struct sg_table *pages = uc_fw->obj->mm.pages;
> + struct intel_memory_region *mr = uc_fw->obj->mm.region;
> u32 size = min_t(u32, uc_fw->rsa_size, max_len);
> u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
> + struct sgt_iter iter;
> + size_t count = 0;
> + int idx;
>
> + /* Called during reset handling, must be atomic [no fs_reclaim] */
> GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
>
> - return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
> + idx = offset >> PAGE_SHIFT;
> + offset = offset_in_page(offset);
> + if (i915_gem_object_has_struct_page(uc_fw->obj)) {
> + struct page *page;
> +
> + for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
Why can't this just use 'sg_pcopy_to_buffer' as before?
John.
> + u32 len = min_t(u32, size, PAGE_SIZE - offset);
> + void *vaddr;
> +
> + if (idx > 0) {
> + idx--;
> + continue;
> + }
> +
> + vaddr = kmap_atomic(page);
> + memcpy(dst, vaddr + offset, len);
> + kunmap_atomic(vaddr);
> +
> + offset = 0;
> + dst += len;
> + size -= len;
> + count += len;
> + if (!size)
> + break;
> + }
> + } else {
> + dma_addr_t addr;
> +
> + for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) {
> + u32 len = min_t(u32, size, PAGE_SIZE - offset);
> + void __iomem *vaddr;
> +
> + if (idx > 0) {
> + idx--;
> + continue;
> + }
> +
> + vaddr = io_mapping_map_atomic_wc(&mr->iomap,
> + addr - mr->region.start);
> + memcpy_fromio(dst, vaddr + offset, len);
> + io_mapping_unmap_atomic(vaddr);
> +
> + offset = 0;
> + dst += len;
> + size -= len;
> + count += len;
> + if (!size)
> + break;
> + }
> + }
> +
> + return count;
> }
>
> /**
next prev parent reply other threads:[~2021-08-06 18:43 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-03 5:11 [Intel-gfx] [PATCH 0/4] Enable GuC submission by default on DG1 Matthew Brost
2021-08-03 5:04 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2021-08-03 5:05 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-08-03 5:11 ` [Intel-gfx] [PATCH 1/4] drm/i915: Do not define vma on stack Matthew Brost
2021-08-04 19:37 ` Matthew Brost
2021-08-03 5:11 ` [Intel-gfx] [PATCH 2/4] drm/i915/guc: put all guc objects in lmem when available Matthew Brost
2021-08-06 18:43 ` John Harrison [this message]
2021-08-03 5:11 ` [Intel-gfx] [PATCH 3/4] drm/i915/guc: Add DG1 GuC / HuC firmware defs Matthew Brost
2021-08-06 18:43 ` John Harrison
2021-08-03 5:11 ` [Intel-gfx] [PATCH 4/4] drm/i915/guc: Enable GuC submission by default on DG1 Matthew Brost
2021-08-06 18:44 ` John Harrison
2021-08-03 5:27 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
2021-08-03 12:15 ` [Intel-gfx] [PATCH 0/4] " Daniel Vetter
2021-08-03 17:26 ` Matthew Brost
2021-08-06 11:34 ` Thomas Hellström (Intel)
2021-08-06 12:07 ` Thomas Hellström (Intel)
2021-08-06 16:09 ` Thomas Hellström (Intel)
2021-08-07 17:20 ` Matthew Brost
2021-08-03 16:22 ` [Intel-gfx] ✓ Fi.CI.IGT: success for " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=28aafcd4-19b7-05f4-5054-59a76043b342@intel.com \
--to=john.c.harrison@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).