All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: "Kim, Jonathan" <Jonathan.Kim@amd.com>,
	"amd-gfx@lists.freedesktop.org" <amd-gfx@lists.freedesktop.org>
Cc: "Kuehling, Felix" <Felix.Kuehling@amd.com>
Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma
Date: Wed, 12 Jan 2022 16:12:39 +0100	[thread overview]
Message-ID: <08f4a6e7-9084-def3-43a7-bfb19c5e649d@amd.com> (raw)
In-Reply-To: <DM6PR12MB31631B4FF1C6F80EC5B3F58685529@DM6PR12MB3163.namprd12.prod.outlook.com>

Yeah, that's basically my fault.

I haven't even worked myself through all the mails which piled up during 
the xmas break :(

Christian.

Am 12.01.22 um 15:21 schrieb Kim, Jonathan:
> [Public]
>
> Thanks Christian.  I've already merged based on Felix's review.
> I'll send your suggested cleanup for review out soon.
>
> Jon
>
>> -----Original Message-----
>> From: Koenig, Christian <Christian.Koenig@amd.com>
>> Sent: January 12, 2022 2:33 AM
>> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-
>> gfx@lists.freedesktop.org
>> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access
>> performance using sdma
>>
>> Am 04.01.22 um 20:12 schrieb Jonathan Kim:
>>> For better performance during VRAM access for debugged processes, do
>>> read/write copies over SDMA.
>>>
>>> In order to fulfill post mortem debugging on a broken device, fallback
>>> to stable MMIO access when gpu recovery is disabled or when job
>>> submission time outs are set to max.  Failed SDMA access should
>>> automatically fall back to MMIO access.
>>>
>>> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
>>> page-table updates and TLB flushes on access.
>>>
>>> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
>> +++++++++++++++++++++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
>>>    2 files changed, 82 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 367abed1d6e6..512df4c09772 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -48,6 +48,7 @@
>>>    #include <drm/ttm/ttm_range_manager.h>
>>>
>>>    #include <drm/amdgpu_drm.h>
>>> +#include <drm/drm_drv.h>
>>>
>>>    #include "amdgpu.h"
>>>    #include "amdgpu_object.h"
>>> @@ -1429,6 +1430,70 @@ static void
>> amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
>>>      }
>>>    }
>>>
>>> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
>> *bo,
>>> +                                   unsigned long offset, void *buf, int
>> len, int write) {
>>> +   struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>>> +   struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>>> +   struct amdgpu_job *job;
>>> +   struct dma_fence *fence;
>>> +   uint64_t src_addr, dst_addr;
>>> +   unsigned int num_dw;
>>> +   int r, idx;
>>> +
>>> +   if (len != PAGE_SIZE)
>>> +           return -EINVAL;
>>> +
>>> +   if (!adev->mman.sdma_access_ptr)
>>> +           return -EACCES;
>>> +
>>> +   r = drm_dev_enter(adev_to_drm(adev), &idx);
>>> +   if (r)
>>> +           return r;
>>> +
>>> +   if (write)
>>> +           memcpy(adev->mman.sdma_access_ptr, buf, len);
>>> +
>>> +   num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>>> +   r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
>> AMDGPU_IB_POOL_DELAYED, &job);
>>> +   if (r)
>>> +           goto out;
>>> +
>>> +   src_addr = write ? amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo) :
>>> +                   amdgpu_bo_gpu_offset(abo);
>>> +   dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
>>> +                   amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo);
>> I suggest to write this as
>>
>> src_addr = a;
>> dst_addr = b;
>> if (write)
>>       swap(src_addr, dst_addr);
>>
>> This way we are not duplicating getting the different offsets.
>>
>>> +   amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
>>> +PAGE_SIZE, false);
>>> +
>>> +   amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job-
>>> ibs[0]);
>>> +   WARN_ON(job->ibs[0].length_dw > num_dw);
>>> +
>>> +   r = amdgpu_job_submit(job, &adev->mman.entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>>> +   if (r) {
>>> +           amdgpu_job_free(job);
>>> +           goto out;
>>> +   }
>>> +
>>> +   if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
>>> +           r = -ETIMEDOUT;
>>> +   dma_fence_put(fence);
>>> +
>>> +   if (!(r || write))
>>> +           memcpy(buf, adev->mman.sdma_access_ptr, len);
>>> +out:
>>> +   drm_dev_exit(idx);
>>> +   return r;
>>> +}
>>> +
>>> +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
>>> +amdgpu_device *adev) {
>>> +   return amdgpu_gpu_recovery == 0 ||
>>> +           adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
>>> +           adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
>>> +           adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
>>> +           adev->video_timeout == MAX_SCHEDULE_TIMEOUT; }
>> This should probably be inside amdgpu_device.c
>>
>>> +
>>>    /**
>>>     * amdgpu_ttm_access_memory - Read or Write memory that backs a
>> buffer object.
>>>     *
>>> @@ -1453,6 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
>> ttm_buffer_object *bo,
>>>      if (bo->resource->mem_type != TTM_PL_VRAM)
>>>              return -EIO;
>>>
>>> +   if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
>>> +                   !amdgpu_ttm_access_memory_sdma(bo, offset,
>> buf, len, write))
>>> +           return len;
>>> +
>>>      amdgpu_res_first(bo->resource, offset, len, &cursor);
>>>      while (cursor.remaining) {
>>>              size_t count, size = cursor.size;
>>> @@ -1793,6 +1862,12 @@ int amdgpu_ttm_init(struct amdgpu_device
>> *adev)
>>>              return r;
>>>      }
>>>
>>> +   if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>>> +                           AMDGPU_GEM_DOMAIN_GTT,
>>> +                           &adev->mman.sdma_access_bo, NULL,
>>> +                           adev->mman.sdma_access_ptr))
>>> +           DRM_WARN("Debug VRAM access will use slowpath MM
>> access\n");
>>> +
>>>      return 0;
>>>    }
>>>
>>> @@ -1823,6 +1898,9 @@ void amdgpu_ttm_fini(struct amdgpu_device
>> *adev)
>>>      ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
>>>      ttm_device_fini(&adev->mman.bdev);
>>>      adev->mman.initialized = false;
>>> +   if (adev->mman.sdma_access_ptr)
>> You can drop that if. Free functions can usually take a NULL pointer.
>>
>> Apart from those nit picks looks good to me as well.
>>
>> Regards,
>> Christian.
>>
>>> +           amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo,
>> NULL,
>>> +                                   &adev->mman.sdma_access_ptr);
>>>      DRM_INFO("amdgpu: ttm finalized\n");
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 91a087f9dc7c..b0116c4a768f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -98,6 +98,10 @@ struct amdgpu_mman {
>>>      u64             fw_vram_usage_size;
>>>      struct amdgpu_bo        *fw_vram_usage_reserved_bo;
>>>      void            *fw_vram_usage_va;
>>> +
>>> +   /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
>>> +   struct amdgpu_bo        *sdma_access_bo;
>>> +   void                    *sdma_access_ptr;
>>>    };
>>>
>>>    struct amdgpu_copy_mem {
>>> @@ -193,5 +197,4 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct
>> amdgpu_device *adev, struct ttm_tt *ttm,
>>>    int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int
>>> mem_type);
>>>
>>>    void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
>>> -
>>>    #endif


  reply	other threads:[~2022-01-12 15:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-04 19:12 [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma Jonathan Kim
2022-01-11 18:49 ` Felix Kuehling
2022-01-12  7:33 ` Christian König
2022-01-12 14:21   ` Kim, Jonathan
2022-01-12 15:12     ` Christian König [this message]
2023-03-20  7:05 ` Quan, Evan
2023-03-20 17:01   ` Christian König
2023-03-20 17:24     ` Kim, Jonathan
2023-03-20 17:31       ` Christian König
2023-03-21  0:39         ` Quan, Evan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=08f4a6e7-9084-def3-43a7-bfb19c5e649d@amd.com \
    --to=christian.koenig@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=Jonathan.Kim@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.