[AMD Official Use Only] ________________________________ From: Lazar, Lijo Sent: Friday, June 25, 2021 9:28 PM To: Wang, Kevin(Yang) ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Koenig, Christian Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access() On 6/25/2021 8:54 AM, Kevin Wang wrote: > 1. add non-aligned address support in amdgpu_device_vram_access() > 2. reduce duplicate code in amdgpu_ttm_access_memory() > > because the MM_INDEX{HI}/DATA are protected register, it is not working > in sriov environment when mmio protect feature is enabled (in some asics), > and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA > to handle non-aligned address by default, it will cause the register(MM_DATA) > is always return 0. > > with the patch, the memory will be accessed in the aper way first. > (in visible memory or enable pcie large-bar feature), then using > MM_INDEX{HI}/MM_DATA to access rest vram memroy. > > Signed-off-by: Kevin Wang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 42 ++----------- > 3 files changed, 58 insertions(+), 55 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index d14b4968a026..8095d9a9c857 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev); > int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); > > void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, > - uint32_t *buf, size_t size, bool write); > + void *buf, size_t size, bool write); > uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, > uint32_t reg, uint32_t acc_flags); > void amdgpu_device_wreg(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index e6702d136a6d..2047e3c2b365 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev) > amdgpu_acpi_is_power_shift_control_supported()); > } > > +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos, > + uint32_t *value, bool write) > +{ > + BUG_ON(!IS_ALIGNED(pos, 4)); > + > + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); > + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); > + if (write) > + WREG32_NO_KIQ(mmMM_DATA, *value); > + else > + *value = RREG32_NO_KIQ(mmMM_DATA); > +} > + > +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos, > + void *buf, size_t size, bool write) > +{ > + while (size) { > + uint64_t aligned_pos = ALIGN_DOWN(pos, 4); > + uint64_t bytes = 4 - (pos & 0x3); > + uint32_t shift = (pos & 0x3) * 8; > + uint32_t mask = 0xffffffff << shift; > + uint32_t value = 0; > + > + if (size < bytes) { > + mask &= 0xffffffff >> (bytes - size) * 8; > + bytes = size; > + } > + > + if (mask != 0xffffffff) { > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false); > + if (write) { > + value &= ~mask; > + value |= (*(uint32_t *)buf << shift) & mask; > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true); > + } else { > + value = (value & mask) >> shift; > + memcpy(buf, &value, bytes); > + } > + } else { > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write); > + } > + > + pos += bytes; > + buf += bytes; > + size -= bytes; > + } > +} > + > /* > * VRAM access helper functions > */ > @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev) > * @write: true - write to vram, otherwise - read from vram > */ > void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, > - uint32_t *buf, size_t size, bool write) > + void *buf, size_t size, bool write) > { > unsigned long flags; > - uint32_t hi = ~0; > uint64_t last; > > - > #ifdef CONFIG_64BIT > last = min(pos + size, adev->gmc.visible_vram_size); > if (last > pos) { > @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, > return; > > pos += count; > - buf += count / 4; > + buf += count; > size -= count; > } > #endif > - > spin_lock_irqsave(&adev->mmio_idx_lock, flags); > - for (last = pos + size; pos < last; pos += 4) { > - uint32_t tmp = pos >> 31; > - > - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); > - if (tmp != hi) { > - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); > - hi = tmp; > - } > - if (write) > - WREG32_NO_KIQ(mmMM_DATA, *buf++); > - else > - *buf++ = RREG32_NO_KIQ(mmMM_DATA); > - } > + amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write); > spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 6297363ab740..cf5b8bdc2dd3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); > struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > struct amdgpu_res_cursor cursor; > - unsigned long flags; > - uint32_t value = 0; > int ret = 0; > > if (bo->mem.mem_type != TTM_PL_VRAM) > @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > > amdgpu_res_first(&bo->mem, offset, len, &cursor); > while (cursor.remaining) { > - uint64_t aligned_pos = cursor.start & ~(uint64_t)3; > - uint64_t bytes = 4 - (cursor.start & 3); > - uint32_t shift = (cursor.start & 3) * 8; > - uint32_t mask = 0xffffffff << shift; > - > - if (cursor.size < bytes) { > - mask &= 0xffffffff >> (bytes - cursor.size) * 8; > - bytes = cursor.size; > - } > + amdgpu_device_vram_access(adev, cursor.start, > + buf, cursor.size, > + write); > > - if (mask != 0xffffffff) { > - spin_lock_irqsave(&adev->mmio_idx_lock, flags); The new logic seems to skip this mmio_idx_lock for accessing index/data pair ragisters. BTW, where is the visible memory aperture check? Thanks Lijo [Keivn]: Hi Lijo, the mmio_idx_lock lock is in amdgpu_device_vram_access() function. this patch is extending the amdgpu_device_vram_access() function to support un-aligned address support. Best Regards, Kevin > - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); > - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); > - value = RREG32_NO_KIQ(mmMM_DATA); > - if (write) { > - value &= ~mask; > - value |= (*(uint32_t *)buf << shift) & mask; > - WREG32_NO_KIQ(mmMM_DATA, value); > - } > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > - if (!write) { > - value = (value & mask) >> shift; > - memcpy(buf, &value, bytes); > - } > - } else { > - bytes = cursor.size & ~0x3ULL; > - amdgpu_device_vram_access(adev, cursor.start, > - (uint32_t *)buf, bytes, > - write); > - } > - > - ret += bytes; > - buf = (uint8_t *)buf + bytes; > - amdgpu_res_next(&cursor, bytes); > + ret += cursor.size; > + buf += cursor.size; > + amdgpu_res_next(&cursor, cursor.size); > } > > return ret; >