Hi Kevin, the primary use of amdgpu_device_vram_access() is to gate direct kernel access to VRAM using the MM_INDEX/MM_DATA registers or aperture. It should by design never deal with data sizes != 4 bytes. The amdgpu_ttm_access_memory() is for debug access to the backing store of BOs and also needs to handle per byte access. What we can do is to also use amdgpu_device_vram_access() in amdgpu_device_vram_access() for the masked access, which would also solve a hot plug bug as far as I can see here. But moving the byte access logic into amdgpu_device_vram_access() makes no sense at all. Regards, Christian. Am 28.06.21 um 16:43 schrieb Wang, Kevin(Yang): > > [AMD Official Use Only] > > > Hi Chris, > > amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long > offset, void *buf, int len,  int write) > > the above function will be called from kernel side (likes > 'get_user_pages' code path),  and the function should accept > non-aligned addresses. > without this patch, this function will be using MM_INDEX/DATA to > handle aligned address whether in visible memory or not. > for this kind of case , I think the driver should check whether VRAM > aperture works first, then using MM_INDEX/DATA as a backup option. > > for this patch, I only extend amdgpu_device_vram_access() function to > support un-aligned case, and the new function is fully compatible with > the old code logic. > I can't understand why you give a NAK about this patch, I think it's a > good optimization, how do you think? > thanks. > > Best Regards, > Kevin > ------------------------------------------------------------------------ > *From:* Christian König > *Sent:* Monday, June 28, 2021 10:10 PM > *To:* Wang, Kevin(Yang) ; > amd-gfx@lists.freedesktop.org > *Cc:* Deucher, Alexander ; Koenig, > Christian > *Subject:* Re: [PATCH] drm/amdgpu: add non-aligned address supported > in amdgpu_device_vram_access() > Am 25.06.21 um 05:24 schrieb Kevin Wang: > > 1. add non-aligned address support in amdgpu_device_vram_access() > > 2. reduce duplicate code in amdgpu_ttm_access_memory() > > > > because the MM_INDEX{HI}/DATA are protected register, it is not working > > in sriov environment when mmio protect feature is enabled (in some > asics), > > and the old function amdgpu_ttm_access_memory() will force using > MM_INDEX/DATA > > to handle non-aligned address by default, it will cause the > register(MM_DATA) > > is always return 0. > > > > with the patch, the memory will be accessed in the aper way first. > > (in visible memory or enable pcie large-bar feature), then using > > MM_INDEX{HI}/MM_DATA to access rest vram memroy. > > Well NAK to the whole approach. > > The amdgpu_device_vram_access() are intentionally *NOT* using the VRAM > aperture nor providing byte wise access. > > And yes that this doesn't work under SRIOV is completely intentional as > well. > > What we could do is to use the aperture in amdgpu_ttm_access_memory() > for unaligned access if that is indeed a problem. > > Regards, > Christian. > > > > > > Signed-off-by: Kevin Wang > > --- > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +- > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------ > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++----------- > >   3 files changed, 58 insertions(+), 55 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index d14b4968a026..8095d9a9c857 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device > *adev); > >   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); > > > >   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, > > -                            uint32_t *buf, size_t size, bool write); > > +                            void *buf, size_t size, bool write); > >   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, > >                            uint32_t reg, uint32_t acc_flags); > >   void amdgpu_device_wreg(struct amdgpu_device *adev, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index e6702d136a6d..2047e3c2b365 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct > drm_device *dev) > > amdgpu_acpi_is_power_shift_control_supported()); > >   } > > > > +static inline void > amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device > *adev, loff_t pos, > > + uint32_t *value, bool write) > > +{ > > +     BUG_ON(!IS_ALIGNED(pos, 4)); > > + > > +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); > > +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); > > +     if (write) > > +             WREG32_NO_KIQ(mmMM_DATA, *value); > > +     else > > +             *value = RREG32_NO_KIQ(mmMM_DATA); > > +} > > + > > +static void amdgpu_device_vram_mmio_access_locked(struct > amdgpu_device *adev, loff_t pos, > > + void *buf, size_t size, bool write) > > +{ > > +     while (size) { > > +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4); > > +             uint64_t bytes = 4 - (pos & 0x3); > > +             uint32_t shift = (pos & 0x3) * 8; > > +             uint32_t mask = 0xffffffff << shift; > > +             uint32_t value = 0; > > + > > +             if (size < bytes) { > > +                     mask &= 0xffffffff >> (bytes - size) * 8; > > +                     bytes = size; > > +             } > > + > > +             if (mask != 0xffffffff) { > > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, > &value, false); > > +                     if (write) { > > +                             value &= ~mask; > > +                             value |= (*(uint32_t *)buf << shift) & > mask; > > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, > &value, true); > > +                     } else { > > +                             value = (value & mask) >> shift; > > +                             memcpy(buf, &value, bytes); > > +                     } > > +             } else { > > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, > buf, write); > > +             } > > + > > +             pos += bytes; > > +             buf += bytes; > > +             size -= bytes; > > +     } > > +} > > + > >   /* > >    * VRAM access helper functions > >    */ > > @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct > drm_device *dev) > >    * @write: true - write to vram, otherwise - read from vram > >    */ > >   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, > > -                            uint32_t *buf, size_t size, bool write) > > +                            void *buf, size_t size, bool write) > >   { > >        unsigned long flags; > > -     uint32_t hi = ~0; > >        uint64_t last; > > > > - > >   #ifdef CONFIG_64BIT > >        last = min(pos + size, adev->gmc.visible_vram_size); > >        if (last > pos) { > > @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct > amdgpu_device *adev, loff_t pos, > >                        return; > > > >                pos += count; > > -             buf += count / 4; > > +             buf += count; > >                size -= count; > >        } > >   #endif > > - > > spin_lock_irqsave(&adev->mmio_idx_lock, flags); > > -     for (last = pos + size; pos < last; pos += 4) { > > -             uint32_t tmp = pos >> 31; > > - > > -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); > > -             if (tmp != hi) { > > -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); > > -                     hi = tmp; > > -             } > > -             if (write) > > -                     WREG32_NO_KIQ(mmMM_DATA, *buf++); > > -             else > > -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA); > > -     } > > +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, > write); > > spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > >   } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index 6297363ab740..cf5b8bdc2dd3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct > ttm_buffer_object *bo, > >        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); > >        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > >        struct amdgpu_res_cursor cursor; > > -     unsigned long flags; > > -     uint32_t value = 0; > >        int ret = 0; > > > >        if (bo->mem.mem_type != TTM_PL_VRAM) > > @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct > ttm_buffer_object *bo, > > > >        amdgpu_res_first(&bo->mem, offset, len, &cursor); > >        while (cursor.remaining) { > > -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3; > > -             uint64_t bytes = 4 - (cursor.start & 3); > > -             uint32_t shift = (cursor.start & 3) * 8; > > -             uint32_t mask = 0xffffffff << shift; > > - > > -             if (cursor.size < bytes) { > > -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8; > > -                     bytes = cursor.size; > > -             } > > +             amdgpu_device_vram_access(adev, cursor.start, > > +                                       buf, cursor.size, > > +                                       write); > > > > -             if (mask != 0xffffffff) { > > - spin_lock_irqsave(&adev->mmio_idx_lock, flags); > > -                     WREG32_NO_KIQ(mmMM_INDEX, > ((uint32_t)aligned_pos) | 0x80000000); > > -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); > > -                     value = RREG32_NO_KIQ(mmMM_DATA); > > -                     if (write) { > > -                             value &= ~mask; > > -                             value |= (*(uint32_t *)buf << shift) & > mask; > > - WREG32_NO_KIQ(mmMM_DATA, value); > > -                     } > > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); > > -                     if (!write) { > > -                             value = (value & mask) >> shift; > > -                             memcpy(buf, &value, bytes); > > -                     } > > -             } else { > > -                     bytes = cursor.size & ~0x3ULL; > > - amdgpu_device_vram_access(adev, cursor.start, > > - (uint32_t *)buf, bytes, > > - write); > > -             } > > - > > -             ret += bytes; > > -             buf = (uint8_t *)buf + bytes; > > -             amdgpu_res_next(&cursor, bytes); > > +             ret += cursor.size; > > +             buf += cursor.size; > > +             amdgpu_res_next(&cursor, cursor.size); > >        } > > > >        return ret; >