* [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
@ 2021-07-13 3:23 Kevin Wang
2021-07-13 7:11 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Kevin Wang @ 2021-07-13 3:23 UTC (permalink / raw)
To: amd-gfx
Cc: Kevin Wang, lijo.lazar, alexander.deucher, frank.min,
christian.koenig, hawking.zhang
1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.
Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
1 file changed, 89 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2aa2eb5de37a..8f6f605bc459 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
}
+static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
+ uint32_t *value, bool write)
+{
+ unsigned long flags;
+
+ BUG_ON(!IS_ALIGNED(pos, 4));
+
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *value);
+ else
+ *value = RREG32_NO_KIQ(mmMM_DATA);
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+}
+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ while (size) {
+ uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+ uint64_t bytes = 4 - (pos & 0x3);
+ uint32_t shift = (pos & 0x3) * 8;
+ uint32_t mask = 0xffffffff << shift;
+ uint32_t value = 0;
+
+ if (size < bytes) {
+ mask &= 0xffffffff >> (bytes - size) * 8;
+ bytes = size;
+ }
+
+ if (mask != 0xffffffff) {
+ amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
+ if (write) {
+ value &= ~mask;
+ value |= (*(uint32_t *)buf << shift) & mask;
+ amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
+ } else {
+ value = (value & mask) >> shift;
+ memcpy(buf, &value, bytes);
+ }
+ } else {
+ amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
+ }
+
+ pos += bytes;
+ buf += bytes;
+ size -= bytes;
+ }
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ uint64_t last;
+
+#ifdef CONFIG_64BIT
+ last = min(pos + size, adev->gmc.visible_vram_size);
+ if (last > pos) {
+ void __iomem *addr = adev->mman.aper_base_kaddr + pos;
+ size_t count = last - pos;
+
+ if (write) {
+ memcpy_toio(addr, buf, count);
+ mb();
+ amdgpu_device_flush_hdp(adev, NULL);
+ } else {
+ amdgpu_device_invalidate_hdp(adev, NULL);
+ mb();
+ memcpy_fromio(buf, addr, count);
+ }
+
+ if (count == size)
+ return;
+
+ pos += count;
+ buf += count;
+ size -= count;
+ }
+#endif
+
+ amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+}
+
/**
* amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
*
@@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct amdgpu_res_cursor cursor;
- unsigned long flags;
- uint32_t value = 0;
int ret = 0;
if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
amdgpu_res_first(&bo->mem, offset, len, &cursor);
while (cursor.remaining) {
- uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
- uint64_t bytes = 4 - (cursor.start & 3);
- uint32_t shift = (cursor.start & 3) * 8;
- uint32_t mask = 0xffffffff << shift;
-
- if (cursor.size < bytes) {
- mask &= 0xffffffff >> (bytes - cursor.size) * 8;
- bytes = cursor.size;
- }
-
- if (mask != 0xffffffff) {
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
- value = RREG32_NO_KIQ(mmMM_DATA);
- if (write) {
- value &= ~mask;
- value |= (*(uint32_t *)buf << shift) & mask;
- WREG32_NO_KIQ(mmMM_DATA, value);
- }
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
- if (!write) {
- value = (value & mask) >> shift;
- memcpy(buf, &value, bytes);
- }
- } else {
- bytes = cursor.size & ~0x3ULL;
- amdgpu_device_vram_access(adev, cursor.start,
- (uint32_t *)buf, bytes,
- write);
- }
-
- ret += bytes;
- buf = (uint8_t *)buf + bytes;
- amdgpu_res_next(&cursor, bytes);
+ amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
+ ret += cursor.size;
+ buf += cursor.size;
+ amdgpu_res_next(&cursor, cursor.size);
}
return ret;
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
2021-07-13 3:23 [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
@ 2021-07-13 7:11 ` Christian König
2021-07-13 8:32 ` Wang, Kevin(Yang)
0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2021-07-13 7:11 UTC (permalink / raw)
To: Kevin Wang, amd-gfx
Cc: alexander.deucher, lijo.lazar, frank.min, christian.koenig,
hawking.zhang
Am 13.07.21 um 05:23 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
> 1 file changed, 89 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2aa2eb5de37a..8f6f605bc459 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> return ttm_bo_eviction_valuable(bo, place);
> }
>
> +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
> + uint32_t *value, bool write)
Please drop the _vram_ and _align_ part from the name. Just
amdgpu_device_mm_access.
> +{
> + unsigned long flags;
> +
> + BUG_ON(!IS_ALIGNED(pos, 4));
> +
> + spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> + if (write)
> + WREG32_NO_KIQ(mmMM_DATA, *value);
> + else
> + *value = RREG32_NO_KIQ(mmMM_DATA);
> + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +}
That should still be in amdgpu_device.c and you completely messed up the
drm_dev_enter()/drm_dev_exit() annotation.
Looks like you are working on an old branch where that is not yet merged?
> +
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + while (size) {
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> + uint64_t bytes = 4 - (pos & 0x3);
> + uint32_t shift = (pos & 0x3) * 8;
> + uint32_t mask = 0xffffffff << shift;
> + uint32_t value = 0;
> +
> + if (size < bytes) {
> + mask &= 0xffffffff >> (bytes - size) * 8;
> + bytes = size;
> + }
> +
> + if (mask != 0xffffffff) {
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> + if (write) {
> + value &= ~mask;
> + value |= (*(uint32_t *)buf << shift) & mask;
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> + } else {
> + value = (value & mask) >> shift;
> + memcpy(buf, &value, bytes);
> + }
> + } else {
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> + }
> +
> + pos += bytes;
> + buf += bytes;
> + size -= bytes;
> + }
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + uint64_t last;
> +
> +#ifdef CONFIG_64BIT
> + last = min(pos + size, adev->gmc.visible_vram_size);
> + if (last > pos) {
> + void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> + size_t count = last - pos;
> +
> + if (write) {
> + memcpy_toio(addr, buf, count);
> + mb();
> + amdgpu_device_flush_hdp(adev, NULL);
> + } else {
> + amdgpu_device_invalidate_hdp(adev, NULL);
> + mb();
> + memcpy_fromio(buf, addr, count);
> + }
> +
> + if (count == size)
> + return;
> +
> + pos += count;
> + buf += count;
> + size -= count;
> + }
> +#endif
I would put this as a separate function into amdgpu_device.c.
But all of this should only be the second step since we need a much
smaller patch for backporting first.
> +
> + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> +}
> +
> /**
> * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
> *
> @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> struct amdgpu_res_cursor cursor;
> - unsigned long flags;
> - uint32_t value = 0;
> int ret = 0;
>
> if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);
> while (cursor.remaining) {
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> - uint64_t bytes = 4 - (cursor.start & 3);
> - uint32_t shift = (cursor.start & 3) * 8;
> - uint32_t mask = 0xffffffff << shift;
> -
> - if (cursor.size < bytes) {
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> - bytes = cursor.size;
> - }
> -
> - if (mask != 0xffffffff) {
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> - value = RREG32_NO_KIQ(mmMM_DATA);
> - if (write) {
> - value &= ~mask;
> - value |= (*(uint32_t *)buf << shift) & mask;
> - WREG32_NO_KIQ(mmMM_DATA, value);
> - }
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> - if (!write) {
> - value = (value & mask) >> shift;
> - memcpy(buf, &value, bytes);
> - }
This here is the problematic part and should use
amdgpu_ttm_vram_access() instead.
That should be implemented first since we might need to backport that.
Regards,
Christian.
> - } else {
> - bytes = cursor.size & ~0x3ULL;
> - amdgpu_device_vram_access(adev, cursor.start,
> - (uint32_t *)buf, bytes,
> - write);
> - }
> -
> - ret += bytes;
> - buf = (uint8_t *)buf + bytes;
> - amdgpu_res_next(&cursor, bytes);
> + amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> + ret += cursor.size;
> + buf += cursor.size;
> + amdgpu_res_next(&cursor, cursor.size);
> }
>
> return ret;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
2021-07-13 7:11 ` Christian König
@ 2021-07-13 8:32 ` Wang, Kevin(Yang)
2021-07-13 9:06 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Wang, Kevin(Yang) @ 2021-07-13 8:32 UTC (permalink / raw)
To: Christian König, amd-gfx
Cc: Deucher, Alexander, Lazar, Lijo, Min, Frank, Koenig, Christian,
Zhang, Hawking
[-- Attachment #1.1: Type: text/plain, Size: 8613 bytes --]
[AMD Official Use Only]
<comments inline>
________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Tuesday, July 13, 2021 3:11 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Lazar, Lijo <Lijo.Lazar@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Min, Frank <Frank.Min@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
Am 13.07.21 um 05:23 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
> 1 file changed, 89 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2aa2eb5de37a..8f6f605bc459 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> return ttm_bo_eviction_valuable(bo, place);
> }
>
> +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
> + uint32_t *value, bool write)
Please drop the _vram_ and _align_ part from the name. Just
amdgpu_device_mm_access.
[kevin]: I will correct it in next patch.
> +{
> + unsigned long flags;
> +
> + BUG_ON(!IS_ALIGNED(pos, 4));
> +
> + spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> + if (write)
> + WREG32_NO_KIQ(mmMM_DATA, *value);
> + else
> + *value = RREG32_NO_KIQ(mmMM_DATA);
> + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +}
That should still be in amdgpu_device.c and you completely messed up the
drm_dev_enter()/drm_dev_exit() annotation.
Looks like you are working on an old branch where that is not yet merged?
[kevin]: yes, I'm working on amd-staging-drm-next branch, the following patch (from drm-next-misc) is not merged into this branch.
drm/amdgpu: Guard against write accesses after device removal
This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.
v5:
Protect more places wher memcopy_to/form_io takes place
Protect IB submissions
v6: Switch to !drm_dev_enter instead of scoping entire code
with brackets.
v7:
Drop guard of HW ring commands emission protection since they
are in GART and not in MMIO.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> +
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + while (size) {
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> + uint64_t bytes = 4 - (pos & 0x3);
> + uint32_t shift = (pos & 0x3) * 8;
> + uint32_t mask = 0xffffffff << shift;
> + uint32_t value = 0;
> +
> + if (size < bytes) {
> + mask &= 0xffffffff >> (bytes - size) * 8;
> + bytes = size;
> + }
> +
> + if (mask != 0xffffffff) {
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> + if (write) {
> + value &= ~mask;
> + value |= (*(uint32_t *)buf << shift) & mask;
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> + } else {
> + value = (value & mask) >> shift;
> + memcpy(buf, &value, bytes);
> + }
> + } else {
> + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> + }
> +
> + pos += bytes;
> + buf += bytes;
> + size -= bytes;
> + }
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + uint64_t last;
> +
> +#ifdef CONFIG_64BIT
> + last = min(pos + size, adev->gmc.visible_vram_size);
> + if (last > pos) {
> + void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> + size_t count = last - pos;
> +
> + if (write) {
> + memcpy_toio(addr, buf, count);
> + mb();
> + amdgpu_device_flush_hdp(adev, NULL);
> + } else {
> + amdgpu_device_invalidate_hdp(adev, NULL);
> + mb();
> + memcpy_fromio(buf, addr, count);
> + }
> +
> + if (count == size)
> + return;
> +
> + pos += count;
> + buf += count;
> + size -= count;
> + }
> +#endif
I would put this as a separate function into amdgpu_device.c.
But all of this should only be the second step since we need a much
smaller patch for backporting first.
> +
> + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> +}
> +
> /**
> * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
> *
> @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> struct amdgpu_res_cursor cursor;
> - unsigned long flags;
> - uint32_t value = 0;
> int ret = 0;
>
> if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);
> while (cursor.remaining) {
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> - uint64_t bytes = 4 - (cursor.start & 3);
> - uint32_t shift = (cursor.start & 3) * 8;
> - uint32_t mask = 0xffffffff << shift;
> -
> - if (cursor.size < bytes) {
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> - bytes = cursor.size;
> - }
> -
> - if (mask != 0xffffffff) {
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> - value = RREG32_NO_KIQ(mmMM_DATA);
> - if (write) {
> - value &= ~mask;
> - value |= (*(uint32_t *)buf << shift) & mask;
> - WREG32_NO_KIQ(mmMM_DATA, value);
> - }
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> - if (!write) {
> - value = (value & mask) >> shift;
> - memcpy(buf, &value, bytes);
> - }
This here is the problematic part and should use
amdgpu_ttm_vram_access() instead.
That should be implemented first since we might need to backport that.
Regards,
Christian.
> - } else {
> - bytes = cursor.size & ~0x3ULL;
> - amdgpu_device_vram_access(adev, cursor.start,
> - (uint32_t *)buf, bytes,
> - write);
> - }
> -
> - ret += bytes;
> - buf = (uint8_t *)buf + bytes;
> - amdgpu_res_next(&cursor, bytes);
> + amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> + ret += cursor.size;
> + buf += cursor.size;
> + amdgpu_res_next(&cursor, cursor.size);
> }
>
> return ret;
[-- Attachment #1.2: Type: text/html, Size: 19949 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
2021-07-13 8:32 ` Wang, Kevin(Yang)
@ 2021-07-13 9:06 ` Christian König
0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2021-07-13 9:06 UTC (permalink / raw)
To: Wang, Kevin(Yang), amd-gfx
Cc: Deucher, Alexander, Lazar, Lijo, Min, Frank, Koenig, Christian,
Zhang, Hawking
[-- Attachment #1.1: Type: text/plain, Size: 8887 bytes --]
See below.
Am 13.07.21 um 10:32 schrieb Wang, Kevin(Yang):
>
> [AMD Official Use Only]
>
>
> <comments inline>
>
> ------------------------------------------------------------------------
> *From:* Christian König <ckoenig.leichtzumerken@gmail.com>
> *Sent:* Tuesday, July 13, 2021 3:11 PM
> *To:* Wang, Kevin(Yang) <Kevin1.Wang@amd.com>;
> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> *Cc:* Lazar, Lijo <Lijo.Lazar@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Min, Frank <Frank.Min@amd.com>; Koenig,
> Christian <Christian.Koenig@amd.com>; Zhang, Hawking
> <Hawking.Zhang@amd.com>
> *Subject:* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in
> amdgpu_ttm_access_memory()
> Am 13.07.21 um 05:23 schrieb Kevin Wang:
> > 1. using vram aper to access vram if possible
> > 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> > enabled.
> >
> > Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
> > 1 file changed, 89 insertions(+), 37 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 2aa2eb5de37a..8f6f605bc459 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1407,6 +1407,91 @@ static bool
> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> > return ttm_bo_eviction_valuable(bo, place);
> > }
> >
> > +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device
> *adev, loff_t pos,
> > + uint32_t *value, bool write)
>
> Please drop the _vram_ and _align_ part from the name. Just
> amdgpu_device_mm_access.
>
> [kevin]: I will correct it in next patch.
>
> > +{
> > + unsigned long flags;
> > +
> > + BUG_ON(!IS_ALIGNED(pos, 4));
> > +
> > + spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> > + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> > + if (write)
> > + WREG32_NO_KIQ(mmMM_DATA, *value);
> > + else
> > + *value = RREG32_NO_KIQ(mmMM_DATA);
> > + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> > +}
>
> That should still be in amdgpu_device.c and you completely messed up the
> drm_dev_enter()/drm_dev_exit() annotation.
>
> Looks like you are working on an old branch where that is not yet merged?
>
> [kevin]: yes, I'm working on amd-staging-drm-next branch, the
> following patch (from drm-next-misc) is not merged into this branch.
Ok then just wait a bit. Alex wanted to update the amd-staging-drm-next
branch in the next few days.
There is an internal mail thread about that, maybe ping him on this.
Christian.
>
> drm/amdgpu: Guard against write accesses after device removal
>
> This should prevent writing to memory or IO ranges possibly
> already allocated for other uses after our device is removed.
>
> v5:
> Protect more places wher memcopy_to/form_io takes place
> Protect IB submissions
>
> v6: Switch to !drm_dev_enter instead of scoping entire code
> with brackets.
>
> v7:
> Drop guard of HW ring commands emission protection since they
> are in GART and not in MMIO.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
>
> > +
> > +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev,
> loff_t pos,
> > + void *buf, size_t size, bool write)
> > +{
> > + while (size) {
> > + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> > + uint64_t bytes = 4 - (pos & 0x3);
> > + uint32_t shift = (pos & 0x3) * 8;
> > + uint32_t mask = 0xffffffff << shift;
> > + uint32_t value = 0;
> > +
> > + if (size < bytes) {
> > + mask &= 0xffffffff >> (bytes - size) * 8;
> > + bytes = size;
> > + }
> > +
> > + if (mask != 0xffffffff) {
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> > + if (write) {
> > + value &= ~mask;
> > + value |= (*(uint32_t *)buf << shift) &
> mask;
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> > + } else {
> > + value = (value & mask) >> shift;
> > + memcpy(buf, &value, bytes);
> > + }
> > + } else {
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> > + }
> > +
> > + pos += bytes;
> > + buf += bytes;
> > + size -= bytes;
> > + }
> > +}
> > +
> > +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev,
> loff_t pos,
> > + void *buf, size_t size, bool write)
> > +{
> > + uint64_t last;
> > +
> > +#ifdef CONFIG_64BIT
> > + last = min(pos + size, adev->gmc.visible_vram_size);
> > + if (last > pos) {
> > + void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> > + size_t count = last - pos;
> > +
> > + if (write) {
> > + memcpy_toio(addr, buf, count);
> > + mb();
> > + amdgpu_device_flush_hdp(adev, NULL);
> > + } else {
> > + amdgpu_device_invalidate_hdp(adev, NULL);
> > + mb();
> > + memcpy_fromio(buf, addr, count);
> > + }
> > +
> > + if (count == size)
> > + return;
> > +
> > + pos += count;
> > + buf += count;
> > + size -= count;
> > + }
> > +#endif
>
> I would put this as a separate function into amdgpu_device.c.
>
> But all of this should only be the second step since we need a much
> smaller patch for backporting first.
>
> > +
> > + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> > +}
> > +
> > /**
> > * amdgpu_ttm_access_memory - Read or Write memory that backs a
> buffer object.
> > *
> > @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct
> ttm_buffer_object *bo,
> > struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> > struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> > struct amdgpu_res_cursor cursor;
> > - unsigned long flags;
> > - uint32_t value = 0;
> > int ret = 0;
> >
> > if (bo->mem.mem_type != TTM_PL_VRAM)
> > @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
> ttm_buffer_object *bo,
> >
> > amdgpu_res_first(&bo->mem, offset, len, &cursor);
> > while (cursor.remaining) {
> > - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> > - uint64_t bytes = 4 - (cursor.start & 3);
> > - uint32_t shift = (cursor.start & 3) * 8;
> > - uint32_t mask = 0xffffffff << shift;
> > -
> > - if (cursor.size < bytes) {
> > - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> > - bytes = cursor.size;
> > - }
> > -
> > - if (mask != 0xffffffff) {
> > - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > - WREG32_NO_KIQ(mmMM_INDEX,
> ((uint32_t)aligned_pos) | 0x80000000);
> > - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> > - value = RREG32_NO_KIQ(mmMM_DATA);
> > - if (write) {
> > - value &= ~mask;
> > - value |= (*(uint32_t *)buf << shift) &
> mask;
> > - WREG32_NO_KIQ(mmMM_DATA, value);
> > - }
> > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> > - if (!write) {
> > - value = (value & mask) >> shift;
> > - memcpy(buf, &value, bytes);
> > - }
>
> This here is the problematic part and should use
> amdgpu_ttm_vram_access() instead.
>
> That should be implemented first since we might need to backport that.
>
> Regards,
> Christian.
>
> > - } else {
> > - bytes = cursor.size & ~0x3ULL;
> > - amdgpu_device_vram_access(adev, cursor.start,
> > - (uint32_t *)buf, bytes,
> > - write);
> > - }
> > -
> > - ret += bytes;
> > - buf = (uint8_t *)buf + bytes;
> > - amdgpu_res_next(&cursor, bytes);
> > + amdgpu_ttm_vram_access(adev, cursor.start, buf,
> cursor.size, write);
> > + ret += cursor.size;
> > + buf += cursor.size;
> > + amdgpu_res_next(&cursor, cursor.size);
> > }
> >
> > return ret;
>
[-- Attachment #1.2: Type: text/html, Size: 18355 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-07-13 9:06 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 3:23 [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
2021-07-13 7:11 ` Christian König
2021-07-13 8:32 ` Wang, Kevin(Yang)
2021-07-13 9:06 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.