* [RFC PATCH v2 1/3] drm/amdgpu: split amdgpu_device_access_vram() into two small parts
@ 2021-07-16 3:10 Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 2/3] drm/amdgpu/ttm: replace duplicate code with exiting function Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
0 siblings, 2 replies; 4+ messages in thread
From: Kevin Wang @ 2021-07-16 3:10 UTC (permalink / raw)
To: amd-gfx
Cc: Kevin Wang, lijo.lazar, alexander.deucher, frank.min,
christian.koenig, hawking.zhang
split amdgpu_device_access_vram()
1. amdgpu_device_mm_access(): using MM_INDEX/MM_DATA to access vram
2. amdgpu_device_aper_access(): using vram aperature to access vram (option)
Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 98 ++++++++++++++++------
2 files changed, 79 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 61a0897e984a..54cf647bd018 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1109,8 +1109,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
void amdgpu_device_fini(struct amdgpu_device *adev);
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
- uint32_t *buf, size_t size, bool write);
+ void *buf, size_t size, bool write);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
void amdgpu_device_wreg(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index eb1f3f42e00b..37fa199be8b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -285,7 +285,7 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
*/
/**
- * amdgpu_device_vram_access - read/write a buffer in vram
+ * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -293,19 +293,60 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
* @size: read/write size, sizeof(@buf) must > @size
* @write: true - write to vram, otherwise - read from vram
*/
-void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
- uint32_t *buf, size_t size, bool write)
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
{
unsigned long flags;
- uint32_t hi = ~0;
+ uint32_t hi = ~0, tmp = 0;
+ uint32_t *data = buf;
uint64_t last;
+ BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
+
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ for (last = pos + size; pos < last; pos += 4) {
+ tmp = pos >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ if (tmp != hi) {
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ hi = tmp;
+ }
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *data++);
+ else
+ *data++ = RREG32_NO_KIQ(mmMM_DATA);
+ }
+
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_vram_access - access vram by vram aperature
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ *
+ * The return value means how many bytes have been transferred.
+ */
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
#ifdef CONFIG_64BIT
+ void __iomem *addr;
+ size_t count = 0;
+ uint64_t last;
+
+ if (!adev->mman.aper_base_kaddr)
+ return 0;
+
last = min(pos + size, adev->gmc.visible_vram_size);
if (last > pos) {
- void __iomem *addr = adev->mman.aper_base_kaddr + pos;
- size_t count = last - pos;
+ addr = adev->mman.aper_base_kaddr + pos;
+ count = last - pos;
if (write) {
memcpy_toio(addr, buf, count);
@@ -317,30 +358,37 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
memcpy_fromio(buf, addr, count);
}
- if (count == size)
- return;
-
- pos += count;
- buf += count / 4;
- size -= count;
}
+
+ return count;
+#else
+ return 0;
#endif
+}
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- for (last = pos + size; pos < last; pos += 4) {
- uint32_t tmp = pos >> 31;
+/**
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ size_t count;
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
- if (tmp != hi) {
- WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
- hi = tmp;
- }
- if (write)
- WREG32_NO_KIQ(mmMM_DATA, *buf++);
- else
- *buf++ = RREG32_NO_KIQ(mmMM_DATA);
+ /* try to using vram apreature to access vram first */
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram */
+ pos += count;
+ buf += count;
+ amdgpu_device_mm_access(adev, pos, buf, size, write);
}
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}
/*
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v2 2/3] drm/amdgpu/ttm: replace duplicate code with exiting function
2021-07-16 3:10 [RFC PATCH v2 1/3] drm/amdgpu: split amdgpu_device_access_vram() into two small parts Kevin Wang
@ 2021-07-16 3:10 ` Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
1 sibling, 0 replies; 4+ messages in thread
From: Kevin Wang @ 2021-07-16 3:10 UTC (permalink / raw)
To: amd-gfx
Cc: Kevin Wang, lijo.lazar, alexander.deucher, frank.min,
christian.koenig, hawking.zhang
using exiting function to replace duplicate code blocks in
amdgpu_ttm_vram_write().
Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2aa2eb5de37a..f4ff3c9350b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2207,7 +2207,6 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
return -ENXIO;
while (size) {
- unsigned long flags;
uint32_t value;
if (*pos >= adev->gmc.mc_vram_size)
@@ -2217,11 +2216,7 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
if (r)
return r;
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
- WREG32_NO_KIQ(mmMM_DATA, value);
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ amdgpu_device_mm_access(adev, *pos, &value, 4, true);
result += 4;
buf += 4;
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
2021-07-16 3:10 [RFC PATCH v2 1/3] drm/amdgpu: split amdgpu_device_access_vram() into two small parts Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 2/3] drm/amdgpu/ttm: replace duplicate code with exiting function Kevin Wang
@ 2021-07-16 3:10 ` Kevin Wang
2021-07-16 7:43 ` Christian König
1 sibling, 1 reply; 4+ messages in thread
From: Kevin Wang @ 2021-07-16 3:10 UTC (permalink / raw)
To: amd-gfx
Cc: Kevin Wang, lijo.lazar, alexander.deucher, frank.min,
christian.koenig, hawking.zhang
1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.
Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 91 +++++++++++++++----------
1 file changed, 54 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f4ff3c9350b3..62ea5089b4f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,56 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
}
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ while (size) {
+ uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+ uint64_t bytes = 4 - (pos & 0x3);
+ uint32_t shift = (pos & 0x3) * 8;
+ uint32_t mask = 0xffffffff << shift;
+ uint32_t value = 0;
+
+ if (size < bytes) {
+ mask &= 0xffffffff >> (bytes - size) * 8;
+ bytes = size;
+ }
+
+ if (mask != 0xffffffff) {
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
+ if (write) {
+ value &= ~mask;
+ value |= (*(uint32_t *)buf << shift) & mask;
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
+ } else {
+ value = (value & mask) >> shift;
+ memcpy(buf, &value, bytes);
+ }
+ } else {
+ amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
+ }
+
+ pos += bytes;
+ buf += bytes;
+ size -= bytes;
+ }
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ size_t count;
+
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram and handle un-aligned address */
+ pos += count;
+ buf += count;
+ amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+ }
+}
+
/**
* amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
*
@@ -1426,8 +1476,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct amdgpu_res_cursor cursor;
- unsigned long flags;
- uint32_t value = 0;
int ret = 0;
if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1435,41 +1483,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
amdgpu_res_first(&bo->mem, offset, len, &cursor);
while (cursor.remaining) {
- uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
- uint64_t bytes = 4 - (cursor.start & 3);
- uint32_t shift = (cursor.start & 3) * 8;
- uint32_t mask = 0xffffffff << shift;
-
- if (cursor.size < bytes) {
- mask &= 0xffffffff >> (bytes - cursor.size) * 8;
- bytes = cursor.size;
- }
-
- if (mask != 0xffffffff) {
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
- value = RREG32_NO_KIQ(mmMM_DATA);
- if (write) {
- value &= ~mask;
- value |= (*(uint32_t *)buf << shift) & mask;
- WREG32_NO_KIQ(mmMM_DATA, value);
- }
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
- if (!write) {
- value = (value & mask) >> shift;
- memcpy(buf, &value, bytes);
- }
- } else {
- bytes = cursor.size & ~0x3ULL;
- amdgpu_device_vram_access(adev, cursor.start,
- (uint32_t *)buf, bytes,
- write);
- }
-
- ret += bytes;
- buf = (uint8_t *)buf + bytes;
- amdgpu_res_next(&cursor, bytes);
+ amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
+ ret += cursor.size;
+ buf += cursor.size;
+ amdgpu_res_next(&cursor, cursor.size);
}
return ret;
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
2021-07-16 3:10 ` [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
@ 2021-07-16 7:43 ` Christian König
0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2021-07-16 7:43 UTC (permalink / raw)
To: Kevin Wang, amd-gfx
Cc: alexander.deucher, lijo.lazar, frank.min, christian.koenig,
hawking.zhang
Am 16.07.21 um 05:10 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 91 +++++++++++++++----------
> 1 file changed, 54 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index f4ff3c9350b3..62ea5089b4f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,56 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> return ttm_bo_eviction_valuable(bo, place);
> }
>
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + while (size) {
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> + uint64_t bytes = 4 - (pos & 0x3);
> + uint32_t shift = (pos & 0x3) * 8;
> + uint32_t mask = 0xffffffff << shift;
> + uint32_t value = 0;
> +
> + if (size < bytes) {
> + mask &= 0xffffffff >> (bytes - size) * 8;
> + bytes = size;
> + }
> +
> + if (mask != 0xffffffff) {
> + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
> + if (write) {
> + value &= ~mask;
> + value |= (*(uint32_t *)buf << shift) & mask;
> + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
> + } else {
> + value = (value & mask) >> shift;
> + memcpy(buf, &value, bytes);
> + }
> + } else {
> + amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
> + }
> +
> + pos += bytes;
> + buf += bytes;
> + size -= bytes;
> + }
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + size_t count;
> +
> + count = amdgpu_device_aper_access(adev, pos, buf, size, write);
> + size -= count;
> + if (size) {
> + /* using MM to access rest vram and handle un-aligned address */
> + pos += count;
> + buf += count;
> + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> + }
> +}
Just inline that function into the caller, don't really see the need to
have that separated.
Apart from that looks good to me.
Regards,
Christian.
> +
> /**
> * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
> *
> @@ -1426,8 +1476,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> struct amdgpu_res_cursor cursor;
> - unsigned long flags;
> - uint32_t value = 0;
> int ret = 0;
>
> if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1483,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);
> while (cursor.remaining) {
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> - uint64_t bytes = 4 - (cursor.start & 3);
> - uint32_t shift = (cursor.start & 3) * 8;
> - uint32_t mask = 0xffffffff << shift;
> -
> - if (cursor.size < bytes) {
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> - bytes = cursor.size;
> - }
> -
> - if (mask != 0xffffffff) {
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> - value = RREG32_NO_KIQ(mmMM_DATA);
> - if (write) {
> - value &= ~mask;
> - value |= (*(uint32_t *)buf << shift) & mask;
> - WREG32_NO_KIQ(mmMM_DATA, value);
> - }
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> - if (!write) {
> - value = (value & mask) >> shift;
> - memcpy(buf, &value, bytes);
> - }
> - } else {
> - bytes = cursor.size & ~0x3ULL;
> - amdgpu_device_vram_access(adev, cursor.start,
> - (uint32_t *)buf, bytes,
> - write);
> - }
> -
> - ret += bytes;
> - buf = (uint8_t *)buf + bytes;
> - amdgpu_res_next(&cursor, bytes);
> + amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> + ret += cursor.size;
> + buf += cursor.size;
> + amdgpu_res_next(&cursor, cursor.size);
> }
>
> return ret;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-07-16 7:43 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-16 3:10 [RFC PATCH v2 1/3] drm/amdgpu: split amdgpu_device_access_vram() into two small parts Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 2/3] drm/amdgpu/ttm: replace duplicate code with exiting function Kevin Wang
2021-07-16 3:10 ` [RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
2021-07-16 7:43 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.