All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
@ 2021-06-25  3:24 Kevin Wang
  2021-06-25 13:28 ` Lazar, Lijo
  2021-06-28 14:10 ` Christian König
  0 siblings, 2 replies; 9+ messages in thread
From: Kevin Wang @ 2021-06-25  3:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Kevin Wang, christian.koenig

1. add non-aligned address support in amdgpu_device_vram_access()
2. reduce duplicate code in amdgpu_ttm_access_memory()

because the MM_INDEX{HI}/DATA are protected register, it is not working
in sriov environment when mmio protect feature is enabled (in some asics),
and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
to handle non-aligned address by default, it will cause the register(MM_DATA)
is always return 0.

with the patch, the memory will be accessed in the aper way first.
(in visible memory or enable pcie large-bar feature), then using
MM_INDEX{HI}/MM_DATA to access rest vram memroy.

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
 3 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d14b4968a026..8095d9a9c857 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
-			       uint32_t *buf, size_t size, bool write);
+			       void *buf, size_t size, bool write);
 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 			    uint32_t reg, uint32_t acc_flags);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6702d136a6d..2047e3c2b365 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 		amdgpu_acpi_is_power_shift_control_supported());
 }
 
+static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
+							       uint32_t *value, bool write)
+{
+	BUG_ON(!IS_ALIGNED(pos, 4));
+
+	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+	if (write)
+		WREG32_NO_KIQ(mmMM_DATA, *value);
+	else
+		*value = RREG32_NO_KIQ(mmMM_DATA);
+}
+
+static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
+						  void *buf, size_t size, bool write)
+{
+	while (size) {
+		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+		uint64_t bytes = 4 - (pos & 0x3);
+		uint32_t shift = (pos & 0x3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+		uint32_t value = 0;
+
+		if (size < bytes) {
+			mask &= 0xffffffff >> (bytes - size) * 8;
+			bytes = size;
+		}
+
+		if (mask != 0xffffffff) {
+			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
+			if (write) {
+				value &= ~mask;
+				value |= (*(uint32_t *)buf << shift) & mask;
+				amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
+			} else {
+				value = (value & mask) >> shift;
+				memcpy(buf, &value, bytes);
+			}
+		} else {
+			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
+		}
+
+		pos += bytes;
+		buf += bytes;
+		size -= bytes;
+	}
+}
+
 /*
  * VRAM access helper functions
  */
@@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
  * @write: true - write to vram, otherwise - read from vram
  */
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
-			       uint32_t *buf, size_t size, bool write)
+			       void *buf, size_t size, bool write)
 {
 	unsigned long flags;
-	uint32_t hi = ~0;
 	uint64_t last;
 
-
 #ifdef CONFIG_64BIT
 	last = min(pos + size, adev->gmc.visible_vram_size);
 	if (last > pos) {
@@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 			return;
 
 		pos += count;
-		buf += count / 4;
+		buf += count;
 		size -= count;
 	}
 #endif
-
 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-	for (last = pos + size; pos < last; pos += 4) {
-		uint32_t tmp = pos >> 31;
-
-		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
-		if (tmp != hi) {
-			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
-			hi = tmp;
-		}
-		if (write)
-			WREG32_NO_KIQ(mmMM_DATA, *buf++);
-		else
-			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
-	}
+	amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6297363ab740..cf5b8bdc2dd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 	struct amdgpu_res_cursor cursor;
-	unsigned long flags;
-	uint32_t value = 0;
 	int ret = 0;
 
 	if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 
 	amdgpu_res_first(&bo->mem, offset, len, &cursor);
 	while (cursor.remaining) {
-		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
-		uint64_t bytes = 4 - (cursor.start & 3);
-		uint32_t shift = (cursor.start & 3) * 8;
-		uint32_t mask = 0xffffffff << shift;
-
-		if (cursor.size < bytes) {
-			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
-			bytes = cursor.size;
-		}
+		amdgpu_device_vram_access(adev, cursor.start,
+					  buf, cursor.size,
+					  write);
 
-		if (mask != 0xffffffff) {
-			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
-			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-			value = RREG32_NO_KIQ(mmMM_DATA);
-			if (write) {
-				value &= ~mask;
-				value |= (*(uint32_t *)buf << shift) & mask;
-				WREG32_NO_KIQ(mmMM_DATA, value);
-			}
-			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-			if (!write) {
-				value = (value & mask) >> shift;
-				memcpy(buf, &value, bytes);
-			}
-		} else {
-			bytes = cursor.size & ~0x3ULL;
-			amdgpu_device_vram_access(adev, cursor.start,
-						  (uint32_t *)buf, bytes,
-						  write);
-		}
-
-		ret += bytes;
-		buf = (uint8_t *)buf + bytes;
-		amdgpu_res_next(&cursor, bytes);
+		ret += cursor.size;
+		buf += cursor.size;
+		amdgpu_res_next(&cursor, cursor.size);
 	}
 
 	return ret;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-25  3:24 [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access() Kevin Wang
@ 2021-06-25 13:28 ` Lazar, Lijo
  2021-06-28  2:44   ` Wang, Kevin(Yang)
  2021-06-28 14:10 ` Christian König
  1 sibling, 1 reply; 9+ messages in thread
From: Lazar, Lijo @ 2021-06-25 13:28 UTC (permalink / raw)
  To: Kevin Wang, amd-gfx; +Cc: alexander.deucher, christian.koenig



On 6/25/2021 8:54 AM, Kevin Wang wrote:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
> 
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
> 
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
> 
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>   
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -			       uint32_t *buf, size_t size, bool write);
> +			       void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   			    uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>   		amdgpu_acpi_is_power_shift_control_supported());
>   }
>   
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +							       uint32_t *value, bool write)
> +{
> +	BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +	if (write)
> +		WREG32_NO_KIQ(mmMM_DATA, *value);
> +	else
> +		*value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +						  void *buf, size_t size, bool write)
> +{
> +	while (size) {
> +		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +		uint64_t bytes = 4 - (pos & 0x3);
> +		uint32_t shift = (pos & 0x3) * 8;
> +		uint32_t mask = 0xffffffff << shift;
> +		uint32_t value = 0;
> +
> +		if (size < bytes) {
> +			mask &= 0xffffffff >> (bytes - size) * 8;
> +			bytes = size;
> +		}
> +
> +		if (mask != 0xffffffff) {
> +			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +			if (write) {
> +				value &= ~mask;
> +				value |= (*(uint32_t *)buf << shift) & mask;
> +				amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +			} else {
> +				value = (value & mask) >> shift;
> +				memcpy(buf, &value, bytes);
> +			}
> +		} else {
> +			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +		}
> +
> +		pos += bytes;
> +		buf += bytes;
> +		size -= bytes;
> +	}
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -			       uint32_t *buf, size_t size, bool write)
> +			       void *buf, size_t size, bool write)
>   {
>   	unsigned long flags;
> -	uint32_t hi = ~0;
>   	uint64_t last;
>   
> -
>   #ifdef CONFIG_64BIT
>   	last = min(pos + size, adev->gmc.visible_vram_size);
>   	if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>   			return;
>   
>   		pos += count;
> -		buf += count / 4;
> +		buf += count;
>   		size -= count;
>   	}
>   #endif
> -
>   	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -	for (last = pos + size; pos < last; pos += 4) {
> -		uint32_t tmp = pos >> 31;
> -
> -		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -		if (tmp != hi) {
> -			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -			hi = tmp;
> -		}
> -		if (write)
> -			WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -		else
> -			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -	}
> +	amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>   	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>   	struct amdgpu_res_cursor cursor;
> -	unsigned long flags;
> -	uint32_t value = 0;
>   	int ret = 0;
>   
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   
>   	amdgpu_res_first(&bo->mem, offset, len, &cursor);
>   	while (cursor.remaining) {
> -		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -		uint64_t bytes = 4 - (cursor.start & 3);
> -		uint32_t shift = (cursor.start & 3) * 8;
> -		uint32_t mask = 0xffffffff << shift;
> -
> -		if (cursor.size < bytes) {
> -			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -			bytes = cursor.size;
> -		}
> +		amdgpu_device_vram_access(adev, cursor.start,
> +					  buf, cursor.size,
> +					  write);
>   
> -		if (mask != 0xffffffff) {
> -			spin_lock_irqsave(&adev->mmio_idx_lock, flags);

The new logic seems to skip this mmio_idx_lock for accessing index/data 
pair ragisters. BTW, where is the visible memory aperture check?

Thanks
Lijo

> -			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -			value = RREG32_NO_KIQ(mmMM_DATA);
> -			if (write) {
> -				value &= ~mask;
> -				value |= (*(uint32_t *)buf << shift) & mask;
> -				WREG32_NO_KIQ(mmMM_DATA, value);
> -			}
> -			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -			if (!write) {
> -				value = (value & mask) >> shift;
> -				memcpy(buf, &value, bytes);
> -			}
> -		} else {
> -			bytes = cursor.size & ~0x3ULL;
> -			amdgpu_device_vram_access(adev, cursor.start,
> -						  (uint32_t *)buf, bytes,
> -						  write);
> -		}
> -
> -		ret += bytes;
> -		buf = (uint8_t *)buf + bytes;
> -		amdgpu_res_next(&cursor, bytes);
> +		ret += cursor.size;
> +		buf += cursor.size;
> +		amdgpu_res_next(&cursor, cursor.size);
>   	}
>   
>   	return ret;
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-25 13:28 ` Lazar, Lijo
@ 2021-06-28  2:44   ` Wang, Kevin(Yang)
  2021-06-28  4:38     ` Zhang, Hawking
  2021-06-28  4:57     ` Lazar, Lijo
  0 siblings, 2 replies; 9+ messages in thread
From: Wang, Kevin(Yang) @ 2021-06-28  2:44 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander, Koenig, Christian


[-- Attachment #1.1: Type: text/plain, Size: 9626 bytes --]

[AMD Official Use Only]



________________________________
From: Lazar, Lijo <Lijo.Lazar@amd.com>
Sent: Friday, June 25, 2021 9:28 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()



On 6/25/2021 8:54 AM, Kevin Wang wrote:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write);
> +                            void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>                            uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>                amdgpu_acpi_is_power_shift_control_supported());
>   }
>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                                            uint32_t *value, bool write)
> +{
> +     BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +     if (write)
> +             WREG32_NO_KIQ(mmMM_DATA, *value);
> +     else
> +             *value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                               void *buf, size_t size, bool write)
> +{
> +     while (size) {
> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +             uint64_t bytes = 4 - (pos & 0x3);
> +             uint32_t shift = (pos & 0x3) * 8;
> +             uint32_t mask = 0xffffffff << shift;
> +             uint32_t value = 0;
> +
> +             if (size < bytes) {
> +                     mask &= 0xffffffff >> (bytes - size) * 8;
> +                     bytes = size;
> +             }
> +
> +             if (mask != 0xffffffff) {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +                     if (write) {
> +                             value &= ~mask;
> +                             value |= (*(uint32_t *)buf << shift) & mask;
> +                             amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +                     } else {
> +                             value = (value & mask) >> shift;
> +                             memcpy(buf, &value, bytes);
> +                     }
> +             } else {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +             }
> +
> +             pos += bytes;
> +             buf += bytes;
> +             size -= bytes;
> +     }
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write)
> +                            void *buf, size_t size, bool write)
>   {
>        unsigned long flags;
> -     uint32_t hi = ~0;
>        uint64_t last;
>
> -
>   #ifdef CONFIG_64BIT
>        last = min(pos + size, adev->gmc.visible_vram_size);
>        if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>                        return;
>
>                pos += count;
> -             buf += count / 4;
> +             buf += count;
>                size -= count;
>        }
>   #endif
> -
>        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -     for (last = pos + size; pos < last; pos += 4) {
> -             uint32_t tmp = pos >> 31;
> -
> -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -             if (tmp != hi) {
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -                     hi = tmp;
> -             }
> -             if (write)
> -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -             else
> -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -     }
> +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>        struct amdgpu_res_cursor cursor;
> -     unsigned long flags;
> -     uint32_t value = 0;
>        int ret = 0;
>
>        if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>        while (cursor.remaining) {
> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -             uint64_t bytes = 4 - (cursor.start & 3);
> -             uint32_t shift = (cursor.start & 3) * 8;
> -             uint32_t mask = 0xffffffff << shift;
> -
> -             if (cursor.size < bytes) {
> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -                     bytes = cursor.size;
> -             }
> +             amdgpu_device_vram_access(adev, cursor.start,
> +                                       buf, cursor.size,
> +                                       write);
>
> -             if (mask != 0xffffffff) {
> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);

The new logic seems to skip this mmio_idx_lock for accessing index/data
pair ragisters. BTW, where is the visible memory aperture check?

Thanks
Lijo

[Keivn]:

Hi Lijo,
the mmio_idx_lock lock is in amdgpu_device_vram_access() function.
this patch is extending the amdgpu_device_vram_access() function to support un-aligned address support.

Best Regards,
Kevin
> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -                     value = RREG32_NO_KIQ(mmMM_DATA);
> -                     if (write) {
> -                             value &= ~mask;
> -                             value |= (*(uint32_t *)buf << shift) & mask;
> -                             WREG32_NO_KIQ(mmMM_DATA, value);
> -                     }
> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -                     if (!write) {
> -                             value = (value & mask) >> shift;
> -                             memcpy(buf, &value, bytes);
> -                     }
> -             } else {
> -                     bytes = cursor.size & ~0x3ULL;
> -                     amdgpu_device_vram_access(adev, cursor.start,
> -                                               (uint32_t *)buf, bytes,
> -                                               write);
> -             }
> -
> -             ret += bytes;
> -             buf = (uint8_t *)buf + bytes;
> -             amdgpu_res_next(&cursor, bytes);
> +             ret += cursor.size;
> +             buf += cursor.size;
> +             amdgpu_res_next(&cursor, cursor.size);
>        }
>
>        return ret;
>

[-- Attachment #1.2: Type: text/html, Size: 24604 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-28  2:44   ` Wang, Kevin(Yang)
@ 2021-06-28  4:38     ` Zhang, Hawking
  2021-06-28  4:57     ` Lazar, Lijo
  1 sibling, 0 replies; 9+ messages in thread
From: Zhang, Hawking @ 2021-06-28  4:38 UTC (permalink / raw)
  To: Wang, Kevin(Yang), Lazar, Lijo, amd-gfx, Min, Frank
  Cc: Deucher, Alexander, Koenig, Christian


[-- Attachment #1.1: Type: text/plain, Size: 10381 bytes --]

[AMD Official Use Only]

Add @Min, Frank<mailto:Frank.Min@amd.com> for the review.

Regards,
Hawking

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Wang, Kevin(Yang)
Sent: Monday, June 28, 2021 10:44
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()


[AMD Official Use Only]


[AMD Official Use Only]



________________________________
From: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>
Sent: Friday, June 25, 2021 9:28 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com<mailto:Kevin1.Wang@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Koenig, Christian <Christian.Koenig@amd.com<mailto:Christian.Koenig@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()



On 6/25/2021 8:54 AM, Kevin Wang wrote:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com<mailto:kevin1.wang@amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write);
> +                            void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>                            uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>                amdgpu_acpi_is_power_shift_control_supported());
>   }
>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                                            uint32_t *value, bool write)
> +{
> +     BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +     if (write)
> +             WREG32_NO_KIQ(mmMM_DATA, *value);
> +     else
> +             *value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                               void *buf, size_t size, bool write)
> +{
> +     while (size) {
> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +             uint64_t bytes = 4 - (pos & 0x3);
> +             uint32_t shift = (pos & 0x3) * 8;
> +             uint32_t mask = 0xffffffff << shift;
> +             uint32_t value = 0;
> +
> +             if (size < bytes) {
> +                     mask &= 0xffffffff >> (bytes - size) * 8;
> +                     bytes = size;
> +             }
> +
> +             if (mask != 0xffffffff) {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +                     if (write) {
> +                             value &= ~mask;
> +                             value |= (*(uint32_t *)buf << shift) & mask;
> +                             amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +                     } else {
> +                             value = (value & mask) >> shift;
> +                             memcpy(buf, &value, bytes);
> +                     }
> +             } else {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +             }
> +
> +             pos += bytes;
> +             buf += bytes;
> +             size -= bytes;
> +     }
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write)
> +                            void *buf, size_t size, bool write)
>   {
>        unsigned long flags;
> -     uint32_t hi = ~0;
>        uint64_t last;
>
> -
>   #ifdef CONFIG_64BIT
>        last = min(pos + size, adev->gmc.visible_vram_size);
>        if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>                        return;
>
>                pos += count;
> -             buf += count / 4;
> +             buf += count;
>                size -= count;
>        }
>   #endif
> -
>        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -     for (last = pos + size; pos < last; pos += 4) {
> -             uint32_t tmp = pos >> 31;
> -
> -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -             if (tmp != hi) {
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -                     hi = tmp;
> -             }
> -             if (write)
> -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -             else
> -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -     }
> +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>        struct amdgpu_res_cursor cursor;
> -     unsigned long flags;
> -     uint32_t value = 0;
>        int ret = 0;
>
>        if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>        while (cursor.remaining) {
> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -             uint64_t bytes = 4 - (cursor.start & 3);
> -             uint32_t shift = (cursor.start & 3) * 8;
> -             uint32_t mask = 0xffffffff << shift;
> -
> -             if (cursor.size < bytes) {
> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -                     bytes = cursor.size;
> -             }
> +             amdgpu_device_vram_access(adev, cursor.start,
> +                                       buf, cursor.size,
> +                                       write);
>
> -             if (mask != 0xffffffff) {
> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);

The new logic seems to skip this mmio_idx_lock for accessing index/data
pair ragisters. BTW, where is the visible memory aperture check?

Thanks
Lijo

[Keivn]:

Hi Lijo,
the mmio_idx_lock lock is in amdgpu_device_vram_access() function.
this patch is extending the amdgpu_device_vram_access() function to support un-aligned address support.

Best Regards,
Kevin
> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -                     value = RREG32_NO_KIQ(mmMM_DATA);
> -                     if (write) {
> -                             value &= ~mask;
> -                             value |= (*(uint32_t *)buf << shift) & mask;
> -                             WREG32_NO_KIQ(mmMM_DATA, value);
> -                     }
> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -                     if (!write) {
> -                             value = (value & mask) >> shift;
> -                             memcpy(buf, &value, bytes);
> -                     }
> -             } else {
> -                     bytes = cursor.size & ~0x3ULL;
> -                     amdgpu_device_vram_access(adev, cursor.start,
> -                                               (uint32_t *)buf, bytes,
> -                                               write);
> -             }
> -
> -             ret += bytes;
> -             buf = (uint8_t *)buf + bytes;
> -             amdgpu_res_next(&cursor, bytes);
> +             ret += cursor.size;
> +             buf += cursor.size;
> +             amdgpu_res_next(&cursor, cursor.size);
>        }
>
>        return ret;
>

[-- Attachment #1.2: Type: text/html, Size: 26623 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-28  2:44   ` Wang, Kevin(Yang)
  2021-06-28  4:38     ` Zhang, Hawking
@ 2021-06-28  4:57     ` Lazar, Lijo
  1 sibling, 0 replies; 9+ messages in thread
From: Lazar, Lijo @ 2021-06-28  4:57 UTC (permalink / raw)
  To: Wang, Kevin(Yang), amd-gfx; +Cc: Deucher, Alexander, Koenig, Christian



On 6/28/2021 8:14 AM, Wang, Kevin(Yang) wrote:
> [AMD Official Use Only]
> 
> 
> 
> 
> ------------------------------------------------------------------------
> *From:* Lazar, Lijo <Lijo.Lazar@amd.com>
> *Sent:* Friday, June 25, 2021 9:28 PM
> *To:* Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; 
> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> *Cc:* Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian 
> <Christian.Koenig@amd.com>
> *Subject:* Re: [PATCH] drm/amdgpu: add non-aligned address supported in 
> amdgpu_device_vram_access()
> 
> 
> On 6/25/2021 8:54 AM, Kevin Wang wrote:
>> 1. add non-aligned address support in amdgpu_device_vram_access()
>> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>> 
>> because the MM_INDEX{HI}/DATA are protected register, it is not working
>> in sriov environment when mmio protect feature is enabled (in some asics),
>> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
>> to handle non-aligned address by default, it will cause the register(MM_DATA)
>> is always return 0.
>> 
>> with the patch, the memory will be accessed in the aper way first.
>> (in visible memory or enable pcie large-bar feature), then using
>> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>> 
>> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>>   3 files changed, 58 insertions(+), 55 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index d14b4968a026..8095d9a9c857 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>>   
>>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> -                            uint32_t *buf, size_t size, bool write);
>> +                            void *buf, size_t size, bool write);
>>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>>                            uint32_t reg, uint32_t acc_flags);
>>   void amdgpu_device_wreg(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index e6702d136a6d..2047e3c2b365 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>>                amdgpu_acpi_is_power_shift_control_supported());
>>   }
>>   
>> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
>> +                                                            uint32_t *value, bool write)
>> +{
>> +     BUG_ON(!IS_ALIGNED(pos, 4));
>> +
>> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
>> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
>> +     if (write)
>> +             WREG32_NO_KIQ(mmMM_DATA, *value);
>> +     else
>> +             *value = RREG32_NO_KIQ(mmMM_DATA);
>> +}
>> +
>> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
>> +                                               void *buf, size_t size, bool write)
>> +{
>> +     while (size) {
>> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
>> +             uint64_t bytes = 4 - (pos & 0x3);
>> +             uint32_t shift = (pos & 0x3) * 8;
>> +             uint32_t mask = 0xffffffff << shift;
>> +             uint32_t value = 0;
>> +
>> +             if (size < bytes) {
>> +                     mask &= 0xffffffff >> (bytes - size) * 8;
>> +                     bytes = size;
>> +             }
>> +
>> +             if (mask != 0xffffffff) {
>> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
>> +                     if (write) {
>> +                             value &= ~mask;
>> +                             value |= (*(uint32_t *)buf << shift) & mask;
>> +                             amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
>> +                     } else {
>> +                             value = (value & mask) >> shift;
>> +                             memcpy(buf, &value, bytes);
>> +                     }
>> +             } else {
>> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
>> +             }
>> +
>> +             pos += bytes;
>> +             buf += bytes;
>> +             size -= bytes;
>> +     }
>> +}
>> +
>>   /*
>>    * VRAM access helper functions
>>    */
>> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>>    * @write: true - write to vram, otherwise - read from vram
>>    */
>>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> -                            uint32_t *buf, size_t size, bool write)
>> +                            void *buf, size_t size, bool write)
>>   {
>>        unsigned long flags;
>> -     uint32_t hi = ~0;
>>        uint64_t last;
>>   
>> -
>>   #ifdef CONFIG_64BIT
>>        last = min(pos + size, adev->gmc.visible_vram_size);
>>        if (last > pos) {
>> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>>                        return;
>>   
>>                pos += count;
>> -             buf += count / 4;
>> +             buf += count;
>>                size -= count;
>>        }
>>   #endif
>> -
>>        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>> -     for (last = pos + size; pos < last; pos += 4) {
>> -             uint32_t tmp = pos >> 31;
>> -
>> -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
>> -             if (tmp != hi) {
>> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
>> -                     hi = tmp;
>> -             }
>> -             if (write)
>> -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
>> -             else
>> -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
>> -     }
>> +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>>        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 6297363ab740..cf5b8bdc2dd3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>>        struct amdgpu_res_cursor cursor;
>> -     unsigned long flags;
>> -     uint32_t value = 0;
>>        int ret = 0;
>>   
>>        if (bo->mem.mem_type != TTM_PL_VRAM)
>> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>>   
>>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>>        while (cursor.remaining) {
>> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
>> -             uint64_t bytes = 4 - (cursor.start & 3);
>> -             uint32_t shift = (cursor.start & 3) * 8;
>> -             uint32_t mask = 0xffffffff << shift;
>> -
>> -             if (cursor.size < bytes) {
>> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
>> -                     bytes = cursor.size;
>> -             }
>> +             amdgpu_device_vram_access(adev, cursor.start,
>> +                                       buf, cursor.size,
>> +                                       write);
>>   
>> -             if (mask != 0xffffffff) {
>> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> 
> The new logic seems to skip this mmio_idx_lock for accessing index/data
> pair ragisters. BTW, where is the visible memory aperture check?
> 
> Thanks
> Lijo
> 
> [Keivn]:
> 
> Hi Lijo,
> the mmio_idx_lock lock is in amdgpu_device_vram_access() function.
> this patch is extending the amdgpu_device_vram_access() function to 
> support un-aligned address support.
> 
> Best Regards,
> Kevin

Ah, now I see..Thanks.

For consecutive mem accesses, this is a good optimization (as in earlier 
code) as crossing this boundary will be rare. I suggest to keep this.

-		uint32_t tmp = pos >> 31;
-
-		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
-		if (tmp != hi) {
-			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
-			hi = tmp;
-		}

Thanks,
Lijo

>> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
>> -                     value = RREG32_NO_KIQ(mmMM_DATA);
>> -                     if (write) {
>> -                             value &= ~mask;
>> -                             value |= (*(uint32_t *)buf << shift) & mask;
>> -                             WREG32_NO_KIQ(mmMM_DATA, value);
>> -                     }
>> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>> -                     if (!write) {
>> -                             value = (value & mask) >> shift;
>> -                             memcpy(buf, &value, bytes);
>> -                     }
>> -             } else {
>> -                     bytes = cursor.size & ~0x3ULL;
>> -                     amdgpu_device_vram_access(adev, cursor.start,
>> -                                               (uint32_t *)buf, bytes,
>> -                                               write);
>> -             }
>> -
>> -             ret += bytes;
>> -             buf = (uint8_t *)buf + bytes;
>> -             amdgpu_res_next(&cursor, bytes);
>> +             ret += cursor.size;
>> +             buf += cursor.size;
>> +             amdgpu_res_next(&cursor, cursor.size);
>>        }
>>   
>>        return ret;
>> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-25  3:24 [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access() Kevin Wang
  2021-06-25 13:28 ` Lazar, Lijo
@ 2021-06-28 14:10 ` Christian König
  2021-06-28 14:43   ` Wang, Kevin(Yang)
  1 sibling, 1 reply; 9+ messages in thread
From: Christian König @ 2021-06-28 14:10 UTC (permalink / raw)
  To: Kevin Wang, amd-gfx; +Cc: alexander.deucher, christian.koenig

Am 25.06.21 um 05:24 schrieb Kevin Wang:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.

Well NAK to the whole approach.

The amdgpu_device_vram_access() are intentionally *NOT* using the VRAM 
aperture nor providing byte wise access.

And yes that this doesn't work under SRIOV is completely intentional as 
well.

What we could do is to use the aperture in amdgpu_ttm_access_memory() 
for unaligned access if that is indeed a problem.

Regards,
Christian.


>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>   
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -			       uint32_t *buf, size_t size, bool write);
> +			       void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   			    uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>   		amdgpu_acpi_is_power_shift_control_supported());
>   }
>   
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +							       uint32_t *value, bool write)
> +{
> +	BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +	if (write)
> +		WREG32_NO_KIQ(mmMM_DATA, *value);
> +	else
> +		*value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +						  void *buf, size_t size, bool write)
> +{
> +	while (size) {
> +		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +		uint64_t bytes = 4 - (pos & 0x3);
> +		uint32_t shift = (pos & 0x3) * 8;
> +		uint32_t mask = 0xffffffff << shift;
> +		uint32_t value = 0;
> +
> +		if (size < bytes) {
> +			mask &= 0xffffffff >> (bytes - size) * 8;
> +			bytes = size;
> +		}
> +
> +		if (mask != 0xffffffff) {
> +			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +			if (write) {
> +				value &= ~mask;
> +				value |= (*(uint32_t *)buf << shift) & mask;
> +				amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +			} else {
> +				value = (value & mask) >> shift;
> +				memcpy(buf, &value, bytes);
> +			}
> +		} else {
> +			amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +		}
> +
> +		pos += bytes;
> +		buf += bytes;
> +		size -= bytes;
> +	}
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -			       uint32_t *buf, size_t size, bool write)
> +			       void *buf, size_t size, bool write)
>   {
>   	unsigned long flags;
> -	uint32_t hi = ~0;
>   	uint64_t last;
>   
> -
>   #ifdef CONFIG_64BIT
>   	last = min(pos + size, adev->gmc.visible_vram_size);
>   	if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>   			return;
>   
>   		pos += count;
> -		buf += count / 4;
> +		buf += count;
>   		size -= count;
>   	}
>   #endif
> -
>   	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -	for (last = pos + size; pos < last; pos += 4) {
> -		uint32_t tmp = pos >> 31;
> -
> -		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -		if (tmp != hi) {
> -			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -			hi = tmp;
> -		}
> -		if (write)
> -			WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -		else
> -			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -	}
> +	amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>   	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>   	struct amdgpu_res_cursor cursor;
> -	unsigned long flags;
> -	uint32_t value = 0;
>   	int ret = 0;
>   
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   
>   	amdgpu_res_first(&bo->mem, offset, len, &cursor);
>   	while (cursor.remaining) {
> -		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -		uint64_t bytes = 4 - (cursor.start & 3);
> -		uint32_t shift = (cursor.start & 3) * 8;
> -		uint32_t mask = 0xffffffff << shift;
> -
> -		if (cursor.size < bytes) {
> -			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -			bytes = cursor.size;
> -		}
> +		amdgpu_device_vram_access(adev, cursor.start,
> +					  buf, cursor.size,
> +					  write);
>   
> -		if (mask != 0xffffffff) {
> -			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -			value = RREG32_NO_KIQ(mmMM_DATA);
> -			if (write) {
> -				value &= ~mask;
> -				value |= (*(uint32_t *)buf << shift) & mask;
> -				WREG32_NO_KIQ(mmMM_DATA, value);
> -			}
> -			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -			if (!write) {
> -				value = (value & mask) >> shift;
> -				memcpy(buf, &value, bytes);
> -			}
> -		} else {
> -			bytes = cursor.size & ~0x3ULL;
> -			amdgpu_device_vram_access(adev, cursor.start,
> -						  (uint32_t *)buf, bytes,
> -						  write);
> -		}
> -
> -		ret += bytes;
> -		buf = (uint8_t *)buf + bytes;
> -		amdgpu_res_next(&cursor, bytes);
> +		ret += cursor.size;
> +		buf += cursor.size;
> +		amdgpu_res_next(&cursor, cursor.size);
>   	}
>   
>   	return ret;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-28 14:10 ` Christian König
@ 2021-06-28 14:43   ` Wang, Kevin(Yang)
  2021-06-28 15:34     ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Wang, Kevin(Yang) @ 2021-06-28 14:43 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Deucher, Alexander, Lazar, Lijo, Min,  Frank, Koenig, Christian,
	Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 10478 bytes --]

[AMD Official Use Only]

Hi Chris,

amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len,  int write)

the above function will be called from kernel side (likes 'get_user_pages' code path),  and the function should accept non-aligned addresses.
without this patch, this function will be using MM_INDEX/DATA to handle aligned address whether in visible memory or not.
for this kind of case , I think the driver should check whether VRAM aperture works first, then using MM_INDEX/DATA as a backup option.

for this patch, I only extend amdgpu_device_vram_access() function to support un-aligned case, and the new function is fully compatible with the old code logic.
I can't understand why you give a NAK about this patch, I think it's a good optimization, how do you think?
thanks.

Best Regards,
Kevin
________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Monday, June 28, 2021 10:10 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()

Am 25.06.21 um 05:24 schrieb Kevin Wang:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.

Well NAK to the whole approach.

The amdgpu_device_vram_access() are intentionally *NOT* using the VRAM
aperture nor providing byte wise access.

And yes that this doesn't work under SRIOV is completely intentional as
well.

What we could do is to use the aperture in amdgpu_ttm_access_memory()
for unaligned access if that is indeed a problem.

Regards,
Christian.


>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write);
> +                            void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>                            uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>                amdgpu_acpi_is_power_shift_control_supported());
>   }
>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                                            uint32_t *value, bool write)
> +{
> +     BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +     if (write)
> +             WREG32_NO_KIQ(mmMM_DATA, *value);
> +     else
> +             *value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                               void *buf, size_t size, bool write)
> +{
> +     while (size) {
> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +             uint64_t bytes = 4 - (pos & 0x3);
> +             uint32_t shift = (pos & 0x3) * 8;
> +             uint32_t mask = 0xffffffff << shift;
> +             uint32_t value = 0;
> +
> +             if (size < bytes) {
> +                     mask &= 0xffffffff >> (bytes - size) * 8;
> +                     bytes = size;
> +             }
> +
> +             if (mask != 0xffffffff) {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +                     if (write) {
> +                             value &= ~mask;
> +                             value |= (*(uint32_t *)buf << shift) & mask;
> +                             amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +                     } else {
> +                             value = (value & mask) >> shift;
> +                             memcpy(buf, &value, bytes);
> +                     }
> +             } else {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +             }
> +
> +             pos += bytes;
> +             buf += bytes;
> +             size -= bytes;
> +     }
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write)
> +                            void *buf, size_t size, bool write)
>   {
>        unsigned long flags;
> -     uint32_t hi = ~0;
>        uint64_t last;
>
> -
>   #ifdef CONFIG_64BIT
>        last = min(pos + size, adev->gmc.visible_vram_size);
>        if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>                        return;
>
>                pos += count;
> -             buf += count / 4;
> +             buf += count;
>                size -= count;
>        }
>   #endif
> -
>        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -     for (last = pos + size; pos < last; pos += 4) {
> -             uint32_t tmp = pos >> 31;
> -
> -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -             if (tmp != hi) {
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -                     hi = tmp;
> -             }
> -             if (write)
> -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -             else
> -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -     }
> +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>        struct amdgpu_res_cursor cursor;
> -     unsigned long flags;
> -     uint32_t value = 0;
>        int ret = 0;
>
>        if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>        while (cursor.remaining) {
> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -             uint64_t bytes = 4 - (cursor.start & 3);
> -             uint32_t shift = (cursor.start & 3) * 8;
> -             uint32_t mask = 0xffffffff << shift;
> -
> -             if (cursor.size < bytes) {
> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -                     bytes = cursor.size;
> -             }
> +             amdgpu_device_vram_access(adev, cursor.start,
> +                                       buf, cursor.size,
> +                                       write);
>
> -             if (mask != 0xffffffff) {
> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -                     value = RREG32_NO_KIQ(mmMM_DATA);
> -                     if (write) {
> -                             value &= ~mask;
> -                             value |= (*(uint32_t *)buf << shift) & mask;
> -                             WREG32_NO_KIQ(mmMM_DATA, value);
> -                     }
> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -                     if (!write) {
> -                             value = (value & mask) >> shift;
> -                             memcpy(buf, &value, bytes);
> -                     }
> -             } else {
> -                     bytes = cursor.size & ~0x3ULL;
> -                     amdgpu_device_vram_access(adev, cursor.start,
> -                                               (uint32_t *)buf, bytes,
> -                                               write);
> -             }
> -
> -             ret += bytes;
> -             buf = (uint8_t *)buf + bytes;
> -             amdgpu_res_next(&cursor, bytes);
> +             ret += cursor.size;
> +             buf += cursor.size;
> +             amdgpu_res_next(&cursor, cursor.size);
>        }
>
>        return ret;


[-- Attachment #1.2: Type: text/html, Size: 28008 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-28 14:43   ` Wang, Kevin(Yang)
@ 2021-06-28 15:34     ` Christian König
  2021-06-29  2:08       ` Wang, Kevin(Yang)
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2021-06-28 15:34 UTC (permalink / raw)
  To: Wang, Kevin(Yang), amd-gfx
  Cc: Deucher, Alexander, Lazar, Lijo, Min, Frank, Koenig, Christian,
	Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 11143 bytes --]

Hi Kevin,

the primary use of amdgpu_device_vram_access() is to gate direct kernel 
access to VRAM using the MM_INDEX/MM_DATA registers or aperture. It 
should by design never deal with data sizes != 4 bytes.

The amdgpu_ttm_access_memory() is for debug access to the backing store 
of BOs and also needs to handle per byte access.

What we can do is to also use amdgpu_device_vram_access() in 
amdgpu_device_vram_access() for the masked access, which would also 
solve a hot plug bug as far as I can see here.

But moving the byte access logic into amdgpu_device_vram_access() makes 
no sense at all.

Regards,
Christian.

Am 28.06.21 um 16:43 schrieb Wang, Kevin(Yang):
>
> [AMD Official Use Only]
>
>
> Hi Chris,
>
> amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long 
> offset, void *buf, int len,  int write)
>
> the above function will be called from kernel side (likes 
> 'get_user_pages' code path),  and the function should accept 
> non-aligned addresses.
> without this patch, this function will be using MM_INDEX/DATA to 
> handle aligned address whether in visible memory or not.
> for this kind of case , I think the driver should check whether VRAM 
> aperture works first, then using MM_INDEX/DATA as a backup option.
>
> for this patch, I only extend amdgpu_device_vram_access() function to 
> support un-aligned case, and the new function is fully compatible with 
> the old code logic.
> I can't understand why you give a NAK about this patch, I think it's a 
> good optimization, how do you think?
> thanks.
>
> Best Regards,
> Kevin
> ------------------------------------------------------------------------
> *From:* Christian König <ckoenig.leichtzumerken@gmail.com>
> *Sent:* Monday, June 28, 2021 10:10 PM
> *To:* Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; 
> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> *Cc:* Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, 
> Christian <Christian.Koenig@amd.com>
> *Subject:* Re: [PATCH] drm/amdgpu: add non-aligned address supported 
> in amdgpu_device_vram_access()
> Am 25.06.21 um 05:24 schrieb Kevin Wang:
> > 1. add non-aligned address support in amdgpu_device_vram_access()
> > 2. reduce duplicate code in amdgpu_ttm_access_memory()
> >
> > because the MM_INDEX{HI}/DATA are protected register, it is not working
> > in sriov environment when mmio protect feature is enabled (in some 
> asics),
> > and the old function amdgpu_ttm_access_memory() will force using 
> MM_INDEX/DATA
> > to handle non-aligned address by default, it will cause the 
> register(MM_DATA)
> > is always return 0.
> >
> > with the patch, the memory will be accessed in the aper way first.
> > (in visible memory or enable pcie large-bar feature), then using
> > MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>
> Well NAK to the whole approach.
>
> The amdgpu_device_vram_access() are intentionally *NOT* using the VRAM
> aperture nor providing byte wise access.
>
> And yes that this doesn't work under SRIOV is completely intentional as
> well.
>
> What we could do is to use the aperture in amdgpu_ttm_access_memory()
> for unaligned access if that is indeed a problem.
>
> Regards,
> Christian.
>
>
> >
> > Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
> >   3 files changed, 58 insertions(+), 55 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index d14b4968a026..8095d9a9c857 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device 
> *adev);
> >   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
> >
> >   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> > -                            uint32_t *buf, size_t size, bool write);
> > +                            void *buf, size_t size, bool write);
> >   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
> >                            uint32_t reg, uint32_t acc_flags);
> >   void amdgpu_device_wreg(struct amdgpu_device *adev,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index e6702d136a6d..2047e3c2b365 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct 
> drm_device *dev)
> > amdgpu_acpi_is_power_shift_control_supported());
> >   }
> >
> > +static inline void 
> amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device 
> *adev, loff_t pos,
> > + uint32_t *value, bool write)
> > +{
> > +     BUG_ON(!IS_ALIGNED(pos, 4));
> > +
> > +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> > +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> > +     if (write)
> > +             WREG32_NO_KIQ(mmMM_DATA, *value);
> > +     else
> > +             *value = RREG32_NO_KIQ(mmMM_DATA);
> > +}
> > +
> > +static void amdgpu_device_vram_mmio_access_locked(struct 
> amdgpu_device *adev, loff_t pos,
> > + void *buf, size_t size, bool write)
> > +{
> > +     while (size) {
> > +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> > +             uint64_t bytes = 4 - (pos & 0x3);
> > +             uint32_t shift = (pos & 0x3) * 8;
> > +             uint32_t mask = 0xffffffff << shift;
> > +             uint32_t value = 0;
> > +
> > +             if (size < bytes) {
> > +                     mask &= 0xffffffff >> (bytes - size) * 8;
> > +                     bytes = size;
> > +             }
> > +
> > +             if (mask != 0xffffffff) {
> > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, 
> &value, false);
> > +                     if (write) {
> > +                             value &= ~mask;
> > +                             value |= (*(uint32_t *)buf << shift) & 
> mask;
> > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, 
> &value, true);
> > +                     } else {
> > +                             value = (value & mask) >> shift;
> > +                             memcpy(buf, &value, bytes);
> > +                     }
> > +             } else {
> > + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, 
> buf, write);
> > +             }
> > +
> > +             pos += bytes;
> > +             buf += bytes;
> > +             size -= bytes;
> > +     }
> > +}
> > +
> >   /*
> >    * VRAM access helper functions
> >    */
> > @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct 
> drm_device *dev)
> >    * @write: true - write to vram, otherwise - read from vram
> >    */
> >   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> > -                            uint32_t *buf, size_t size, bool write)
> > +                            void *buf, size_t size, bool write)
> >   {
> >        unsigned long flags;
> > -     uint32_t hi = ~0;
> >        uint64_t last;
> >
> > -
> >   #ifdef CONFIG_64BIT
> >        last = min(pos + size, adev->gmc.visible_vram_size);
> >        if (last > pos) {
> > @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct 
> amdgpu_device *adev, loff_t pos,
> >                        return;
> >
> >                pos += count;
> > -             buf += count / 4;
> > +             buf += count;
> >                size -= count;
> >        }
> >   #endif
> > -
> > spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > -     for (last = pos + size; pos < last; pos += 4) {
> > -             uint32_t tmp = pos >> 31;
> > -
> > -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> > -             if (tmp != hi) {
> > -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> > -                     hi = tmp;
> > -             }
> > -             if (write)
> > -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
> > -             else
> > -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> > -     }
> > +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, 
> write);
> > spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 6297363ab740..cf5b8bdc2dd3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
> >        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> >        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> >        struct amdgpu_res_cursor cursor;
> > -     unsigned long flags;
> > -     uint32_t value = 0;
> >        int ret = 0;
> >
> >        if (bo->mem.mem_type != TTM_PL_VRAM)
> > @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
> >
> >        amdgpu_res_first(&bo->mem, offset, len, &cursor);
> >        while (cursor.remaining) {
> > -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> > -             uint64_t bytes = 4 - (cursor.start & 3);
> > -             uint32_t shift = (cursor.start & 3) * 8;
> > -             uint32_t mask = 0xffffffff << shift;
> > -
> > -             if (cursor.size < bytes) {
> > -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> > -                     bytes = cursor.size;
> > -             }
> > +             amdgpu_device_vram_access(adev, cursor.start,
> > +                                       buf, cursor.size,
> > +                                       write);
> >
> > -             if (mask != 0xffffffff) {
> > - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > -                     WREG32_NO_KIQ(mmMM_INDEX, 
> ((uint32_t)aligned_pos) | 0x80000000);
> > -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> > -                     value = RREG32_NO_KIQ(mmMM_DATA);
> > -                     if (write) {
> > -                             value &= ~mask;
> > -                             value |= (*(uint32_t *)buf << shift) & 
> mask;
> > - WREG32_NO_KIQ(mmMM_DATA, value);
> > -                     }
> > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> > -                     if (!write) {
> > -                             value = (value & mask) >> shift;
> > -                             memcpy(buf, &value, bytes);
> > -                     }
> > -             } else {
> > -                     bytes = cursor.size & ~0x3ULL;
> > - amdgpu_device_vram_access(adev, cursor.start,
> > - (uint32_t *)buf, bytes,
> > - write);
> > -             }
> > -
> > -             ret += bytes;
> > -             buf = (uint8_t *)buf + bytes;
> > -             amdgpu_res_next(&cursor, bytes);
> > +             ret += cursor.size;
> > +             buf += cursor.size;
> > +             amdgpu_res_next(&cursor, cursor.size);
> >        }
> >
> >        return ret;
>


[-- Attachment #1.2: Type: text/html, Size: 26228 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
  2021-06-28 15:34     ` Christian König
@ 2021-06-29  2:08       ` Wang, Kevin(Yang)
  0 siblings, 0 replies; 9+ messages in thread
From: Wang, Kevin(Yang) @ 2021-06-29  2:08 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Deucher, Alexander, Lazar, Lijo, Min,  Frank, Koenig, Christian,
	Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 12797 bytes --]

[AMD Official Use Only]

Hi Chris,

<comments inline>

the primary use of amdgpu_device_vram_access() is to gate direct kernel access to VRAM using the MM_INDEX/MM_DATA registers or aperture. It should by design never deal with data sizes != 4 bytes.
[kevin]:
NOT​​ size aligned, it is offset (address) aligned, e.g.: RW 0x0023 5bytes. ( it is not supported without this patch)

The amdgpu_ttm_access_memory() is for debug access to the backing store of BOs and needs to handle per byte access.
[kevin]:
yes, this function support per byte access, but it will use MM_INDEX/DATA to handle un-aligned address whether it is in visible memory or not. with mmio protect feature is enabled, the MM_INDEX/DATA is not working well, but in fact, the driver can access this vram memory when offset is in visible range.

Best Regards,
Kevin
________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Monday, June 28, 2021 11:34 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Min, Frank <Frank.Min@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()

Hi Kevin,

the primary use of amdgpu_device_vram_access() is to gate direct kernel access to VRAM using the MM_INDEX/MM_DATA registers or aperture. It should by design never deal with data sizes != 4 bytes.

The amdgpu_ttm_access_memory() is for debug access to the backing store of BOs and also needs to handle per byte access.

What we can do is to also use amdgpu_device_vram_access() in amdgpu_device_vram_access() for the masked access, which would also solve a hot plug bug as far as I can see here.

But moving the byte access logic into amdgpu_device_vram_access() makes no sense at all.

Regards,
Christian.

Am 28.06.21 um 16:43 schrieb Wang, Kevin(Yang):

[AMD Official Use Only]

Hi Chris,

amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len,  int write)

the above function will be called from kernel side (likes 'get_user_pages' code path),  and the function should accept non-aligned addresses.
without this patch, this function will be using MM_INDEX/DATA to handle aligned address whether in visible memory or not.
for this kind of case , I think the driver should check whether VRAM aperture works first, then using MM_INDEX/DATA as a backup option.

for this patch, I only extend amdgpu_device_vram_access() function to support un-aligned case, and the new function is fully compatible with the old code logic.
I can't understand why you give a NAK about this patch, I think it's a good optimization, how do you think?
thanks.

Best Regards,
Kevin
________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com><mailto:ckoenig.leichtzumerken@gmail.com>
Sent: Monday, June 28, 2021 10:10 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com><mailto:Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com><mailto:Christian.Koenig@amd.com>
Subject: Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()

Am 25.06.21 um 05:24 schrieb Kevin Wang:
> 1. add non-aligned address support in amdgpu_device_vram_access()
> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>
> because the MM_INDEX{HI}/DATA are protected register, it is not working
> in sriov environment when mmio protect feature is enabled (in some asics),
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
> to handle non-aligned address by default, it will cause the register(MM_DATA)
> is always return 0.
>
> with the patch, the memory will be accessed in the aper way first.
> (in visible memory or enable pcie large-bar feature), then using
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.

Well NAK to the whole approach.

The amdgpu_device_vram_access() are intentionally *NOT* using the VRAM
aperture nor providing byte wise access.

And yes that this doesn't work under SRIOV is completely intentional as
well.

What we could do is to use the aperture in amdgpu_ttm_access_memory()
for unaligned access if that is indeed a problem.

Regards,
Christian.


>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com><mailto:kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 42 ++-----------
>   3 files changed, 58 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d14b4968a026..8095d9a9c857 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>   int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write);
> +                            void *buf, size_t size, bool write);
>   uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>                            uint32_t reg, uint32_t acc_flags);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e6702d136a6d..2047e3c2b365 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>                amdgpu_acpi_is_power_shift_control_supported());
>   }
>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                                            uint32_t *value, bool write)
> +{
> +     BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +     if (write)
> +             WREG32_NO_KIQ(mmMM_DATA, *value);
> +     else
> +             *value = RREG32_NO_KIQ(mmMM_DATA);
> +}
> +
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
> +                                               void *buf, size_t size, bool write)
> +{
> +     while (size) {
> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +             uint64_t bytes = 4 - (pos & 0x3);
> +             uint32_t shift = (pos & 0x3) * 8;
> +             uint32_t mask = 0xffffffff << shift;
> +             uint32_t value = 0;
> +
> +             if (size < bytes) {
> +                     mask &= 0xffffffff >> (bytes - size) * 8;
> +                     bytes = size;
> +             }
> +
> +             if (mask != 0xffffffff) {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
> +                     if (write) {
> +                             value &= ~mask;
> +                             value |= (*(uint32_t *)buf << shift) & mask;
> +                             amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
> +                     } else {
> +                             value = (value & mask) >> shift;
> +                             memcpy(buf, &value, bytes);
> +                     }
> +             } else {
> +                     amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
> +             }
> +
> +             pos += bytes;
> +             buf += bytes;
> +             size -= bytes;
> +     }
> +}
> +
>   /*
>    * VRAM access helper functions
>    */
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>    * @write: true - write to vram, otherwise - read from vram
>    */
>   void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
> -                            uint32_t *buf, size_t size, bool write)
> +                            void *buf, size_t size, bool write)
>   {
>        unsigned long flags;
> -     uint32_t hi = ~0;
>        uint64_t last;
>
> -
>   #ifdef CONFIG_64BIT
>        last = min(pos + size, adev->gmc.visible_vram_size);
>        if (last > pos) {
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>                        return;
>
>                pos += count;
> -             buf += count / 4;
> +             buf += count;
>                size -= count;
>        }
>   #endif
> -
>        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -     for (last = pos + size; pos < last; pos += 4) {
> -             uint32_t tmp = pos >> 31;
> -
> -             WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> -             if (tmp != hi) {
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
> -                     hi = tmp;
> -             }
> -             if (write)
> -                     WREG32_NO_KIQ(mmMM_DATA, *buf++);
> -             else
> -                     *buf++ = RREG32_NO_KIQ(mmMM_DATA);
> -     }
> +     amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 6297363ab740..cf5b8bdc2dd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>        struct amdgpu_res_cursor cursor;
> -     unsigned long flags;
> -     uint32_t value = 0;
>        int ret = 0;
>
>        if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>        while (cursor.remaining) {
> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -             uint64_t bytes = 4 - (cursor.start & 3);
> -             uint32_t shift = (cursor.start & 3) * 8;
> -             uint32_t mask = 0xffffffff << shift;
> -
> -             if (cursor.size < bytes) {
> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -                     bytes = cursor.size;
> -             }
> +             amdgpu_device_vram_access(adev, cursor.start,
> +                                       buf, cursor.size,
> +                                       write);
>
> -             if (mask != 0xffffffff) {
> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -                     value = RREG32_NO_KIQ(mmMM_DATA);
> -                     if (write) {
> -                             value &= ~mask;
> -                             value |= (*(uint32_t *)buf << shift) & mask;
> -                             WREG32_NO_KIQ(mmMM_DATA, value);
> -                     }
> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -                     if (!write) {
> -                             value = (value & mask) >> shift;
> -                             memcpy(buf, &value, bytes);
> -                     }
> -             } else {
> -                     bytes = cursor.size & ~0x3ULL;
> -                     amdgpu_device_vram_access(adev, cursor.start,
> -                                               (uint32_t *)buf, bytes,
> -                                               write);
> -             }
> -
> -             ret += bytes;
> -             buf = (uint8_t *)buf + bytes;
> -             amdgpu_res_next(&cursor, bytes);
> +             ret += cursor.size;
> +             buf += cursor.size;
> +             amdgpu_res_next(&cursor, cursor.size);
>        }
>
>        return ret;



[-- Attachment #1.2: Type: text/html, Size: 32845 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-06-29  2:08 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-25  3:24 [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access() Kevin Wang
2021-06-25 13:28 ` Lazar, Lijo
2021-06-28  2:44   ` Wang, Kevin(Yang)
2021-06-28  4:38     ` Zhang, Hawking
2021-06-28  4:57     ` Lazar, Lijo
2021-06-28 14:10 ` Christian König
2021-06-28 14:43   ` Wang, Kevin(Yang)
2021-06-28 15:34     ` Christian König
2021-06-29  2:08       ` Wang, Kevin(Yang)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.