All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
@ 2021-07-13  3:23 Kevin Wang
  2021-07-13  7:11 ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Kevin Wang @ 2021-07-13  3:23 UTC (permalink / raw)
  To: amd-gfx
  Cc: Kevin Wang, lijo.lazar, alexander.deucher, frank.min,
	christian.koenig, hawking.zhang

1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
 1 file changed, 89 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2aa2eb5de37a..8f6f605bc459 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	return ttm_bo_eviction_valuable(bo, place);
 }
 
+static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
+					    uint32_t *value, bool write)
+{
+	unsigned long flags;
+
+	BUG_ON(!IS_ALIGNED(pos, 4));
+
+	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+	if (write)
+		WREG32_NO_KIQ(mmMM_DATA, *value);
+	else
+		*value = RREG32_NO_KIQ(mmMM_DATA);
+	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+}
+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+				      void *buf, size_t size, bool write)
+{
+	while (size) {
+		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+		uint64_t bytes = 4 - (pos & 0x3);
+		uint32_t shift = (pos & 0x3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+		uint32_t value = 0;
+
+		if (size < bytes) {
+			mask &= 0xffffffff >> (bytes - size) * 8;
+			bytes = size;
+		}
+
+		if (mask != 0xffffffff) {
+			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
+			if (write) {
+				value &= ~mask;
+				value |= (*(uint32_t *)buf << shift) & mask;
+				amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
+			} else {
+				value = (value & mask) >> shift;
+				memcpy(buf, &value, bytes);
+			}
+		} else {
+			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
+		}
+
+		pos += bytes;
+		buf += bytes;
+		size -= bytes;
+	}
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+				   void *buf, size_t size, bool write)
+{
+	uint64_t last;
+
+#ifdef CONFIG_64BIT
+	last = min(pos + size, adev->gmc.visible_vram_size);
+	if (last > pos) {
+		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
+		size_t count = last - pos;
+
+		if (write) {
+			memcpy_toio(addr, buf, count);
+			mb();
+			amdgpu_device_flush_hdp(adev, NULL);
+		} else {
+			amdgpu_device_invalidate_hdp(adev, NULL);
+			mb();
+			memcpy_fromio(buf, addr, count);
+		}
+
+		if (count == size)
+			return;
+
+		pos += count;
+		buf += count;
+		size -= count;
+	}
+#endif
+
+	amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+}
+
 /**
  * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
  *
@@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 	struct amdgpu_res_cursor cursor;
-	unsigned long flags;
-	uint32_t value = 0;
 	int ret = 0;
 
 	if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 
 	amdgpu_res_first(&bo->mem, offset, len, &cursor);
 	while (cursor.remaining) {
-		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
-		uint64_t bytes = 4 - (cursor.start & 3);
-		uint32_t shift = (cursor.start & 3) * 8;
-		uint32_t mask = 0xffffffff << shift;
-
-		if (cursor.size < bytes) {
-			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
-			bytes = cursor.size;
-		}
-
-		if (mask != 0xffffffff) {
-			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
-			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-			value = RREG32_NO_KIQ(mmMM_DATA);
-			if (write) {
-				value &= ~mask;
-				value |= (*(uint32_t *)buf << shift) & mask;
-				WREG32_NO_KIQ(mmMM_DATA, value);
-			}
-			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-			if (!write) {
-				value = (value & mask) >> shift;
-				memcpy(buf, &value, bytes);
-			}
-		} else {
-			bytes = cursor.size & ~0x3ULL;
-			amdgpu_device_vram_access(adev, cursor.start,
-						  (uint32_t *)buf, bytes,
-						  write);
-		}
-
-		ret += bytes;
-		buf = (uint8_t *)buf + bytes;
-		amdgpu_res_next(&cursor, bytes);
+		amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
+		ret += cursor.size;
+		buf += cursor.size;
+		amdgpu_res_next(&cursor, cursor.size);
 	}
 
 	return ret;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
  2021-07-13  3:23 [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
@ 2021-07-13  7:11 ` Christian König
  2021-07-13  8:32   ` Wang, Kevin(Yang)
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2021-07-13  7:11 UTC (permalink / raw)
  To: Kevin Wang, amd-gfx
  Cc: alexander.deucher, lijo.lazar, frank.min, christian.koenig,
	hawking.zhang

Am 13.07.21 um 05:23 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
>   1 file changed, 89 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2aa2eb5de37a..8f6f605bc459 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   	return ttm_bo_eviction_valuable(bo, place);
>   }
>   
> +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
> +					    uint32_t *value, bool write)

Please drop the _vram_ and _align_ part from the name. Just 
amdgpu_device_mm_access.

> +{
> +	unsigned long flags;
> +
> +	BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> +	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +	if (write)
> +		WREG32_NO_KIQ(mmMM_DATA, *value);
> +	else
> +		*value = RREG32_NO_KIQ(mmMM_DATA);
> +	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +}

That should still be in amdgpu_device.c and you completely messed up the 
drm_dev_enter()/drm_dev_exit() annotation.

Looks like you are working on an old branch where that is not yet merged?

> +
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> +				      void *buf, size_t size, bool write)
> +{
> +	while (size) {
> +		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +		uint64_t bytes = 4 - (pos & 0x3);
> +		uint32_t shift = (pos & 0x3) * 8;
> +		uint32_t mask = 0xffffffff << shift;
> +		uint32_t value = 0;
> +
> +		if (size < bytes) {
> +			mask &= 0xffffffff >> (bytes - size) * 8;
> +			bytes = size;
> +		}
> +
> +		if (mask != 0xffffffff) {
> +			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> +			if (write) {
> +				value &= ~mask;
> +				value |= (*(uint32_t *)buf << shift) & mask;
> +				amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> +			} else {
> +				value = (value & mask) >> shift;
> +				memcpy(buf, &value, bytes);
> +			}
> +		} else {
> +			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> +		}
> +
> +		pos += bytes;
> +		buf += bytes;
> +		size -= bytes;
> +	}
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> +				   void *buf, size_t size, bool write)
> +{
> +	uint64_t last;
> +
> +#ifdef CONFIG_64BIT
> +	last = min(pos + size, adev->gmc.visible_vram_size);
> +	if (last > pos) {
> +		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> +		size_t count = last - pos;
> +
> +		if (write) {
> +			memcpy_toio(addr, buf, count);
> +			mb();
> +			amdgpu_device_flush_hdp(adev, NULL);
> +		} else {
> +			amdgpu_device_invalidate_hdp(adev, NULL);
> +			mb();
> +			memcpy_fromio(buf, addr, count);
> +		}
> +
> +		if (count == size)
> +			return;
> +
> +		pos += count;
> +		buf += count;
> +		size -= count;
> +	}
> +#endif

I would put this as a separate function into amdgpu_device.c.

But all of this should only be the second step since we need a much 
smaller patch for backporting first.

> +
> +	amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> +}
> +
>   /**
>    * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
>    *
> @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>   	struct amdgpu_res_cursor cursor;
> -	unsigned long flags;
> -	uint32_t value = 0;
>   	int ret = 0;
>   
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   
>   	amdgpu_res_first(&bo->mem, offset, len, &cursor);
>   	while (cursor.remaining) {
> -		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -		uint64_t bytes = 4 - (cursor.start & 3);
> -		uint32_t shift = (cursor.start & 3) * 8;
> -		uint32_t mask = 0xffffffff << shift;
> -
> -		if (cursor.size < bytes) {
> -			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -			bytes = cursor.size;
> -		}
> -
> -		if (mask != 0xffffffff) {
> -			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -			value = RREG32_NO_KIQ(mmMM_DATA);
> -			if (write) {
> -				value &= ~mask;
> -				value |= (*(uint32_t *)buf << shift) & mask;
> -				WREG32_NO_KIQ(mmMM_DATA, value);
> -			}
> -			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -			if (!write) {
> -				value = (value & mask) >> shift;
> -				memcpy(buf, &value, bytes);
> -			}

This here is the problematic part and should use 
amdgpu_ttm_vram_access() instead.

That should be implemented first since we might need to backport that.

Regards,
Christian.

> -		} else {
> -			bytes = cursor.size & ~0x3ULL;
> -			amdgpu_device_vram_access(adev, cursor.start,
> -						  (uint32_t *)buf, bytes,
> -						  write);
> -		}
> -
> -		ret += bytes;
> -		buf = (uint8_t *)buf + bytes;
> -		amdgpu_res_next(&cursor, bytes);
> +		amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> +		ret += cursor.size;
> +		buf += cursor.size;
> +		amdgpu_res_next(&cursor, cursor.size);
>   	}
>   
>   	return ret;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
  2021-07-13  7:11 ` Christian König
@ 2021-07-13  8:32   ` Wang, Kevin(Yang)
  2021-07-13  9:06     ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Wang, Kevin(Yang) @ 2021-07-13  8:32 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Deucher, Alexander, Lazar, Lijo, Min, Frank, Koenig,  Christian,
	Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 8613 bytes --]

[AMD Official Use Only]

<comments inline>

________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Tuesday, July 13, 2021 3:11 PM
To: Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Lazar, Lijo <Lijo.Lazar@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Min, Frank <Frank.Min@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()

Am 13.07.21 um 05:23 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
>   1 file changed, 89 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2aa2eb5de37a..8f6f605bc459 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>        return ttm_bo_eviction_valuable(bo, place);
>   }
>
> +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
> +                                         uint32_t *value, bool write)

Please drop the _vram_ and _align_ part from the name. Just
amdgpu_device_mm_access.

[kevin]: I will correct it in next patch.

> +{
> +     unsigned long flags;
> +
> +     BUG_ON(!IS_ALIGNED(pos, 4));
> +
> +     spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> +     if (write)
> +             WREG32_NO_KIQ(mmMM_DATA, *value);
> +     else
> +             *value = RREG32_NO_KIQ(mmMM_DATA);
> +     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +}

That should still be in amdgpu_device.c and you completely messed up the
drm_dev_enter()/drm_dev_exit() annotation.

Looks like you are working on an old branch where that is not yet merged?

[kevin]: yes, I'm working on amd-staging-drm-next branch, the following patch (from drm-next-misc) is not merged into this branch.

drm/amdgpu: Guard against write accesses after device removal

This should prevent writing to memory or IO ranges possibly
already allocated for other uses after our device is removed.

v5:
Protect more places wher memcopy_to/form_io takes place
Protect IB submissions

v6: Switch to !drm_dev_enter instead of scoping entire code
with brackets.

v7:
Drop guard of HW ring commands emission protection since they
are in GART and not in MMIO.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>


> +
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> +                                   void *buf, size_t size, bool write)
> +{
> +     while (size) {
> +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> +             uint64_t bytes = 4 - (pos & 0x3);
> +             uint32_t shift = (pos & 0x3) * 8;
> +             uint32_t mask = 0xffffffff << shift;
> +             uint32_t value = 0;
> +
> +             if (size < bytes) {
> +                     mask &= 0xffffffff >> (bytes - size) * 8;
> +                     bytes = size;
> +             }
> +
> +             if (mask != 0xffffffff) {
> +                     amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> +                     if (write) {
> +                             value &= ~mask;
> +                             value |= (*(uint32_t *)buf << shift) & mask;
> +                             amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> +                     } else {
> +                             value = (value & mask) >> shift;
> +                             memcpy(buf, &value, bytes);
> +                     }
> +             } else {
> +                     amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> +             }
> +
> +             pos += bytes;
> +             buf += bytes;
> +             size -= bytes;
> +     }
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> +                                void *buf, size_t size, bool write)
> +{
> +     uint64_t last;
> +
> +#ifdef CONFIG_64BIT
> +     last = min(pos + size, adev->gmc.visible_vram_size);
> +     if (last > pos) {
> +             void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> +             size_t count = last - pos;
> +
> +             if (write) {
> +                     memcpy_toio(addr, buf, count);
> +                     mb();
> +                     amdgpu_device_flush_hdp(adev, NULL);
> +             } else {
> +                     amdgpu_device_invalidate_hdp(adev, NULL);
> +                     mb();
> +                     memcpy_fromio(buf, addr, count);
> +             }
> +
> +             if (count == size)
> +                     return;
> +
> +             pos += count;
> +             buf += count;
> +             size -= count;
> +     }
> +#endif

I would put this as a separate function into amdgpu_device.c.

But all of this should only be the second step since we need a much
smaller patch for backporting first.

> +
> +     amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> +}
> +
>   /**
>    * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
>    *
> @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>        struct amdgpu_res_cursor cursor;
> -     unsigned long flags;
> -     uint32_t value = 0;
>        int ret = 0;
>
>        if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
>        amdgpu_res_first(&bo->mem, offset, len, &cursor);
>        while (cursor.remaining) {
> -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> -             uint64_t bytes = 4 - (cursor.start & 3);
> -             uint32_t shift = (cursor.start & 3) * 8;
> -             uint32_t mask = 0xffffffff << shift;
> -
> -             if (cursor.size < bytes) {
> -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> -                     bytes = cursor.size;
> -             }
> -
> -             if (mask != 0xffffffff) {
> -                     spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> -                     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -                     value = RREG32_NO_KIQ(mmMM_DATA);
> -                     if (write) {
> -                             value &= ~mask;
> -                             value |= (*(uint32_t *)buf << shift) & mask;
> -                             WREG32_NO_KIQ(mmMM_DATA, value);
> -                     }
> -                     spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> -                     if (!write) {
> -                             value = (value & mask) >> shift;
> -                             memcpy(buf, &value, bytes);
> -                     }

This here is the problematic part and should use
amdgpu_ttm_vram_access() instead.

That should be implemented first since we might need to backport that.

Regards,
Christian.

> -             } else {
> -                     bytes = cursor.size & ~0x3ULL;
> -                     amdgpu_device_vram_access(adev, cursor.start,
> -                                               (uint32_t *)buf, bytes,
> -                                               write);
> -             }
> -
> -             ret += bytes;
> -             buf = (uint8_t *)buf + bytes;
> -             amdgpu_res_next(&cursor, bytes);
> +             amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> +             ret += cursor.size;
> +             buf += cursor.size;
> +             amdgpu_res_next(&cursor, cursor.size);
>        }
>
>        return ret;


[-- Attachment #1.2: Type: text/html, Size: 19949 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
  2021-07-13  8:32   ` Wang, Kevin(Yang)
@ 2021-07-13  9:06     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2021-07-13  9:06 UTC (permalink / raw)
  To: Wang, Kevin(Yang), amd-gfx
  Cc: Deucher, Alexander, Lazar, Lijo, Min, Frank, Koenig, Christian,
	Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 8887 bytes --]

See below.

Am 13.07.21 um 10:32 schrieb Wang, Kevin(Yang):
>
> [AMD Official Use Only]
>
>
> <comments inline>
>
> ------------------------------------------------------------------------
> *From:* Christian König <ckoenig.leichtzumerken@gmail.com>
> *Sent:* Tuesday, July 13, 2021 3:11 PM
> *To:* Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; 
> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> *Cc:* Lazar, Lijo <Lijo.Lazar@amd.com>; Deucher, Alexander 
> <Alexander.Deucher@amd.com>; Min, Frank <Frank.Min@amd.com>; Koenig, 
> Christian <Christian.Koenig@amd.com>; Zhang, Hawking 
> <Hawking.Zhang@amd.com>
> *Subject:* Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in 
> amdgpu_ttm_access_memory()
> Am 13.07.21 um 05:23 schrieb Kevin Wang:
> > 1. using vram aper to access vram if possible
> > 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> > enabled.
> >
> > Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
> >   1 file changed, 89 insertions(+), 37 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 2aa2eb5de37a..8f6f605bc459 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1407,6 +1407,91 @@ static bool 
> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> >        return ttm_bo_eviction_valuable(bo, place);
> >   }
> >
> > +static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device 
> *adev, loff_t pos,
> > +                                         uint32_t *value, bool write)
>
> Please drop the _vram_ and _align_ part from the name. Just
> amdgpu_device_mm_access.
>
> [kevin]: I will correct it in next patch.
>
> > +{
> > +     unsigned long flags;
> > +
> > +     BUG_ON(!IS_ALIGNED(pos, 4));
> > +
> > + spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > +     WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
> > +     WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
> > +     if (write)
> > +             WREG32_NO_KIQ(mmMM_DATA, *value);
> > +     else
> > +             *value = RREG32_NO_KIQ(mmMM_DATA);
> > + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> > +}
>
> That should still be in amdgpu_device.c and you completely messed up the
> drm_dev_enter()/drm_dev_exit() annotation.
>
> Looks like you are working on an old branch where that is not yet merged?
>
> [kevin]: yes, I'm working on amd-staging-drm-next branch, the 
> following patch (from drm-next-misc) is not merged into this branch.

Ok then just wait a bit. Alex wanted to update the amd-staging-drm-next 
branch in the next few days.

There is an internal mail thread about that, maybe ping him on this.

Christian.

>
> drm/amdgpu: Guard against write accesses after device removal
>
> This should prevent writing to memory or IO ranges possibly
> already allocated for other uses after our device is removed.
>
> v5:
> Protect more places wher memcopy_to/form_io takes place
> Protect IB submissions
>
> v6: Switch to !drm_dev_enter instead of scoping entire code
> with brackets.
>
> v7:
> Drop guard of HW ring commands emission protection since they
> are in GART and not in MMIO.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
>
> > +
> > +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, 
> loff_t pos,
> > +                                   void *buf, size_t size, bool write)
> > +{
> > +     while (size) {
> > +             uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> > +             uint64_t bytes = 4 - (pos & 0x3);
> > +             uint32_t shift = (pos & 0x3) * 8;
> > +             uint32_t mask = 0xffffffff << shift;
> > +             uint32_t value = 0;
> > +
> > +             if (size < bytes) {
> > +                     mask &= 0xffffffff >> (bytes - size) * 8;
> > +                     bytes = size;
> > +             }
> > +
> > +             if (mask != 0xffffffff) {
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
> > +                     if (write) {
> > +                             value &= ~mask;
> > +                             value |= (*(uint32_t *)buf << shift) & 
> mask;
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
> > +                     } else {
> > +                             value = (value & mask) >> shift;
> > +                             memcpy(buf, &value, bytes);
> > +                     }
> > +             } else {
> > + amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
> > +             }
> > +
> > +             pos += bytes;
> > +             buf += bytes;
> > +             size -= bytes;
> > +     }
> > +}
> > +
> > +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, 
> loff_t pos,
> > +                                void *buf, size_t size, bool write)
> > +{
> > +     uint64_t last;
> > +
> > +#ifdef CONFIG_64BIT
> > +     last = min(pos + size, adev->gmc.visible_vram_size);
> > +     if (last > pos) {
> > +             void __iomem *addr = adev->mman.aper_base_kaddr + pos;
> > +             size_t count = last - pos;
> > +
> > +             if (write) {
> > +                     memcpy_toio(addr, buf, count);
> > +                     mb();
> > +                     amdgpu_device_flush_hdp(adev, NULL);
> > +             } else {
> > + amdgpu_device_invalidate_hdp(adev, NULL);
> > +                     mb();
> > +                     memcpy_fromio(buf, addr, count);
> > +             }
> > +
> > +             if (count == size)
> > +                     return;
> > +
> > +             pos += count;
> > +             buf += count;
> > +             size -= count;
> > +     }
> > +#endif
>
> I would put this as a separate function into amdgpu_device.c.
>
> But all of this should only be the second step since we need a much
> smaller patch for backporting first.
>
> > +
> > +     amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> > +}
> > +
> >   /**
> >    * amdgpu_ttm_access_memory - Read or Write memory that backs a 
> buffer object.
> >    *
> > @@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
> >        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> >        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> >        struct amdgpu_res_cursor cursor;
> > -     unsigned long flags;
> > -     uint32_t value = 0;
> >        int ret = 0;
> >
> >        if (bo->mem.mem_type != TTM_PL_VRAM)
> > @@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
> >
> >        amdgpu_res_first(&bo->mem, offset, len, &cursor);
> >        while (cursor.remaining) {
> > -             uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> > -             uint64_t bytes = 4 - (cursor.start & 3);
> > -             uint32_t shift = (cursor.start & 3) * 8;
> > -             uint32_t mask = 0xffffffff << shift;
> > -
> > -             if (cursor.size < bytes) {
> > -                     mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> > -                     bytes = cursor.size;
> > -             }
> > -
> > -             if (mask != 0xffffffff) {
> > - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> > -                     WREG32_NO_KIQ(mmMM_INDEX, 
> ((uint32_t)aligned_pos) | 0x80000000);
> > -                     WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> > -                     value = RREG32_NO_KIQ(mmMM_DATA);
> > -                     if (write) {
> > -                             value &= ~mask;
> > -                             value |= (*(uint32_t *)buf << shift) & 
> mask;
> > - WREG32_NO_KIQ(mmMM_DATA, value);
> > -                     }
> > - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> > -                     if (!write) {
> > -                             value = (value & mask) >> shift;
> > -                             memcpy(buf, &value, bytes);
> > -                     }
>
> This here is the problematic part and should use
> amdgpu_ttm_vram_access() instead.
>
> That should be implemented first since we might need to backport that.
>
> Regards,
> Christian.
>
> > -             } else {
> > -                     bytes = cursor.size & ~0x3ULL;
> > - amdgpu_device_vram_access(adev, cursor.start,
> > - (uint32_t *)buf, bytes,
> > - write);
> > -             }
> > -
> > -             ret += bytes;
> > -             buf = (uint8_t *)buf + bytes;
> > -             amdgpu_res_next(&cursor, bytes);
> > +             amdgpu_ttm_vram_access(adev, cursor.start, buf, 
> cursor.size, write);
> > +             ret += cursor.size;
> > +             buf += cursor.size;
> > +             amdgpu_res_next(&cursor, cursor.size);
> >        }
> >
> >        return ret;
>


[-- Attachment #1.2: Type: text/html, Size: 18355 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-07-13  9:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13  3:23 [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory() Kevin Wang
2021-07-13  7:11 ` Christian König
2021-07-13  8:32   ` Wang, Kevin(Yang)
2021-07-13  9:06     ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.