All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling
@ 2022-06-22 10:07 Pierre-Eric Pelloux-Prayer
  2022-06-22 10:07 ` [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops Pierre-Eric Pelloux-Prayer
  2022-06-22 12:34 ` [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Christian König
  0 siblings, 2 replies; 4+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2022-06-22 10:07 UTC (permalink / raw)
  To: amd-gfx; +Cc: Pierre-Eric Pelloux-Prayer

Without this change amdgpu_ttm_training_data_block_init tries
to allocate at the end of the real amount of RAM, which
then fails like this if amdgpu.vramlimit=XXXX is used:

   [drm:amdgpu_ttm_init [amdgpu]] *ERROR* alloc c2p_bo failed(-12)!
   [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* sw_init of IP block <gmc_v10_0> failed -12
   amdgpu: amdgpu_device_ip_init failed
   amdgpu: Fatal error during GPU init

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index be0efaae79a9..952e99e6d07e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1621,9 +1621,9 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
 	memset(ctx, 0, sizeof(*ctx));
 
 	ctx->c2p_train_data_offset =
-		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+		ALIGN((adev->gmc.real_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
 	ctx->p2c_train_data_offset =
-		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+		(adev->gmc.real_vram_size - GDDR6_MEM_TRAINING_OFFSET);
 	ctx->train_data_size =
 		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
 

base-commit: a81bcfc756bcaa9e8bb46262f910504fa5290aab
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops
  2022-06-22 10:07 [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Pierre-Eric Pelloux-Prayer
@ 2022-06-22 10:07 ` Pierre-Eric Pelloux-Prayer
  2022-06-22 12:38   ` Christian König
  2022-06-22 12:34 ` [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Christian König
  1 sibling, 1 reply; 4+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2022-06-22 10:07 UTC (permalink / raw)
  To: amd-gfx; +Cc: Pierre-Eric Pelloux-Prayer

If amdgpu.vramlimit=XXXX is used, amdgpu_gmc_vram_location will update
real_vram_size based on this value.
mc_vram_size is the real amount of VRAM, initialized in gmc_..._mc_init.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 952e99e6d07e..8f245e9f8f7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2252,10 +2252,10 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
-	if (*pos >= adev->gmc.mc_vram_size)
+	if (*pos >= adev->gmc.real_vram_size)
 		return -ENXIO;
 
-	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
+	size = min(size, (size_t)(adev->gmc.real_vram_size - *pos));
 	while (size) {
 		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
 		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
@@ -2288,13 +2288,13 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
-	if (*pos >= adev->gmc.mc_vram_size)
+	if (*pos >= adev->gmc.real_vram_size)
 		return -ENXIO;
 
 	while (size) {
 		uint32_t value;
 
-		if (*pos >= adev->gmc.mc_vram_size)
+		if (*pos >= adev->gmc.real_vram_size)
 			return result;
 
 		r = get_user(value, (uint32_t *)buf);
@@ -2442,7 +2442,7 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 	struct dentry *root = minor->debugfs_root;
 
 	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
-				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
+				 &amdgpu_ttm_vram_fops, adev->gmc.real_vram_size);
 	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
 			    &amdgpu_ttm_iomem_fops);
 	debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling
  2022-06-22 10:07 [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Pierre-Eric Pelloux-Prayer
  2022-06-22 10:07 ` [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops Pierre-Eric Pelloux-Prayer
@ 2022-06-22 12:34 ` Christian König
  1 sibling, 0 replies; 4+ messages in thread
From: Christian König @ 2022-06-22 12:34 UTC (permalink / raw)
  To: Pierre-Eric Pelloux-Prayer, amd-gfx

Am 22.06.22 um 12:07 schrieb Pierre-Eric Pelloux-Prayer:
> Without this change amdgpu_ttm_training_data_block_init tries
> to allocate at the end of the real amount of RAM, which
> then fails like this if amdgpu.vramlimit=XXXX is used:
>
>     [drm:amdgpu_ttm_init [amdgpu]] *ERROR* alloc c2p_bo failed(-12)!
>     [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* sw_init of IP block <gmc_v10_0> failed -12
>     amdgpu: amdgpu_device_ip_init failed
>     amdgpu: Fatal error during GPU init
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>

That looks like a real bug fix to me, so I think we should add "CC: 
stable..." tag.

Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index be0efaae79a9..952e99e6d07e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1621,9 +1621,9 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
>   	memset(ctx, 0, sizeof(*ctx));
>   
>   	ctx->c2p_train_data_offset =
> -		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
> +		ALIGN((adev->gmc.real_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
>   	ctx->p2c_train_data_offset =
> -		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
> +		(adev->gmc.real_vram_size - GDDR6_MEM_TRAINING_OFFSET);
>   	ctx->train_data_size =
>   		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
>   
>
> base-commit: a81bcfc756bcaa9e8bb46262f910504fa5290aab


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops
  2022-06-22 10:07 ` [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops Pierre-Eric Pelloux-Prayer
@ 2022-06-22 12:38   ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2022-06-22 12:38 UTC (permalink / raw)
  To: Pierre-Eric Pelloux-Prayer, amd-gfx

Am 22.06.22 um 12:07 schrieb Pierre-Eric Pelloux-Prayer:
> If amdgpu.vramlimit=XXXX is used, amdgpu_gmc_vram_location will update
> real_vram_size based on this value.
> mc_vram_size is the real amount of VRAM, initialized in gmc_..._mc_init.

Thinking more about it I came to the conclusion that this patch here is 
not correct.

Even when we restricted the driver to use only a fraction of the VRAM we 
can still read/write all of it through the debugging interfaces.

That could be useful if we try to track down things like random memory 
corruption etc..

Regards,
Christian.

>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +++++-----
>   1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 952e99e6d07e..8f245e9f8f7c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2252,10 +2252,10 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
>   	if (size & 0x3 || *pos & 0x3)
>   		return -EINVAL;
>   
> -	if (*pos >= adev->gmc.mc_vram_size)
> +	if (*pos >= adev->gmc.real_vram_size)
>   		return -ENXIO;
>   
> -	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
> +	size = min(size, (size_t)(adev->gmc.real_vram_size - *pos));
>   	while (size) {
>   		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
>   		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
> @@ -2288,13 +2288,13 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
>   	if (size & 0x3 || *pos & 0x3)
>   		return -EINVAL;
>   
> -	if (*pos >= adev->gmc.mc_vram_size)
> +	if (*pos >= adev->gmc.real_vram_size)
>   		return -ENXIO;
>   
>   	while (size) {
>   		uint32_t value;
>   
> -		if (*pos >= adev->gmc.mc_vram_size)
> +		if (*pos >= adev->gmc.real_vram_size)
>   			return result;
>   
>   		r = get_user(value, (uint32_t *)buf);
> @@ -2442,7 +2442,7 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
>   	struct dentry *root = minor->debugfs_root;
>   
>   	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
> -				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
> +				 &amdgpu_ttm_vram_fops, adev->gmc.real_vram_size);
>   	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
>   			    &amdgpu_ttm_iomem_fops);
>   	debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-06-22 12:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-22 10:07 [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Pierre-Eric Pelloux-Prayer
2022-06-22 10:07 ` [PATCH 2/2] drm/amdgpu: use real_vram_size in ttm_vram_fops Pierre-Eric Pelloux-Prayer
2022-06-22 12:38   ` Christian König
2022-06-22 12:34 ` [PATCH 1/2] drm/amdgpu: fix amdgpu.vramlimit handling Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.