All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdgpu: convert gtt_window_lock to a spinlock
@ 2021-05-27 14:54 Nirmoy Das
  2021-05-27 15:30 ` Christian König
  0 siblings, 1 reply; 2+ messages in thread
From: Nirmoy Das @ 2021-05-27 14:54 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Nirmoy Das, christian.koenig

amdgpu_device_gpu_recover() will eventually call
gmc_v10_0_flush_gpu_tlb() with holding a spinlock, which
puts the process in atomic context. Sleeping in atomic context
is not allowed so convert gtt_window_lock into a spinlock.

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2662, name: cat
Call Trace:
 dump_stack+0x8b/0xb0
 ___might_sleep.cold+0xb6/0xc6
 ? gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
 __mutex_lock+0x45/0x820
 ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
 gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
 ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
 amdgpu_gart_bind+0x7a/0xc0 [amdgpu]
 amdgpu_ttm_gart_bind+0x7d/0xd0 [amdgpu]
 ? amdgpu_ttm_recover_gart+0x2e/0x70 [amdgpu]
 amdgpu_gtt_mgr_recover+0x4e/0x70 [amdgpu]
 amdgpu_do_asic_reset.cold+0x90/0x1c4 [amdgpu]
 ? amdgpu_device_pre_asic_reset+0xa8/0x250 [amdgpu]
 amdgpu_device_gpu_recover.cold+0x7bd/0xa23 [amdgpu]
 ? lock_acquired+0x210/0x390
 gpu_recover_get+0x29/0x60 [amdgpu]
 simple_attr_read+0x69/0xf0
 debugfs_attr_read+0x40/0x60
 full_proxy_read+0x56/0x80
 vfs_read+0xd1/0x1d0
 ksys_read+0x68/0xe0
 do_syscall_64+0x33/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..4cb8fd193b6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -333,7 +333,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
 	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	spin_lock(&adev->mman.gtt_window_lock);
 	while (src_mm.remaining) {
 		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
 		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
@@ -373,7 +373,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		amdgpu_res_next(&dst_mm, cur_size);
 	}
 error:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	spin_unlock(&adev->mman.gtt_window_lock);
 	if (f)
 		*f = dma_fence_get(fence);
 	dma_fence_put(fence);
@@ -1661,7 +1661,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	int r;
 	u64 vis_vram_limit;
 
-	mutex_init(&adev->mman.gtt_window_lock);
+	spin_lock_init(&adev->mman.gtt_window_lock);
 
 	/* No others user of address space so set it to 0 */
 	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 2877a924086f..afd905dc337b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -70,7 +70,7 @@ struct amdgpu_mman {
 	struct amdgpu_ring			*buffer_funcs_ring;
 	bool					buffer_funcs_enabled;
 
-	struct mutex				gtt_window_lock;
+	spinlock_t				gtt_window_lock;
 	/* Scheduler entity for buffer moves */
 	struct drm_sched_entity			entity;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ceab5ef50790..f4ce3eb4d7e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -341,11 +341,11 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		return;
 	}
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	spin_lock(&adev->mman.gtt_window_lock);
 
 	if (vmhub == AMDGPU_MMHUB_0) {
 		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
-		mutex_unlock(&adev->mman.gtt_window_lock);
+		spin_unlock(&adev->mman.gtt_window_lock);
 		return;
 	}
 
@@ -356,7 +356,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	    amdgpu_in_reset(adev) ||
 	    ring->sched.ready == false) {
 		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
-		mutex_unlock(&adev->mman.gtt_window_lock);
+		spin_unlock(&adev->mman.gtt_window_lock);
 		return;
 	}
 
@@ -379,7 +379,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	if (r)
 		goto error_submit;
 
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	spin_unlock(&adev->mman.gtt_window_lock);
 
 	dma_fence_wait(fence, false);
 	dma_fence_put(fence);
@@ -390,7 +390,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	amdgpu_job_free(job);
 
 error_alloc:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	spin_unlock(&adev->mman.gtt_window_lock);
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fd8f544f0de2..e1dad02d400b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -135,7 +135,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 	uint64_t size;
 	int r;
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	spin_lock(&adev->mman.gtt_window_lock);
 
 	while (npages) {
 		size = min(GTT_MAX_PAGES, npages);
@@ -171,7 +171,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 	}
 
 out_unlock:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	spin_unlock(&adev->mman.gtt_window_lock);
 
 	return r;
 }
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: convert gtt_window_lock to a spinlock
  2021-05-27 14:54 [PATCH 1/1] drm/amdgpu: convert gtt_window_lock to a spinlock Nirmoy Das
@ 2021-05-27 15:30 ` Christian König
  0 siblings, 0 replies; 2+ messages in thread
From: Christian König @ 2021-05-27 15:30 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx; +Cc: alexander.deucher

That won't work, we need to be able to sleep in waiting for the tlb 
flush anyway.

On the other hand flushing the TLB after recovering each BO is nonsense 
to begin with.

So we should probably move that from amdgpu_gart_bind() into 
amdgpu_ttm_alloc_gart().

Regards,
Christian.

Am 27.05.21 um 16:54 schrieb Nirmoy Das:
> amdgpu_device_gpu_recover() will eventually call
> gmc_v10_0_flush_gpu_tlb() with holding a spinlock, which
> puts the process in atomic context. Sleeping in atomic context
> is not allowed so convert gtt_window_lock into a spinlock.
>
> BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2662, name: cat
> Call Trace:
>   dump_stack+0x8b/0xb0
>   ___might_sleep.cold+0xb6/0xc6
>   ? gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
>   __mutex_lock+0x45/0x820
>   ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
>   gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
>   ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
>   amdgpu_gart_bind+0x7a/0xc0 [amdgpu]
>   amdgpu_ttm_gart_bind+0x7d/0xd0 [amdgpu]
>   ? amdgpu_ttm_recover_gart+0x2e/0x70 [amdgpu]
>   amdgpu_gtt_mgr_recover+0x4e/0x70 [amdgpu]
>   amdgpu_do_asic_reset.cold+0x90/0x1c4 [amdgpu]
>   ? amdgpu_device_pre_asic_reset+0xa8/0x250 [amdgpu]
>   amdgpu_device_gpu_recover.cold+0x7bd/0xa23 [amdgpu]
>   ? lock_acquired+0x210/0x390
>   gpu_recover_get+0x29/0x60 [amdgpu]
>   simple_attr_read+0x69/0xf0
>   debugfs_attr_read+0x40/0x60
>   full_proxy_read+0x56/0x80
>   vfs_read+0xd1/0x1d0
>   ksys_read+0x68/0xe0
>   do_syscall_64+0x33/0x80
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 ++--
>   4 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c0aef327292a..4cb8fd193b6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -333,7 +333,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
>   	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   	while (src_mm.remaining) {
>   		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
>   		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
> @@ -373,7 +373,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   		amdgpu_res_next(&dst_mm, cur_size);
>   	}
>   error:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   	if (f)
>   		*f = dma_fence_get(fence);
>   	dma_fence_put(fence);
> @@ -1661,7 +1661,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   	int r;
>   	u64 vis_vram_limit;
>   
> -	mutex_init(&adev->mman.gtt_window_lock);
> +	spin_lock_init(&adev->mman.gtt_window_lock);
>   
>   	/* No others user of address space so set it to 0 */
>   	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 2877a924086f..afd905dc337b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -70,7 +70,7 @@ struct amdgpu_mman {
>   	struct amdgpu_ring			*buffer_funcs_ring;
>   	bool					buffer_funcs_enabled;
>   
> -	struct mutex				gtt_window_lock;
> +	spinlock_t				gtt_window_lock;
>   	/* Scheduler entity for buffer moves */
>   	struct drm_sched_entity			entity;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index ceab5ef50790..f4ce3eb4d7e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -341,11 +341,11 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		return;
>   	}
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   
>   	if (vmhub == AMDGPU_MMHUB_0) {
>   		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> +		spin_unlock(&adev->mman.gtt_window_lock);
>   		return;
>   	}
>   
> @@ -356,7 +356,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	    amdgpu_in_reset(adev) ||
>   	    ring->sched.ready == false) {
>   		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> +		spin_unlock(&adev->mman.gtt_window_lock);
>   		return;
>   	}
>   
> @@ -379,7 +379,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	if (r)
>   		goto error_submit;
>   
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   
>   	dma_fence_wait(fence, false);
>   	dma_fence_put(fence);
> @@ -390,7 +390,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	amdgpu_job_free(job);
>   
>   error_alloc:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index fd8f544f0de2..e1dad02d400b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -135,7 +135,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>   	uint64_t size;
>   	int r;
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   
>   	while (npages) {
>   		size = min(GTT_MAX_PAGES, npages);
> @@ -171,7 +171,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>   	}
>   
>   out_unlock:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   
>   	return r;
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-05-27 15:30 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-27 14:54 [PATCH 1/1] drm/amdgpu: convert gtt_window_lock to a spinlock Nirmoy Das
2021-05-27 15:30 ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.