All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
@ 2021-03-22 12:40 Christian König
  2021-03-22 13:11 ` Chen, Guchun
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2021-03-22 12:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: nirmoy.das, guchun.chen

Now that we found the underlying problem we can re-apply this patch.

This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++-----------------
 1 file changed, 18 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9268db1172bd..bc3951b71079 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
 
 /**
  * DOC: GPUVM
@@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
- * @nodes: array of drm_mm_nodes with the MC addresses
+ * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
@@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       bool unlocked, struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t offset,
-				       struct drm_mm_node *nodes,
+				       struct ttm_resource *res,
 				       dma_addr_t *pages_addr,
 				       struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
+	struct amdgpu_res_cursor cursor;
 	enum amdgpu_sync_mode sync_mode;
-	uint64_t pfn;
 	int r;
 
 	memset(&params, 0, sizeof(params));
@@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	else
 		sync_mode = AMDGPU_SYNC_EXPLICIT;
 
-	pfn = offset >> PAGE_SHIFT;
-	if (nodes) {
-		while (pfn >= nodes->size) {
-			pfn -= nodes->size;
-			++nodes;
-		}
-	}
-
 	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
@@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_unlock;
 
-	do {
+	amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
+			 &cursor);
+	while (cursor.remaining) {
 		uint64_t tmp, num_entries, addr;
 
-
-		num_entries = last - start + 1;
-		if (nodes) {
-			addr = nodes->start << PAGE_SHIFT;
-			num_entries = min((nodes->size - pfn) *
-				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
-		} else {
-			addr = 0;
-		}
-
+		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
 		if (pages_addr) {
 			bool contiguous = true;
 
 			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+				uint64_t pfn = cursor.start >> PAGE_SHIFT;
 				uint64_t count;
 
 				contiguous = pages_addr[pfn + 1] ==
@@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 			}
 
 			if (!contiguous) {
-				addr = pfn << PAGE_SHIFT;
+				addr = cursor.start;
 				params.pages_addr = pages_addr;
 			} else {
-				addr = pages_addr[pfn];
+				addr = pages_addr[cursor.start >> PAGE_SHIFT];
 				params.pages_addr = NULL;
 			}
 
 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-			addr += bo_adev->vm_manager.vram_base_offset;
-			addr += pfn << PAGE_SHIFT;
+			addr = bo_adev->vm_manager.vram_base_offset +
+				cursor.start;
+		} else {
+			addr = 0;
 		}
 
 		tmp = start + num_entries;
@@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		if (r)
 			goto error_unlock;
 
-		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
-		if (nodes && nodes->size == pfn) {
-			pfn = 0;
-			++nodes;
-		}
+		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
 		start = tmp;
-
-	} while (unlikely(start != last + 1));
+	};
 
 	r = vm->update_funcs->commit(&params, fence);
 
@@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_resource *mem;
-	struct drm_mm_node *nodes;
 	struct dma_fence **last_update;
 	struct dma_resv *resv;
 	uint64_t flags;
@@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 	if (clear || !bo) {
 		mem = NULL;
-		nodes = NULL;
 		resv = vm->root.base.bo->tbo.base.resv;
 	} else {
 		struct drm_gem_object *obj = &bo->tbo.base;
@@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 				bo = gem_to_amdgpu_bo(gobj);
 		}
 		mem = &bo->tbo.mem;
-		nodes = mem->mm_node;
 		if (mem->mem_type == TTM_PL_TT)
 			pages_addr = bo->tbo.ttm->dma_address;
 	}
@@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
 						resv, mapping->start,
 						mapping->last, update_flags,
-						mapping->offset, nodes,
+						mapping->offset, mem,
 						pages_addr, last_update);
 		if (r)
 			return r;
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
  2021-03-22 12:40 [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code"" Christian König
@ 2021-03-22 13:11 ` Chen, Guchun
  2021-03-23  9:09   ` Nirmoy
  0 siblings, 1 reply; 4+ messages in thread
From: Chen, Guchun @ 2021-03-22 13:11 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Das, Nirmoy

[AMD Public Use]

Hi Christian,

I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting?

Regards,
Guchun

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Monday, March 22, 2021 8:41 PM
To: amd-gfx@lists.freedesktop.org
Cc: Chen, Guchun <Guchun.Chen@amd.com>; Das, Nirmoy <Nirmoy.Das@amd.com>
Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""

Now that we found the underlying problem we can re-apply this patch.

This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++-----------------
 1 file changed, 18 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9268db1172bd..bc3951b71079 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
 
 /**
  * DOC: GPUVM
@@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
- * @nodes: array of drm_mm_nodes with the MC addresses
+ * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
@@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       bool unlocked, struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t offset,
-				       struct drm_mm_node *nodes,
+				       struct ttm_resource *res,
 				       dma_addr_t *pages_addr,
 				       struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
+	struct amdgpu_res_cursor cursor;
 	enum amdgpu_sync_mode sync_mode;
-	uint64_t pfn;
 	int r;
 
 	memset(&params, 0, sizeof(params));
@@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	else
 		sync_mode = AMDGPU_SYNC_EXPLICIT;
 
-	pfn = offset >> PAGE_SHIFT;
-	if (nodes) {
-		while (pfn >= nodes->size) {
-			pfn -= nodes->size;
-			++nodes;
-		}
-	}
-
 	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
@@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_unlock;
 
-	do {
+	amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
+			 &cursor);
+	while (cursor.remaining) {
 		uint64_t tmp, num_entries, addr;
 
-
-		num_entries = last - start + 1;
-		if (nodes) {
-			addr = nodes->start << PAGE_SHIFT;
-			num_entries = min((nodes->size - pfn) *
-				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
-		} else {
-			addr = 0;
-		}
-
+		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
 		if (pages_addr) {
 			bool contiguous = true;
 
 			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+				uint64_t pfn = cursor.start >> PAGE_SHIFT;
 				uint64_t count;
 
 				contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 			}
 
 			if (!contiguous) {
-				addr = pfn << PAGE_SHIFT;
+				addr = cursor.start;
 				params.pages_addr = pages_addr;
 			} else {
-				addr = pages_addr[pfn];
+				addr = pages_addr[cursor.start >> PAGE_SHIFT];
 				params.pages_addr = NULL;
 			}
 
 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-			addr += bo_adev->vm_manager.vram_base_offset;
-			addr += pfn << PAGE_SHIFT;
+			addr = bo_adev->vm_manager.vram_base_offset +
+				cursor.start;
+		} else {
+			addr = 0;
 		}
 
 		tmp = start + num_entries;
@@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		if (r)
 			goto error_unlock;
 
-		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
-		if (nodes && nodes->size == pfn) {
-			pfn = 0;
-			++nodes;
-		}
+		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
 		start = tmp;
-
-	} while (unlikely(start != last + 1));
+	};
 
 	r = vm->update_funcs->commit(&params, fence);
 
@@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_resource *mem;
-	struct drm_mm_node *nodes;
 	struct dma_fence **last_update;
 	struct dma_resv *resv;
 	uint64_t flags;
@@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 	if (clear || !bo) {
 		mem = NULL;
-		nodes = NULL;
 		resv = vm->root.base.bo->tbo.base.resv;
 	} else {
 		struct drm_gem_object *obj = &bo->tbo.base; @@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 				bo = gem_to_amdgpu_bo(gobj);
 		}
 		mem = &bo->tbo.mem;
-		nodes = mem->mm_node;
 		if (mem->mem_type == TTM_PL_TT)
 			pages_addr = bo->tbo.ttm->dma_address;
 	}
@@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
 						resv, mapping->start,
 						mapping->last, update_flags,
-						mapping->offset, nodes,
+						mapping->offset, mem,
 						pages_addr, last_update);
 		if (r)
 			return r;
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
  2021-03-22 13:11 ` Chen, Guchun
@ 2021-03-23  9:09   ` Nirmoy
  2021-03-23  9:52     ` Chen, Guchun
  0 siblings, 1 reply; 4+ messages in thread
From: Nirmoy @ 2021-03-23  9:09 UTC (permalink / raw)
  To: Chen, Guchun, Christian König, amd-gfx; +Cc: Das, Nirmoy

I tested ./piglit run opengl results/test multiple times. Once I got gfx 
time out

error but without kernel freeze. I can't reproduce it any more.


Regards,

Nirmoy

On 3/22/21 2:11 PM, Chen, Guchun wrote:
> [AMD Public Use]
>
> Hi Christian,
>
> I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting?
>
> Regards,
> Guchun
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Monday, March 22, 2021 8:41 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chen, Guchun <Guchun.Chen@amd.com>; Das, Nirmoy <Nirmoy.Das@amd.com>
> Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
>
> Now that we found the underlying problem we can re-apply this patch.
>
> This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++-----------------
>   1 file changed, 18 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9268db1172bd..bc3951b71079 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -37,6 +37,7 @@
>   #include "amdgpu_gmc.h"
>   #include "amdgpu_xgmi.h"
>   #include "amdgpu_dma_buf.h"
> +#include "amdgpu_res_cursor.h"
>   
>   /**
>    * DOC: GPUVM
> @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>    * @last: last mapped entry
>    * @flags: flags for the entries
>    * @offset: offset into nodes and pages_addr
> - * @nodes: array of drm_mm_nodes with the MC addresses
> + * @res: ttm_resource to map
>    * @pages_addr: DMA addresses to use for mapping
>    * @fence: optional resulting fence
>    *
> @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   				       bool unlocked, struct dma_resv *resv,
>   				       uint64_t start, uint64_t last,
>   				       uint64_t flags, uint64_t offset,
> -				       struct drm_mm_node *nodes,
> +				       struct ttm_resource *res,
>   				       dma_addr_t *pages_addr,
>   				       struct dma_fence **fence)
>   {
>   	struct amdgpu_vm_update_params params;
> +	struct amdgpu_res_cursor cursor;
>   	enum amdgpu_sync_mode sync_mode;
> -	uint64_t pfn;
>   	int r;
>   
>   	memset(&params, 0, sizeof(params));
> @@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	else
>   		sync_mode = AMDGPU_SYNC_EXPLICIT;
>   
> -	pfn = offset >> PAGE_SHIFT;
> -	if (nodes) {
> -		while (pfn >= nodes->size) {
> -			pfn -= nodes->size;
> -			++nodes;
> -		}
> -	}
> -
>   	amdgpu_vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
> @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_unlock;
>   
> -	do {
> +	amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
> +			 &cursor);
> +	while (cursor.remaining) {
>   		uint64_t tmp, num_entries, addr;
>   
> -
> -		num_entries = last - start + 1;
> -		if (nodes) {
> -			addr = nodes->start << PAGE_SHIFT;
> -			num_entries = min((nodes->size - pfn) *
> -				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
> -		} else {
> -			addr = 0;
> -		}
> -
> +		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
>   		if (pages_addr) {
>   			bool contiguous = true;
>   
>   			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
> +				uint64_t pfn = cursor.start >> PAGE_SHIFT;
>   				uint64_t count;
>   
>   				contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   			}
>   
>   			if (!contiguous) {
> -				addr = pfn << PAGE_SHIFT;
> +				addr = cursor.start;
>   				params.pages_addr = pages_addr;
>   			} else {
> -				addr = pages_addr[pfn];
> +				addr = pages_addr[cursor.start >> PAGE_SHIFT];
>   				params.pages_addr = NULL;
>   			}
>   
>   		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> -			addr += bo_adev->vm_manager.vram_base_offset;
> -			addr += pfn << PAGE_SHIFT;
> +			addr = bo_adev->vm_manager.vram_base_offset +
> +				cursor.start;
> +		} else {
> +			addr = 0;
>   		}
>   
>   		tmp = start + num_entries;
> @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   		if (r)
>   			goto error_unlock;
>   
> -		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
> -		if (nodes && nodes->size == pfn) {
> -			pfn = 0;
> -			++nodes;
> -		}
> +		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
>   		start = tmp;
> -
> -	} while (unlikely(start != last + 1));
> +	};
>   
>   	r = vm->update_funcs->commit(&params, fence);
>   
> @@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   	struct amdgpu_bo_va_mapping *mapping;
>   	dma_addr_t *pages_addr = NULL;
>   	struct ttm_resource *mem;
> -	struct drm_mm_node *nodes;
>   	struct dma_fence **last_update;
>   	struct dma_resv *resv;
>   	uint64_t flags;
> @@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   
>   	if (clear || !bo) {
>   		mem = NULL;
> -		nodes = NULL;
>   		resv = vm->root.base.bo->tbo.base.resv;
>   	} else {
>   		struct drm_gem_object *obj = &bo->tbo.base; @@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   				bo = gem_to_amdgpu_bo(gobj);
>   		}
>   		mem = &bo->tbo.mem;
> -		nodes = mem->mm_node;
>   		if (mem->mem_type == TTM_PL_TT)
>   			pages_addr = bo->tbo.ttm->dma_address;
>   	}
> @@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>   						resv, mapping->start,
>   						mapping->last, update_flags,
> -						mapping->offset, nodes,
> +						mapping->offset, mem,
>   						pages_addr, last_update);
>   		if (r)
>   			return r;
> --
> 2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
  2021-03-23  9:09   ` Nirmoy
@ 2021-03-23  9:52     ` Chen, Guchun
  0 siblings, 0 replies; 4+ messages in thread
From: Chen, Guchun @ 2021-03-23  9:52 UTC (permalink / raw)
  To: Das, Nirmoy, Christian König, amd-gfx, Koenig, Christian

[AMD Public Use]

Hi Christian,

Thanks for your patience.

Unluckily, after applying below patch, vulkan cts test on my side is negative. The same gfxhub page fault and kernel bug along with amdgpu_vm_update_ptes calltrace is observed. I will send the full log to you privately soon.

I suggest holding on this patch before rooting cause it.

Regards,
Guchun

-----Original Message-----
From: Das, Nirmoy <Nirmoy.Das@amd.com> 
Sent: Tuesday, March 23, 2021 5:09 PM
To: Chen, Guchun <Guchun.Chen@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
Cc: Das, Nirmoy <Nirmoy.Das@amd.com>
Subject: Re: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""

I tested ./piglit run opengl results/test multiple times. Once I got gfx time out

error but without kernel freeze. I can't reproduce it any more.


Regards,

Nirmoy

On 3/22/21 2:11 PM, Chen, Guchun wrote:
> [AMD Public Use]
>
> Hi Christian,
>
> I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting?
>
> Regards,
> Guchun
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Monday, March 22, 2021 8:41 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chen, Guchun <Guchun.Chen@amd.com>; Das, Nirmoy 
> <Nirmoy.Das@amd.com>
> Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
>
> Now that we found the underlying problem we can re-apply this patch.
>
> This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++-----------------
>   1 file changed, 18 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9268db1172bd..bc3951b71079 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -37,6 +37,7 @@
>   #include "amdgpu_gmc.h"
>   #include "amdgpu_xgmi.h"
>   #include "amdgpu_dma_buf.h"
> +#include "amdgpu_res_cursor.h"
>   
>   /**
>    * DOC: GPUVM
> @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>    * @last: last mapped entry
>    * @flags: flags for the entries
>    * @offset: offset into nodes and pages_addr
> - * @nodes: array of drm_mm_nodes with the MC addresses
> + * @res: ttm_resource to map
>    * @pages_addr: DMA addresses to use for mapping
>    * @fence: optional resulting fence
>    *
> @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   				       bool unlocked, struct dma_resv *resv,
>   				       uint64_t start, uint64_t last,
>   				       uint64_t flags, uint64_t offset,
> -				       struct drm_mm_node *nodes,
> +				       struct ttm_resource *res,
>   				       dma_addr_t *pages_addr,
>   				       struct dma_fence **fence)
>   {
>   	struct amdgpu_vm_update_params params;
> +	struct amdgpu_res_cursor cursor;
>   	enum amdgpu_sync_mode sync_mode;
> -	uint64_t pfn;
>   	int r;
>   
>   	memset(&params, 0, sizeof(params)); @@ -1622,14 +1623,6 @@ static 
> int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	else
>   		sync_mode = AMDGPU_SYNC_EXPLICIT;
>   
> -	pfn = offset >> PAGE_SHIFT;
> -	if (nodes) {
> -		while (pfn >= nodes->size) {
> -			pfn -= nodes->size;
> -			++nodes;
> -		}
> -	}
> -
>   	amdgpu_vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
> @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_unlock;
>   
> -	do {
> +	amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
> +			 &cursor);
> +	while (cursor.remaining) {
>   		uint64_t tmp, num_entries, addr;
>   
> -
> -		num_entries = last - start + 1;
> -		if (nodes) {
> -			addr = nodes->start << PAGE_SHIFT;
> -			num_entries = min((nodes->size - pfn) *
> -				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
> -		} else {
> -			addr = 0;
> -		}
> -
> +		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
>   		if (pages_addr) {
>   			bool contiguous = true;
>   
>   			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
> +				uint64_t pfn = cursor.start >> PAGE_SHIFT;
>   				uint64_t count;
>   
>   				contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   			}
>   
>   			if (!contiguous) {
> -				addr = pfn << PAGE_SHIFT;
> +				addr = cursor.start;
>   				params.pages_addr = pages_addr;
>   			} else {
> -				addr = pages_addr[pfn];
> +				addr = pages_addr[cursor.start >> PAGE_SHIFT];
>   				params.pages_addr = NULL;
>   			}
>   
>   		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
> -			addr += bo_adev->vm_manager.vram_base_offset;
> -			addr += pfn << PAGE_SHIFT;
> +			addr = bo_adev->vm_manager.vram_base_offset +
> +				cursor.start;
> +		} else {
> +			addr = 0;
>   		}
>   
>   		tmp = start + num_entries;
> @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   		if (r)
>   			goto error_unlock;
>   
> -		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
> -		if (nodes && nodes->size == pfn) {
> -			pfn = 0;
> -			++nodes;
> -		}
> +		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
>   		start = tmp;
> -
> -	} while (unlikely(start != last + 1));
> +	};
>   
>   	r = vm->update_funcs->commit(&params, fence);
>   
> @@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   	struct amdgpu_bo_va_mapping *mapping;
>   	dma_addr_t *pages_addr = NULL;
>   	struct ttm_resource *mem;
> -	struct drm_mm_node *nodes;
>   	struct dma_fence **last_update;
>   	struct dma_resv *resv;
>   	uint64_t flags;
> @@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
> *adev, struct amdgpu_bo_va *bo_va,
>   
>   	if (clear || !bo) {
>   		mem = NULL;
> -		nodes = NULL;
>   		resv = vm->root.base.bo->tbo.base.resv;
>   	} else {
>   		struct drm_gem_object *obj = &bo->tbo.base; @@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   				bo = gem_to_amdgpu_bo(gobj);
>   		}
>   		mem = &bo->tbo.mem;
> -		nodes = mem->mm_node;
>   		if (mem->mem_type == TTM_PL_TT)
>   			pages_addr = bo->tbo.ttm->dma_address;
>   	}
> @@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>   						resv, mapping->start,
>   						mapping->last, update_flags,
> -						mapping->offset, nodes,
> +						mapping->offset, mem,
>   						pages_addr, last_update);
>   		if (r)
>   			return r;
> --
> 2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-23  9:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-22 12:40 [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code"" Christian König
2021-03-22 13:11 ` Chen, Guchun
2021-03-23  9:09   ` Nirmoy
2021-03-23  9:52     ` Chen, Guchun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.