All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
@ 2019-03-19 12:07 Christian König
       [not found] ` <20190319120703.28786-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2019-03-19 12:07 UTC (permalink / raw)
  To: Shaoyun.Liu-5C7GfCeVMHo, Alexander.Deucher-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.

Adding this to the mapping is complete nonsense and the whole
implementation looks racy. This patch wasn't thoughtfully reviewed
and should be reverted for now.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29 +---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
 6 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b5720c1610e1..1db192150532 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -931,9 +931,6 @@ struct amdgpu_device {
 	int asic_reset_res;
 	struct work_struct		xgmi_reset_work;
 
-	/* counter of mapped memory through xgmi */
-	atomic_t			xgmi_map_counter;
-
 	bool                            in_baco_reset;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 964a4d3f1f43..206583707124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2018,9 +2018,6 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 	r = amdgpu_device_enable_mgpu_fan_boost();
 	if (r)
 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
-	/*set to low pstate by default */
-	amdgpu_xgmi_set_pstate(adev, 0);
 }
 
 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 6f176bbe4cf2..220a6a7b1bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
 	uint64_t			__subtree_last;
 	uint64_t			offset;
 	uint64_t			flags;
-	bool				is_xgmi;
 };
 
 /* User space allocated BO in a VM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c5230a9fb7f6..c8f0e4ca05fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -34,7 +34,6 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gmc.h"
-#include "amdgpu_xgmi.h"
 
 /**
  * DOC: GPUVM
@@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct ttm_mem_reg *mem;
 	struct drm_mm_node *nodes;
 	struct dma_fence *exclusive, **last_update;
-	struct amdgpu_device *bo_adev = adev;
-	bool is_xgmi = false;
 	uint64_t flags;
+	struct amdgpu_device *bo_adev = adev;
 	int r;
 
 	if (clear || !bo) {
@@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	if (bo) {
 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
 		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		if (adev != bo_adev &&
-		    adev->gmc.xgmi.hive_id &&
-		    adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
-			is_xgmi = true;
 	} else {
 		flags = 0x0;
 	}
@@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		if (mapping->is_xgmi != is_xgmi) {
-			if (is_xgmi) {
-				/* Adding an XGMI mapping to the PT */
-				if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
-					amdgpu_xgmi_set_pstate(adev, 1);
-			} else {
-				/* Removing an XGMI mapping from the PT */
-				if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
-					amdgpu_xgmi_set_pstate(adev, 0);
-			}
-			mapping->is_xgmi = is_xgmi;
-		}
-
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
 					       mapping, flags, bo_adev, nodes,
 					       last_update);
@@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
 						init_pte_value, 0, &f);
-
-		if (mapping->is_xgmi) {
-			/* Removing an XGMI mapping from the PT */
-			if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
-				amdgpu_xgmi_set_pstate(adev, 0);
-		}
-
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
@@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
-	mapping->is_xgmi = false;
 
 	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 807440d3edff..fcc4b05c745c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -200,7 +200,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
 
 	if (lock)
 		mutex_lock(&tmp->hive_lock);
-	tmp->pstate = -1;
 
 	mutex_unlock(&xgmi_mutex);
 
@@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 		mutex_unlock(&hive->hive_lock);
 	}
 }
-
-int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
-{
-	int ret = 0;
-	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
-
-	if (!hive)
-		return 0;
-
-	if (hive->pstate == pstate)
-		return 0;
-	/* Todo : sent the message to SMU for pstate change */
-	return ret;
-}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 7e1710fcbef2..24a3b0362f98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -33,13 +33,11 @@ struct amdgpu_hive_info {
 	struct kobject *kobj;
 	struct device_attribute dev_attr;
 	struct amdgpu_device *adev;
-	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
 };
 
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
 void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
-int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
 
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found] ` <20190319120703.28786-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-19 12:42   ` Kuehling, Felix
       [not found]     ` <ddf61163-5c5c-2547-2e5d-16f205111a55-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Kuehling, Felix @ 2019-03-19 12:42 UTC (permalink / raw)
  To: Christian König, Liu, Shaoyun, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

We discussed a few different approaches before settling on this one.

Maybe it needs some more background. XGMI links are quite power hungry. 
Being able to power them down improves performance for power-limited 
workloads that don't need XGMI. In machine learning, pretty much all 
workloads are power limited on our GPUs, so this is not just a 
theoretical thing. The problem is, how do you know whether you need 
XGMI? You need to know whether there are P2P memory mappings involving 
XGMI. So the natural place to put that is in the memory mapping code.

If you do spot a race condition in the code, let's talk about how to fix it.

Regards,
   Felix

On 3/19/2019 8:07 AM, Christian König wrote:
> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>
> Adding this to the mapping is complete nonsense and the whole
> implementation looks racy. This patch wasn't thoughtfully reviewed
> and should be reverted for now.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29 +---------------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>   6 files changed, 1 insertion(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b5720c1610e1..1db192150532 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -931,9 +931,6 @@ struct amdgpu_device {
>   	int asic_reset_res;
>   	struct work_struct		xgmi_reset_work;
>   
> -	/* counter of mapped memory through xgmi */
> -	atomic_t			xgmi_map_counter;
> -
>   	bool                            in_baco_reset;
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 964a4d3f1f43..206583707124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2018,9 +2018,6 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>   	r = amdgpu_device_enable_mgpu_fan_boost();
>   	if (r)
>   		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
> -
> -	/*set to low pstate by default */
> -	amdgpu_xgmi_set_pstate(adev, 0);
>   }
>   
>   static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 6f176bbe4cf2..220a6a7b1bc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>   	uint64_t			__subtree_last;
>   	uint64_t			offset;
>   	uint64_t			flags;
> -	bool				is_xgmi;
>   };
>   
>   /* User space allocated BO in a VM */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c5230a9fb7f6..c8f0e4ca05fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -34,7 +34,6 @@
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gmc.h"
> -#include "amdgpu_xgmi.h"
>   
>   /**
>    * DOC: GPUVM
> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	struct ttm_mem_reg *mem;
>   	struct drm_mm_node *nodes;
>   	struct dma_fence *exclusive, **last_update;
> -	struct amdgpu_device *bo_adev = adev;
> -	bool is_xgmi = false;
>   	uint64_t flags;
> +	struct amdgpu_device *bo_adev = adev;
>   	int r;
>   
>   	if (clear || !bo) {
> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	if (bo) {
>   		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>   		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -		if (adev != bo_adev &&
> -		    adev->gmc.xgmi.hive_id &&
> -		    adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
> -			is_xgmi = true;
>   	} else {
>   		flags = 0x0;
>   	}
> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	}
>   
>   	list_for_each_entry(mapping, &bo_va->invalids, list) {
> -		if (mapping->is_xgmi != is_xgmi) {
> -			if (is_xgmi) {
> -				/* Adding an XGMI mapping to the PT */
> -				if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
> -					amdgpu_xgmi_set_pstate(adev, 1);
> -			} else {
> -				/* Removing an XGMI mapping from the PT */
> -				if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
> -					amdgpu_xgmi_set_pstate(adev, 0);
> -			}
> -			mapping->is_xgmi = is_xgmi;
> -		}
> -
>   		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
>   					       mapping, flags, bo_adev, nodes,
>   					       last_update);
> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>   						mapping->start, mapping->last,
>   						init_pte_value, 0, &f);
> -
> -		if (mapping->is_xgmi) {
> -			/* Removing an XGMI mapping from the PT */
> -			if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
> -				amdgpu_xgmi_set_pstate(adev, 0);
> -		}
> -
>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>   		if (r) {
>   			dma_fence_put(f);
> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>   	mapping->last = eaddr;
>   	mapping->offset = offset;
>   	mapping->flags = flags;
> -	mapping->is_xgmi = false;
>   
>   	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 807440d3edff..fcc4b05c745c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -200,7 +200,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>   
>   	if (lock)
>   		mutex_lock(&tmp->hive_lock);
> -	tmp->pstate = -1;
>   
>   	mutex_unlock(&xgmi_mutex);
>   
> @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
>   		mutex_unlock(&hive->hive_lock);
>   	}
>   }
> -
> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
> -{
> -	int ret = 0;
> -	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
> -
> -	if (!hive)
> -		return 0;
> -
> -	if (hive->pstate == pstate)
> -		return 0;
> -	/* Todo : sent the message to SMU for pstate change */
> -	return ret;
> -}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 7e1710fcbef2..24a3b0362f98 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>   	struct kobject *kobj;
>   	struct device_attribute dev_attr;
>   	struct amdgpu_device *adev;
> -	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>   };
>   
>   struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
>   int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>   void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>   
>   #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]     ` <ddf61163-5c5c-2547-2e5d-16f205111a55-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-19 12:49       ` Christian König
       [not found]         ` <405c7ec8-f52e-3d08-3ca1-84933e54adcd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2019-03-19 12:49 UTC (permalink / raw)
  To: Kuehling, Felix, Liu, Shaoyun, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yeah, all that is perfectly fine.

The problem is Shaoyun didn't put this into the mapping code, but rather 
into the VM state machine. So this won't work at all (the counter and 
increment/decrement unbalanced and multiple times).

The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.

Additional to that the approach with the counter doesn't work because 
you don't have a lock protecting the hw update itself. E.g. while 
powering down you can add a mapping which needs to power it up again and 
so powering down and powering up race with each other.

Regards,
Christian.

Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
> We discussed a few different approaches before settling on this one.
>
> Maybe it needs some more background. XGMI links are quite power hungry.
> Being able to power them down improves performance for power-limited
> workloads that don't need XGMI. In machine learning, pretty much all
> workloads are power limited on our GPUs, so this is not just a
> theoretical thing. The problem is, how do you know whether you need
> XGMI? You need to know whether there are P2P memory mappings involving
> XGMI. So the natural place to put that is in the memory mapping code.
>
> If you do spot a race condition in the code, let's talk about how to fix it.
>
> Regards,
>     Felix
>
> On 3/19/2019 8:07 AM, Christian König wrote:
>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>
>> Adding this to the mapping is complete nonsense and the whole
>> implementation looks racy. This patch wasn't thoughtfully reviewed
>> and should be reverted for now.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29 +---------------------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>    6 files changed, 1 insertion(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b5720c1610e1..1db192150532 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>    	int asic_reset_res;
>>    	struct work_struct		xgmi_reset_work;
>>    
>> -	/* counter of mapped memory through xgmi */
>> -	atomic_t			xgmi_map_counter;
>> -
>>    	bool                            in_baco_reset;
>>    };
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 964a4d3f1f43..206583707124 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2018,9 +2018,6 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>    	r = amdgpu_device_enable_mgpu_fan_boost();
>>    	if (r)
>>    		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>> -
>> -	/*set to low pstate by default */
>> -	amdgpu_xgmi_set_pstate(adev, 0);
>>    }
>>    
>>    static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>    	uint64_t			__subtree_last;
>>    	uint64_t			offset;
>>    	uint64_t			flags;
>> -	bool				is_xgmi;
>>    };
>>    
>>    /* User space allocated BO in a VM */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -34,7 +34,6 @@
>>    #include "amdgpu_trace.h"
>>    #include "amdgpu_amdkfd.h"
>>    #include "amdgpu_gmc.h"
>> -#include "amdgpu_xgmi.h"
>>    
>>    /**
>>     * DOC: GPUVM
>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>    	struct ttm_mem_reg *mem;
>>    	struct drm_mm_node *nodes;
>>    	struct dma_fence *exclusive, **last_update;
>> -	struct amdgpu_device *bo_adev = adev;
>> -	bool is_xgmi = false;
>>    	uint64_t flags;
>> +	struct amdgpu_device *bo_adev = adev;
>>    	int r;
>>    
>>    	if (clear || !bo) {
>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>    	if (bo) {
>>    		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>    		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> -		if (adev != bo_adev &&
>> -		    adev->gmc.xgmi.hive_id &&
>> -		    adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>> -			is_xgmi = true;
>>    	} else {
>>    		flags = 0x0;
>>    	}
>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>    	}
>>    
>>    	list_for_each_entry(mapping, &bo_va->invalids, list) {
>> -		if (mapping->is_xgmi != is_xgmi) {
>> -			if (is_xgmi) {
>> -				/* Adding an XGMI mapping to the PT */
>> -				if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>> -					amdgpu_xgmi_set_pstate(adev, 1);
>> -			} else {
>> -				/* Removing an XGMI mapping from the PT */
>> -				if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>> -					amdgpu_xgmi_set_pstate(adev, 0);
>> -			}
>> -			mapping->is_xgmi = is_xgmi;
>> -		}
>> -
>>    		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
>>    					       mapping, flags, bo_adev, nodes,
>>    					       last_update);
>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>    		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>    						mapping->start, mapping->last,
>>    						init_pte_value, 0, &f);
>> -
>> -		if (mapping->is_xgmi) {
>> -			/* Removing an XGMI mapping from the PT */
>> -			if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>> -				amdgpu_xgmi_set_pstate(adev, 0);
>> -		}
>> -
>>    		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>    		if (r) {
>>    			dma_fence_put(f);
>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>    	mapping->last = eaddr;
>>    	mapping->offset = offset;
>>    	mapping->flags = flags;
>> -	mapping->is_xgmi = false;
>>    
>>    	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> index 807440d3edff..fcc4b05c745c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>    
>>    	if (lock)
>>    		mutex_lock(&tmp->hive_lock);
>> -	tmp->pstate = -1;
>>    
>>    	mutex_unlock(&xgmi_mutex);
>>    
>> @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
>>    		mutex_unlock(&hive->hive_lock);
>>    	}
>>    }
>> -
>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>> -{
>> -	int ret = 0;
>> -	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>> -
>> -	if (!hive)
>> -		return 0;
>> -
>> -	if (hive->pstate == pstate)
>> -		return 0;
>> -	/* Todo : sent the message to SMU for pstate change */
>> -	return ret;
>> -}
>> \ No newline at end of file
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> index 7e1710fcbef2..24a3b0362f98 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>    	struct kobject *kobj;
>>    	struct device_attribute dev_attr;
>>    	struct amdgpu_device *adev;
>> -	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>    };
>>    
>>    struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
>>    int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
>>    int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>    void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>    
>>    #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]         ` <405c7ec8-f52e-3d08-3ca1-84933e54adcd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-19 13:10           ` Kuehling, Felix
       [not found]             ` <2d451ed9-ed64-bcff-617a-45cdd41011c2-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Kuehling, Felix @ 2019-03-19 13:10 UTC (permalink / raw)
  To: Koenig, Christian, Liu, Shaoyun, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 3/19/2019 8:49 AM, Christian König wrote:
> Yeah, all that is perfectly fine.
>
> The problem is Shaoyun didn't put this into the mapping code, but 
> rather into the VM state machine. So this won't work at all (the 
> counter and increment/decrement unbalanced and multiple times).

We tried to consider all the possible ways that this could go wrong. 
Basically, every time a mapping is updated, we update the is_xgmi state 
and update the counter if it changed. Have you seen the counter become 
unbalanced?


>
> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.

I think we considered that. The problem is that a BO can be migrated 
between bo_add and bo_rmv. I found that even bo->preferred_domain can 
change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know 
whether to increment your counter, and your counter can become 
unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between 
bo_add and bo_rmv.

Therefore we're trying to check for XGMI mappings every time the mapping 
changes and keep track of the state in amdgpu_bo_va_mapping.


>
> Additional to that the approach with the counter doesn't work because 
> you don't have a lock protecting the hw update itself. E.g. while 
> powering down you can add a mapping which needs to power it up again 
> and so powering down and powering up race with each other.

That's a good point.

Regards,
   Felix


>
> Regards,
> Christian.
>
> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>> We discussed a few different approaches before settling on this one.
>>
>> Maybe it needs some more background. XGMI links are quite power hungry.
>> Being able to power them down improves performance for power-limited
>> workloads that don't need XGMI. In machine learning, pretty much all
>> workloads are power limited on our GPUs, so this is not just a
>> theoretical thing. The problem is, how do you know whether you need
>> XGMI? You need to know whether there are P2P memory mappings involving
>> XGMI. So the natural place to put that is in the memory mapping code.
>>
>> If you do spot a race condition in the code, let's talk about how to 
>> fix it.
>>
>> Regards,
>>     Felix
>>
>> On 3/19/2019 8:07 AM, Christian König wrote:
>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>
>>> Adding this to the mapping is complete nonsense and the whole
>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>> and should be reverted for now.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29 
>>> +---------------------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>    6 files changed, 1 insertion(+), 52 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b5720c1610e1..1db192150532 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>        int asic_reset_res;
>>>        struct work_struct        xgmi_reset_work;
>>>    -    /* counter of mapped memory through xgmi */
>>> -    atomic_t            xgmi_map_counter;
>>> -
>>>        bool                            in_baco_reset;
>>>    };
>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 964a4d3f1f43..206583707124 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2018,9 +2018,6 @@ static void 
>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>        r = amdgpu_device_enable_mgpu_fan_boost();
>>>        if (r)
>>>            DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>> -
>>> -    /*set to low pstate by default */
>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>    }
>>>       static void amdgpu_device_delay_enable_gfx_off(struct 
>>> work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>        uint64_t            __subtree_last;
>>>        uint64_t            offset;
>>>        uint64_t            flags;
>>> -    bool                is_xgmi;
>>>    };
>>>       /* User space allocated BO in a VM */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -34,7 +34,6 @@
>>>    #include "amdgpu_trace.h"
>>>    #include "amdgpu_amdkfd.h"
>>>    #include "amdgpu_gmc.h"
>>> -#include "amdgpu_xgmi.h"
>>>       /**
>>>     * DOC: GPUVM
>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev,
>>>        struct ttm_mem_reg *mem;
>>>        struct drm_mm_node *nodes;
>>>        struct dma_fence *exclusive, **last_update;
>>> -    struct amdgpu_device *bo_adev = adev;
>>> -    bool is_xgmi = false;
>>>        uint64_t flags;
>>> +    struct amdgpu_device *bo_adev = adev;
>>>        int r;
>>>           if (clear || !bo) {
>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev,
>>>        if (bo) {
>>>            flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>            bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> -        if (adev != bo_adev &&
>>> -            adev->gmc.xgmi.hive_id &&
>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>> -            is_xgmi = true;
>>>        } else {
>>>            flags = 0x0;
>>>        }
>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev,
>>>        }
>>>           list_for_each_entry(mapping, &bo_va->invalids, list) {
>>> -        if (mapping->is_xgmi != is_xgmi) {
>>> -            if (is_xgmi) {
>>> -                /* Adding an XGMI mapping to the PT */
>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>> -            } else {
>>> -                /* Removing an XGMI mapping from the PT */
>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>> -            }
>>> -            mapping->is_xgmi = is_xgmi;
>>> -        }
>>> -
>>>            r = amdgpu_vm_bo_split_mapping(adev, exclusive, 
>>> pages_addr, vm,
>>>                               mapping, flags, bo_adev, nodes,
>>>                               last_update);
>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct 
>>> amdgpu_device *adev,
>>>            r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>                            mapping->start, mapping->last,
>>>                            init_pte_value, 0, &f);
>>> -
>>> -        if (mapping->is_xgmi) {
>>> -            /* Removing an XGMI mapping from the PT */
>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>> -        }
>>> -
>>>            amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>            if (r) {
>>>                dma_fence_put(f);
>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>        mapping->last = eaddr;
>>>        mapping->offset = offset;
>>>        mapping->flags = flags;
>>> -    mapping->is_xgmi = false;
>>>           amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>> index 807440d3edff..fcc4b05c745c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info 
>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>           if (lock)
>>>            mutex_lock(&tmp->hive_lock);
>>> -    tmp->pstate = -1;
>>>           mutex_unlock(&xgmi_mutex);
>>>    @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct 
>>> amdgpu_device *adev)
>>>            mutex_unlock(&hive->hive_lock);
>>>        }
>>>    }
>>> -
>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>> -{
>>> -    int ret = 0;
>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>>> -
>>> -    if (!hive)
>>> -        return 0;
>>> -
>>> -    if (hive->pstate == pstate)
>>> -        return 0;
>>> -    /* Todo : sent the message to SMU for pstate change */
>>> -    return ret;
>>> -}
>>> \ No newline at end of file
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>> index 7e1710fcbef2..24a3b0362f98 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>        struct kobject *kobj;
>>>        struct device_attribute dev_attr;
>>>        struct amdgpu_device *adev;
>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>    };
>>>       struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
>>> amdgpu_device *adev, int lock);
>>>    int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, 
>>> struct amdgpu_device *adev);
>>>    int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>    void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>>       #endif
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]             ` <2d451ed9-ed64-bcff-617a-45cdd41011c2-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-19 15:42               ` Liu, Shaoyun
       [not found]                 ` <e72f86a3-7eea-5d81-4a96-c6b7003f5864-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Liu, Shaoyun @ 2019-03-19 15:42 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Thanks Felix .

We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed 
out the  object can be migrate from FB to system memory after allocation 
.  I also think of put the  logic inside amdgpu_vm_bo_update_mapping , 
but seems that function prefer to take the  dma address already been 
calculated (change that will cause a huge code reorganize) . That's the 
reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .

As the race condition  you described sounds reasonable.  let me think 
how to fix it .

Regards

shaoyun.liu

On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
> On 3/19/2019 8:49 AM, Christian König wrote:
>> Yeah, all that is perfectly fine.
>>
>> The problem is Shaoyun didn't put this into the mapping code, but
>> rather into the VM state machine. So this won't work at all (the
>> counter and increment/decrement unbalanced and multiple times).
> We tried to consider all the possible ways that this could go wrong.
> Basically, every time a mapping is updated, we update the is_xgmi state
> and update the counter if it changed. Have you seen the counter become
> unbalanced?
>
>
>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
> I think we considered that. The problem is that a BO can be migrated
> between bo_add and bo_rmv. I found that even bo->preferred_domain can
> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
> whether to increment your counter, and your counter can become
> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
> bo_add and bo_rmv.
>
> Therefore we're trying to check for XGMI mappings every time the mapping
> changes and keep track of the state in amdgpu_bo_va_mapping.
>
>
>> Additional to that the approach with the counter doesn't work because
>> you don't have a lock protecting the hw update itself. E.g. while
>> powering down you can add a mapping which needs to power it up again
>> and so powering down and powering up race with each other.
> That's a good point.
>
> Regards,
>     Felix
>
>
>> Regards,
>> Christian.
>>
>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>> We discussed a few different approaches before settling on this one.
>>>
>>> Maybe it needs some more background. XGMI links are quite power hungry.
>>> Being able to power them down improves performance for power-limited
>>> workloads that don't need XGMI. In machine learning, pretty much all
>>> workloads are power limited on our GPUs, so this is not just a
>>> theoretical thing. The problem is, how do you know whether you need
>>> XGMI? You need to know whether there are P2P memory mappings involving
>>> XGMI. So the natural place to put that is in the memory mapping code.
>>>
>>> If you do spot a race condition in the code, let's talk about how to
>>> fix it.
>>>
>>> Regards,
>>>      Felix
>>>
>>> On 3/19/2019 8:07 AM, Christian König wrote:
>>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>>
>>>> Adding this to the mapping is complete nonsense and the whole
>>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>>> and should be reverted for now.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29
>>>> +---------------------
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>>     6 files changed, 1 insertion(+), 52 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b5720c1610e1..1db192150532 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>>         int asic_reset_res;
>>>>         struct work_struct        xgmi_reset_work;
>>>>     -    /* counter of mapped memory through xgmi */
>>>> -    atomic_t            xgmi_map_counter;
>>>> -
>>>>         bool                            in_baco_reset;
>>>>     };
>>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 964a4d3f1f43..206583707124 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -2018,9 +2018,6 @@ static void
>>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>>         r = amdgpu_device_enable_mgpu_fan_boost();
>>>>         if (r)
>>>>             DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>>> -
>>>> -    /*set to low pstate by default */
>>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>>     }
>>>>        static void amdgpu_device_delay_enable_gfx_off(struct
>>>> work_struct *work)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>>         uint64_t            __subtree_last;
>>>>         uint64_t            offset;
>>>>         uint64_t            flags;
>>>> -    bool                is_xgmi;
>>>>     };
>>>>        /* User space allocated BO in a VM */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -34,7 +34,6 @@
>>>>     #include "amdgpu_trace.h"
>>>>     #include "amdgpu_amdkfd.h"
>>>>     #include "amdgpu_gmc.h"
>>>> -#include "amdgpu_xgmi.h"
>>>>        /**
>>>>      * DOC: GPUVM
>>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev,
>>>>         struct ttm_mem_reg *mem;
>>>>         struct drm_mm_node *nodes;
>>>>         struct dma_fence *exclusive, **last_update;
>>>> -    struct amdgpu_device *bo_adev = adev;
>>>> -    bool is_xgmi = false;
>>>>         uint64_t flags;
>>>> +    struct amdgpu_device *bo_adev = adev;
>>>>         int r;
>>>>            if (clear || !bo) {
>>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev,
>>>>         if (bo) {
>>>>             flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>             bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>> -        if (adev != bo_adev &&
>>>> -            adev->gmc.xgmi.hive_id &&
>>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>>> -            is_xgmi = true;
>>>>         } else {
>>>>             flags = 0x0;
>>>>         }
>>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev,
>>>>         }
>>>>            list_for_each_entry(mapping, &bo_va->invalids, list) {
>>>> -        if (mapping->is_xgmi != is_xgmi) {
>>>> -            if (is_xgmi) {
>>>> -                /* Adding an XGMI mapping to the PT */
>>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>>> -            } else {
>>>> -                /* Removing an XGMI mapping from the PT */
>>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>>> -            }
>>>> -            mapping->is_xgmi = is_xgmi;
>>>> -        }
>>>> -
>>>>             r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>>>> pages_addr, vm,
>>>>                                mapping, flags, bo_adev, nodes,
>>>>                                last_update);
>>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct
>>>> amdgpu_device *adev,
>>>>             r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>>                             mapping->start, mapping->last,
>>>>                             init_pte_value, 0, &f);
>>>> -
>>>> -        if (mapping->is_xgmi) {
>>>> -            /* Removing an XGMI mapping from the PT */
>>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>>> -        }
>>>> -
>>>>             amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>             if (r) {
>>>>                 dma_fence_put(f);
>>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>>         mapping->last = eaddr;
>>>>         mapping->offset = offset;
>>>>         mapping->flags = flags;
>>>> -    mapping->is_xgmi = false;
>>>>            amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>> index 807440d3edff..fcc4b05c745c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info
>>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>            if (lock)
>>>>             mutex_lock(&tmp->hive_lock);
>>>> -    tmp->pstate = -1;
>>>>            mutex_unlock(&xgmi_mutex);
>>>>     @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct
>>>> amdgpu_device *adev)
>>>>             mutex_unlock(&hive->hive_lock);
>>>>         }
>>>>     }
>>>> -
>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>>> -{
>>>> -    int ret = 0;
>>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>>>> -
>>>> -    if (!hive)
>>>> -        return 0;
>>>> -
>>>> -    if (hive->pstate == pstate)
>>>> -        return 0;
>>>> -    /* Todo : sent the message to SMU for pstate change */
>>>> -    return ret;
>>>> -}
>>>> \ No newline at end of file
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>> index 7e1710fcbef2..24a3b0362f98 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>>         struct kobject *kobj;
>>>>         struct device_attribute dev_attr;
>>>>         struct amdgpu_device *adev;
>>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>>     };
>>>>        struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct
>>>> amdgpu_device *adev, int lock);
>>>>     int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive,
>>>> struct amdgpu_device *adev);
>>>>     int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>>     void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>>>        #endif
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]                 ` <e72f86a3-7eea-5d81-4a96-c6b7003f5864-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-19 16:09                   ` Koenig, Christian
       [not found]                     ` <63cd6620-9482-a96e-0d97-f44b70bff01e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Koenig, Christian @ 2019-03-19 16:09 UTC (permalink / raw)
  To: Liu, Shaoyun, Kuehling, Felix, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
> Thanks Felix .
>
> We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed
> out the  object can be migrate from FB to system memory after allocation
> .

Yeah, I also considered that and that is actually a really good argument.

But even in this case you still only need the flag in the bo_va 
structure, not the mapping.

>    I also think of put the  logic inside amdgpu_vm_bo_update_mapping ,
> but seems that function prefer to take the  dma address already been
> calculated (change that will cause a huge code reorganize) . That's the
> reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .

Both places won't work correctly. The problem is simply that you haven't 
considered what happens when the VM is destroyed.

In this case we should not unmap the XGMI mapping, but rather just throw 
away the page tables completely.

Additional to that the mapping code is often interrupted because the 
calling process receives a signal. So it can happen that all of that is 
called multiple times. The is_xgmi variable prevents that, but that's 
really not straight forward.

Se the PRT handling for how complicated that gets when you attach 
something like this to the mapping. IIRC we have 3 different code paths 
how a PRT mapping can end up being destroyed including a delayed destroy 
handler and callback.

>
> As the race condition  you described sounds reasonable.  let me think
> how to fix it .

Two possible code pattern usually used for this:

A) You have a lock protecting both the counter as well as the operation:

lock();
if (increment_and_test_counter())
     power_on()
unlock();

lock()
if (decrement_and_test_counter())
     power_off();
unlock();

B) The counter is an atomic and you have a lock protecting the operation:

if (atomic_inc_return() == 1) {
     lock();
     power_on();
     unlock();
}

if (atomic_dec_return() == 0) {
     lock();
     if (double_check_atomic_for_race())
         power_off();
     unlock();
}

The later is more efficient, but also more work to implement.

Either way I suggest to move the actual increment/decrement into the 
xgmi code and only have the signalling in the VM code.

Regards,
Christian.

>
> Regards
>
> shaoyun.liu
>
> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>> On 3/19/2019 8:49 AM, Christian König wrote:
>>> Yeah, all that is perfectly fine.
>>>
>>> The problem is Shaoyun didn't put this into the mapping code, but
>>> rather into the VM state machine. So this won't work at all (the
>>> counter and increment/decrement unbalanced and multiple times).
>> We tried to consider all the possible ways that this could go wrong.
>> Basically, every time a mapping is updated, we update the is_xgmi state
>> and update the counter if it changed. Have you seen the counter become
>> unbalanced?
>>
>>
>>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>> I think we considered that. The problem is that a BO can be migrated
>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>> whether to increment your counter, and your counter can become
>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
>> bo_add and bo_rmv.
>>
>> Therefore we're trying to check for XGMI mappings every time the mapping
>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>
>>
>>> Additional to that the approach with the counter doesn't work because
>>> you don't have a lock protecting the hw update itself. E.g. while
>>> powering down you can add a mapping which needs to power it up again
>>> and so powering down and powering up race with each other.
>> That's a good point.
>>
>> Regards,
>>      Felix
>>
>>
>>> Regards,
>>> Christian.
>>>
>>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>>> We discussed a few different approaches before settling on this one.
>>>>
>>>> Maybe it needs some more background. XGMI links are quite power hungry.
>>>> Being able to power them down improves performance for power-limited
>>>> workloads that don't need XGMI. In machine learning, pretty much all
>>>> workloads are power limited on our GPUs, so this is not just a
>>>> theoretical thing. The problem is, how do you know whether you need
>>>> XGMI? You need to know whether there are P2P memory mappings involving
>>>> XGMI. So the natural place to put that is in the memory mapping code.
>>>>
>>>> If you do spot a race condition in the code, let's talk about how to
>>>> fix it.
>>>>
>>>> Regards,
>>>>       Felix
>>>>
>>>> On 3/19/2019 8:07 AM, Christian König wrote:
>>>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>>>
>>>>> Adding this to the mapping is complete nonsense and the whole
>>>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>>>> and should be reverted for now.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29
>>>>> +---------------------
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>>>      6 files changed, 1 insertion(+), 52 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index b5720c1610e1..1db192150532 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>>>          int asic_reset_res;
>>>>>          struct work_struct        xgmi_reset_work;
>>>>>      -    /* counter of mapped memory through xgmi */
>>>>> -    atomic_t            xgmi_map_counter;
>>>>> -
>>>>>          bool                            in_baco_reset;
>>>>>      };
>>>>>      diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index 964a4d3f1f43..206583707124 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -2018,9 +2018,6 @@ static void
>>>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>>>          r = amdgpu_device_enable_mgpu_fan_boost();
>>>>>          if (r)
>>>>>              DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>>>> -
>>>>> -    /*set to low pstate by default */
>>>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>      }
>>>>>         static void amdgpu_device_delay_enable_gfx_off(struct
>>>>> work_struct *work)
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>>>          uint64_t            __subtree_last;
>>>>>          uint64_t            offset;
>>>>>          uint64_t            flags;
>>>>> -    bool                is_xgmi;
>>>>>      };
>>>>>         /* User space allocated BO in a VM */
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -34,7 +34,6 @@
>>>>>      #include "amdgpu_trace.h"
>>>>>      #include "amdgpu_amdkfd.h"
>>>>>      #include "amdgpu_gmc.h"
>>>>> -#include "amdgpu_xgmi.h"
>>>>>         /**
>>>>>       * DOC: GPUVM
>>>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev,
>>>>>          struct ttm_mem_reg *mem;
>>>>>          struct drm_mm_node *nodes;
>>>>>          struct dma_fence *exclusive, **last_update;
>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>> -    bool is_xgmi = false;
>>>>>          uint64_t flags;
>>>>> +    struct amdgpu_device *bo_adev = adev;
>>>>>          int r;
>>>>>             if (clear || !bo) {
>>>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev,
>>>>>          if (bo) {
>>>>>              flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>              bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> -        if (adev != bo_adev &&
>>>>> -            adev->gmc.xgmi.hive_id &&
>>>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>>>> -            is_xgmi = true;
>>>>>          } else {
>>>>>              flags = 0x0;
>>>>>          }
>>>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev,
>>>>>          }
>>>>>             list_for_each_entry(mapping, &bo_va->invalids, list) {
>>>>> -        if (mapping->is_xgmi != is_xgmi) {
>>>>> -            if (is_xgmi) {
>>>>> -                /* Adding an XGMI mapping to the PT */
>>>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>>>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>>>> -            } else {
>>>>> -                /* Removing an XGMI mapping from the PT */
>>>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>>>> -            }
>>>>> -            mapping->is_xgmi = is_xgmi;
>>>>> -        }
>>>>> -
>>>>>              r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>>>>> pages_addr, vm,
>>>>>                                 mapping, flags, bo_adev, nodes,
>>>>>                                 last_update);
>>>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct
>>>>> amdgpu_device *adev,
>>>>>              r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>>>                              mapping->start, mapping->last,
>>>>>                              init_pte_value, 0, &f);
>>>>> -
>>>>> -        if (mapping->is_xgmi) {
>>>>> -            /* Removing an XGMI mapping from the PT */
>>>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>>>> -        }
>>>>> -
>>>>>              amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>              if (r) {
>>>>>                  dma_fence_put(f);
>>>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>>>          mapping->last = eaddr;
>>>>>          mapping->offset = offset;
>>>>>          mapping->flags = flags;
>>>>> -    mapping->is_xgmi = false;
>>>>>             amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>>>      diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> index 807440d3edff..fcc4b05c745c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info
>>>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>>             if (lock)
>>>>>              mutex_lock(&tmp->hive_lock);
>>>>> -    tmp->pstate = -1;
>>>>>             mutex_unlock(&xgmi_mutex);
>>>>>      @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct
>>>>> amdgpu_device *adev)
>>>>>              mutex_unlock(&hive->hive_lock);
>>>>>          }
>>>>>      }
>>>>> -
>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>>>> -{
>>>>> -    int ret = 0;
>>>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>>>>> -
>>>>> -    if (!hive)
>>>>> -        return 0;
>>>>> -
>>>>> -    if (hive->pstate == pstate)
>>>>> -        return 0;
>>>>> -    /* Todo : sent the message to SMU for pstate change */
>>>>> -    return ret;
>>>>> -}
>>>>> \ No newline at end of file
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> index 7e1710fcbef2..24a3b0362f98 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>>>          struct kobject *kobj;
>>>>>          struct device_attribute dev_attr;
>>>>>          struct amdgpu_device *adev;
>>>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>>>      };
>>>>>         struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct
>>>>> amdgpu_device *adev, int lock);
>>>>>      int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive,
>>>>> struct amdgpu_device *adev);
>>>>>      int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>>>      void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>>>>         #endif
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]                     ` <63cd6620-9482-a96e-0d97-f44b70bff01e-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-19 18:48                       ` Liu, Shaoyun
       [not found]                         ` <c616ad49-2bc2-d7a6-35e3-27c24c922b5f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Liu, Shaoyun @ 2019-03-19 18:48 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

As I understand,  if we want to implement the  logic in bo_add/rmv 
function ,  I need following conditions are all to be true for a valid 
XGMI request.

1.  check the adev and the bo_adev are different .
     This is correct on rocm implementation .( 
amdgpu_amdkfd_gpuvm_map_memory_to_gpu pass in the correct adev it want 
to mapped to).  For gem bo (amdgpu_gem_object_open/close), the  adev is  
get from bo directly so it's alway same.  Do we need extra adev info 
and  how or we don't need to care about the XGMI  for graphic side .

2. check the  bo->preferred_domains to be  AMDGPU_GEM_DOMAIN_VRAM.
     But as Felix mentioned , this domain can be changed by 
AMDGPU_GEM_OP_SET_PLACEMENT , how to handle this?

Can you explain a little bit more on how to handle the eviction with the 
flag in bo_va structure as you mentioned ?  Do you mean we disable the 
eviction for the  bo with XGMI request ?

Regards
shaoyun.liu


On 2019-03-19 12:09 p.m., Koenig, Christian wrote:
> Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
>> Thanks Felix .
>>
>> We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed
>> out the  object can be migrate from FB to system memory after allocation
>> .
> Yeah, I also considered that and that is actually a really good argument.
>
> But even in this case you still only need the flag in the bo_va
> structure, not the mapping.
>
>>     I also think of put the  logic inside amdgpu_vm_bo_update_mapping ,
>> but seems that function prefer to take the  dma address already been
>> calculated (change that will cause a huge code reorganize) . That's the
>> reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .
> Both places won't work correctly. The problem is simply that you haven't
> considered what happens when the VM is destroyed.
>
> In this case we should not unmap the XGMI mapping, but rather just throw
> away the page tables completely.
>
> Additional to that the mapping code is often interrupted because the
> calling process receives a signal. So it can happen that all of that is
> called multiple times. The is_xgmi variable prevents that, but that's
> really not straight forward.
>
> Se the PRT handling for how complicated that gets when you attach
> something like this to the mapping. IIRC we have 3 different code paths
> how a PRT mapping can end up being destroyed including a delayed destroy
> handler and callback.
>
>> As the race condition  you described sounds reasonable.  let me think
>> how to fix it .
> Two possible code pattern usually used for this:
>
> A) You have a lock protecting both the counter as well as the operation:
>
> lock();
> if (increment_and_test_counter())
>       power_on()
> unlock();
>
> lock()
> if (decrement_and_test_counter())
>       power_off();
> unlock();
>
> B) The counter is an atomic and you have a lock protecting the operation:
>
> if (atomic_inc_return() == 1) {
>       lock();
>       power_on();
>       unlock();
> }
>
> if (atomic_dec_return() == 0) {
>       lock();
>       if (double_check_atomic_for_race())
>           power_off();
>       unlock();
> }
>
> The later is more efficient, but also more work to implement.
>
> Either way I suggest to move the actual increment/decrement into the
> xgmi code and only have the signalling in the VM code.
>
> Regards,
> Christian.
>
>> Regards
>>
>> shaoyun.liu
>>
>> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>>> On 3/19/2019 8:49 AM, Christian König wrote:
>>>> Yeah, all that is perfectly fine.
>>>>
>>>> The problem is Shaoyun didn't put this into the mapping code, but
>>>> rather into the VM state machine. So this won't work at all (the
>>>> counter and increment/decrement unbalanced and multiple times).
>>> We tried to consider all the possible ways that this could go wrong.
>>> Basically, every time a mapping is updated, we update the is_xgmi state
>>> and update the counter if it changed. Have you seen the counter become
>>> unbalanced?
>>>
>>>
>>>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>>> I think we considered that. The problem is that a BO can be migrated
>>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>>> whether to increment your counter, and your counter can become
>>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
>>> bo_add and bo_rmv.
>>>
>>> Therefore we're trying to check for XGMI mappings every time the mapping
>>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>>
>>>
>>>> Additional to that the approach with the counter doesn't work because
>>>> you don't have a lock protecting the hw update itself. E.g. while
>>>> powering down you can add a mapping which needs to power it up again
>>>> and so powering down and powering up race with each other.
>>> That's a good point.
>>>
>>> Regards,
>>>       Felix
>>>
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>>>> We discussed a few different approaches before settling on this one.
>>>>>
>>>>> Maybe it needs some more background. XGMI links are quite power hungry.
>>>>> Being able to power them down improves performance for power-limited
>>>>> workloads that don't need XGMI. In machine learning, pretty much all
>>>>> workloads are power limited on our GPUs, so this is not just a
>>>>> theoretical thing. The problem is, how do you know whether you need
>>>>> XGMI? You need to know whether there are P2P memory mappings involving
>>>>> XGMI. So the natural place to put that is in the memory mapping code.
>>>>>
>>>>> If you do spot a race condition in the code, let's talk about how to
>>>>> fix it.
>>>>>
>>>>> Regards,
>>>>>        Felix
>>>>>
>>>>> On 3/19/2019 8:07 AM, Christian König wrote:
>>>>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>>>>
>>>>>> Adding this to the mapping is complete nonsense and the whole
>>>>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>>>>> and should be reverted for now.
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29
>>>>>> +---------------------
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>>>>       6 files changed, 1 insertion(+), 52 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> index b5720c1610e1..1db192150532 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>>>>           int asic_reset_res;
>>>>>>           struct work_struct        xgmi_reset_work;
>>>>>>       -    /* counter of mapped memory through xgmi */
>>>>>> -    atomic_t            xgmi_map_counter;
>>>>>> -
>>>>>>           bool                            in_baco_reset;
>>>>>>       };
>>>>>>       diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index 964a4d3f1f43..206583707124 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -2018,9 +2018,6 @@ static void
>>>>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>>>>           r = amdgpu_device_enable_mgpu_fan_boost();
>>>>>>           if (r)
>>>>>>               DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>>>>> -
>>>>>> -    /*set to low pstate by default */
>>>>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>       }
>>>>>>          static void amdgpu_device_delay_enable_gfx_off(struct
>>>>>> work_struct *work)
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>>>>           uint64_t            __subtree_last;
>>>>>>           uint64_t            offset;
>>>>>>           uint64_t            flags;
>>>>>> -    bool                is_xgmi;
>>>>>>       };
>>>>>>          /* User space allocated BO in a VM */
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -34,7 +34,6 @@
>>>>>>       #include "amdgpu_trace.h"
>>>>>>       #include "amdgpu_amdkfd.h"
>>>>>>       #include "amdgpu_gmc.h"
>>>>>> -#include "amdgpu_xgmi.h"
>>>>>>          /**
>>>>>>        * DOC: GPUVM
>>>>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev,
>>>>>>           struct ttm_mem_reg *mem;
>>>>>>           struct drm_mm_node *nodes;
>>>>>>           struct dma_fence *exclusive, **last_update;
>>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>>> -    bool is_xgmi = false;
>>>>>>           uint64_t flags;
>>>>>> +    struct amdgpu_device *bo_adev = adev;
>>>>>>           int r;
>>>>>>              if (clear || !bo) {
>>>>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev,
>>>>>>           if (bo) {
>>>>>>               flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>>               bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>> -        if (adev != bo_adev &&
>>>>>> -            adev->gmc.xgmi.hive_id &&
>>>>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>>>>> -            is_xgmi = true;
>>>>>>           } else {
>>>>>>               flags = 0x0;
>>>>>>           }
>>>>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev,
>>>>>>           }
>>>>>>              list_for_each_entry(mapping, &bo_va->invalids, list) {
>>>>>> -        if (mapping->is_xgmi != is_xgmi) {
>>>>>> -            if (is_xgmi) {
>>>>>> -                /* Adding an XGMI mapping to the PT */
>>>>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>>>>> -            } else {
>>>>>> -                /* Removing an XGMI mapping from the PT */
>>>>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>> -            }
>>>>>> -            mapping->is_xgmi = is_xgmi;
>>>>>> -        }
>>>>>> -
>>>>>>               r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>>>>>> pages_addr, vm,
>>>>>>                                  mapping, flags, bo_adev, nodes,
>>>>>>                                  last_update);
>>>>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct
>>>>>> amdgpu_device *adev,
>>>>>>               r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>>>>                               mapping->start, mapping->last,
>>>>>>                               init_pte_value, 0, &f);
>>>>>> -
>>>>>> -        if (mapping->is_xgmi) {
>>>>>> -            /* Removing an XGMI mapping from the PT */
>>>>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>>>>> -        }
>>>>>> -
>>>>>>               amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>>               if (r) {
>>>>>>                   dma_fence_put(f);
>>>>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>>>>           mapping->last = eaddr;
>>>>>>           mapping->offset = offset;
>>>>>>           mapping->flags = flags;
>>>>>> -    mapping->is_xgmi = false;
>>>>>>              amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>>>>       diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>> index 807440d3edff..fcc4b05c745c 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info
>>>>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>>>              if (lock)
>>>>>>               mutex_lock(&tmp->hive_lock);
>>>>>> -    tmp->pstate = -1;
>>>>>>              mutex_unlock(&xgmi_mutex);
>>>>>>       @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct
>>>>>> amdgpu_device *adev)
>>>>>>               mutex_unlock(&hive->hive_lock);
>>>>>>           }
>>>>>>       }
>>>>>> -
>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>>>>> -{
>>>>>> -    int ret = 0;
>>>>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>>>>>> -
>>>>>> -    if (!hive)
>>>>>> -        return 0;
>>>>>> -
>>>>>> -    if (hive->pstate == pstate)
>>>>>> -        return 0;
>>>>>> -    /* Todo : sent the message to SMU for pstate change */
>>>>>> -    return ret;
>>>>>> -}
>>>>>> \ No newline at end of file
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>> index 7e1710fcbef2..24a3b0362f98 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>>>>           struct kobject *kobj;
>>>>>>           struct device_attribute dev_attr;
>>>>>>           struct amdgpu_device *adev;
>>>>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>>>>       };
>>>>>>          struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct
>>>>>> amdgpu_device *adev, int lock);
>>>>>>       int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive,
>>>>>> struct amdgpu_device *adev);
>>>>>>       int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>>>>       void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>>>>>          #endif
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]                         ` <c616ad49-2bc2-d7a6-35e3-27c24c922b5f-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-20 10:00                           ` Christian König
       [not found]                             ` <6794e80b-c935-12fa-05a2-a710e2d310a6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2019-03-20 10:00 UTC (permalink / raw)
  To: Liu, Shaoyun, Koenig, Christian, Kuehling, Felix, Deucher,
	Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 19.03.19 um 19:48 schrieb Liu, Shaoyun:
> As I understand,  if we want to implement the  logic in bo_add/rmv
> function ,  I need following conditions are all to be true for a valid
> XGMI request.
>
> 1.  check the adev and the bo_adev are different .
>       This is correct on rocm implementation .(
> amdgpu_amdkfd_gpuvm_map_memory_to_gpu pass in the correct adev it want
> to mapped to).  For gem bo (amdgpu_gem_object_open/close), the  adev is
> get from bo directly so it's alway same.  Do we need extra adev info
> and  how or we don't need to care about the XGMI  for graphic side .

That the GEM code uses the wrong adev sounds like a bug to me which 
should be fixed anyway.

Apart from that I would just add a function amdgpu_xgmi_same_hive(adev1, 
adev2) which checks adev1!=adev2 and adev1->hive_id == adev2->hive_id.

> 2. check the  bo->preferred_domains to be  AMDGPU_GEM_DOMAIN_VRAM.
>       But as Felix mentioned , this domain can be changed by
> AMDGPU_GEM_OP_SET_PLACEMENT , how to handle this?

As Felix pointed out BOs can move around for a couple of reasons. So 
actually checking if we use XGMI or not can be rather tricky.

I would just check the hive_id of the involved devices, so that if we 
can potentially use XGMI we power it on.

Only alternative I can see is to have the same complicated handling as 
with PRT.

> Can you explain a little bit more on how to handle the eviction with the
> flag in bo_va structure as you mentioned ?  Do you mean we disable the
> eviction for the  bo with XGMI request ?

Just put the is_xgmi flag into the bo_va structure instead of the 
mapping structure.

And when the bo_va structure is remove in amdgpu_vm_bo_rmv you also 
decrement the counter.

Apart from that the handling stays the same, e.g. you increment the 
counter during the page table update.

Regards,
Christian.

>
> Regards
> shaoyun.liu
>
>
> On 2019-03-19 12:09 p.m., Koenig, Christian wrote:
>> Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
>>> Thanks Felix .
>>>
>>> We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed
>>> out the  object can be migrate from FB to system memory after allocation
>>> .
>> Yeah, I also considered that and that is actually a really good argument.
>>
>> But even in this case you still only need the flag in the bo_va
>> structure, not the mapping.
>>
>>>      I also think of put the  logic inside amdgpu_vm_bo_update_mapping ,
>>> but seems that function prefer to take the  dma address already been
>>> calculated (change that will cause a huge code reorganize) . That's the
>>> reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .
>> Both places won't work correctly. The problem is simply that you haven't
>> considered what happens when the VM is destroyed.
>>
>> In this case we should not unmap the XGMI mapping, but rather just throw
>> away the page tables completely.
>>
>> Additional to that the mapping code is often interrupted because the
>> calling process receives a signal. So it can happen that all of that is
>> called multiple times. The is_xgmi variable prevents that, but that's
>> really not straight forward.
>>
>> Se the PRT handling for how complicated that gets when you attach
>> something like this to the mapping. IIRC we have 3 different code paths
>> how a PRT mapping can end up being destroyed including a delayed destroy
>> handler and callback.
>>
>>> As the race condition  you described sounds reasonable.  let me think
>>> how to fix it .
>> Two possible code pattern usually used for this:
>>
>> A) You have a lock protecting both the counter as well as the operation:
>>
>> lock();
>> if (increment_and_test_counter())
>>        power_on()
>> unlock();
>>
>> lock()
>> if (decrement_and_test_counter())
>>        power_off();
>> unlock();
>>
>> B) The counter is an atomic and you have a lock protecting the operation:
>>
>> if (atomic_inc_return() == 1) {
>>        lock();
>>        power_on();
>>        unlock();
>> }
>>
>> if (atomic_dec_return() == 0) {
>>        lock();
>>        if (double_check_atomic_for_race())
>>            power_off();
>>        unlock();
>> }
>>
>> The later is more efficient, but also more work to implement.
>>
>> Either way I suggest to move the actual increment/decrement into the
>> xgmi code and only have the signalling in the VM code.
>>
>> Regards,
>> Christian.
>>
>>> Regards
>>>
>>> shaoyun.liu
>>>
>>> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>>>> On 3/19/2019 8:49 AM, Christian König wrote:
>>>>> Yeah, all that is perfectly fine.
>>>>>
>>>>> The problem is Shaoyun didn't put this into the mapping code, but
>>>>> rather into the VM state machine. So this won't work at all (the
>>>>> counter and increment/decrement unbalanced and multiple times).
>>>> We tried to consider all the possible ways that this could go wrong.
>>>> Basically, every time a mapping is updated, we update the is_xgmi state
>>>> and update the counter if it changed. Have you seen the counter become
>>>> unbalanced?
>>>>
>>>>
>>>>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>>>> I think we considered that. The problem is that a BO can be migrated
>>>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>>>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>>>> whether to increment your counter, and your counter can become
>>>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
>>>> bo_add and bo_rmv.
>>>>
>>>> Therefore we're trying to check for XGMI mappings every time the mapping
>>>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>>>
>>>>
>>>>> Additional to that the approach with the counter doesn't work because
>>>>> you don't have a lock protecting the hw update itself. E.g. while
>>>>> powering down you can add a mapping which needs to power it up again
>>>>> and so powering down and powering up race with each other.
>>>> That's a good point.
>>>>
>>>> Regards,
>>>>        Felix
>>>>
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>>>>> We discussed a few different approaches before settling on this one.
>>>>>>
>>>>>> Maybe it needs some more background. XGMI links are quite power hungry.
>>>>>> Being able to power them down improves performance for power-limited
>>>>>> workloads that don't need XGMI. In machine learning, pretty much all
>>>>>> workloads are power limited on our GPUs, so this is not just a
>>>>>> theoretical thing. The problem is, how do you know whether you need
>>>>>> XGMI? You need to know whether there are P2P memory mappings involving
>>>>>> XGMI. So the natural place to put that is in the memory mapping code.
>>>>>>
>>>>>> If you do spot a race condition in the code, let's talk about how to
>>>>>> fix it.
>>>>>>
>>>>>> Regards,
>>>>>>         Felix
>>>>>>
>>>>>> On 3/19/2019 8:07 AM, Christian König wrote:
>>>>>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>>>>>
>>>>>>> Adding this to the mapping is complete nonsense and the whole
>>>>>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>>>>>> and should be reverted for now.
>>>>>>>
>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>> ---
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ---
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29
>>>>>>> +---------------------
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>>>>>        6 files changed, 1 insertion(+), 52 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> index b5720c1610e1..1db192150532 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>>>>>            int asic_reset_res;
>>>>>>>            struct work_struct        xgmi_reset_work;
>>>>>>>        -    /* counter of mapped memory through xgmi */
>>>>>>> -    atomic_t            xgmi_map_counter;
>>>>>>> -
>>>>>>>            bool                            in_baco_reset;
>>>>>>>        };
>>>>>>>        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> index 964a4d3f1f43..206583707124 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> @@ -2018,9 +2018,6 @@ static void
>>>>>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>>>>>            r = amdgpu_device_enable_mgpu_fan_boost();
>>>>>>>            if (r)
>>>>>>>                DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>>>>>> -
>>>>>>> -    /*set to low pstate by default */
>>>>>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>>        }
>>>>>>>           static void amdgpu_device_delay_enable_gfx_off(struct
>>>>>>> work_struct *work)
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>>>>>            uint64_t            __subtree_last;
>>>>>>>            uint64_t            offset;
>>>>>>>            uint64_t            flags;
>>>>>>> -    bool                is_xgmi;
>>>>>>>        };
>>>>>>>           /* User space allocated BO in a VM */
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> @@ -34,7 +34,6 @@
>>>>>>>        #include "amdgpu_trace.h"
>>>>>>>        #include "amdgpu_amdkfd.h"
>>>>>>>        #include "amdgpu_gmc.h"
>>>>>>> -#include "amdgpu_xgmi.h"
>>>>>>>           /**
>>>>>>>         * DOC: GPUVM
>>>>>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>>> *adev,
>>>>>>>            struct ttm_mem_reg *mem;
>>>>>>>            struct drm_mm_node *nodes;
>>>>>>>            struct dma_fence *exclusive, **last_update;
>>>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>>>> -    bool is_xgmi = false;
>>>>>>>            uint64_t flags;
>>>>>>> +    struct amdgpu_device *bo_adev = adev;
>>>>>>>            int r;
>>>>>>>               if (clear || !bo) {
>>>>>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>>> *adev,
>>>>>>>            if (bo) {
>>>>>>>                flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>>>                bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>>> -        if (adev != bo_adev &&
>>>>>>> -            adev->gmc.xgmi.hive_id &&
>>>>>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>>>>>> -            is_xgmi = true;
>>>>>>>            } else {
>>>>>>>                flags = 0x0;
>>>>>>>            }
>>>>>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>>> *adev,
>>>>>>>            }
>>>>>>>               list_for_each_entry(mapping, &bo_va->invalids, list) {
>>>>>>> -        if (mapping->is_xgmi != is_xgmi) {
>>>>>>> -            if (is_xgmi) {
>>>>>>> -                /* Adding an XGMI mapping to the PT */
>>>>>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
>>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>>>>>> -            } else {
>>>>>>> -                /* Removing an XGMI mapping from the PT */
>>>>>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>> -            }
>>>>>>> -            mapping->is_xgmi = is_xgmi;
>>>>>>> -        }
>>>>>>> -
>>>>>>>                r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>>>>>>> pages_addr, vm,
>>>>>>>                                   mapping, flags, bo_adev, nodes,
>>>>>>>                                   last_update);
>>>>>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>                r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>>>>>>>                                mapping->start, mapping->last,
>>>>>>>                                init_pte_value, 0, &f);
>>>>>>> -
>>>>>>> -        if (mapping->is_xgmi) {
>>>>>>> -            /* Removing an XGMI mapping from the PT */
>>>>>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>> -        }
>>>>>>> -
>>>>>>>                amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>>>                if (r) {
>>>>>>>                    dma_fence_put(f);
>>>>>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>>>>>            mapping->last = eaddr;
>>>>>>>            mapping->offset = offset;
>>>>>>>            mapping->flags = flags;
>>>>>>> -    mapping->is_xgmi = false;
>>>>>>>               amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>>>>>        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>> index 807440d3edff..fcc4b05c745c 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info
>>>>>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>>>>               if (lock)
>>>>>>>                mutex_lock(&tmp->hive_lock);
>>>>>>> -    tmp->pstate = -1;
>>>>>>>               mutex_unlock(&xgmi_mutex);
>>>>>>>        @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct
>>>>>>> amdgpu_device *adev)
>>>>>>>                mutex_unlock(&hive->hive_lock);
>>>>>>>            }
>>>>>>>        }
>>>>>>> -
>>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>>>>>> -{
>>>>>>> -    int ret = 0;
>>>>>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
>>>>>>> -
>>>>>>> -    if (!hive)
>>>>>>> -        return 0;
>>>>>>> -
>>>>>>> -    if (hive->pstate == pstate)
>>>>>>> -        return 0;
>>>>>>> -    /* Todo : sent the message to SMU for pstate change */
>>>>>>> -    return ret;
>>>>>>> -}
>>>>>>> \ No newline at end of file
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>> index 7e1710fcbef2..24a3b0362f98 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>>>>>            struct kobject *kobj;
>>>>>>>            struct device_attribute dev_attr;
>>>>>>>            struct amdgpu_device *adev;
>>>>>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>>>>>        };
>>>>>>>           struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct
>>>>>>> amdgpu_device *adev, int lock);
>>>>>>>        int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive,
>>>>>>> struct amdgpu_device *adev);
>>>>>>>        int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>>>>>        void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>>>>>>>           #endif
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"
       [not found]                             ` <6794e80b-c935-12fa-05a2-a710e2d310a6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-20 15:34                               ` Liu, Shaoyun
  0 siblings, 0 replies; 9+ messages in thread
From: Liu, Shaoyun @ 2019-03-20 15:34 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix, Deucher, Alexander,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

ok , sounds good .  Please go ahead to revert the change . I will send 
out another one  for review .

Regards

shaoyun.liu


On 2019-03-20 6:00 a.m., Christian König wrote:
> Am 19.03.19 um 19:48 schrieb Liu, Shaoyun:
>> As I understand,  if we want to implement the  logic in bo_add/rmv
>> function ,  I need following conditions are all to be true for a valid
>> XGMI request.
>>
>> 1.  check the adev and the bo_adev are different .
>>       This is correct on rocm implementation .(
>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu pass in the correct adev it want
>> to mapped to).  For gem bo (amdgpu_gem_object_open/close), the adev is
>> get from bo directly so it's alway same.  Do we need extra adev info
>> and  how or we don't need to care about the XGMI  for graphic side .
>
> That the GEM code uses the wrong adev sounds like a bug to me which 
> should be fixed anyway.
>
> Apart from that I would just add a function 
> amdgpu_xgmi_same_hive(adev1, adev2) which checks adev1!=adev2 and 
> adev1->hive_id == adev2->hive_id.
>
>> 2. check the  bo->preferred_domains to be  AMDGPU_GEM_DOMAIN_VRAM.
>>       But as Felix mentioned , this domain can be changed by
>> AMDGPU_GEM_OP_SET_PLACEMENT , how to handle this?
>
> As Felix pointed out BOs can move around for a couple of reasons. So 
> actually checking if we use XGMI or not can be rather tricky.
>
> I would just check the hive_id of the involved devices, so that if we 
> can potentially use XGMI we power it on.
>
> Only alternative I can see is to have the same complicated handling as 
> with PRT.
>
>> Can you explain a little bit more on how to handle the eviction with the
>> flag in bo_va structure as you mentioned ?  Do you mean we disable the
>> eviction for the  bo with XGMI request ?
>
> Just put the is_xgmi flag into the bo_va structure instead of the 
> mapping structure.
>
> And when the bo_va structure is remove in amdgpu_vm_bo_rmv you also 
> decrement the counter.
>
> Apart from that the handling stays the same, e.g. you increment the 
> counter during the page table update.
>
> Regards,
> Christian.
>
>>
>> Regards
>> shaoyun.liu
>>
>>
>> On 2019-03-19 12:09 p.m., Koenig, Christian wrote:
>>> Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
>>>> Thanks Felix .
>>>>
>>>> We did consider to put the  logic into bo_add/bo_rmv, but Felix 
>>>> pointed
>>>> out the  object can be migrate from FB to system memory after 
>>>> allocation
>>>> .
>>> Yeah, I also considered that and that is actually a really good 
>>> argument.
>>>
>>> But even in this case you still only need the flag in the bo_va
>>> structure, not the mapping.
>>>
>>>>      I also think of put the  logic inside 
>>>> amdgpu_vm_bo_update_mapping ,
>>>> but seems that function prefer to take the  dma address already been
>>>> calculated (change that will cause a huge code reorganize) . That's 
>>>> the
>>>> reason I put the logic before calling into 
>>>> amdgpu_vm_bo_update_mapping .
>>> Both places won't work correctly. The problem is simply that you 
>>> haven't
>>> considered what happens when the VM is destroyed.
>>>
>>> In this case we should not unmap the XGMI mapping, but rather just 
>>> throw
>>> away the page tables completely.
>>>
>>> Additional to that the mapping code is often interrupted because the
>>> calling process receives a signal. So it can happen that all of that is
>>> called multiple times. The is_xgmi variable prevents that, but that's
>>> really not straight forward.
>>>
>>> Se the PRT handling for how complicated that gets when you attach
>>> something like this to the mapping. IIRC we have 3 different code paths
>>> how a PRT mapping can end up being destroyed including a delayed 
>>> destroy
>>> handler and callback.
>>>
>>>> As the race condition  you described sounds reasonable.  let me think
>>>> how to fix it .
>>> Two possible code pattern usually used for this:
>>>
>>> A) You have a lock protecting both the counter as well as the 
>>> operation:
>>>
>>> lock();
>>> if (increment_and_test_counter())
>>>        power_on()
>>> unlock();
>>>
>>> lock()
>>> if (decrement_and_test_counter())
>>>        power_off();
>>> unlock();
>>>
>>> B) The counter is an atomic and you have a lock protecting the 
>>> operation:
>>>
>>> if (atomic_inc_return() == 1) {
>>>        lock();
>>>        power_on();
>>>        unlock();
>>> }
>>>
>>> if (atomic_dec_return() == 0) {
>>>        lock();
>>>        if (double_check_atomic_for_race())
>>>            power_off();
>>>        unlock();
>>> }
>>>
>>> The later is more efficient, but also more work to implement.
>>>
>>> Either way I suggest to move the actual increment/decrement into the
>>> xgmi code and only have the signalling in the VM code.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> Regards
>>>>
>>>> shaoyun.liu
>>>>
>>>> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>>>>> On 3/19/2019 8:49 AM, Christian König wrote:
>>>>>> Yeah, all that is perfectly fine.
>>>>>>
>>>>>> The problem is Shaoyun didn't put this into the mapping code, but
>>>>>> rather into the VM state machine. So this won't work at all (the
>>>>>> counter and increment/decrement unbalanced and multiple times).
>>>>> We tried to consider all the possible ways that this could go wrong.
>>>>> Basically, every time a mapping is updated, we update the is_xgmi 
>>>>> state
>>>>> and update the counter if it changed. Have you seen the counter 
>>>>> become
>>>>> unbalanced?
>>>>>
>>>>>
>>>>>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>>>>> I think we considered that. The problem is that a BO can be migrated
>>>>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>>>>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>>>>> whether to increment your counter, and your counter can become
>>>>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens 
>>>>> between
>>>>> bo_add and bo_rmv.
>>>>>
>>>>> Therefore we're trying to check for XGMI mappings every time the 
>>>>> mapping
>>>>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>>>>
>>>>>
>>>>>> Additional to that the approach with the counter doesn't work 
>>>>>> because
>>>>>> you don't have a lock protecting the hw update itself. E.g. while
>>>>>> powering down you can add a mapping which needs to power it up again
>>>>>> and so powering down and powering up race with each other.
>>>>> That's a good point.
>>>>>
>>>>> Regards,
>>>>>        Felix
>>>>>
>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>>>>>> We discussed a few different approaches before settling on this 
>>>>>>> one.
>>>>>>>
>>>>>>> Maybe it needs some more background. XGMI links are quite power 
>>>>>>> hungry.
>>>>>>> Being able to power them down improves performance for 
>>>>>>> power-limited
>>>>>>> workloads that don't need XGMI. In machine learning, pretty much 
>>>>>>> all
>>>>>>> workloads are power limited on our GPUs, so this is not just a
>>>>>>> theoretical thing. The problem is, how do you know whether you need
>>>>>>> XGMI? You need to know whether there are P2P memory mappings 
>>>>>>> involving
>>>>>>> XGMI. So the natural place to put that is in the memory mapping 
>>>>>>> code.
>>>>>>>
>>>>>>> If you do spot a race condition in the code, let's talk about 
>>>>>>> how to
>>>>>>> fix it.
>>>>>>>
>>>>>>> Regards,
>>>>>>>         Felix
>>>>>>>
>>>>>>> On 3/19/2019 8:07 AM, Christian König wrote:
>>>>>>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>>>>>>
>>>>>>>> Adding this to the mapping is complete nonsense and the whole
>>>>>>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>>>>>>> and should be reverted for now.
>>>>>>>>
>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>> ---
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 3 ---
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ---
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 29
>>>>>>>> +---------------------
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 -----------
>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   | 2 --
>>>>>>>>        6 files changed, 1 insertion(+), 52 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>> index b5720c1610e1..1db192150532 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>>>>>>            int asic_reset_res;
>>>>>>>>            struct work_struct xgmi_reset_work;
>>>>>>>>        -    /* counter of mapped memory through xgmi */
>>>>>>>> -    atomic_t            xgmi_map_counter;
>>>>>>>> -
>>>>>>>>            bool in_baco_reset;
>>>>>>>>        };
>>>>>>>>        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> index 964a4d3f1f43..206583707124 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> @@ -2018,9 +2018,6 @@ static void
>>>>>>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>>>>>>            r = amdgpu_device_enable_mgpu_fan_boost();
>>>>>>>>            if (r)
>>>>>>>>                DRM_ERROR("enable mgpu fan boost failed 
>>>>>>>> (%d).\n", r);
>>>>>>>> -
>>>>>>>> -    /*set to low pstate by default */
>>>>>>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>>>        }
>>>>>>>>           static void amdgpu_device_delay_enable_gfx_off(struct
>>>>>>>> work_struct *work)
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>>>>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>>>>>>            uint64_t            __subtree_last;
>>>>>>>>            uint64_t            offset;
>>>>>>>>            uint64_t            flags;
>>>>>>>> -    bool                is_xgmi;
>>>>>>>>        };
>>>>>>>>           /* User space allocated BO in a VM */
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>> @@ -34,7 +34,6 @@
>>>>>>>>        #include "amdgpu_trace.h"
>>>>>>>>        #include "amdgpu_amdkfd.h"
>>>>>>>>        #include "amdgpu_gmc.h"
>>>>>>>> -#include "amdgpu_xgmi.h"
>>>>>>>>           /**
>>>>>>>>         * DOC: GPUVM
>>>>>>>> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>>>> *adev,
>>>>>>>>            struct ttm_mem_reg *mem;
>>>>>>>>            struct drm_mm_node *nodes;
>>>>>>>>            struct dma_fence *exclusive, **last_update;
>>>>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>>>>> -    bool is_xgmi = false;
>>>>>>>>            uint64_t flags;
>>>>>>>> +    struct amdgpu_device *bo_adev = adev;
>>>>>>>>            int r;
>>>>>>>>               if (clear || !bo) {
>>>>>>>> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct 
>>>>>>>> amdgpu_device
>>>>>>>> *adev,
>>>>>>>>            if (bo) {
>>>>>>>>                flags = amdgpu_ttm_tt_pte_flags(adev, 
>>>>>>>> bo->tbo.ttm, mem);
>>>>>>>>                bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>>>> -        if (adev != bo_adev &&
>>>>>>>> -            adev->gmc.xgmi.hive_id &&
>>>>>>>> -            adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
>>>>>>>> -            is_xgmi = true;
>>>>>>>>            } else {
>>>>>>>>                flags = 0x0;
>>>>>>>>            }
>>>>>>>> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct 
>>>>>>>> amdgpu_device
>>>>>>>> *adev,
>>>>>>>>            }
>>>>>>>>               list_for_each_entry(mapping, &bo_va->invalids, 
>>>>>>>> list) {
>>>>>>>> -        if (mapping->is_xgmi != is_xgmi) {
>>>>>>>> -            if (is_xgmi) {
>>>>>>>> -                /* Adding an XGMI mapping to the PT */
>>>>>>>> -                if (atomic_inc_return(&adev->xgmi_map_counter) 
>>>>>>>> == 1)
>>>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 1);
>>>>>>>> -            } else {
>>>>>>>> -                /* Removing an XGMI mapping from the PT */
>>>>>>>> -                if (atomic_dec_return(&adev->xgmi_map_counter) 
>>>>>>>> == 0)
>>>>>>>> -                    amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>>> -            }
>>>>>>>> -            mapping->is_xgmi = is_xgmi;
>>>>>>>> -        }
>>>>>>>> -
>>>>>>>>                r = amdgpu_vm_bo_split_mapping(adev, exclusive,
>>>>>>>> pages_addr, vm,
>>>>>>>>                                   mapping, flags, bo_adev, nodes,
>>>>>>>>                                   last_update);
>>>>>>>> @@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct
>>>>>>>> amdgpu_device *adev,
>>>>>>>>                r = amdgpu_vm_bo_update_mapping(adev, NULL, 
>>>>>>>> NULL, vm,
>>>>>>>>                                mapping->start, mapping->last,
>>>>>>>>                                init_pte_value, 0, &f);
>>>>>>>> -
>>>>>>>> -        if (mapping->is_xgmi) {
>>>>>>>> -            /* Removing an XGMI mapping from the PT */
>>>>>>>> -            if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
>>>>>>>> -                amdgpu_xgmi_set_pstate(adev, 0);
>>>>>>>> -        }
>>>>>>>> -
>>>>>>>>                amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>>>>                if (r) {
>>>>>>>>                    dma_fence_put(f);
>>>>>>>> @@ -2501,7 +2475,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device 
>>>>>>>> *adev,
>>>>>>>>            mapping->last = eaddr;
>>>>>>>>            mapping->offset = offset;
>>>>>>>>            mapping->flags = flags;
>>>>>>>> -    mapping->is_xgmi = false;
>>>>>>>>               amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>>>>>>        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>>> index 807440d3edff..fcc4b05c745c 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>>>>>>>> @@ -200,7 +200,6 @@ struct amdgpu_hive_info
>>>>>>>> *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>>>>>>>               if (lock)
>>>>>>>>                mutex_lock(&tmp->hive_lock);
>>>>>>>> -    tmp->pstate = -1;
>>>>>>>>               mutex_unlock(&xgmi_mutex);
>>>>>>>>        @@ -322,17 +321,3 @@ void amdgpu_xgmi_remove_device(struct
>>>>>>>> amdgpu_device *adev)
>>>>>>>> mutex_unlock(&hive->hive_lock);
>>>>>>>>            }
>>>>>>>>        }
>>>>>>>> -
>>>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int 
>>>>>>>> pstate)
>>>>>>>> -{
>>>>>>>> -    int ret = 0;
>>>>>>>> -    struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 
>>>>>>>> 0);
>>>>>>>> -
>>>>>>>> -    if (!hive)
>>>>>>>> -        return 0;
>>>>>>>> -
>>>>>>>> -    if (hive->pstate == pstate)
>>>>>>>> -        return 0;
>>>>>>>> -    /* Todo : sent the message to SMU for pstate change */
>>>>>>>> -    return ret;
>>>>>>>> -}
>>>>>>>> \ No newline at end of file
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>>> index 7e1710fcbef2..24a3b0362f98 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>>>>>>>> @@ -33,13 +33,11 @@ struct amdgpu_hive_info {
>>>>>>>>            struct kobject *kobj;
>>>>>>>>            struct device_attribute dev_attr;
>>>>>>>>            struct amdgpu_device *adev;
>>>>>>>> -    int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>>>>>>>>        };
>>>>>>>>           struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct
>>>>>>>> amdgpu_device *adev, int lock);
>>>>>>>>        int amdgpu_xgmi_update_topology(struct amdgpu_hive_info 
>>>>>>>> *hive,
>>>>>>>> struct amdgpu_device *adev);
>>>>>>>>        int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>>>>>>>>        void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>>>>>>>> -int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int 
>>>>>>>> pstate);
>>>>>>>>           #endif
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-03-20 15:34 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-19 12:07 [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support" Christian König
     [not found] ` <20190319120703.28786-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-03-19 12:42   ` Kuehling, Felix
     [not found]     ` <ddf61163-5c5c-2547-2e5d-16f205111a55-5C7GfCeVMHo@public.gmane.org>
2019-03-19 12:49       ` Christian König
     [not found]         ` <405c7ec8-f52e-3d08-3ca1-84933e54adcd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-19 13:10           ` Kuehling, Felix
     [not found]             ` <2d451ed9-ed64-bcff-617a-45cdd41011c2-5C7GfCeVMHo@public.gmane.org>
2019-03-19 15:42               ` Liu, Shaoyun
     [not found]                 ` <e72f86a3-7eea-5d81-4a96-c6b7003f5864-5C7GfCeVMHo@public.gmane.org>
2019-03-19 16:09                   ` Koenig, Christian
     [not found]                     ` <63cd6620-9482-a96e-0d97-f44b70bff01e-5C7GfCeVMHo@public.gmane.org>
2019-03-19 18:48                       ` Liu, Shaoyun
     [not found]                         ` <c616ad49-2bc2-d7a6-35e3-27c24c922b5f-5C7GfCeVMHo@public.gmane.org>
2019-03-20 10:00                           ` Christian König
     [not found]                             ` <6794e80b-c935-12fa-05a2-a710e2d310a6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-20 15:34                               ` Liu, Shaoyun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.