All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: Set pte_flags for actual BO location
@ 2022-08-26 23:16 Felix Kuehling
  2022-08-29 15:38 ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2022-08-26 23:16 UTC (permalink / raw)
  To: amd-gfx

BOs can be in a different location than was intended at allocation time,
for example when restoring fails after an eviction or BOs get pinned in
system memory. On some GPUs the MTYPE for coherent mappings depends on
the actual memory location.

Use the actual location to determine the pte_flags every time the page
tables are updated.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cbd593f7d553..5dd89f5a032f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
 	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+	bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
 	bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
 	bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
 	uint32_t mapping_flags;
@@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 	switch (adev->asic_type) {
 	case CHIP_ARCTURUS:
 	case CHIP_ALDEBARAN:
-		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		if (is_vram) {
 			if (bo_adev == adev) {
 				if (uncached)
 					mapping_flags |= AMDGPU_VM_MTYPE_UC;
@@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_device *adev = entry->adev;
+	uint64_t pte_flags = get_pte_flags(adev, mem);
 	int ret;
 
 	ret = kfd_mem_dmamap_attachment(mem, entry);
 	if (ret)
 		return ret;
 
+	if (unlikely(entry->pte_flags != pte_flags)) {
+		amdgpu_vm_bo_update_flags(bo_va, pte_flags);
+		entry->pte_flags = pte_flags;
+	}
+
 	/* Update the page tables  */
 	ret = amdgpu_vm_bo_update(adev, bo_va, false);
 	if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 59cac347baa3..954a40d5d828 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	}
 }
 
+/**
+ * amdgpu_vm_bo_update_flags - Update mapping flags of invalid mappings
+ *
+ * @bo_va: identifies the BO and VM
+ * @flags: new mapping flags
+ *
+ * The update is only applied to invalid mappings. This allows updating the
+ * mapping flags after a migration to maintain the desired coherence. The next
+ * call to amdgpu_vm_bo_update() will apply the new @flags to the page table.
+ */
+void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
+			       uint64_t flags)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+
+	list_for_each_entry(mapping, &bo_va->invalids, list)
+		mapping->flags = flags;
+}
+
 /**
  * amdgpu_vm_get_block_size - calculate VM page table size as power of two
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9ecb7f663e19..11793716cd8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t flags);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-26 23:16 [PATCH] drm/amdkfd: Set pte_flags for actual BO location Felix Kuehling
@ 2022-08-29 15:38 ` Christian König
  2022-08-29 16:07   ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2022-08-29 15:38 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Am 27.08.22 um 01:16 schrieb Felix Kuehling:
> BOs can be in a different location than was intended at allocation time,
> for example when restoring fails after an eviction or BOs get pinned in
> system memory. On some GPUs the MTYPE for coherent mappings depends on
> the actual memory location.
>
> Use the actual location to determine the pte_flags every time the page
> tables are updated.

For a workaround ok, but looks a bit awkward. Basically we need 
different MTYPE based on the location, right?

Christian.

>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>   3 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index cbd593f7d553..5dd89f5a032f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>   {
>   	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
> +	bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>   	bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>   	bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>   	uint32_t mapping_flags;
> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>   	switch (adev->asic_type) {
>   	case CHIP_ARCTURUS:
>   	case CHIP_ALDEBARAN:
> -		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		if (is_vram) {
>   			if (bo_adev == adev) {
>   				if (uncached)
>   					mapping_flags |= AMDGPU_VM_MTYPE_UC;
> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>   {
>   	struct amdgpu_bo_va *bo_va = entry->bo_va;
>   	struct amdgpu_device *adev = entry->adev;
> +	uint64_t pte_flags = get_pte_flags(adev, mem);
>   	int ret;
>   
>   	ret = kfd_mem_dmamap_attachment(mem, entry);
>   	if (ret)
>   		return ret;
>   
> +	if (unlikely(entry->pte_flags != pte_flags)) {
> +		amdgpu_vm_bo_update_flags(bo_va, pte_flags);
> +		entry->pte_flags = pte_flags;
> +	}
> +
>   	/* Update the page tables  */
>   	ret = amdgpu_vm_bo_update(adev, bo_va, false);
>   	if (ret) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 59cac347baa3..954a40d5d828 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>   	}
>   }
>   
> +/**
> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid mappings
> + *
> + * @bo_va: identifies the BO and VM
> + * @flags: new mapping flags
> + *
> + * The update is only applied to invalid mappings. This allows updating the
> + * mapping flags after a migration to maintain the desired coherence. The next
> + * call to amdgpu_vm_bo_update() will apply the new @flags to the page table.
> + */
> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
> +			       uint64_t flags)
> +{
> +	struct amdgpu_bo_va_mapping *mapping;
> +
> +	list_for_each_entry(mapping, &bo_va->invalids, list)
> +		mapping->flags = flags;
> +}
> +
>   /**
>    * amdgpu_vm_get_block_size - calculate VM page table size as power of two
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 9ecb7f663e19..11793716cd8b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>   			     struct amdgpu_bo *bo, bool evicted);
> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t flags);
>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>   				       struct amdgpu_bo *bo);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-29 15:38 ` Christian König
@ 2022-08-29 16:07   ` Felix Kuehling
  2022-08-29 18:59     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2022-08-29 16:07 UTC (permalink / raw)
  To: Christian König, amd-gfx

Am 2022-08-29 um 11:38 schrieb Christian König:
> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>> BOs can be in a different location than was intended at allocation time,
>> for example when restoring fails after an eviction or BOs get pinned in
>> system memory. On some GPUs the MTYPE for coherent mappings depends on
>> the actual memory location.
>>
>> Use the actual location to determine the pte_flags every time the page
>> tables are updated.
>
> For a workaround ok, but looks a bit awkward. Basically we need 
> different MTYPE based on the location, right?

Yes. On Aldebaran and Arcturus we need different MTYPEs for fine-grained 
coherence depending on the location.

Regards,
   Felix


>
> Christian.
>
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 +++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index cbd593f7d553..5dd89f5a032f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, 
>> struct amdgpu_sync *sync)
>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
>> kgd_mem *mem)
>>   {
>>       struct amdgpu_device *bo_adev = 
>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>       bool coherent = mem->alloc_flags & 
>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>       bool uncached = mem->alloc_flags & 
>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>       uint32_t mapping_flags;
>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>> amdgpu_device *adev, struct kgd_mem *mem)
>>       switch (adev->asic_type) {
>>       case CHIP_ARCTURUS:
>>       case CHIP_ALDEBARAN:
>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +        if (is_vram) {
>>               if (bo_adev == adev) {
>>                   if (uncached)
>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>>   {
>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>       struct amdgpu_device *adev = entry->adev;
>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>       int ret;
>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>       if (ret)
>>           return ret;
>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>> +        entry->pte_flags = pte_flags;
>> +    }
>> +
>>       /* Update the page tables  */
>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>       if (ret) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 59cac347baa3..954a40d5d828 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>> amdgpu_device *adev,
>>       }
>>   }
>>   +/**
>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid mappings
>> + *
>> + * @bo_va: identifies the BO and VM
>> + * @flags: new mapping flags
>> + *
>> + * The update is only applied to invalid mappings. This allows 
>> updating the
>> + * mapping flags after a migration to maintain the desired 
>> coherence. The next
>> + * call to amdgpu_vm_bo_update() will apply the new @flags to the 
>> page table.
>> + */
>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>> +                   uint64_t flags)
>> +{
>> +    struct amdgpu_bo_va_mapping *mapping;
>> +
>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>> +        mapping->flags = flags;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_get_block_size - calculate VM page table size as power 
>> of two
>>    *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 9ecb7f663e19..11793716cd8b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>                    struct amdgpu_bo *bo, bool evicted);
>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t 
>> flags);
>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t 
>> addr);
>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>                          struct amdgpu_bo *bo);
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-29 16:07   ` Felix Kuehling
@ 2022-08-29 18:59     ` Christian König
  2022-08-29 19:30       ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2022-08-29 18:59 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Am 29.08.22 um 18:07 schrieb Felix Kuehling:
> Am 2022-08-29 um 11:38 schrieb Christian König:
>> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>>> BOs can be in a different location than was intended at allocation 
>>> time,
>>> for example when restoring fails after an eviction or BOs get pinned in
>>> system memory. On some GPUs the MTYPE for coherent mappings depends on
>>> the actual memory location.
>>>
>>> Use the actual location to determine the pte_flags every time the page
>>> tables are updated.
>>
>> For a workaround ok, but looks a bit awkward. Basically we need 
>> different MTYPE based on the location, right?
>
> Yes. On Aldebaran and Arcturus we need different MTYPEs for 
> fine-grained coherence depending on the location.

It would be much cleaner to have a better description how all this came 
to be in the mapping.

E.g. that we generate the flags in the VM code based on the requirements 
described in the mapping.

Do we have time to clean this up thoughtfully?

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Christian.
>>
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 
>>> +++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index cbd593f7d553..5dd89f5a032f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, 
>>> struct amdgpu_sync *sync)
>>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
>>> kgd_mem *mem)
>>>   {
>>>       struct amdgpu_device *bo_adev = 
>>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>>       bool coherent = mem->alloc_flags & 
>>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>>       bool uncached = mem->alloc_flags & 
>>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>>       uint32_t mapping_flags;
>>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>>> amdgpu_device *adev, struct kgd_mem *mem)
>>>       switch (adev->asic_type) {
>>>       case CHIP_ARCTURUS:
>>>       case CHIP_ALDEBARAN:
>>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>> +        if (is_vram) {
>>>               if (bo_adev == adev) {
>>>                   if (uncached)
>>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem 
>>> *mem,
>>>   {
>>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>>       struct amdgpu_device *adev = entry->adev;
>>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>>       int ret;
>>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>>       if (ret)
>>>           return ret;
>>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>>> +        entry->pte_flags = pte_flags;
>>> +    }
>>> +
>>>       /* Update the page tables  */
>>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>>       if (ret) {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 59cac347baa3..954a40d5d828 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>>> amdgpu_device *adev,
>>>       }
>>>   }
>>>   +/**
>>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid 
>>> mappings
>>> + *
>>> + * @bo_va: identifies the BO and VM
>>> + * @flags: new mapping flags
>>> + *
>>> + * The update is only applied to invalid mappings. This allows 
>>> updating the
>>> + * mapping flags after a migration to maintain the desired 
>>> coherence. The next
>>> + * call to amdgpu_vm_bo_update() will apply the new @flags to the 
>>> page table.
>>> + */
>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>>> +                   uint64_t flags)
>>> +{
>>> +    struct amdgpu_bo_va_mapping *mapping;
>>> +
>>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>>> +        mapping->flags = flags;
>>> +}
>>> +
>>>   /**
>>>    * amdgpu_vm_get_block_size - calculate VM page table size as 
>>> power of two
>>>    *
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 9ecb7f663e19..11793716cd8b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>                    struct amdgpu_bo *bo, bool evicted);
>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t 
>>> flags);
>>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t 
>>> addr);
>>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>>                          struct amdgpu_bo *bo);
>>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-29 18:59     ` Christian König
@ 2022-08-29 19:30       ` Felix Kuehling
  2022-08-30  6:00         ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2022-08-29 19:30 UTC (permalink / raw)
  To: Christian König, amd-gfx


Am 2022-08-29 um 14:59 schrieb Christian König:
> Am 29.08.22 um 18:07 schrieb Felix Kuehling:
>> Am 2022-08-29 um 11:38 schrieb Christian König:
>>> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>>>> BOs can be in a different location than was intended at allocation 
>>>> time,
>>>> for example when restoring fails after an eviction or BOs get 
>>>> pinned in
>>>> system memory. On some GPUs the MTYPE for coherent mappings depends on
>>>> the actual memory location.
>>>>
>>>> Use the actual location to determine the pte_flags every time the page
>>>> tables are updated.
>>>
>>> For a workaround ok, but looks a bit awkward. Basically we need 
>>> different MTYPE based on the location, right?
>>
>> Yes. On Aldebaran and Arcturus we need different MTYPEs for 
>> fine-grained coherence depending on the location.
>
> It would be much cleaner to have a better description how all this 
> came to be in the mapping.
>
> E.g. that we generate the flags in the VM code based on the 
> requirements described in the mapping.
>
> Do we have time to clean this up thoughtfully?

Currently we have two places in the KFD code that generates the mapping 
flags. I was planning to eliminate the duplication. I think you're 
suggesting moving it into the amdgpu_vm code instead. Unfortunately it's 
somewhat GPU-specific. So you probably won't like this code in the 
general amdgpu_vm code. Maybe the HW-specific part should be in gmc_v*.c.

The requirements would include:

  * Memory type and mapping (local, system, PCIe P2P, XGMI P2P)
  * Memory model (coarse-grained or fine-grained coherence)

Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> Christian.
>>>
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 
>>>> +++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> index cbd593f7d553..5dd89f5a032f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, 
>>>> struct amdgpu_sync *sync)
>>>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
>>>> kgd_mem *mem)
>>>>   {
>>>>       struct amdgpu_device *bo_adev = 
>>>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>>>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>>>       bool coherent = mem->alloc_flags & 
>>>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>>>       bool uncached = mem->alloc_flags & 
>>>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>>>       uint32_t mapping_flags;
>>>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>>>> amdgpu_device *adev, struct kgd_mem *mem)
>>>>       switch (adev->asic_type) {
>>>>       case CHIP_ARCTURUS:
>>>>       case CHIP_ALDEBARAN:
>>>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>>> +        if (is_vram) {
>>>>               if (bo_adev == adev) {
>>>>                   if (uncached)
>>>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>>>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem 
>>>> *mem,
>>>>   {
>>>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>>>       struct amdgpu_device *adev = entry->adev;
>>>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>>>       int ret;
>>>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>>>       if (ret)
>>>>           return ret;
>>>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>>>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>>>> +        entry->pte_flags = pte_flags;
>>>> +    }
>>>> +
>>>>       /* Update the page tables  */
>>>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>>>       if (ret) {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 59cac347baa3..954a40d5d828 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>>>> amdgpu_device *adev,
>>>>       }
>>>>   }
>>>>   +/**
>>>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid 
>>>> mappings
>>>> + *
>>>> + * @bo_va: identifies the BO and VM
>>>> + * @flags: new mapping flags
>>>> + *
>>>> + * The update is only applied to invalid mappings. This allows 
>>>> updating the
>>>> + * mapping flags after a migration to maintain the desired 
>>>> coherence. The next
>>>> + * call to amdgpu_vm_bo_update() will apply the new @flags to the 
>>>> page table.
>>>> + */
>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>>>> +                   uint64_t flags)
>>>> +{
>>>> +    struct amdgpu_bo_va_mapping *mapping;
>>>> +
>>>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>>>> +        mapping->flags = flags;
>>>> +}
>>>> +
>>>>   /**
>>>>    * amdgpu_vm_get_block_size - calculate VM page table size as 
>>>> power of two
>>>>    *
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 9ecb7f663e19..11793716cd8b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev,
>>>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>>                    struct amdgpu_bo *bo, bool evicted);
>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, 
>>>> uint64_t flags);
>>>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, 
>>>> uint64_t addr);
>>>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>>>                          struct amdgpu_bo *bo);
>>>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-29 19:30       ` Felix Kuehling
@ 2022-08-30  6:00         ` Christian König
  2022-08-30 15:41           ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2022-08-30  6:00 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Am 29.08.22 um 21:30 schrieb Felix Kuehling:
>
> Am 2022-08-29 um 14:59 schrieb Christian König:
>> Am 29.08.22 um 18:07 schrieb Felix Kuehling:
>>> Am 2022-08-29 um 11:38 schrieb Christian König:
>>>> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>>>>> BOs can be in a different location than was intended at allocation 
>>>>> time,
>>>>> for example when restoring fails after an eviction or BOs get 
>>>>> pinned in
>>>>> system memory. On some GPUs the MTYPE for coherent mappings 
>>>>> depends on
>>>>> the actual memory location.
>>>>>
>>>>> Use the actual location to determine the pte_flags every time the 
>>>>> page
>>>>> tables are updated.
>>>>
>>>> For a workaround ok, but looks a bit awkward. Basically we need 
>>>> different MTYPE based on the location, right?
>>>
>>> Yes. On Aldebaran and Arcturus we need different MTYPEs for 
>>> fine-grained coherence depending on the location.
>>
>> It would be much cleaner to have a better description how all this 
>> came to be in the mapping.
>>
>> E.g. that we generate the flags in the VM code based on the 
>> requirements described in the mapping.
>>
>> Do we have time to clean this up thoughtfully?
>
> Currently we have two places in the KFD code that generates the 
> mapping flags. I was planning to eliminate the duplication. I think 
> you're suggesting moving it into the amdgpu_vm code instead. 
> Unfortunately it's somewhat GPU-specific. So you probably won't like 
> this code in the general amdgpu_vm code. Maybe the HW-specific part 
> should be in gmc_v*.c.

We have the gmc_v*_get_vm_pte() and gmc_v*_get_vm_pde() functions 
exactly for that.

>
> The requirements would include:
>
>  * Memory type and mapping (local, system, PCIe P2P, XGMI P2P)
>  * Memory model (coarse-grained or fine-grained coherence)

Question is if any of this is a per BO or per mapping information?

For the gfx side we unfortunately have put the MTYPE into the mapping 
(which turned out to be a bad idea).

So as far as I understand it the MTYPE should rather be obtained from 
per buffer flags and the current placement, correct?

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>>
>>>> Christian.
>>>>
>>>>>
>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>> ---
>>>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 
>>>>> +++++++++++++++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>>>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index cbd593f7d553..5dd89f5a032f 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, 
>>>>> struct amdgpu_sync *sync)
>>>>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
>>>>> kgd_mem *mem)
>>>>>   {
>>>>>       struct amdgpu_device *bo_adev = 
>>>>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>>>>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>>>>       bool coherent = mem->alloc_flags & 
>>>>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>>>>       bool uncached = mem->alloc_flags & 
>>>>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>>>>       uint32_t mapping_flags;
>>>>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>>>>> amdgpu_device *adev, struct kgd_mem *mem)
>>>>>       switch (adev->asic_type) {
>>>>>       case CHIP_ARCTURUS:
>>>>>       case CHIP_ALDEBARAN:
>>>>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>>>> +        if (is_vram) {
>>>>>               if (bo_adev == adev) {
>>>>>                   if (uncached)
>>>>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>>>>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem 
>>>>> *mem,
>>>>>   {
>>>>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>>>>       struct amdgpu_device *adev = entry->adev;
>>>>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>>>>       int ret;
>>>>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>>>>       if (ret)
>>>>>           return ret;
>>>>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>>>>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>>>>> +        entry->pte_flags = pte_flags;
>>>>> +    }
>>>>> +
>>>>>       /* Update the page tables  */
>>>>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>>>>       if (ret) {
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 59cac347baa3..954a40d5d828 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>>>>> amdgpu_device *adev,
>>>>>       }
>>>>>   }
>>>>>   +/**
>>>>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid 
>>>>> mappings
>>>>> + *
>>>>> + * @bo_va: identifies the BO and VM
>>>>> + * @flags: new mapping flags
>>>>> + *
>>>>> + * The update is only applied to invalid mappings. This allows 
>>>>> updating the
>>>>> + * mapping flags after a migration to maintain the desired 
>>>>> coherence. The next
>>>>> + * call to amdgpu_vm_bo_update() will apply the new @flags to the 
>>>>> page table.
>>>>> + */
>>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>>>>> +                   uint64_t flags)
>>>>> +{
>>>>> +    struct amdgpu_bo_va_mapping *mapping;
>>>>> +
>>>>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>>>>> +        mapping->flags = flags;
>>>>> +}
>>>>> +
>>>>>   /**
>>>>>    * amdgpu_vm_get_block_size - calculate VM page table size as 
>>>>> power of two
>>>>>    *
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 9ecb7f663e19..11793716cd8b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>>> *adev,
>>>>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>>>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>>>                    struct amdgpu_bo *bo, bool evicted);
>>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, 
>>>>> uint64_t flags);
>>>>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, 
>>>>> uint64_t addr);
>>>>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>>>>                          struct amdgpu_bo *bo);
>>>>
>>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdkfd: Set pte_flags for actual BO location
  2022-08-30  6:00         ` Christian König
@ 2022-08-30 15:41           ` Felix Kuehling
  0 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2022-08-30 15:41 UTC (permalink / raw)
  To: Christian König, amd-gfx

On 2022-08-30 02:00, Christian König wrote:
> Am 29.08.22 um 21:30 schrieb Felix Kuehling:
>>
>> Am 2022-08-29 um 14:59 schrieb Christian König:
>>> Am 29.08.22 um 18:07 schrieb Felix Kuehling:
>>>> Am 2022-08-29 um 11:38 schrieb Christian König:
>>>>> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>>>>>> BOs can be in a different location than was intended at 
>>>>>> allocation time,
>>>>>> for example when restoring fails after an eviction or BOs get 
>>>>>> pinned in
>>>>>> system memory. On some GPUs the MTYPE for coherent mappings 
>>>>>> depends on
>>>>>> the actual memory location.
>>>>>>
>>>>>> Use the actual location to determine the pte_flags every time the 
>>>>>> page
>>>>>> tables are updated.
>>>>>
>>>>> For a workaround ok, but looks a bit awkward. Basically we need 
>>>>> different MTYPE based on the location, right?
>>>>
>>>> Yes. On Aldebaran and Arcturus we need different MTYPEs for 
>>>> fine-grained coherence depending on the location.
>>>
>>> It would be much cleaner to have a better description how all this 
>>> came to be in the mapping.
>>>
>>> E.g. that we generate the flags in the VM code based on the 
>>> requirements described in the mapping.
>>>
>>> Do we have time to clean this up thoughtfully?
>>
>> Currently we have two places in the KFD code that generates the 
>> mapping flags. I was planning to eliminate the duplication. I think 
>> you're suggesting moving it into the amdgpu_vm code instead. 
>> Unfortunately it's somewhat GPU-specific. So you probably won't like 
>> this code in the general amdgpu_vm code. Maybe the HW-specific part 
>> should be in gmc_v*.c.
>
> We have the gmc_v*_get_vm_pte() and gmc_v*_get_vm_pde() functions 
> exactly for that.
>
>>
>> The requirements would include:
>>
>>  * Memory type and mapping (local, system, PCIe P2P, XGMI P2P)
>>  * Memory model (coarse-grained or fine-grained coherence)
>
> Question is if any of this is a per BO or per mapping information?

With XGMI, we are sharing the same BO between multiple GPUs. Local and 
remote mappings of the same BO can be different. I think a per-BO 
attribute to decide the memory model makes sense. Then the mapping code 
needs to figure out the flags from the per-BO memory model and the type 
of mapping on the fly. gmc_v*_get_vm_pte() seems to have all the 
parameters necessary to figure this out.


>
> For the gfx side we unfortunately have put the MTYPE into the mapping 
> (which turned out to be a bad idea).
>
> So as far as I understand it the MTYPE should rather be obtained from 
> per buffer flags and the current placement, correct?

Yes. I can work on a patch for that.

Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Regards,
>>>>   Felix
>>>>
>>>>
>>>>>
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>> ---
>>>>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 
>>>>>> +++++++++++++++++++
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>>>>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> index cbd593f7d553..5dd89f5a032f 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm 
>>>>>> *vm, struct amdgpu_sync *sync)
>>>>>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, 
>>>>>> struct kgd_mem *mem)
>>>>>>   {
>>>>>>       struct amdgpu_device *bo_adev = 
>>>>>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>>>>>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>>>>>       bool coherent = mem->alloc_flags & 
>>>>>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>>>>>       bool uncached = mem->alloc_flags & 
>>>>>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>>>>>       uint32_t mapping_flags;
>>>>>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>>>>>> amdgpu_device *adev, struct kgd_mem *mem)
>>>>>>       switch (adev->asic_type) {
>>>>>>       case CHIP_ARCTURUS:
>>>>>>       case CHIP_ALDEBARAN:
>>>>>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>>>>> +        if (is_vram) {
>>>>>>               if (bo_adev == adev) {
>>>>>>                   if (uncached)
>>>>>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>>>>>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct 
>>>>>> kgd_mem *mem,
>>>>>>   {
>>>>>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>>>>>       struct amdgpu_device *adev = entry->adev;
>>>>>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>>>>>       int ret;
>>>>>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>>>>>       if (ret)
>>>>>>           return ret;
>>>>>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>>>>>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>>>>>> +        entry->pte_flags = pte_flags;
>>>>>> +    }
>>>>>> +
>>>>>>       /* Update the page tables  */
>>>>>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>>>>>       if (ret) {
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 59cac347baa3..954a40d5d828 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>>>>>> amdgpu_device *adev,
>>>>>>       }
>>>>>>   }
>>>>>>   +/**
>>>>>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid 
>>>>>> mappings
>>>>>> + *
>>>>>> + * @bo_va: identifies the BO and VM
>>>>>> + * @flags: new mapping flags
>>>>>> + *
>>>>>> + * The update is only applied to invalid mappings. This allows 
>>>>>> updating the
>>>>>> + * mapping flags after a migration to maintain the desired 
>>>>>> coherence. The next
>>>>>> + * call to amdgpu_vm_bo_update() will apply the new @flags to 
>>>>>> the page table.
>>>>>> + */
>>>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>>>>>> +                   uint64_t flags)
>>>>>> +{
>>>>>> +    struct amdgpu_bo_va_mapping *mapping;
>>>>>> +
>>>>>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>>>>>> +        mapping->flags = flags;
>>>>>> +}
>>>>>> +
>>>>>>   /**
>>>>>>    * amdgpu_vm_get_block_size - calculate VM page table size as 
>>>>>> power of two
>>>>>>    *
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index 9ecb7f663e19..11793716cd8b 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>>>> *adev,
>>>>>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>>>>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>>>>                    struct amdgpu_bo *bo, bool evicted);
>>>>>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, 
>>>>>> uint64_t flags);
>>>>>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, 
>>>>>> uint64_t addr);
>>>>>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>>>>>                          struct amdgpu_bo *bo);
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-08-30 15:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-26 23:16 [PATCH] drm/amdkfd: Set pte_flags for actual BO location Felix Kuehling
2022-08-29 15:38 ` Christian König
2022-08-29 16:07   ` Felix Kuehling
2022-08-29 18:59     ` Christian König
2022-08-29 19:30       ` Felix Kuehling
2022-08-30  6:00         ` Christian König
2022-08-30 15:41           ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.