All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
@ 2021-11-11 21:13 Ramesh Errabolu
  2021-11-11 21:13 ` [PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT domain Ramesh Errabolu
  0 siblings, 1 reply; 8+ messages in thread
From: Ramesh Errabolu @ 2021-11-11 21:13 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

Accounting system to track amount of available memory (system, TTM
and VRAM of a device) relies on BO's domain. The change is to rely
instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
MMIO or DOORBELL

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 95 ++++++++++++-------
 2 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d00de575c541..fcbc8a9c9e06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,12 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 94fccf0b47ad..cfc84af682b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -120,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
 		PAGE_ALIGN(size);
 }
 
+/**
+ * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer including any reserved for control structures
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ *
+ * Return: returns -ENOMEM in case of error, ZERO otherwise
+ */
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-		uint64_t size, u32 domain, bool sg)
+		uint64_t size, u32 alloc_flag)
 {
 	uint64_t reserved_for_pt =
 		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
@@ -131,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	acc_size = amdgpu_amdkfd_acc_size(size);
 
 	vram_needed = 0;
-	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		/* TTM GTT memory */
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
 		system_mem_needed = acc_size + size;
 		ttm_mem_needed = acc_size + size;
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
-		/* Userptr */
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		system_mem_needed = acc_size;
+		ttm_mem_needed = acc_size;
+		vram_needed = size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 		system_mem_needed = acc_size + size;
 		ttm_mem_needed = acc_size;
-	} else {
-		/* VRAM and SG */
+	} else if (alloc_flag &
+		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		system_mem_needed = acc_size;
 		ttm_mem_needed = acc_size;
-		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
-			vram_needed = size;
+	} else {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		return -ENOMEM;
 	}
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -160,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	    (adev->kfd.vram_used + vram_needed >
 	     adev->gmc.real_vram_size - reserved_for_pt)) {
 		ret = -ENOMEM;
-	} else {
-		kfd_mem_limit.system_mem_used += system_mem_needed;
-		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
-		adev->kfd.vram_used += vram_needed;
+		goto release;
 	}
 
+	/* Update memory accounting by decreasing available system
+	 * memory, TTM memory and GPU memory as computed above
+	 */
+	adev->kfd.vram_used += vram_needed;
+	kfd_mem_limit.system_mem_used += system_mem_needed;
+	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	return ret;
 }
 
 static void unreserve_mem_limit(struct amdgpu_device *adev,
-		uint64_t size, u32 domain, bool sg)
+		uint64_t size, u32 alloc_flag)
 {
 	size_t acc_size;
 
 	acc_size = amdgpu_amdkfd_acc_size(size);
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
-	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
 		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		kfd_mem_limit.system_mem_used -= acc_size;
+		kfd_mem_limit.ttm_mem_used -= acc_size;
+		adev->kfd.vram_used -= size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
 		kfd_mem_limit.ttm_mem_used -= acc_size;
-	} else {
+	} else if (alloc_flag &
+		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		kfd_mem_limit.system_mem_used -= acc_size;
 		kfd_mem_limit.ttm_mem_used -= acc_size;
-		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-			adev->kfd.vram_used -= size;
-			WARN_ONCE(adev->kfd.vram_used < 0,
-				  "kfd VRAM memory accounting unbalanced");
-		}
+	} else {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		goto release;
 	}
-	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-		  "kfd system memory accounting unbalanced");
+
+	WARN_ONCE(adev->kfd.vram_used < 0,
+		  "KFD VRAM memory accounting unbalanced");
 	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
-		  "kfd TTM memory accounting unbalanced");
+		  "KFD TTM memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "KFD system memory accounting unbalanced");
 
+release:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	u32 domain = bo->preferred_domains;
-	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
+	u32 alloc_flags = bo->kfd_bo->alloc_flags;
+	u64 size = amdgpu_bo_size(bo);
 
-	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
-		domain = AMDGPU_GEM_DOMAIN_CPU;
-		sg = false;
-	}
-
-	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+	unreserve_mem_limit(adev, size, alloc_flags);
 
 	kfree(bo->kfd_bo);
 }
 
-
 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
  *  reservation object.
  *
@@ -1452,7 +1475,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	amdgpu_sync_create(&(*mem)->sync);
 
-	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
+	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
 	if (ret) {
 		pr_debug("Insufficient memory\n");
 		goto err_reserve_limit;
@@ -1508,7 +1531,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
 err_bo_create:
-	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+	unreserve_mem_limit(adev, size, flags);
 err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	kfree(*mem);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT  domain
  2021-11-11 21:13 [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag Ramesh Errabolu
@ 2021-11-11 21:13 ` Ramesh Errabolu
  2021-11-12 17:58   ` Felix Kuehling
  0 siblings, 1 reply; 8+ messages in thread
From: Ramesh Errabolu @ 2021-11-11 21:13 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

MMIO/DOORBELL BOs encode control data and should be pinned in GTT
domain before enabling PCIe connected peer devices in accessing it

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cfc84af682b1..90b985436878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1295,6 +1295,55 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	return ret;
 }
 
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ *   - USERPTR BOs are UNPINNABLE and will return error
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ *     PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
+{
+	int ret = 0;
+
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret))
+		return ret;
+
+	ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+	if (ret)
+		pr_err("Error in Pinning BO to domain: %d\n", domain);
+
+	amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+	amdgpu_bo_unreserve(bo);
+
+	return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ *   - Is a illegal request for USERPTR BOs and is ignored
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ *     PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+	int ret = 0;
+
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret))
+		return;
+
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+}
+
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
 					   struct file *filp, u32 pasid,
 					   void **process_info,
@@ -1521,10 +1570,22 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
 
+	if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+			KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+		ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+		if (ret) {
+			pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
+			goto err_pin_bo;
+		}
+		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	}
+
 	return 0;
 
 allocate_init_user_pages_failed:
 	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+err_pin_bo:
 	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
 	drm_gem_object_put(gobj);
@@ -1557,6 +1618,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	bool is_imported = false;
 
 	mutex_lock(&mem->lock);
+
+	/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
+	if (mem->alloc_flags &
+	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+		amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+	}
+
 	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
 	is_imported = mem->is_imported;
 	mutex_unlock(&mem->lock);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT domain
  2021-11-11 21:13 ` [PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT domain Ramesh Errabolu
@ 2021-11-12 17:58   ` Felix Kuehling
  0 siblings, 0 replies; 8+ messages in thread
From: Felix Kuehling @ 2021-11-12 17:58 UTC (permalink / raw)
  To: Ramesh Errabolu, amd-gfx

On 2021-11-11 4:13 p.m., Ramesh Errabolu wrote:
> MMIO/DOORBELL BOs encode control data and should be pinned in GTT
> domain before enabling PCIe connected peer devices in accessing it
>
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>

The series is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 69 +++++++++++++++++++
>   1 file changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index cfc84af682b1..90b985436878 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1295,6 +1295,55 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>   	return ret;
>   }
>   
> +/**
> + * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
> + * @bo: Handle of buffer object being pinned
> + * @domain: Domain into which BO should be pinned
> + *
> + *   - USERPTR BOs are UNPINNABLE and will return error
> + *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
> + *     PIN count incremented. It is valid to PIN a BO multiple times
> + *
> + * Return: ZERO if successful in pinning, Non-Zero in case of error.
> + */
> +static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
> +{
> +	int ret = 0;
> +
> +	ret = amdgpu_bo_reserve(bo, false);
> +	if (unlikely(ret))
> +		return ret;
> +
> +	ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
> +	if (ret)
> +		pr_err("Error in Pinning BO to domain: %d\n", domain);
> +
> +	amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
> +	amdgpu_bo_unreserve(bo);
> +
> +	return ret;
> +}
> +
> +/**
> + * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
> + * @bo: Handle of buffer object being unpinned
> + *
> + *   - Is a illegal request for USERPTR BOs and is ignored
> + *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
> + *     PIN count decremented. Calls to UNPIN must balance calls to PIN
> + */
> +void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
> +{
> +	int ret = 0;
> +
> +	ret = amdgpu_bo_reserve(bo, false);
> +	if (unlikely(ret))
> +		return;
> +
> +	amdgpu_bo_unpin(bo);
> +	amdgpu_bo_unreserve(bo);
> +}
> +
>   int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>   					   struct file *filp, u32 pasid,
>   					   void **process_info,
> @@ -1521,10 +1570,22 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   	if (offset)
>   		*offset = amdgpu_bo_mmap_offset(bo);
>   
> +	if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +			KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
> +		ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
> +		if (ret) {
> +			pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
> +			goto err_pin_bo;
> +		}
> +		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
> +		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> +	}
> +
>   	return 0;
>   
>   allocate_init_user_pages_failed:
>   	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
> +err_pin_bo:
>   	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
>   err_node_allow:
>   	drm_gem_object_put(gobj);
> @@ -1557,6 +1618,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	bool is_imported = false;
>   
>   	mutex_lock(&mem->lock);
> +
> +	/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
> +	if (mem->alloc_flags &
> +	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
> +		amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
> +	}
> +
>   	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
>   	is_imported = mem->is_imported;
>   	mutex_unlock(&mem->lock);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
  2021-11-11 20:53     ` Felix Kuehling
@ 2021-11-11 20:58       ` Errabolu, Ramesh
  0 siblings, 0 replies; 8+ messages in thread
From: Errabolu, Ramesh @ 2021-11-11 20:58 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only]

Agree with your comments. I will remove commenting the method amdgpu_amdkfd_reserve_system_mem() in my updated patch.

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Thursday, November 11, 2021 2:54 PM
To: Errabolu, Ramesh <Ramesh.Errabolu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

Am 2021-11-11 um 3:45 p.m. schrieb Errabolu, Ramesh:
> [AMD Official Use Only]
>
> Resp inline
>
> Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()
>
> Will send out updated patch upon clarification
>
> Regards,
> Ramesh
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Thursday, November 11, 2021 7:44 AM
> To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
> <Ramesh.Errabolu@amd.com>
> Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to 
> rely on allocation flag
>
> Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
>> Accounting system to track amount of available memory (system, TTM 
>> and VRAM of a device) relies on BO's domain. The change is to rely 
>> instead on allocation flag indicating BO type - VRAM, GTT, USERPTR, 
>> MMIO or DOORBELL
>>
>> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
> The code changes look good. Comments about comments inline ...
>
>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  16 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101
>> +++++++++++-------
>>  2 files changed, 79 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 5f658823a637..8d31a742cd80 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -307,7 +307,23 @@ void
>> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
>> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>>  				struct amdgpu_vm *vm);
>> +
>> +/**
>> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
>> +reference count
>> + * reaches ZERO. Increases available memory by size of buffer 
>> +including any
>> + * reserved for control structures
> "Increases available memory size ..." is an implementation detail that doesn't matter to the callers of this function. It should not be part of the interface definition. The interface description should be more general, maybe:
>
> Ramesh: Agreed.
>
> * amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is 
> released
> *
> * Allows KFD to release its resources associated with the GEM object.
> * ...
>
> Ramesh: Used your comment as is
>
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> + via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
>> + */
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>> +
>> +/**
>> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that 
>> +is
>> + * available by an amount specified by input parameter
> This is misleading. This function doesn't change availability of system memory in general because it doesn't allocate any memory. You'll need to be more specific:
>
> * amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit 
> for KFD applications
>
> Ramesh: Thanks for pointing out the detail. Should this be "Decrease system memory available for KFD applications" as the code seems to track a counter "system_mem_used". Let me know?

That's still not entirely accurate. KFD applications can allocate system memory in different ways: malloc, hipHostMalloc, hipHostRegister, hipMallocManaged. This limit affects system memory managed by the kfd_ioctl_alloc_memory_of_gpu, which must be physically resident while the queues are active. At the HIP API level this includes hipHostMalloc and hipHostRegister.

It does not affect system memory allocated with plain malloc or mmap.

My version that just mentions the limit avoids all those details because it doesn't need to explain what that limit applies to. If you want to go into all those details, I'm not sure this comment is the right place for it. I think it takes a more comprehensive discussion of GPU memory management with user mode queues.

Regards,
  Felix


>
>> + *
>> + * @size: Size of buffer in bytes
> What buffer?
>
> Ramesh: Updated it "Amount to decrease in bytes"
>
>
>> + */
>>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
>> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 94fccf0b47ad..08675f89bfb2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>>  		PAGE_ALIGN(size);
>>  }
>>  
>> +/**
>> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by 
>> +size
>> + * of buffer including any reserved for control structures
>> + *
>> + * @adev: Device to which allocated BO belongs to
>> + * @size: Size of buffer, in bytes, encapsulated by B0. This should 
>> +be
>> + * equivalent to amdgpu_bo_size(BO)
>> + * @alloc_flag: Flag used in allocating a BO as noted above
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> +via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> Who needs to call it? Your statement sounds like callers of amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
> This is very misleading because this function is already called from amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.
>
> Ramesh: Will remove the "note"
>
>
>> + *
>> + * Return: returns -ENOMEM in case of error, ZERO otherwise */
>>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>> -		uint64_t size, u32 domain, bool sg)
>> +		uint64_t size, u32 alloc_flag)
>>  {
>>  	uint64_t reserved_for_pt =
>>  		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
>> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>  	acc_size = amdgpu_amdkfd_acc_size(size);
>>  
>>  	vram_needed = 0;
>> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
>> -		/* TTM GTT memory */
>> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>>  		system_mem_needed = acc_size + size;
>>  		ttm_mem_needed = acc_size + size;
>> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
>> -		/* Userptr */
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +		system_mem_needed = acc_size;
>> +		ttm_mem_needed = acc_size;
>> +		vram_needed = size;
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>  		system_mem_needed = acc_size + size;
>>  		ttm_mem_needed = acc_size;
>> -	} else {
>> -		/* VRAM and SG */
>> +	} else if (alloc_flag &
>> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>>  		system_mem_needed = acc_size;
>>  		ttm_mem_needed = acc_size;
>> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
>> -			vram_needed = size;
>> +	} else {
>> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
>> +		return -ENOMEM;
>>  	}
>>  
>>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
>> @@ -160,64 +178,71 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>  	    (adev->kfd.vram_used + vram_needed >
>>  	     adev->gmc.real_vram_size - reserved_for_pt)) {
>>  		ret = -ENOMEM;
>> -	} else {
>> -		kfd_mem_limit.system_mem_used += system_mem_needed;
>> -		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>> -		adev->kfd.vram_used += vram_needed;
>> +		goto release;
>>  	}
>>  
>> +	/* Update memory accounting by decreasing available system
>> +	 * memory, TTM memory and GPU memory as computed above
>> +	 */
>> +	adev->kfd.vram_used += vram_needed;
>> +	kfd_mem_limit.system_mem_used += system_mem_needed;
>> +	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>> +
>> +release:
>>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>>  	return ret;
>>  }
>>  
>>  static void unreserve_mem_limit(struct amdgpu_device *adev,
>> -		uint64_t size, u32 domain, bool sg)
>> +		uint64_t size, u32 alloc_flag)
>>  {
>>  	size_t acc_size;
>>  
>>  	acc_size = amdgpu_amdkfd_acc_size(size);
>>  
>>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
>> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
>> +
>> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>>  		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
>> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +		kfd_mem_limit.system_mem_used -= acc_size;
>> +		kfd_mem_limit.ttm_mem_used -= acc_size;
>> +		adev->kfd.vram_used -= size;
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>>  		kfd_mem_limit.ttm_mem_used -= acc_size;
>> -	} else {
>> +	} else if (alloc_flag &
>> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>>  		kfd_mem_limit.system_mem_used -= acc_size;
>>  		kfd_mem_limit.ttm_mem_used -= acc_size;
>> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> -			adev->kfd.vram_used -= size;
>> -			WARN_ONCE(adev->kfd.vram_used < 0,
>> -				  "kfd VRAM memory accounting unbalanced");
>> -		}
>> +	} else {
>> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
>> +		goto release;
>>  	}
>> -	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
>> -		  "kfd system memory accounting unbalanced");
>> +
>> +	/* Alert user if memory accounting is not per expectation */
> This comment is obvious and unnecessary, and also not even correct.
> These WARN messages are not for the user because the user did not cause them and can do nothing to avoid them. These messages point out bugs elsewhere in the driver. So they are for engineers.
>
> Ramesh: Removed
>
> Regards,
>   Felix
>
>
>> +	WARN_ONCE(adev->kfd.vram_used < 0,
>> +		  "KFD VRAM memory accounting unbalanced");
>>  	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
>> -		  "kfd TTM memory accounting unbalanced");
>> +		  "KFD TTM memory accounting unbalanced");
>> +	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
>> +		  "KFD system memory accounting unbalanced");
>>  
>> +release:
>>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>>  }
>>  
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)  {
>>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> -	u32 domain = bo->preferred_domains;
>> -	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
>> -
>> -	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
>> -		domain = AMDGPU_GEM_DOMAIN_CPU;
>> -		sg = false;
>> -	}
>> -
>> -	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
>> +	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>> +	u64 size = amdgpu_bo_size(bo);
>>  
>> -	kfree(bo->kfd_bo);
>> +	unreserve_mem_limit(adev, size, alloc_flags);
>>  }
>>  
>> -
>>  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
>>   *  reservation object.
>>   *
>> @@ -1452,7 +1477,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>  
>>  	amdgpu_sync_create(&(*mem)->sync);
>>  
>> -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
>> +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
>>  	if (ret) {
>>  		pr_debug("Insufficient memory\n");
>>  		goto err_reserve_limit;
>> @@ -1508,7 +1533,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>  	/* Don't unreserve system mem limit twice */
>>  	goto err_reserve_limit;
>>  err_bo_create:
>> -	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
>> +	unreserve_mem_limit(adev, size, flags);
>>  err_reserve_limit:
>>  	mutex_destroy(&(*mem)->lock);
>>  	kfree(*mem);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
  2021-11-11 20:45   ` Errabolu, Ramesh
@ 2021-11-11 20:53     ` Felix Kuehling
  2021-11-11 20:58       ` Errabolu, Ramesh
  0 siblings, 1 reply; 8+ messages in thread
From: Felix Kuehling @ 2021-11-11 20:53 UTC (permalink / raw)
  To: Errabolu, Ramesh, amd-gfx

Am 2021-11-11 um 3:45 p.m. schrieb Errabolu, Ramesh:
> [AMD Official Use Only]
>
> Resp inline
>
> Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()
>
> Will send out updated patch upon clarification
>
> Regards,
> Ramesh
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@amd.com> 
> Sent: Thursday, November 11, 2021 7:44 AM
> To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh <Ramesh.Errabolu@amd.com>
> Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
>
> Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
>> Accounting system to track amount of available memory (system, TTM and 
>> VRAM of a device) relies on BO's domain. The change is to rely instead 
>> on allocation flag indicating BO type - VRAM, GTT, USERPTR, MMIO or 
>> DOORBELL
>>
>> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
> The code changes look good. Comments about comments inline ...
>
>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  16 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 
>> +++++++++++-------
>>  2 files changed, 79 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 5f658823a637..8d31a742cd80 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -307,7 +307,23 @@ void 
>> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
>> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>>  				struct amdgpu_vm *vm);
>> +
>> +/**
>> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
>> +reference count
>> + * reaches ZERO. Increases available memory by size of buffer 
>> +including any
>> + * reserved for control structures
> "Increases available memory size ..." is an implementation detail that doesn't matter to the callers of this function. It should not be part of the interface definition. The interface description should be more general, maybe:
>
> Ramesh: Agreed.
>
> * amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
> *
> * Allows KFD to release its resources associated with the GEM object.
> * ...
>
> Ramesh: Used your comment as is
>
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> + via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
>> + */
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>> +
>> +/**
>> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
>> + * available by an amount specified by input parameter
> This is misleading. This function doesn't change availability of system memory in general because it doesn't allocate any memory. You'll need to be more specific:
>
> * amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD applications
>
> Ramesh: Thanks for pointing out the detail. Should this be "Decrease system memory available for KFD applications" as the code seems to track a counter "system_mem_used". Let me know?

That's still not entirely accurate. KFD applications can allocate system
memory in different ways: malloc, hipHostMalloc, hipHostRegister,
hipMallocManaged. This limit affects system memory managed by the
kfd_ioctl_alloc_memory_of_gpu, which must be physically resident while
the queues are active. At the HIP API level this includes hipHostMalloc
and hipHostRegister.

It does not affect system memory allocated with plain malloc or mmap.

My version that just mentions the limit avoids all those details because
it doesn't need to explain what that limit applies to. If you want to go
into all those details, I'm not sure this comment is the right place for
it. I think it takes a more comprehensive discussion of GPU memory
management with user mode queues.

Regards,
  Felix


>
>> + *
>> + * @size: Size of buffer in bytes
> What buffer?
>
> Ramesh: Updated it "Amount to decrease in bytes"
>
>
>> + */
>>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
>> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 94fccf0b47ad..08675f89bfb2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>>  		PAGE_ALIGN(size);
>>  }
>>  
>> +/**
>> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by 
>> +size
>> + * of buffer including any reserved for control structures
>> + *
>> + * @adev: Device to which allocated BO belongs to
>> + * @size: Size of buffer, in bytes, encapsulated by B0. This should 
>> +be
>> + * equivalent to amdgpu_bo_size(BO)
>> + * @alloc_flag: Flag used in allocating a BO as noted above
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> +via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> Who needs to call it? Your statement sounds like callers of amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
> This is very misleading because this function is already called from amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.
>
> Ramesh: Will remove the "note"
>
>
>> + *
>> + * Return: returns -ENOMEM in case of error, ZERO otherwise */
>>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>> -		uint64_t size, u32 domain, bool sg)
>> +		uint64_t size, u32 alloc_flag)
>>  {
>>  	uint64_t reserved_for_pt =
>>  		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
>> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>  	acc_size = amdgpu_amdkfd_acc_size(size);
>>  
>>  	vram_needed = 0;
>> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
>> -		/* TTM GTT memory */
>> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>>  		system_mem_needed = acc_size + size;
>>  		ttm_mem_needed = acc_size + size;
>> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
>> -		/* Userptr */
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +		system_mem_needed = acc_size;
>> +		ttm_mem_needed = acc_size;
>> +		vram_needed = size;
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>  		system_mem_needed = acc_size + size;
>>  		ttm_mem_needed = acc_size;
>> -	} else {
>> -		/* VRAM and SG */
>> +	} else if (alloc_flag &
>> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>>  		system_mem_needed = acc_size;
>>  		ttm_mem_needed = acc_size;
>> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
>> -			vram_needed = size;
>> +	} else {
>> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
>> +		return -ENOMEM;
>>  	}
>>  
>>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
>> @@ -160,64 +178,71 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>  	    (adev->kfd.vram_used + vram_needed >
>>  	     adev->gmc.real_vram_size - reserved_for_pt)) {
>>  		ret = -ENOMEM;
>> -	} else {
>> -		kfd_mem_limit.system_mem_used += system_mem_needed;
>> -		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>> -		adev->kfd.vram_used += vram_needed;
>> +		goto release;
>>  	}
>>  
>> +	/* Update memory accounting by decreasing available system
>> +	 * memory, TTM memory and GPU memory as computed above
>> +	 */
>> +	adev->kfd.vram_used += vram_needed;
>> +	kfd_mem_limit.system_mem_used += system_mem_needed;
>> +	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>> +
>> +release:
>>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>>  	return ret;
>>  }
>>  
>>  static void unreserve_mem_limit(struct amdgpu_device *adev,
>> -		uint64_t size, u32 domain, bool sg)
>> +		uint64_t size, u32 alloc_flag)
>>  {
>>  	size_t acc_size;
>>  
>>  	acc_size = amdgpu_amdkfd_acc_size(size);
>>  
>>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
>> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
>> +
>> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>>  		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
>> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +		kfd_mem_limit.system_mem_used -= acc_size;
>> +		kfd_mem_limit.ttm_mem_used -= acc_size;
>> +		adev->kfd.vram_used -= size;
>> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>>  		kfd_mem_limit.ttm_mem_used -= acc_size;
>> -	} else {
>> +	} else if (alloc_flag &
>> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>>  		kfd_mem_limit.system_mem_used -= acc_size;
>>  		kfd_mem_limit.ttm_mem_used -= acc_size;
>> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> -			adev->kfd.vram_used -= size;
>> -			WARN_ONCE(adev->kfd.vram_used < 0,
>> -				  "kfd VRAM memory accounting unbalanced");
>> -		}
>> +	} else {
>> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
>> +		goto release;
>>  	}
>> -	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
>> -		  "kfd system memory accounting unbalanced");
>> +
>> +	/* Alert user if memory accounting is not per expectation */
> This comment is obvious and unnecessary, and also not even correct.
> These WARN messages are not for the user because the user did not cause them and can do nothing to avoid them. These messages point out bugs elsewhere in the driver. So they are for engineers.
>
> Ramesh: Removed
>
> Regards,
>   Felix
>
>
>> +	WARN_ONCE(adev->kfd.vram_used < 0,
>> +		  "KFD VRAM memory accounting unbalanced");
>>  	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
>> -		  "kfd TTM memory accounting unbalanced");
>> +		  "KFD TTM memory accounting unbalanced");
>> +	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
>> +		  "KFD system memory accounting unbalanced");
>>  
>> +release:
>>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>>  }
>>  
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)  {
>>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> -	u32 domain = bo->preferred_domains;
>> -	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
>> -
>> -	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
>> -		domain = AMDGPU_GEM_DOMAIN_CPU;
>> -		sg = false;
>> -	}
>> -
>> -	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
>> +	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>> +	u64 size = amdgpu_bo_size(bo);
>>  
>> -	kfree(bo->kfd_bo);
>> +	unreserve_mem_limit(adev, size, alloc_flags);
>>  }
>>  
>> -
>>  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
>>   *  reservation object.
>>   *
>> @@ -1452,7 +1477,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>  
>>  	amdgpu_sync_create(&(*mem)->sync);
>>  
>> -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
>> +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
>>  	if (ret) {
>>  		pr_debug("Insufficient memory\n");
>>  		goto err_reserve_limit;
>> @@ -1508,7 +1533,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>  	/* Don't unreserve system mem limit twice */
>>  	goto err_reserve_limit;
>>  err_bo_create:
>> -	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
>> +	unreserve_mem_limit(adev, size, flags);
>>  err_reserve_limit:
>>  	mutex_destroy(&(*mem)->lock);
>>  	kfree(*mem);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
  2021-11-11 13:43 ` Felix Kuehling
@ 2021-11-11 20:45   ` Errabolu, Ramesh
  2021-11-11 20:53     ` Felix Kuehling
  0 siblings, 1 reply; 8+ messages in thread
From: Errabolu, Ramesh @ 2021-11-11 20:45 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[AMD Official Use Only]

Resp inline

Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()

Will send out updated patch upon clarification

Regards,
Ramesh

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Thursday, November 11, 2021 7:44 AM
To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh <Ramesh.Errabolu@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
> Accounting system to track amount of available memory (system, TTM and 
> VRAM of a device) relies on BO's domain. The change is to rely instead 
> on allocation flag indicating BO type - VRAM, GTT, USERPTR, MMIO or 
> DOORBELL
>
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>

The code changes look good. Comments about comments inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  16 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 
> +++++++++++-------
>  2 files changed, 79 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 5f658823a637..8d31a742cd80 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,23 @@ void 
> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>  				struct amdgpu_vm *vm);
> +
> +/**
> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
> +reference count
> + * reaches ZERO. Increases available memory by size of buffer 
> +including any
> + * reserved for control structures

"Increases available memory size ..." is an implementation detail that doesn't matter to the callers of this function. It should not be part of the interface definition. The interface description should be more general, maybe:

Ramesh: Agreed.

* amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
*
* Allows KFD to release its resources associated with the GEM object.
* ...

Ramesh: Used your comment as is

> + *
> + * @note: This api must be invoked on BOs that have been allocated 
> + via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> + */
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
> +
> +/**
> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
> + * available by an amount specified by input parameter

This is misleading. This function doesn't change availability of system memory in general because it doesn't allocate any memory. You'll need to be more specific:

* amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD applications

Ramesh: Thanks for pointing out the detail. Should this be "Decrease system memory available for KFD applications" as the code seems to track a counter "system_mem_used". Let me know?

> + *
> + * @size: Size of buffer in bytes

What buffer?

Ramesh: Updated it "Amount to decrease in bytes"


> + */
>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 94fccf0b47ad..08675f89bfb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>  		PAGE_ALIGN(size);
>  }
>  
> +/**
> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by 
> +size
> + * of buffer including any reserved for control structures
> + *
> + * @adev: Device to which allocated BO belongs to
> + * @size: Size of buffer, in bytes, encapsulated by B0. This should 
> +be
> + * equivalent to amdgpu_bo_size(BO)
> + * @alloc_flag: Flag used in allocating a BO as noted above
> + *
> + * @note: This api must be invoked on BOs that have been allocated 
> +via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()

Who needs to call it? Your statement sounds like callers of amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
This is very misleading because this function is already called from amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.

Ramesh: Will remove the "note"


> + *
> + * Return: returns -ENOMEM in case of error, ZERO otherwise */
>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> -		uint64_t size, u32 domain, bool sg)
> +		uint64_t size, u32 alloc_flag)
>  {
>  	uint64_t reserved_for_pt =
>  		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>  	acc_size = amdgpu_amdkfd_acc_size(size);
>  
>  	vram_needed = 0;
> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> -		/* TTM GTT memory */
> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>  		system_mem_needed = acc_size + size;
>  		ttm_mem_needed = acc_size + size;
> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
> -		/* Userptr */
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		system_mem_needed = acc_size;
> +		ttm_mem_needed = acc_size;
> +		vram_needed = size;
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>  		system_mem_needed = acc_size + size;
>  		ttm_mem_needed = acc_size;
> -	} else {
> -		/* VRAM and SG */
> +	} else if (alloc_flag &
> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>  		system_mem_needed = acc_size;
>  		ttm_mem_needed = acc_size;
> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
> -			vram_needed = size;
> +	} else {
> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
> +		return -ENOMEM;
>  	}
>  
>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
> @@ -160,64 +178,71 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>  	    (adev->kfd.vram_used + vram_needed >
>  	     adev->gmc.real_vram_size - reserved_for_pt)) {
>  		ret = -ENOMEM;
> -	} else {
> -		kfd_mem_limit.system_mem_used += system_mem_needed;
> -		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
> -		adev->kfd.vram_used += vram_needed;
> +		goto release;
>  	}
>  
> +	/* Update memory accounting by decreasing available system
> +	 * memory, TTM memory and GPU memory as computed above
> +	 */
> +	adev->kfd.vram_used += vram_needed;
> +	kfd_mem_limit.system_mem_used += system_mem_needed;
> +	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
> +
> +release:
>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>  	return ret;
>  }
>  
>  static void unreserve_mem_limit(struct amdgpu_device *adev,
> -		uint64_t size, u32 domain, bool sg)
> +		uint64_t size, u32 alloc_flag)
>  {
>  	size_t acc_size;
>  
>  	acc_size = amdgpu_amdkfd_acc_size(size);
>  
>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> +
> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>  		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		kfd_mem_limit.system_mem_used -= acc_size;
> +		kfd_mem_limit.ttm_mem_used -= acc_size;
> +		adev->kfd.vram_used -= size;
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>  		kfd_mem_limit.ttm_mem_used -= acc_size;
> -	} else {
> +	} else if (alloc_flag &
> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>  		kfd_mem_limit.system_mem_used -= acc_size;
>  		kfd_mem_limit.ttm_mem_used -= acc_size;
> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> -			adev->kfd.vram_used -= size;
> -			WARN_ONCE(adev->kfd.vram_used < 0,
> -				  "kfd VRAM memory accounting unbalanced");
> -		}
> +	} else {
> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
> +		goto release;
>  	}
> -	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
> -		  "kfd system memory accounting unbalanced");
> +
> +	/* Alert user if memory accounting is not per expectation */

This comment is obvious and unnecessary, and also not even correct.
These WARN messages are not for the user because the user did not cause them and can do nothing to avoid them. These messages point out bugs elsewhere in the driver. So they are for engineers.

Ramesh: Removed

Regards,
  Felix


> +	WARN_ONCE(adev->kfd.vram_used < 0,
> +		  "KFD VRAM memory accounting unbalanced");
>  	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
> -		  "kfd TTM memory accounting unbalanced");
> +		  "KFD TTM memory accounting unbalanced");
> +	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
> +		  "KFD system memory accounting unbalanced");
>  
> +release:
>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>  }
>  
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)  {
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -	u32 domain = bo->preferred_domains;
> -	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
> -
> -	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
> -		domain = AMDGPU_GEM_DOMAIN_CPU;
> -		sg = false;
> -	}
> -
> -	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
> +	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> +	u64 size = amdgpu_bo_size(bo);
>  
> -	kfree(bo->kfd_bo);
> +	unreserve_mem_limit(adev, size, alloc_flags);
>  }
>  
> -
>  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
>   *  reservation object.
>   *
> @@ -1452,7 +1477,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  
>  	amdgpu_sync_create(&(*mem)->sync);
>  
> -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
> +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
>  	if (ret) {
>  		pr_debug("Insufficient memory\n");
>  		goto err_reserve_limit;
> @@ -1508,7 +1533,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  	/* Don't unreserve system mem limit twice */
>  	goto err_reserve_limit;
>  err_bo_create:
> -	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
> +	unreserve_mem_limit(adev, size, flags);
>  err_reserve_limit:
>  	mutex_destroy(&(*mem)->lock);
>  	kfree(*mem);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
  2021-11-09  6:13 [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag Ramesh Errabolu
@ 2021-11-11 13:43 ` Felix Kuehling
  2021-11-11 20:45   ` Errabolu, Ramesh
  0 siblings, 1 reply; 8+ messages in thread
From: Felix Kuehling @ 2021-11-11 13:43 UTC (permalink / raw)
  To: amd-gfx, Errabolu, Ramesh

Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
> Accounting system to track amount of available memory (system, TTM
> and VRAM of a device) relies on BO's domain. The change is to rely
> instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
> MMIO or DOORBELL
>
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>

The code changes look good. Comments about comments inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  16 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 +++++++++++-------
>  2 files changed, 79 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 5f658823a637..8d31a742cd80 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,23 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
>  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>  				struct amdgpu_vm *vm);
> +
> +/**
> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object reference count
> + * reaches ZERO. Increases available memory by size of buffer including any
> + * reserved for control structures

"Increases available memory size ..." is an implementation detail that
doesn't matter to the callers of this function. It should not be part of
the interface definition. The interface description should be more
general, maybe:

* amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
*
* Allows KFD to release its resources associated with the GEM object.
* ...


> + *
> + * @note: This api must be invoked on BOs that have been allocated via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> + */
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
> +
> +/**
> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
> + * available by an amount specified by input parameter

This is misleading. This function doesn't change availability of system
memory in general because it doesn't allocate any memory. You'll need to
be more specific:

* amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD applications


> + *
> + * @size: Size of buffer in bytes

What buffer?


> + */
>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
>  #else
>  static inline
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 94fccf0b47ad..08675f89bfb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>  		PAGE_ALIGN(size);
>  }
>  
> +/**
> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
> + * of buffer including any reserved for control structures
> + *
> + * @adev: Device to which allocated BO belongs to
> + * @size: Size of buffer, in bytes, encapsulated by B0. This should be
> + * equivalent to amdgpu_bo_size(BO)
> + * @alloc_flag: Flag used in allocating a BO as noted above
> + *
> + * @note: This api must be invoked on BOs that have been allocated via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()

Who needs to call it? Your statement sounds like callers of
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
This is very misleading because this function is already called from
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.


> + *
> + * Return: returns -ENOMEM in case of error, ZERO otherwise
> + */
>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> -		uint64_t size, u32 domain, bool sg)
> +		uint64_t size, u32 alloc_flag)
>  {
>  	uint64_t reserved_for_pt =
>  		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>  	acc_size = amdgpu_amdkfd_acc_size(size);
>  
>  	vram_needed = 0;
> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> -		/* TTM GTT memory */
> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>  		system_mem_needed = acc_size + size;
>  		ttm_mem_needed = acc_size + size;
> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
> -		/* Userptr */
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		system_mem_needed = acc_size;
> +		ttm_mem_needed = acc_size;
> +		vram_needed = size;
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>  		system_mem_needed = acc_size + size;
>  		ttm_mem_needed = acc_size;
> -	} else {
> -		/* VRAM and SG */
> +	} else if (alloc_flag &
> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>  		system_mem_needed = acc_size;
>  		ttm_mem_needed = acc_size;
> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
> -			vram_needed = size;
> +	} else {
> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
> +		return -ENOMEM;
>  	}
>  
>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
> @@ -160,64 +178,71 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>  	    (adev->kfd.vram_used + vram_needed >
>  	     adev->gmc.real_vram_size - reserved_for_pt)) {
>  		ret = -ENOMEM;
> -	} else {
> -		kfd_mem_limit.system_mem_used += system_mem_needed;
> -		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
> -		adev->kfd.vram_used += vram_needed;
> +		goto release;
>  	}
>  
> +	/* Update memory accounting by decreasing available system
> +	 * memory, TTM memory and GPU memory as computed above
> +	 */
> +	adev->kfd.vram_used += vram_needed;
> +	kfd_mem_limit.system_mem_used += system_mem_needed;
> +	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
> +
> +release:
>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>  	return ret;
>  }
>  
>  static void unreserve_mem_limit(struct amdgpu_device *adev,
> -		uint64_t size, u32 domain, bool sg)
> +		uint64_t size, u32 alloc_flag)
>  {
>  	size_t acc_size;
>  
>  	acc_size = amdgpu_amdkfd_acc_size(size);
>  
>  	spin_lock(&kfd_mem_limit.mem_limit_lock);
> -	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> +
> +	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>  		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
> -	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		kfd_mem_limit.system_mem_used -= acc_size;
> +		kfd_mem_limit.ttm_mem_used -= acc_size;
> +		adev->kfd.vram_used -= size;
> +	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>  		kfd_mem_limit.system_mem_used -= (acc_size + size);
>  		kfd_mem_limit.ttm_mem_used -= acc_size;
> -	} else {
> +	} else if (alloc_flag &
> +		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
>  		kfd_mem_limit.system_mem_used -= acc_size;
>  		kfd_mem_limit.ttm_mem_used -= acc_size;
> -		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> -			adev->kfd.vram_used -= size;
> -			WARN_ONCE(adev->kfd.vram_used < 0,
> -				  "kfd VRAM memory accounting unbalanced");
> -		}
> +	} else {
> +		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
> +		goto release;
>  	}
> -	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
> -		  "kfd system memory accounting unbalanced");
> +
> +	/* Alert user if memory accounting is not per expectation */

This comment is obvious and unnecessary, and also not even correct.
These WARN messages are not for the user because the user did not cause
them and can do nothing to avoid them. These messages point out bugs
elsewhere in the driver. So they are for engineers.

Regards,
  Felix


> +	WARN_ONCE(adev->kfd.vram_used < 0,
> +		  "KFD VRAM memory accounting unbalanced");
>  	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
> -		  "kfd TTM memory accounting unbalanced");
> +		  "KFD TTM memory accounting unbalanced");
> +	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
> +		  "KFD system memory accounting unbalanced");
>  
> +release:
>  	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>  }
>  
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>  {
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -	u32 domain = bo->preferred_domains;
> -	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
> -
> -	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
> -		domain = AMDGPU_GEM_DOMAIN_CPU;
> -		sg = false;
> -	}
> -
> -	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
> +	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> +	u64 size = amdgpu_bo_size(bo);
>  
> -	kfree(bo->kfd_bo);
> +	unreserve_mem_limit(adev, size, alloc_flags);
>  }
>  
> -
>  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
>   *  reservation object.
>   *
> @@ -1452,7 +1477,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  
>  	amdgpu_sync_create(&(*mem)->sync);
>  
> -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
> +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
>  	if (ret) {
>  		pr_debug("Insufficient memory\n");
>  		goto err_reserve_limit;
> @@ -1508,7 +1533,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  	/* Don't unreserve system mem limit twice */
>  	goto err_reserve_limit;
>  err_bo_create:
> -	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
> +	unreserve_mem_limit(adev, size, flags);
>  err_reserve_limit:
>  	mutex_destroy(&(*mem)->lock);
>  	kfree(*mem);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag
@ 2021-11-09  6:13 Ramesh Errabolu
  2021-11-11 13:43 ` Felix Kuehling
  0 siblings, 1 reply; 8+ messages in thread
From: Ramesh Errabolu @ 2021-11-09  6:13 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

Accounting system to track amount of available memory (system, TTM
and VRAM of a device) relies on BO's domain. The change is to rely
instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
MMIO or DOORBELL

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  16 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 +++++++++++-------
 2 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5f658823a637..8d31a742cd80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -307,7 +307,23 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Invoked when GEM object reference count
+ * reaches ZERO. Increases available memory by size of buffer including any
+ * reserved for control structures
+ *
+ * @note: This api must be invoked on BOs that have been allocated via
+ * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
+ */
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
+
+/**
+ * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
+ * available by an amount specified by input parameter
+ *
+ * @size: Size of buffer in bytes
+ */
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
 static inline
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 94fccf0b47ad..08675f89bfb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
 		PAGE_ALIGN(size);
 }
 
+/**
+ * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer including any reserved for control structures
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ *
+ * @note: This api must be invoked on BOs that have been allocated via
+ * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
+ *
+ * Return: returns -ENOMEM in case of error, ZERO otherwise
+ */
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-		uint64_t size, u32 domain, bool sg)
+		uint64_t size, u32 alloc_flag)
 {
 	uint64_t reserved_for_pt =
 		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
@@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	acc_size = amdgpu_amdkfd_acc_size(size);
 
 	vram_needed = 0;
-	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		/* TTM GTT memory */
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
 		system_mem_needed = acc_size + size;
 		ttm_mem_needed = acc_size + size;
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
-		/* Userptr */
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		system_mem_needed = acc_size;
+		ttm_mem_needed = acc_size;
+		vram_needed = size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 		system_mem_needed = acc_size + size;
 		ttm_mem_needed = acc_size;
-	} else {
-		/* VRAM and SG */
+	} else if (alloc_flag &
+		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		system_mem_needed = acc_size;
 		ttm_mem_needed = acc_size;
-		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
-			vram_needed = size;
+	} else {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		return -ENOMEM;
 	}
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -160,64 +178,71 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	    (adev->kfd.vram_used + vram_needed >
 	     adev->gmc.real_vram_size - reserved_for_pt)) {
 		ret = -ENOMEM;
-	} else {
-		kfd_mem_limit.system_mem_used += system_mem_needed;
-		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
-		adev->kfd.vram_used += vram_needed;
+		goto release;
 	}
 
+	/* Update memory accounting by decreasing available system
+	 * memory, TTM memory and GPU memory as computed above
+	 */
+	adev->kfd.vram_used += vram_needed;
+	kfd_mem_limit.system_mem_used += system_mem_needed;
+	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	return ret;
 }
 
 static void unreserve_mem_limit(struct amdgpu_device *adev,
-		uint64_t size, u32 domain, bool sg)
+		uint64_t size, u32 alloc_flag)
 {
 	size_t acc_size;
 
 	acc_size = amdgpu_amdkfd_acc_size(size);
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
-	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
 		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		kfd_mem_limit.system_mem_used -= acc_size;
+		kfd_mem_limit.ttm_mem_used -= acc_size;
+		adev->kfd.vram_used -= size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
 		kfd_mem_limit.ttm_mem_used -= acc_size;
-	} else {
+	} else if (alloc_flag &
+		   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+		    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		kfd_mem_limit.system_mem_used -= acc_size;
 		kfd_mem_limit.ttm_mem_used -= acc_size;
-		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-			adev->kfd.vram_used -= size;
-			WARN_ONCE(adev->kfd.vram_used < 0,
-				  "kfd VRAM memory accounting unbalanced");
-		}
+	} else {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		goto release;
 	}
-	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-		  "kfd system memory accounting unbalanced");
+
+	/* Alert user if memory accounting is not per expectation */
+	WARN_ONCE(adev->kfd.vram_used < 0,
+		  "KFD VRAM memory accounting unbalanced");
 	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
-		  "kfd TTM memory accounting unbalanced");
+		  "KFD TTM memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "KFD system memory accounting unbalanced");
 
+release:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	u32 domain = bo->preferred_domains;
-	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
-
-	if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
-		domain = AMDGPU_GEM_DOMAIN_CPU;
-		sg = false;
-	}
-
-	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+	u32 alloc_flags = bo->kfd_bo->alloc_flags;
+	u64 size = amdgpu_bo_size(bo);
 
-	kfree(bo->kfd_bo);
+	unreserve_mem_limit(adev, size, alloc_flags);
 }
 
-
 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
  *  reservation object.
  *
@@ -1452,7 +1477,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	amdgpu_sync_create(&(*mem)->sync);
 
-	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
+	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
 	if (ret) {
 		pr_debug("Insufficient memory\n");
 		goto err_reserve_limit;
@@ -1508,7 +1533,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
 err_bo_create:
-	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+	unreserve_mem_limit(adev, size, flags);
 err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	kfree(*mem);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-11-12 17:58 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-11 21:13 [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag Ramesh Errabolu
2021-11-11 21:13 ` [PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT domain Ramesh Errabolu
2021-11-12 17:58   ` Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2021-11-09  6:13 [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag Ramesh Errabolu
2021-11-11 13:43 ` Felix Kuehling
2021-11-11 20:45   ` Errabolu, Ramesh
2021-11-11 20:53     ` Felix Kuehling
2021-11-11 20:58       ` Errabolu, Ramesh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.