amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/9] Implement multi-GPU DMA mappings for KFD
@ 2021-04-14  6:47 Felix Kuehling
  2021-04-14  6:47 ` [PATCH 1/9] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment Felix Kuehling
                   ` (8 more replies)
  0 siblings, 9 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

This patch series fixes DMA-mappings of system memory (GTT and userptr)
for KFD running on multi-GPU systems with IOMMU enabled. One SG-BO per
GPU is needed to maintain the DMA mappings of each BO.

I ran into some reservation issues when unmapping or freeing DMA-buf
imports. There are a few FIXME comments in this patch series where I'm
hoping for some expert advice. Patches 8 and 9 are some related fixes
in TTM and amdgpu_ttm. I'm pretty sure patch 9 is not the right way to
do this.

Felix Kuehling (9):
  drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment
  drm/amdgpu: Keep a bo-reference per-attachment
  drm/amdgpu: Simplify AQL queue mapping
  drm/amdgpu: Add multi-GPU DMA mapping helpers
  drm/amdgpu: DMA map/unmap when updating GPU mappings
  drm/amdgpu: Move kfd_mem_attach outside reservation
  drm/amdgpu: Add DMA mapping of GTT BOs
  drm/ttm: Don't count pages in SG BOs against pages_limit
  drm/amdgpu: Lock the attached dmabuf in unpopulate

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  18 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 535 ++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  13 +
 drivers/gpu/drm/ttm/ttm_tt.c                  |  27 +-
 4 files changed, 420 insertions(+), 173 deletions(-)

-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/9] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
@ 2021-04-14  6:47 ` Felix Kuehling
  2021-04-14  6:47 ` [PATCH 2/9] drm/amdgpu: Keep a bo-reference per-attachment Felix Kuehling
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

This name is more fitting, especially for the changes coming next to
support multi-GPU systems with proper DMA mappings. Cleaned up the code
and renamed some related functions and variables to improve readability.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   8 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 209 +++++++++---------
 2 files changed, 104 insertions(+), 113 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..910c50956e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -38,10 +38,10 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
 
 struct amdgpu_device;
 
-struct kfd_bo_va_list {
-	struct list_head bo_list;
+struct kfd_mem_attachment {
+	struct list_head list;
 	struct amdgpu_bo_va *bo_va;
-	void *kgd_dev;
+	struct amdgpu_device *adev;
 	bool is_mapped;
 	uint64_t va;
 	uint64_t pte_flags;
@@ -50,7 +50,7 @@ struct kfd_bo_va_list {
 struct kgd_mem {
 	struct mutex lock;
 	struct amdgpu_bo *bo;
-	struct list_head bo_va_list;
+	struct list_head attachments;
 	/* protected by amdkfd_process_info.lock */
 	struct ttm_validate_buffer validate_list;
 	struct ttm_validate_buffer resv_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6622695a5eed..d021152314ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -75,16 +75,16 @@ static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 	return (struct amdgpu_device *)kgd;
 }
 
-static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
 		struct kgd_mem *mem)
 {
-	struct kfd_bo_va_list *entry;
+	struct kfd_mem_attachment *entry;
 
-	list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+	list_for_each_entry(entry, &mem->attachments, list)
 		if (entry->bo_va->base.vm == avm)
-			return false;
+			return true;
 
-	return true;
+	return false;
 }
 
 /* Set memory usage limits. Current, limits are
@@ -471,7 +471,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 	return pte_flags;
 }
 
-/* add_bo_to_vm - Add a BO to a VM
+/* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
  * to a VM. It can later be mapped and unmapped many times without
@@ -483,15 +483,14 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
  * 4. Alloc page tables and directories if needed
  * 4a.  Validate new page tables and directories
  */
-static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		struct amdgpu_vm *vm, bool is_aql,
-		struct kfd_bo_va_list **p_bo_va_entry)
+		struct kfd_mem_attachment **p_attachment)
 {
 	int ret;
-	struct kfd_bo_va_list *bo_va_entry;
+	struct kfd_mem_attachment *attachment;
 	struct amdgpu_bo *bo = mem->bo;
 	uint64_t va = mem->va;
-	struct list_head *list_bo_va = &mem->bo_va_list;
 	unsigned long bo_size = bo->tbo.base.size;
 
 	if (!va) {
@@ -502,29 +501,29 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	if (is_aql)
 		va += bo_size;
 
-	bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
-	if (!bo_va_entry)
+	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
+	if (!attachment)
 		return -ENOMEM;
 
 	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
 			va + bo_size, vm);
 
 	/* Add BO to VM internal data structures*/
-	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
-	if (!bo_va_entry->bo_va) {
+	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!attachment->bo_va) {
 		ret = -EINVAL;
 		pr_err("Failed to add BO object to VM. ret == %d\n",
 				ret);
 		goto err_vmadd;
 	}
 
-	bo_va_entry->va = va;
-	bo_va_entry->pte_flags = get_pte_flags(adev, mem);
-	bo_va_entry->kgd_dev = (void *)adev;
-	list_add(&bo_va_entry->bo_list, list_bo_va);
+	attachment->va = va;
+	attachment->pte_flags = get_pte_flags(adev, mem);
+	attachment->adev = adev;
+	list_add(&attachment->list, &mem->attachments);
 
-	if (p_bo_va_entry)
-		*p_bo_va_entry = bo_va_entry;
+	if (p_attachment)
+		*p_attachment = attachment;
 
 	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
@@ -536,22 +535,20 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	return 0;
 
 err_alloc_pts:
-	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
-	list_del(&bo_va_entry->bo_list);
+	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
+	list_del(&attachment->list);
 err_vmadd:
-	kfree(bo_va_entry);
+	kfree(attachment);
 	return ret;
 }
 
-static void remove_bo_from_vm(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry, unsigned long size)
+static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
 {
-	pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
-			entry->va,
-			entry->va + size, entry);
-	amdgpu_vm_bo_rmv(adev, entry->bo_va);
-	list_del(&entry->bo_list);
-	kfree(entry);
+	pr_debug("\t remove VA 0x%llx in entry %p\n",
+			attachment->va, attachment);
+	amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+	list_del(&attachment->list);
+	kfree(attachment);
 }
 
 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
@@ -726,7 +723,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 				struct bo_vm_reservation_context *ctx)
 {
 	struct amdgpu_bo *bo = mem->bo;
-	struct kfd_bo_va_list *entry;
+	struct kfd_mem_attachment *entry;
 	unsigned int i;
 	int ret;
 
@@ -738,7 +735,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	INIT_LIST_HEAD(&ctx->list);
 	INIT_LIST_HEAD(&ctx->duplicates);
 
-	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+	list_for_each_entry(entry, &mem->attachments, list) {
 		if ((vm && vm != entry->bo_va->base.vm) ||
 			(entry->is_mapped != map_type
 			&& map_type != BO_VM_ALL))
@@ -760,7 +757,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
-	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+	list_for_each_entry(entry, &mem->attachments, list) {
 		if ((vm && vm != entry->bo_va->base.vm) ||
 			(entry->is_mapped != map_type
 			&& map_type != BO_VM_ALL))
@@ -815,7 +812,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
 }
 
 static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
-				struct kfd_bo_va_list *entry,
+				struct kfd_mem_attachment *entry,
 				struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
@@ -831,7 +828,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 }
 
 static int update_gpuvm_pte(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry,
+		struct kfd_mem_attachment *entry,
 		struct amdgpu_sync *sync)
 {
 	int ret;
@@ -848,7 +845,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 }
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+		struct kfd_mem_attachment *entry, struct amdgpu_sync *sync,
 		bool no_update_pte)
 {
 	int ret;
@@ -1235,7 +1232,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		ret = -ENOMEM;
 		goto err;
 	}
-	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	INIT_LIST_HEAD(&(*mem)->attachments);
 	mutex_init(&(*mem)->lock);
 	(*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
 
@@ -1316,7 +1313,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
-	struct kfd_bo_va_list *entry, *tmp;
+	struct kfd_mem_attachment *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
 	struct ttm_validate_buffer *bo_list_entry;
 	unsigned int mapped_to_gpu_memory;
@@ -1360,9 +1357,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		mem->va + bo_size * (1 + mem->aql_queue));
 
 	/* Remove from VM internal data structures */
-	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
-		remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
-				entry, bo_size);
+	list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+		kfd_mem_detach(entry);
 
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
@@ -1404,10 +1400,10 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	int ret;
 	struct amdgpu_bo *bo;
 	uint32_t domain;
-	struct kfd_bo_va_list *entry;
+	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
-	struct kfd_bo_va_list *bo_va_entry = NULL;
-	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+	struct kfd_mem_attachment *attachment = NULL;
+	struct kfd_mem_attachment *attachment_aql = NULL;
 	unsigned long bo_size;
 	bool is_invalid_userptr = false;
 
@@ -1456,21 +1452,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	    bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
 		is_invalid_userptr = true;
 
-	if (check_if_add_bo_to_vm(avm, mem)) {
-		ret = add_bo_to_vm(adev, mem, avm, false,
-				&bo_va_entry);
+	if (!kfd_mem_is_attached(avm, mem)) {
+		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
 		if (ret)
-			goto add_bo_to_vm_failed;
+			goto attach_failed;
 		if (mem->aql_queue) {
-			ret = add_bo_to_vm(adev, mem, avm,
-					true, &bo_va_entry_aql);
+			ret = kfd_mem_attach(adev, mem, avm, true,
+					     &attachment_aql);
 			if (ret)
-				goto add_bo_to_vm_failed_aql;
+				goto attach_failed_aql;
 		}
 	} else {
 		ret = vm_validate_pt_pd_bos(avm);
 		if (unlikely(ret))
-			goto add_bo_to_vm_failed;
+			goto attach_failed;
 	}
 
 	if (mem->mapped_to_gpu_memory == 0 &&
@@ -1486,30 +1481,30 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		}
 	}
 
-	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-		if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
-			pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
-					entry->va, entry->va + bo_size,
-					entry);
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->bo_va->base.vm != vm || entry->is_mapped)
+			continue;
 
-			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
-					      is_invalid_userptr);
-			if (ret) {
-				pr_err("Failed to map bo to gpuvm\n");
-				goto map_bo_to_gpuvm_failed;
-			}
+		pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+			 entry->va, entry->va + bo_size, entry);
 
-			ret = vm_update_pds(vm, ctx.sync);
-			if (ret) {
-				pr_err("Failed to update page directories\n");
-				goto map_bo_to_gpuvm_failed;
-			}
+		ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+				      is_invalid_userptr);
+		if (ret) {
+			pr_err("Failed to map bo to gpuvm\n");
+			goto map_bo_to_gpuvm_failed;
+		}
 
-			entry->is_mapped = true;
-			mem->mapped_to_gpu_memory++;
-			pr_debug("\t INC mapping count %d\n",
-					mem->mapped_to_gpu_memory);
+		ret = vm_update_pds(vm, ctx.sync);
+		if (ret) {
+			pr_err("Failed to update page directories\n");
+			goto map_bo_to_gpuvm_failed;
 		}
+
+		entry->is_mapped = true;
+		mem->mapped_to_gpu_memory++;
+		pr_debug("\t INC mapping count %d\n",
+			 mem->mapped_to_gpu_memory);
 	}
 
 	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
@@ -1521,12 +1516,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	goto out;
 
 map_bo_to_gpuvm_failed:
-	if (bo_va_entry_aql)
-		remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
-add_bo_to_vm_failed_aql:
-	if (bo_va_entry)
-		remove_bo_from_vm(adev, bo_va_entry, bo_size);
-add_bo_to_vm_failed:
+	if (attachment_aql)
+		kfd_mem_detach(attachment_aql);
+attach_failed_aql:
+	if (attachment)
+		kfd_mem_detach(attachment);
+attach_failed:
 	unreserve_bo_and_vms(&ctx, false, false);
 out:
 	mutex_unlock(&mem->process_info->lock);
@@ -1541,7 +1536,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 	struct amdkfd_process_info *process_info =
 		((struct amdgpu_vm *)vm)->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
-	struct kfd_bo_va_list *entry;
+	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
 	int ret;
 
@@ -1565,26 +1560,24 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		mem->va + bo_size * (1 + mem->aql_queue),
 		vm);
 
-	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-		if (entry->bo_va->base.vm == vm && entry->is_mapped) {
-			pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
-					entry->va,
-					entry->va + bo_size,
-					entry);
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->bo_va->base.vm != vm || !entry->is_mapped)
+			continue;
 
-			ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
-			if (ret == 0) {
-				entry->is_mapped = false;
-			} else {
-				pr_err("failed to unmap VA 0x%llx\n",
-						mem->va);
-				goto unreserve_out;
-			}
+		pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+			 entry->va, entry->va + bo_size, entry);
 
-			mem->mapped_to_gpu_memory--;
-			pr_debug("\t DEC mapping count %d\n",
-					mem->mapped_to_gpu_memory);
+		ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+		if (ret == 0) {
+			entry->is_mapped = false;
+		} else {
+			pr_err("failed to unmap VA 0x%llx\n", mem->va);
+			goto unreserve_out;
 		}
+
+		mem->mapped_to_gpu_memory--;
+		pr_debug("\t DEC mapping count %d\n",
+			 mem->mapped_to_gpu_memory);
 	}
 
 	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
@@ -1726,7 +1719,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 	if (mmap_offset)
 		*mmap_offset = amdgpu_bo_mmap_offset(bo);
 
-	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	INIT_LIST_HEAD(&(*mem)->attachments);
 	mutex_init(&(*mem)->lock);
 
 	(*mem)->alloc_flags =
@@ -1923,7 +1916,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	list_for_each_entry_safe(mem, tmp_mem,
 				 &process_info->userptr_inval_list,
 				 validate_list.head) {
-		struct kfd_bo_va_list *bo_va_entry;
+		struct kfd_mem_attachment *attachment;
 
 		bo = mem->bo;
 
@@ -1946,13 +1939,13 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 		 * VM faults if the GPU tries to access the invalid
 		 * memory.
 		 */
-		list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
-			if (!bo_va_entry->is_mapped)
+		list_for_each_entry(attachment, &mem->attachments, list) {
+			if (!attachment->is_mapped)
 				continue;
 
 			ret = update_gpuvm_pte((struct amdgpu_device *)
-					       bo_va_entry->kgd_dev,
-					       bo_va_entry, &sync);
+					       attachment->adev,
+					       attachment, &sync);
 			if (ret) {
 				pr_err("%s: update PTE failed\n", __func__);
 				/* make sure this gets validated again */
@@ -2133,7 +2126,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 
 		struct amdgpu_bo *bo = mem->bo;
 		uint32_t domain = mem->domain;
-		struct kfd_bo_va_list *bo_va_entry;
+		struct kfd_mem_attachment *attachment;
 
 		total_size += amdgpu_bo_size(bo);
 
@@ -2153,11 +2146,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
 			goto validate_map_fail;
 		}
-		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
-				    bo_list) {
+		list_for_each_entry(attachment, &mem->attachments, list) {
 			ret = update_gpuvm_pte((struct amdgpu_device *)
-					      bo_va_entry->kgd_dev,
-					      bo_va_entry,
+					      attachment->adev, attachment,
 					      &sync_obj);
 			if (ret) {
 				pr_debug("Memory eviction: update PTE failed. Try again\n");
@@ -2232,7 +2223,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
 		return -ENOMEM;
 
 	mutex_init(&(*mem)->lock);
-	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	INIT_LIST_HEAD(&(*mem)->attachments);
 	(*mem)->bo = amdgpu_bo_ref(gws_bo);
 	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
 	(*mem)->process_info = process_info;
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/9] drm/amdgpu: Keep a bo-reference per-attachment
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
  2021-04-14  6:47 ` [PATCH 1/9] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment Felix Kuehling
@ 2021-04-14  6:47 ` Felix Kuehling
  2021-04-14  6:47 ` [PATCH 3/9] drm/amdgpu: Simplify AQL queue mapping Felix Kuehling
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

For now they all reference the same BO. For correct DMA mappings they will
refer to different BOs per-GPU.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d021152314ad..40a296ca37b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -487,11 +487,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		struct amdgpu_vm *vm, bool is_aql,
 		struct kfd_mem_attachment **p_attachment)
 {
-	int ret;
-	struct kfd_mem_attachment *attachment;
-	struct amdgpu_bo *bo = mem->bo;
+	unsigned long bo_size = mem->bo->tbo.base.size;
 	uint64_t va = mem->va;
-	unsigned long bo_size = bo->tbo.base.size;
+	struct kfd_mem_attachment *attachment;
+	struct amdgpu_bo *bo;
+	int ret;
 
 	if (!va) {
 		pr_err("Invalid VA when adding BO to VM\n");
@@ -508,6 +508,14 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
 			va + bo_size, vm);
 
+	/* FIXME: For now all attachments use the same BO. This is incorrect
+	 * because one BO can only have one DMA mapping for one GPU. We need
+	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
+	 * will be addressed one BO-type at a time in subsequent patches.
+	 */
+	bo = mem->bo;
+	drm_gem_object_get(&bo->tbo.base);
+
 	/* Add BO to VM internal data structures*/
 	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
 	if (!attachment->bo_va) {
@@ -527,7 +535,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 
 	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
-	if (ret) {
+	if (unlikely(ret)) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto err_alloc_pts;
 	}
@@ -538,15 +546,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
 	list_del(&attachment->list);
 err_vmadd:
+	drm_gem_object_put(&bo->tbo.base);
 	kfree(attachment);
 	return ret;
 }
 
 static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
 {
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
 	pr_debug("\t remove VA 0x%llx in entry %p\n",
 			attachment->va, attachment);
 	amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+	drm_gem_object_put(&bo->tbo.base);
 	list_del(&attachment->list);
 	kfree(attachment);
 }
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/9] drm/amdgpu: Simplify AQL queue mapping
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
  2021-04-14  6:47 ` [PATCH 1/9] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment Felix Kuehling
  2021-04-14  6:47 ` [PATCH 2/9] drm/amdgpu: Keep a bo-reference per-attachment Felix Kuehling
@ 2021-04-14  6:47 ` Felix Kuehling
  2021-04-14  6:47 ` [PATCH 4/9] drm/amdgpu: Add multi-GPU DMA mapping helpers Felix Kuehling
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

Do AQL queue double-mapping with a single attach call. That will make it
easier to create per-GPU BOs later, to be shared between the two BO VA
mappings on the same GPU.

Freeing the attachments is not necessary if map_to_gpu fails. These will be
cleaned up when the kdg_mem object is destroyed in
amdgpu_amdkfd_gpuvm_free_memory_of_gpu.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
 1 file changed, 48 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 40a296ca37b9..114fbf508707 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -484,70 +484,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
  * 4a.  Validate new page tables and directories
  */
 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
-		struct amdgpu_vm *vm, bool is_aql,
-		struct kfd_mem_attachment **p_attachment)
+		struct amdgpu_vm *vm, bool is_aql)
 {
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	uint64_t va = mem->va;
-	struct kfd_mem_attachment *attachment;
-	struct amdgpu_bo *bo;
-	int ret;
+	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
+	struct amdgpu_bo *bo[2] = {NULL, NULL};
+	int i, ret;
 
 	if (!va) {
 		pr_err("Invalid VA when adding BO to VM\n");
 		return -EINVAL;
 	}
 
-	if (is_aql)
-		va += bo_size;
-
-	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
-	if (!attachment)
-		return -ENOMEM;
+	for (i = 0; i <= is_aql; i++) {
+		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+		if (unlikely(!attachment[i])) {
+			ret = -ENOMEM;
+			goto unwind;
+		}
 
-	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-			va + bo_size, vm);
+		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+			 va + bo_size, vm);
 
-	/* FIXME: For now all attachments use the same BO. This is incorrect
-	 * because one BO can only have one DMA mapping for one GPU. We need
-	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
-	 * will be addressed one BO-type at a time in subsequent patches.
-	 */
-	bo = mem->bo;
-	drm_gem_object_get(&bo->tbo.base);
+		/* FIXME: For now all attachments use the same BO. This is
+		 * incorrect because one BO can only have one DMA mapping
+		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
+		 * import with dynamic attachment. This will be addressed
+		 * one BO-type at a time in subsequent patches.
+		 */
+		bo[i] = mem->bo;
+		drm_gem_object_get(&bo[i]->tbo.base);
 
-	/* Add BO to VM internal data structures*/
-	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
-	if (!attachment->bo_va) {
-		ret = -EINVAL;
-		pr_err("Failed to add BO object to VM. ret == %d\n",
-				ret);
-		goto err_vmadd;
-	}
+		/* Add BO to VM internal data structures */
+		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		if (unlikely(!attachment[i]->bo_va)) {
+			ret = -ENOMEM;
+			pr_err("Failed to add BO object to VM. ret == %d\n",
+			       ret);
+			goto unwind;
+		}
 
-	attachment->va = va;
-	attachment->pte_flags = get_pte_flags(adev, mem);
-	attachment->adev = adev;
-	list_add(&attachment->list, &mem->attachments);
+		attachment[i]->va = va;
+		attachment[i]->pte_flags = get_pte_flags(adev, mem);
+		attachment[i]->adev = adev;
+		list_add(&attachment[i]->list, &mem->attachments);
 
-	if (p_attachment)
-		*p_attachment = attachment;
+		va += bo_size;
+	}
 
 	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
 	if (unlikely(ret)) {
 		pr_err("validate_pt_pd_bos() failed\n");
-		goto err_alloc_pts;
+		goto unwind;
 	}
 
 	return 0;
 
-err_alloc_pts:
-	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
-	list_del(&attachment->list);
-err_vmadd:
-	drm_gem_object_put(&bo->tbo.base);
-	kfree(attachment);
+unwind:
+	for (; i >= 0; i--) {
+		if (!attachment[i])
+			continue;
+		if (attachment[i]->bo_va) {
+			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+			list_del(&attachment[i]->list);
+		}
+		if (bo[i])
+			drm_gem_object_put(&bo[i]->tbo.base);
+		kfree(attachment[i]);
+	}
 	return ret;
 }
 
@@ -1414,8 +1420,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	uint32_t domain;
 	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
-	struct kfd_mem_attachment *attachment = NULL;
-	struct kfd_mem_attachment *attachment_aql = NULL;
 	unsigned long bo_size;
 	bool is_invalid_userptr = false;
 
@@ -1465,15 +1469,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		is_invalid_userptr = true;
 
 	if (!kfd_mem_is_attached(avm, mem)) {
-		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
+		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
 		if (ret)
 			goto attach_failed;
-		if (mem->aql_queue) {
-			ret = kfd_mem_attach(adev, mem, avm, true,
-					     &attachment_aql);
-			if (ret)
-				goto attach_failed_aql;
-		}
 	} else {
 		ret = vm_validate_pt_pd_bos(avm);
 		if (unlikely(ret))
@@ -1528,11 +1526,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	goto out;
 
 map_bo_to_gpuvm_failed:
-	if (attachment_aql)
-		kfd_mem_detach(attachment_aql);
-attach_failed_aql:
-	if (attachment)
-		kfd_mem_detach(attachment);
 attach_failed:
 	unreserve_bo_and_vms(&ctx, false, false);
 out:
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/9] drm/amdgpu: Add multi-GPU DMA mapping helpers
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (2 preceding siblings ...)
  2021-04-14  6:47 ` [PATCH 3/9] drm/amdgpu: Simplify AQL queue mapping Felix Kuehling
@ 2021-04-14  6:47 ` Felix Kuehling
  2021-04-14  6:48 ` [PATCH 5/9] drm/amdgpu: DMA map/unmap when updating GPU mappings Felix Kuehling
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:47 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

Add BO-type specific helpers functions to DMA-map and unmap
kfd_mem_attachments. Implement this functionality for userptrs by creating
one SG BO per GPU and filling it with a DMA mapping of the pages from the
original mem->bo.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   8 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 155 ++++++++++++++++--
 2 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 910c50956e16..fc3514ed1b74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size;
 
 struct amdgpu_device;
 
+enum kfd_mem_attachment_type {
+	KFD_MEM_ATT_SHARED,	/* Share kgd_mem->bo or another attachment's */
+	KFD_MEM_ATT_USERPTR,	/* SG bo to DMA map pages from a userptr bo */
+};
+
 struct kfd_mem_attachment {
 	struct list_head list;
+	enum kfd_mem_attachment_type type;
+	bool is_mapped;
 	struct amdgpu_bo_va *bo_va;
 	struct amdgpu_device *adev;
-	bool is_mapped;
 	uint64_t va;
 	uint64_t pte_flags;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 114fbf508707..51502a07fc1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -471,12 +471,117 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 	return pte_flags;
 }
 
+static int
+kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+		       struct kfd_mem_attachment *attachment)
+{
+	enum dma_data_direction direction =
+		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+	int ret;
+
+	ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+	if (unlikely(!ttm->sg))
+		return -ENOMEM;
+
+	if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+		return -EINVAL;
+
+	/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+	ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+					ttm->num_pages, 0,
+					(u64)ttm->num_pages << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (unlikely(ret))
+		goto release_sg;
+
+	ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+	if (unlikely(ret))
+		goto release_sg;
+
+	drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
+				       ttm->num_pages);
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		goto release_sg;
+
+	return 0;
+
+release_sg:
+	pr_err("DMA map userptr failed: %d\n", ret);
+	sg_free_table(ttm->sg);
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+	return ret;
+}
+
+static int
+kfd_mem_dmamap_attachment(struct kgd_mem *mem,
+			  struct kfd_mem_attachment *attachment)
+{
+	switch (attachment->type) {
+	case KFD_MEM_ATT_SHARED:
+		return 0;
+	case KFD_MEM_ATT_USERPTR:
+		return kfd_mem_dmamap_userptr(mem, attachment);
+	default:
+		WARN_ON_ONCE(1);
+	}
+	return -EINVAL;
+}
+
+static void
+kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
+			 struct kfd_mem_attachment *attachment)
+{
+	enum dma_data_direction direction =
+		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	struct ttm_operation_ctx ctx = {.interruptible = false};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+
+	if (unlikely(!ttm->sg))
+		return;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+	ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+	sg_free_table(ttm->sg);
+	ttm->sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
+			    struct kfd_mem_attachment *attachment)
+{
+	switch (attachment->type) {
+	case KFD_MEM_ATT_SHARED:
+		break;
+	case KFD_MEM_ATT_USERPTR:
+		kfd_mem_dmaunmap_userptr(mem, attachment);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
 /* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
  * to a VM. It can later be mapped and unmapped many times without
  * repeating these steps.
  *
+ * 0. Create BO for DMA mapping, if needed
  * 1. Allocate and initialize BO VA entry data structure
  * 2. Add BO to the VM
  * 3. Determine ASIC-specific PTE flags
@@ -486,10 +591,12 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		struct amdgpu_vm *vm, bool is_aql)
 {
+	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	uint64_t va = mem->va;
 	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
 	struct amdgpu_bo *bo[2] = {NULL, NULL};
+	struct drm_gem_object *gobj;
 	int i, ret;
 
 	if (!va) {
@@ -507,14 +614,36 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
 			 va + bo_size, vm);
 
-		/* FIXME: For now all attachments use the same BO. This is
-		 * incorrect because one BO can only have one DMA mapping
-		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
-		 * import with dynamic attachment. This will be addressed
-		 * one BO-type at a time in subsequent patches.
-		 */
-		bo[i] = mem->bo;
-		drm_gem_object_get(&bo[i]->tbo.base);
+		if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM &&
+					amdgpu_xgmi_same_hive(adev, bo_adev))) {
+			/* Mappings on the local GPU and VRAM mappings in the
+			 * local hive share the original BO
+			 */
+			attachment[i]->type = KFD_MEM_ATT_SHARED;
+			bo[i] = mem->bo;
+			drm_gem_object_get(&bo[i]->tbo.base);
+		} else if (i > 0) {
+			/* Multiple mappings on the same GPU share the BO */
+			attachment[i]->type = KFD_MEM_ATT_SHARED;
+			bo[i] = bo[0];
+			drm_gem_object_get(&bo[i]->tbo.base);
+		} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+			/* Create an SG BO to DMA-map userptrs on other GPUs */
+			attachment[i]->type = KFD_MEM_ATT_USERPTR;
+			ret = amdgpu_gem_object_create(adev, bo_size, 1,
+						       AMDGPU_GEM_DOMAIN_CPU,
+						       0, ttm_bo_type_sg,
+						       mem->bo->tbo.base.resv,
+						       &gobj);
+			if (ret)
+				goto unwind;
+			bo[i] = gem_to_amdgpu_bo(gobj);
+		} else {
+			/* FIXME: Need to DMA-map other BO types */
+			attachment[i]->type = KFD_MEM_ATT_SHARED;
+			bo[i] = mem->bo;
+			drm_gem_object_get(&bo[i]->tbo.base);
+		}
 
 		/* Add BO to VM internal data structures */
 		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
@@ -557,13 +686,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	return ret;
 }
 
-static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
+static void
+kfd_mem_detach(struct kgd_mem *mem, struct kfd_mem_attachment *attachment)
 {
 	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
 
 	pr_debug("\t remove VA 0x%llx in entry %p\n",
 			attachment->va, attachment);
 	amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+	/* FIXME: For some reason SG BOs don't get individualized. Do this
+	 * now manually. This is probably not the right place to do this.
+	 */
+	if (bo != mem->bo)
+		bo->tbo.base.resv = &bo->tbo.base._resv;
 	drm_gem_object_put(&bo->tbo.base);
 	list_del(&attachment->list);
 	kfree(attachment);
@@ -1376,7 +1511,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	/* Remove from VM internal data structures */
 	list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
-		kfd_mem_detach(entry);
+		kfd_mem_detach(mem, entry);
 
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/9] drm/amdgpu: DMA map/unmap when updating GPU mappings
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (3 preceding siblings ...)
  2021-04-14  6:47 ` [PATCH 4/9] drm/amdgpu: Add multi-GPU DMA mapping helpers Felix Kuehling
@ 2021-04-14  6:48 ` Felix Kuehling
  2021-04-14  6:48 ` [PATCH 6/9] drm/amdgpu: Move kfd_mem_attach outside reservation Felix Kuehling
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:48 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

DMA map kfd_mem_attachments in update_gpuvm_pte. This function is called
with the BO and page tables reserved, so we can safely update the DMA
mapping.

DMA unmap when a BO is unmapped from a GPU and before updating mappings
in restore workers.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 56 ++++++++++---------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 51502a07fc1d..3bb2ae185bbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -964,11 +964,12 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
 	return ret;
 }
 
-static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 				struct kfd_mem_attachment *entry,
 				struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_device *adev = entry->adev;
 	struct amdgpu_vm *vm = bo_va->base.vm;
 
 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
@@ -977,15 +978,20 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 
 	amdgpu_sync_fence(sync, bo_va->last_pt_update);
 
-	return 0;
+	kfd_mem_dmaunmap_attachment(mem, entry);
 }
 
-static int update_gpuvm_pte(struct amdgpu_device *adev,
-		struct kfd_mem_attachment *entry,
-		struct amdgpu_sync *sync)
+static int update_gpuvm_pte(struct kgd_mem *mem,
+			    struct kfd_mem_attachment *entry,
+			    struct amdgpu_sync *sync)
 {
-	int ret;
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_device *adev = entry->adev;
+	int ret;
+
+	ret = kfd_mem_dmamap_attachment(mem, entry);
+	if (ret)
+		return ret;
 
 	/* Update the page tables  */
 	ret = amdgpu_vm_bo_update(adev, bo_va, false);
@@ -997,14 +1003,15 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 	return amdgpu_sync_fence(sync, bo_va->last_pt_update);
 }
 
-static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-		struct kfd_mem_attachment *entry, struct amdgpu_sync *sync,
-		bool no_update_pte)
+static int map_bo_to_gpuvm(struct kgd_mem *mem,
+			   struct kfd_mem_attachment *entry,
+			   struct amdgpu_sync *sync,
+			   bool no_update_pte)
 {
 	int ret;
 
 	/* Set virtual address for the allocation */
-	ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+	ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
 			       amdgpu_bo_size(entry->bo_va->base.bo),
 			       entry->pte_flags);
 	if (ret) {
@@ -1016,7 +1023,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 	if (no_update_pte)
 		return 0;
 
-	ret = update_gpuvm_pte(adev, entry, sync);
+	ret = update_gpuvm_pte(mem, entry, sync);
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
 		goto update_gpuvm_pte_failed;
@@ -1025,7 +1032,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 	return 0;
 
 update_gpuvm_pte_failed:
-	unmap_bo_from_gpuvm(adev, entry, sync);
+	unmap_bo_from_gpuvm(mem, entry, sync);
 	return ret;
 }
 
@@ -1633,7 +1640,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
 			 entry->va, entry->va + bo_size, entry);
 
-		ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
 				      is_invalid_userptr);
 		if (ret) {
 			pr_err("Failed to map bo to gpuvm\n");
@@ -1672,7 +1679,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdkfd_process_info *process_info =
 		((struct amdgpu_vm *)vm)->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
@@ -1707,13 +1713,8 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
 			 entry->va, entry->va + bo_size, entry);
 
-		ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
-		if (ret == 0) {
-			entry->is_mapped = false;
-		} else {
-			pr_err("failed to unmap VA 0x%llx\n", mem->va);
-			goto unreserve_out;
-		}
+		unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+		entry->is_mapped = false;
 
 		mem->mapped_to_gpu_memory--;
 		pr_debug("\t DEC mapping count %d\n",
@@ -2083,9 +2084,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			if (!attachment->is_mapped)
 				continue;
 
-			ret = update_gpuvm_pte((struct amdgpu_device *)
-					       attachment->adev,
-					       attachment, &sync);
+			kfd_mem_dmaunmap_attachment(mem, attachment);
+			ret = update_gpuvm_pte(mem, attachment, &sync);
 			if (ret) {
 				pr_err("%s: update PTE failed\n", __func__);
 				/* make sure this gets validated again */
@@ -2287,9 +2287,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 			goto validate_map_fail;
 		}
 		list_for_each_entry(attachment, &mem->attachments, list) {
-			ret = update_gpuvm_pte((struct amdgpu_device *)
-					      attachment->adev, attachment,
-					      &sync_obj);
+			if (!attachment->is_mapped)
+				continue;
+
+			kfd_mem_dmaunmap_attachment(mem, attachment);
+			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
 			if (ret) {
 				pr_debug("Memory eviction: update PTE failed. Try again\n");
 				goto validate_map_fail;
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 6/9] drm/amdgpu: Move kfd_mem_attach outside reservation
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (4 preceding siblings ...)
  2021-04-14  6:48 ` [PATCH 5/9] drm/amdgpu: DMA map/unmap when updating GPU mappings Felix Kuehling
@ 2021-04-14  6:48 ` Felix Kuehling
  2021-04-14  6:48 ` [PATCH 7/9] drm/amdgpu: Add DMA mapping of GTT BOs Felix Kuehling
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:48 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

This is needed to avoid deadlocks with DMA buf import in the next patch.
Also move PT/PD validation out of kfd_mem_attach, that way the caller
can bo this unconditionally.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 72 +++++++++++--------
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 3bb2ae185bbb..1416f3c03f1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -575,6 +575,32 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 	}
 }
 
+static int
+kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
+		       struct amdgpu_bo **bo)
+{
+	unsigned long bo_size = mem->bo->tbo.base.size;
+	struct drm_gem_object *gobj;
+	int ret;
+
+	ret = amdgpu_bo_reserve(mem->bo, false);
+	if (ret)
+		return ret;
+
+	ret = amdgpu_gem_object_create(adev, bo_size, 1,
+				       AMDGPU_GEM_DOMAIN_CPU,
+				       0, ttm_bo_type_sg,
+				       mem->bo->tbo.base.resv,
+				       &gobj);
+	if (ret)
+		return ret;
+
+	amdgpu_bo_unreserve(mem->bo);
+
+	*bo = gem_to_amdgpu_bo(gobj);
+	return 0;
+}
+
 /* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
@@ -596,7 +622,6 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	uint64_t va = mem->va;
 	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
 	struct amdgpu_bo *bo[2] = {NULL, NULL};
-	struct drm_gem_object *gobj;
 	int i, ret;
 
 	if (!va) {
@@ -630,14 +655,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
 			/* Create an SG BO to DMA-map userptrs on other GPUs */
 			attachment[i]->type = KFD_MEM_ATT_USERPTR;
-			ret = amdgpu_gem_object_create(adev, bo_size, 1,
-						       AMDGPU_GEM_DOMAIN_CPU,
-						       0, ttm_bo_type_sg,
-						       mem->bo->tbo.base.resv,
-						       &gobj);
+			ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
 			if (ret)
 				goto unwind;
-			bo[i] = gem_to_amdgpu_bo(gobj);
 		} else {
 			/* FIXME: Need to DMA-map other BO types */
 			attachment[i]->type = KFD_MEM_ATT_SHARED;
@@ -662,13 +682,6 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 		va += bo_size;
 	}
 
-	/* Allocate validate page tables if needed */
-	ret = vm_validate_pt_pd_bos(vm);
-	if (unlikely(ret)) {
-		pr_err("validate_pt_pd_bos() failed\n");
-		goto unwind;
-	}
-
 	return 0;
 
 unwind:
@@ -1516,12 +1529,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
 		mem->va + bo_size * (1 + mem->aql_queue));
 
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
 	/* Remove from VM internal data structures */
 	list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
 		kfd_mem_detach(mem, entry);
 
-	ret = unreserve_bo_and_vms(&ctx, false, false);
-
 	/* Free the sync object */
 	amdgpu_sync_free(&mem->sync);
 
@@ -1597,6 +1610,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			mem->va + bo_size * (1 + mem->aql_queue),
 			vm, domain_string(domain));
 
+	if (!kfd_mem_is_attached(avm, mem)) {
+		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
+		if (ret)
+			goto out;
+	}
+
 	ret = reserve_bo_and_vm(mem, vm, &ctx);
 	if (unlikely(ret))
 		goto out;
@@ -1610,15 +1629,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	    bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
 		is_invalid_userptr = true;
 
-	if (!kfd_mem_is_attached(avm, mem)) {
-		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
-		if (ret)
-			goto attach_failed;
-	} else {
-		ret = vm_validate_pt_pd_bos(avm);
-		if (unlikely(ret))
-			goto attach_failed;
-	}
+	ret = vm_validate_pt_pd_bos(avm);
+	if (unlikely(ret))
+		goto out_unreserve;
 
 	if (mem->mapped_to_gpu_memory == 0 &&
 	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
@@ -1629,7 +1642,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
 		if (ret) {
 			pr_debug("Validate failed\n");
-			goto map_bo_to_gpuvm_failed;
+			goto out_unreserve;
 		}
 	}
 
@@ -1644,13 +1657,13 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 				      is_invalid_userptr);
 		if (ret) {
 			pr_err("Failed to map bo to gpuvm\n");
-			goto map_bo_to_gpuvm_failed;
+			goto out_unreserve;
 		}
 
 		ret = vm_update_pds(vm, ctx.sync);
 		if (ret) {
 			pr_err("Failed to update page directories\n");
-			goto map_bo_to_gpuvm_failed;
+			goto out_unreserve;
 		}
 
 		entry->is_mapped = true;
@@ -1667,8 +1680,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 
 	goto out;
 
-map_bo_to_gpuvm_failed:
-attach_failed:
+out_unreserve:
 	unreserve_bo_and_vms(&ctx, false, false);
 out:
 	mutex_unlock(&mem->process_info->lock);
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 7/9] drm/amdgpu: Add DMA mapping of GTT BOs
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (5 preceding siblings ...)
  2021-04-14  6:48 ` [PATCH 6/9] drm/amdgpu: Move kfd_mem_attach outside reservation Felix Kuehling
@ 2021-04-14  6:48 ` Felix Kuehling
  2021-04-14  6:48 ` [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit Felix Kuehling
  2021-04-14  6:48 ` [PATCH 9/9] drm/amdgpu: Lock the attached dmabuf in unpopulate Felix Kuehling
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:48 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

Use DMABufs with dynamic attachment to DMA-map GTT BOs on other GPUs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 74 ++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fc3514ed1b74..3ea51982b720 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -41,6 +41,7 @@ struct amdgpu_device;
 enum kfd_mem_attachment_type {
 	KFD_MEM_ATT_SHARED,	/* Share kgd_mem->bo or another attachment's */
 	KFD_MEM_ATT_USERPTR,	/* SG bo to DMA map pages from a userptr bo */
+	KFD_MEM_ATT_DMABUF,	/* DMAbuf to DMA map TTM BOs */
 };
 
 struct kfd_mem_attachment {
@@ -56,6 +57,7 @@ struct kfd_mem_attachment {
 struct kgd_mem {
 	struct mutex lock;
 	struct amdgpu_bo *bo;
+	struct dma_buf *dmabuf;
 	struct list_head attachments;
 	/* protected by amdkfd_process_info.lock */
 	struct ttm_validate_buffer validate_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1416f3c03f1d..bb3a96ab8f20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -522,6 +522,16 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
 	return ret;
 }
 
+static int
+kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
 static int
 kfd_mem_dmamap_attachment(struct kgd_mem *mem,
 			  struct kfd_mem_attachment *attachment)
@@ -531,6 +541,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
 		return 0;
 	case KFD_MEM_ATT_USERPTR:
 		return kfd_mem_dmamap_userptr(mem, attachment);
+	case KFD_MEM_ATT_DMABUF:
+		return kfd_mem_dmamap_dmabuf(attachment);
 	default:
 		WARN_ON_ONCE(1);
 	}
@@ -560,6 +572,19 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
 	ttm->sg = NULL;
 }
 
+static void
+kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+	ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	/* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is
+	 * called
+	 */
+}
+
 static void
 kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 			    struct kfd_mem_attachment *attachment)
@@ -570,6 +595,9 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 	case KFD_MEM_ATT_USERPTR:
 		kfd_mem_dmaunmap_userptr(mem, attachment);
 		break;
+	case KFD_MEM_ATT_DMABUF:
+		kfd_mem_dmaunmap_dmabuf(attachment);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 	}
@@ -601,6 +629,36 @@ kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
 	return 0;
 }
 
+static int
+kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
+		      struct amdgpu_bo **bo)
+{
+	struct drm_gem_object *gobj;
+
+	if (!mem->dmabuf) {
+		mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
+			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+				DRM_RDWR : 0);
+		if (IS_ERR(mem->dmabuf)) {
+			mem->dmabuf = NULL;
+			return PTR_ERR(mem->dmabuf);
+		}
+	}
+
+	gobj = amdgpu_gem_prime_import(&adev->ddev, mem->dmabuf);
+	if (IS_ERR(gobj))
+		return PTR_ERR(gobj);
+
+	/* Import takes an extra reference on the dmabuf. Drop it now to
+	 * avoid leaking it. We only need the one reference in
+	 * kgd_mem->dmabuf.
+	 */
+	dma_buf_put(mem->dmabuf);
+
+	*bo = gem_to_amdgpu_bo(gobj);
+	return 0;
+}
+
 /* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
@@ -658,8 +716,20 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 			ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
 			if (ret)
 				goto unwind;
+		} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
+			   mem->bo->tbo.type != ttm_bo_type_sg) {
+			/* GTT BOs use DMA-mapping ability of dynamic-attach
+			 * DMA bufs. TODO: The same should work for VRAM on
+			 * large-BAR GPUs.
+			 */
+			attachment[i]->type = KFD_MEM_ATT_DMABUF;
+			ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+			if (ret)
+				goto unwind;
 		} else {
-			/* FIXME: Need to DMA-map other BO types */
+			/* FIXME: Need to DMA-map other BO types:
+			 * large-BAR VRAM, doorbells, MMIO remap
+			 */
 			attachment[i]->type = KFD_MEM_ATT_SHARED;
 			bo[i] = mem->bo;
 			drm_gem_object_get(&bo[i]->tbo.base);
@@ -1558,6 +1628,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	}
 
 	/* Free the BO*/
+	if (mem->dmabuf)
+		dma_buf_put(mem->dmabuf);
 	drm_gem_object_put(&mem->bo->tbo.base);
 	mutex_destroy(&mem->lock);
 	kfree(mem);
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (6 preceding siblings ...)
  2021-04-14  6:48 ` [PATCH 7/9] drm/amdgpu: Add DMA mapping of GTT BOs Felix Kuehling
@ 2021-04-14  6:48 ` Felix Kuehling
  2021-04-14  6:51   ` Christian König
  2021-04-14  6:48 ` [PATCH 9/9] drm/amdgpu: Lock the attached dmabuf in unpopulate Felix Kuehling
  8 siblings, 1 reply; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:48 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

Pages in SG BOs were not allocated by TTM. So don't count them against
TTM's pages limit.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 5d8820725b75..e8b8c3257392 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
 	if (ttm_tt_is_populated(ttm))
 		return 0;
 
-	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
-	if (bdev->pool.use_dma32)
-		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
+		if (bdev->pool.use_dma32)
+			atomic_long_add(ttm->num_pages,
+					&ttm_dma32_pages_allocated);
+	}
 
 	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
 	       atomic_long_read(&ttm_dma32_pages_allocated) >
@@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
 	return 0;
 
 error:
-	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
-	if (bdev->pool.use_dma32)
-		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+		if (bdev->pool.use_dma32)
+			atomic_long_sub(ttm->num_pages,
+					&ttm_dma32_pages_allocated);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(ttm_tt_populate);
@@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 	else
 		ttm_pool_free(&bdev->pool, ttm);
 
-	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
-	if (bdev->pool.use_dma32)
-		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+		if (bdev->pool.use_dma32)
+			atomic_long_sub(ttm->num_pages,
+					&ttm_dma32_pages_allocated);
+	}
 
 	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
 }
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 9/9] drm/amdgpu: Lock the attached dmabuf in unpopulate
  2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
                   ` (7 preceding siblings ...)
  2021-04-14  6:48 ` [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit Felix Kuehling
@ 2021-04-14  6:48 ` Felix Kuehling
  8 siblings, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14  6:48 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: christian.koenig

amdgpu_ttm_tt_unpopulate can be called during bo_destroy. The dmabuf->resv
must not be held by the caller or dma_buf_detach will deadlock. This is
probably not the right fix. I get a recursive lock warning with the
reservation held in ttm_bo_release. Should unmap_attachment move to
backend_unbind instead?

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 936b3cfdde55..257750921eed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1216,9 +1216,22 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
 
 	if (ttm->sg && gtt->gobj->import_attach) {
 		struct dma_buf_attachment *attach;
+		bool locked;
 
 		attach = gtt->gobj->import_attach;
+		/* FIXME: unpopulate can be called during bo_destroy.
+		 * The dmabuf->resv must not be held by the caller or
+		 * dma_buf_detach will deadlock. This is probably not
+		 * the right fix. I get a recursive lock warning with the
+		 * reservation held in ttm_bo_releas.. Should
+		 * unmap_attachment move to backend_unbind instead?
+		 */
+		locked = dma_resv_is_locked(attach->dmabuf->resv);
+		if (!locked)
+			dma_resv_lock(attach->dmabuf->resv, NULL);
 		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+		if (!locked)
+			dma_resv_unlock(attach->dmabuf->resv);
 		ttm->sg = NULL;
 		return;
 	}
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14  6:48 ` [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit Felix Kuehling
@ 2021-04-14  6:51   ` Christian König
  2021-04-14  9:15     ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2021-04-14  6:51 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx, dri-devel; +Cc: christian.koenig

Am 14.04.21 um 08:48 schrieb Felix Kuehling:
> Pages in SG BOs were not allocated by TTM. So don't count them against
> TTM's pages limit.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

Going to pick that one up for inclusion in drm-misc-next.

Regards,
Christian.

> ---
>   drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
>   1 file changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 5d8820725b75..e8b8c3257392 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>   	if (ttm_tt_is_populated(ttm))
>   		return 0;
>   
> -	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> -	if (bdev->pool.use_dma32)
> -		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> +		if (bdev->pool.use_dma32)
> +			atomic_long_add(ttm->num_pages,
> +					&ttm_dma32_pages_allocated);
> +	}
>   
>   	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
>   	       atomic_long_read(&ttm_dma32_pages_allocated) >
> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>   	return 0;
>   
>   error:
> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> -	if (bdev->pool.use_dma32)
> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> +		if (bdev->pool.use_dma32)
> +			atomic_long_sub(ttm->num_pages,
> +					&ttm_dma32_pages_allocated);
> +	}
>   	return ret;
>   }
>   EXPORT_SYMBOL(ttm_tt_populate);
> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>   	else
>   		ttm_pool_free(&bdev->pool, ttm);
>   
> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> -	if (bdev->pool.use_dma32)
> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> +		if (bdev->pool.use_dma32)
> +			atomic_long_sub(ttm->num_pages,
> +					&ttm_dma32_pages_allocated);
> +	}
>   
>   	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14  6:51   ` Christian König
@ 2021-04-14  9:15     ` Daniel Vetter
  2021-04-14  9:19       ` Christian König
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2021-04-14  9:15 UTC (permalink / raw)
  To: Christian König; +Cc: Felix Kuehling, dri-devel, amd-gfx, christian.koenig

On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
> > Pages in SG BOs were not allocated by TTM. So don't count them against
> > TTM's pages limit.
> > 
> > Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> 
> Reviewed-by: Christian König <christian.koenig@amd.com>
> 
> Going to pick that one up for inclusion in drm-misc-next.

See my other email, but why do we need this? A bit more explanation is imo
needed here at least, since we still need to guarantee that allocations
don't over the limit in total for all gpu buffers together. At least until
the shrinker has landed.

And this here just opens up the barn door without any explanation why it's
ok.
-Daniel

> 
> Regards,
> Christian.
> 
> > ---
> >   drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
> >   1 file changed, 18 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> > index 5d8820725b75..e8b8c3257392 100644
> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >   	if (ttm_tt_is_populated(ttm))
> >   		return 0;
> > -	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> > -	if (bdev->pool.use_dma32)
> > -		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
> > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > +		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> > +		if (bdev->pool.use_dma32)
> > +			atomic_long_add(ttm->num_pages,
> > +					&ttm_dma32_pages_allocated);
> > +	}
> >   	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
> >   	       atomic_long_read(&ttm_dma32_pages_allocated) >
> > @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >   	return 0;
> >   error:
> > -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > -	if (bdev->pool.use_dma32)
> > -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > +		if (bdev->pool.use_dma32)
> > +			atomic_long_sub(ttm->num_pages,
> > +					&ttm_dma32_pages_allocated);
> > +	}
> >   	return ret;
> >   }
> >   EXPORT_SYMBOL(ttm_tt_populate);
> > @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
> >   	else
> >   		ttm_pool_free(&bdev->pool, ttm);
> > -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > -	if (bdev->pool.use_dma32)
> > -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > +		if (bdev->pool.use_dma32)
> > +			atomic_long_sub(ttm->num_pages,
> > +					&ttm_dma32_pages_allocated);
> > +	}
> >   	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
> >   }
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14  9:15     ` Daniel Vetter
@ 2021-04-14  9:19       ` Christian König
  2021-04-14 10:26         ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2021-04-14  9:19 UTC (permalink / raw)
  To: Daniel Vetter, Christian König; +Cc: Felix Kuehling, dri-devel, amd-gfx

Am 14.04.21 um 11:15 schrieb Daniel Vetter:
> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
>>> Pages in SG BOs were not allocated by TTM. So don't count them against
>>> TTM's pages limit.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> Going to pick that one up for inclusion in drm-misc-next.
> See my other email, but why do we need this? A bit more explanation is imo
> needed here at least, since we still need to guarantee that allocations
> don't over the limit in total for all gpu buffers together. At least until
> the shrinker has landed.
>
> And this here just opens up the barn door without any explanation why it's
> ok.

The SG based BOs might not even be backed by pages. E.g. exported VRAM.

So either they are exported by a driver which should have accounted for 
the allocation, exported by TTM which already did the accounting or 
doesn't even point to pages at all.

This is really a bug fix to recreate the behavior we had before moving 
the accounting to this place.

Christian.

> -Daniel
>
>> Regards,
>> Christian.
>>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
>>>    1 file changed, 18 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>> index 5d8820725b75..e8b8c3257392 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>    	if (ttm_tt_is_populated(ttm))
>>>    		return 0;
>>> -	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>> -	if (bdev->pool.use_dma32)
>>> -		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>> +		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>> +		if (bdev->pool.use_dma32)
>>> +			atomic_long_add(ttm->num_pages,
>>> +					&ttm_dma32_pages_allocated);
>>> +	}
>>>    	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
>>>    	       atomic_long_read(&ttm_dma32_pages_allocated) >
>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>    	return 0;
>>>    error:
>>> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>> -	if (bdev->pool.use_dma32)
>>> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>> +		if (bdev->pool.use_dma32)
>>> +			atomic_long_sub(ttm->num_pages,
>>> +					&ttm_dma32_pages_allocated);
>>> +	}
>>>    	return ret;
>>>    }
>>>    EXPORT_SYMBOL(ttm_tt_populate);
>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>>>    	else
>>>    		ttm_pool_free(&bdev->pool, ttm);
>>> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>> -	if (bdev->pool.use_dma32)
>>> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>> +		if (bdev->pool.use_dma32)
>>> +			atomic_long_sub(ttm->num_pages,
>>> +					&ttm_dma32_pages_allocated);
>>> +	}
>>>    	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>>    }
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C3075d7fd16644322a13608d8ff25d59b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539885255795187%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=KOnHA1CbNFjjMZR2rgHmGkH%2B7C84YCtA6u9V1wBAay4%3D&amp;reserved=0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14  9:19       ` Christian König
@ 2021-04-14 10:26         ` Daniel Vetter
  2021-04-14 10:49           ` Christian König
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2021-04-14 10:26 UTC (permalink / raw)
  To: Christian König
  Cc: dri-devel, Christian König, Felix Kuehling, amd-gfx, Daniel Vetter

On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
> > On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
> > > Am 14.04.21 um 08:48 schrieb Felix Kuehling:
> > > > Pages in SG BOs were not allocated by TTM. So don't count them against
> > > > TTM's pages limit.
> > > > 
> > > > Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > > 
> > > Going to pick that one up for inclusion in drm-misc-next.
> > See my other email, but why do we need this? A bit more explanation is imo
> > needed here at least, since we still need to guarantee that allocations
> > don't over the limit in total for all gpu buffers together. At least until
> > the shrinker has landed.
> > 
> > And this here just opens up the barn door without any explanation why it's
> > ok.
> 
> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
> 
> So either they are exported by a driver which should have accounted for the
> allocation, exported by TTM which already did the accounting or doesn't even
> point to pages at all.
> 
> This is really a bug fix to recreate the behavior we had before moving the
> accounting to this place.

Throw that into the commit message and a-b: me. Ideally with a Fixes: line
or so pointing at the offending commit that broke stuff. Commit messages
should really go into more detail when there's an entire story behind a
small change like this one.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > Regards,
> > > Christian.
> > > 
> > > > ---
> > > >    drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
> > > >    1 file changed, 18 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> > > > index 5d8820725b75..e8b8c3257392 100644
> > > > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > > > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > > > @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> > > >    	if (ttm_tt_is_populated(ttm))
> > > >    		return 0;
> > > > -	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> > > > -	if (bdev->pool.use_dma32)
> > > > -		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
> > > > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > > > +		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> > > > +		if (bdev->pool.use_dma32)
> > > > +			atomic_long_add(ttm->num_pages,
> > > > +					&ttm_dma32_pages_allocated);
> > > > +	}
> > > >    	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
> > > >    	       atomic_long_read(&ttm_dma32_pages_allocated) >
> > > > @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> > > >    	return 0;
> > > >    error:
> > > > -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > > > -	if (bdev->pool.use_dma32)
> > > > -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> > > > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > > > +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > > > +		if (bdev->pool.use_dma32)
> > > > +			atomic_long_sub(ttm->num_pages,
> > > > +					&ttm_dma32_pages_allocated);
> > > > +	}
> > > >    	return ret;
> > > >    }
> > > >    EXPORT_SYMBOL(ttm_tt_populate);
> > > > @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
> > > >    	else
> > > >    		ttm_pool_free(&bdev->pool, ttm);
> > > > -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > > > -	if (bdev->pool.use_dma32)
> > > > -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> > > > +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > > > +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> > > > +		if (bdev->pool.use_dma32)
> > > > +			atomic_long_sub(ttm->num_pages,
> > > > +					&ttm_dma32_pages_allocated);
> > > > +	}
> > > >    	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
> > > >    }
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C3075d7fd16644322a13608d8ff25d59b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539885255795187%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=KOnHA1CbNFjjMZR2rgHmGkH%2B7C84YCtA6u9V1wBAay4%3D&amp;reserved=0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 10:26         ` Daniel Vetter
@ 2021-04-14 10:49           ` Christian König
  2021-04-14 12:25             ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2021-04-14 10:49 UTC (permalink / raw)
  To: Daniel Vetter, Christian König; +Cc: Felix Kuehling, dri-devel, amd-gfx

Am 14.04.21 um 12:26 schrieb Daniel Vetter:
> On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
>> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
>>> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
>>>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
>>>>> Pages in SG BOs were not allocated by TTM. So don't count them against
>>>>> TTM's pages limit.
>>>>>
>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>
>>>> Going to pick that one up for inclusion in drm-misc-next.
>>> See my other email, but why do we need this? A bit more explanation is imo
>>> needed here at least, since we still need to guarantee that allocations
>>> don't over the limit in total for all gpu buffers together. At least until
>>> the shrinker has landed.
>>>
>>> And this here just opens up the barn door without any explanation why it's
>>> ok.
>> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
>>
>> So either they are exported by a driver which should have accounted for the
>> allocation, exported by TTM which already did the accounting or doesn't even
>> point to pages at all.
>>
>> This is really a bug fix to recreate the behavior we had before moving the
>> accounting to this place.
> Throw that into the commit message and a-b: me. Ideally with a Fixes: line
> or so pointing at the offending commit that broke stuff. Commit messages
> should really go into more detail when there's an entire story behind a
> small change like this one.

Sorry I though that this would be obvious :)

I've already pushed the patch in the morning, but going to keep that in 
mind for the next time.

Christian.

> -Daniel
>
>> Christian.
>>
>>> -Daniel
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> ---
>>>>>     drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
>>>>>     1 file changed, 18 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>> index 5d8820725b75..e8b8c3257392 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>     	if (ttm_tt_is_populated(ttm))
>>>>>     		return 0;
>>>>> -	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>> -	if (bdev->pool.use_dma32)
>>>>> -		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>> +		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>> +		if (bdev->pool.use_dma32)
>>>>> +			atomic_long_add(ttm->num_pages,
>>>>> +					&ttm_dma32_pages_allocated);
>>>>> +	}
>>>>>     	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
>>>>>     	       atomic_long_read(&ttm_dma32_pages_allocated) >
>>>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>     	return 0;
>>>>>     error:
>>>>> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>> -	if (bdev->pool.use_dma32)
>>>>> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>> +		if (bdev->pool.use_dma32)
>>>>> +			atomic_long_sub(ttm->num_pages,
>>>>> +					&ttm_dma32_pages_allocated);
>>>>> +	}
>>>>>     	return ret;
>>>>>     }
>>>>>     EXPORT_SYMBOL(ttm_tt_populate);
>>>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>>>>>     	else
>>>>>     		ttm_pool_free(&bdev->pool, ttm);
>>>>> -	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>> -	if (bdev->pool.use_dma32)
>>>>> -		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>> +		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>> +		if (bdev->pool.use_dma32)
>>>>> +			atomic_long_sub(ttm->num_pages,
>>>>> +					&ttm_dma32_pages_allocated);
>>>>> +	}
>>>>>     	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>>>>     }
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C3075d7fd16644322a13608d8ff25d59b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539885255795187%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=KOnHA1CbNFjjMZR2rgHmGkH%2B7C84YCtA6u9V1wBAay4%3D&amp;reserved=0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 10:49           ` Christian König
@ 2021-04-14 12:25             ` Daniel Vetter
  2021-04-14 12:43               ` Christian König
  2021-04-14 14:41               ` Felix Kuehling
  0 siblings, 2 replies; 20+ messages in thread
From: Daniel Vetter @ 2021-04-14 12:25 UTC (permalink / raw)
  To: Christian König
  Cc: dri-devel, Felix Kuehling, Christian König, amd-gfx list

On Wed, Apr 14, 2021 at 12:49 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 14.04.21 um 12:26 schrieb Daniel Vetter:
> > On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
> >> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
> >>> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
> >>>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
> >>>>> Pages in SG BOs were not allocated by TTM. So don't count them against
> >>>>> TTM's pages limit.
> >>>>>
> >>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>>
> >>>> Going to pick that one up for inclusion in drm-misc-next.
> >>> See my other email, but why do we need this? A bit more explanation is imo
> >>> needed here at least, since we still need to guarantee that allocations
> >>> don't over the limit in total for all gpu buffers together. At least until
> >>> the shrinker has landed.
> >>>
> >>> And this here just opens up the barn door without any explanation why it's
> >>> ok.
> >> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
> >>
> >> So either they are exported by a driver which should have accounted for the
> >> allocation, exported by TTM which already did the accounting or doesn't even
> >> point to pages at all.
> >>
> >> This is really a bug fix to recreate the behavior we had before moving the
> >> accounting to this place.
> > Throw that into the commit message and a-b: me. Ideally with a Fixes: line
> > or so pointing at the offending commit that broke stuff. Commit messages
> > should really go into more detail when there's an entire story behind a
> > small change like this one.
>
> Sorry I though that this would be obvious :)
>
> I've already pushed the patch in the morning, but going to keep that in
> mind for the next time.

I'll keep reminding you to pls elaborate more in commit messages, it's
coming up every once in a while :-)

Also in general I think a few days of letting patches soak out there,
especially shared code, is good curtesy. Some folks demand 2 weeks,
which I think is too much, but less than 24h just means you're
guaranteed to leave out half the globe with their feedback. Which
isn't great.

Driver code I don't care since there you know all the stakeholders ofc.
-Daniel

>
> Christian.
>
> > -Daniel
> >
> >> Christian.
> >>
> >>> -Daniel
> >>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> ---
> >>>>>     drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
> >>>>>     1 file changed, 18 insertions(+), 9 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>> index 5d8820725b75..e8b8c3257392 100644
> >>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >>>>>           if (ttm_tt_is_populated(ttm))
> >>>>>                   return 0;
> >>>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> >>>>> - if (bdev->pool.use_dma32)
> >>>>> -         atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>> +         atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> >>>>> +         if (bdev->pool.use_dma32)
> >>>>> +                 atomic_long_add(ttm->num_pages,
> >>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>> + }
> >>>>>           while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
> >>>>>                  atomic_long_read(&ttm_dma32_pages_allocated) >
> >>>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >>>>>           return 0;
> >>>>>     error:
> >>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>> - if (bdev->pool.use_dma32)
> >>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>> +         if (bdev->pool.use_dma32)
> >>>>> +                 atomic_long_sub(ttm->num_pages,
> >>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>> + }
> >>>>>           return ret;
> >>>>>     }
> >>>>>     EXPORT_SYMBOL(ttm_tt_populate);
> >>>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
> >>>>>           else
> >>>>>                   ttm_pool_free(&bdev->pool, ttm);
> >>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>> - if (bdev->pool.use_dma32)
> >>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>> +         if (bdev->pool.use_dma32)
> >>>>> +                 atomic_long_sub(ttm->num_pages,
> >>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>> + }
> >>>>>           ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
> >>>>>     }
> >>>> _______________________________________________
> >>>> dri-devel mailing list
> >>>> dri-devel@lists.freedesktop.org
> >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C3075d7fd16644322a13608d8ff25d59b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539885255795187%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=KOnHA1CbNFjjMZR2rgHmGkH%2B7C84YCtA6u9V1wBAay4%3D&amp;reserved=0
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 12:25             ` Daniel Vetter
@ 2021-04-14 12:43               ` Christian König
  2021-04-14 12:47                 ` Daniel Vetter
  2021-04-14 14:41               ` Felix Kuehling
  1 sibling, 1 reply; 20+ messages in thread
From: Christian König @ 2021-04-14 12:43 UTC (permalink / raw)
  To: Daniel Vetter, Christian König
  Cc: Felix Kuehling, dri-devel, amd-gfx list

Am 14.04.21 um 14:25 schrieb Daniel Vetter:
> On Wed, Apr 14, 2021 at 12:49 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 14.04.21 um 12:26 schrieb Daniel Vetter:
>>> On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
>>>> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
>>>>> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
>>>>>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
>>>>>>> Pages in SG BOs were not allocated by TTM. So don't count them against
>>>>>>> TTM's pages limit.
>>>>>>>
>>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> Going to pick that one up for inclusion in drm-misc-next.
>>>>> See my other email, but why do we need this? A bit more explanation is imo
>>>>> needed here at least, since we still need to guarantee that allocations
>>>>> don't over the limit in total for all gpu buffers together. At least until
>>>>> the shrinker has landed.
>>>>>
>>>>> And this here just opens up the barn door without any explanation why it's
>>>>> ok.
>>>> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
>>>>
>>>> So either they are exported by a driver which should have accounted for the
>>>> allocation, exported by TTM which already did the accounting or doesn't even
>>>> point to pages at all.
>>>>
>>>> This is really a bug fix to recreate the behavior we had before moving the
>>>> accounting to this place.
>>> Throw that into the commit message and a-b: me. Ideally with a Fixes: line
>>> or so pointing at the offending commit that broke stuff. Commit messages
>>> should really go into more detail when there's an entire story behind a
>>> small change like this one.
>> Sorry I though that this would be obvious :)
>>
>> I've already pushed the patch in the morning, but going to keep that in
>> mind for the next time.
> I'll keep reminding you to pls elaborate more in commit messages, it's
> coming up every once in a while :-)

Well, describing stuff in a commit message which is obvious is just 
redundant.

I can of course explain the whole background of the code in question in 
the commit message, but for obvious bug fixes like this it is just overkill.

> Also in general I think a few days of letting patches soak out there,
> especially shared code, is good curtesy. Some folks demand 2 weeks,
> which I think is too much, but less than 24h just means you're
> guaranteed to leave out half the globe with their feedback. Which
> isn't great.

Well for structural changes I certainly agree, but not for a bug fix 
which adds a missing check for a special case.

Christian.

>
> Driver code I don't care since there you know all the stakeholders ofc.
> -Daniel
>
>> Christian.
>>
>>> -Daniel
>>>
>>>> Christian.
>>>>
>>>>> -Daniel
>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> ---
>>>>>>>      drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
>>>>>>>      1 file changed, 18 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>> index 5d8820725b75..e8b8c3257392 100644
>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>>>            if (ttm_tt_is_populated(ttm))
>>>>>>>                    return 0;
>>>>>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>> -         atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>> +         atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>> +                 atomic_long_add(ttm->num_pages,
>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>> + }
>>>>>>>            while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
>>>>>>>                   atomic_long_read(&ttm_dma32_pages_allocated) >
>>>>>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>>>            return 0;
>>>>>>>      error:
>>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>> +                 atomic_long_sub(ttm->num_pages,
>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>> + }
>>>>>>>            return ret;
>>>>>>>      }
>>>>>>>      EXPORT_SYMBOL(ttm_tt_populate);
>>>>>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>>>>>>>            else
>>>>>>>                    ttm_pool_free(&bdev->pool, ttm);
>>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>> +                 atomic_long_sub(ttm->num_pages,
>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>> + }
>>>>>>>            ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>>>>>>      }
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C503f240d409740c1333508d8ff406545%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539999355330481%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=6sW53%2FGpxk4rZKM7mpHDfgBhreCXY4McypKGqTH13b8%3D&amp;reserved=0
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 12:43               ` Christian König
@ 2021-04-14 12:47                 ` Daniel Vetter
  2021-04-14 12:49                   ` Christian König
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2021-04-14 12:47 UTC (permalink / raw)
  To: Christian König
  Cc: Christian König, Felix Kuehling, dri-devel, amd-gfx list

On Wed, Apr 14, 2021 at 2:43 PM Christian König
<christian.koenig@amd.com> wrote:
>
> Am 14.04.21 um 14:25 schrieb Daniel Vetter:
> > On Wed, Apr 14, 2021 at 12:49 PM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Am 14.04.21 um 12:26 schrieb Daniel Vetter:
> >>> On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
> >>>> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
> >>>>> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
> >>>>>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
> >>>>>>> Pages in SG BOs were not allocated by TTM. So don't count them against
> >>>>>>> TTM's pages limit.
> >>>>>>>
> >>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>>>>
> >>>>>> Going to pick that one up for inclusion in drm-misc-next.
> >>>>> See my other email, but why do we need this? A bit more explanation is imo
> >>>>> needed here at least, since we still need to guarantee that allocations
> >>>>> don't over the limit in total for all gpu buffers together. At least until
> >>>>> the shrinker has landed.
> >>>>>
> >>>>> And this here just opens up the barn door without any explanation why it's
> >>>>> ok.
> >>>> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
> >>>>
> >>>> So either they are exported by a driver which should have accounted for the
> >>>> allocation, exported by TTM which already did the accounting or doesn't even
> >>>> point to pages at all.
> >>>>
> >>>> This is really a bug fix to recreate the behavior we had before moving the
> >>>> accounting to this place.
> >>> Throw that into the commit message and a-b: me. Ideally with a Fixes: line
> >>> or so pointing at the offending commit that broke stuff. Commit messages
> >>> should really go into more detail when there's an entire story behind a
> >>> small change like this one.
> >> Sorry I though that this would be obvious :)
> >>
> >> I've already pushed the patch in the morning, but going to keep that in
> >> mind for the next time.
> > I'll keep reminding you to pls elaborate more in commit messages, it's
> > coming up every once in a while :-)
>
> Well, describing stuff in a commit message which is obvious is just
> redundant.
>
> I can of course explain the whole background of the code in question in
> the commit message, but for obvious bug fixes like this it is just overkill.
>
> > Also in general I think a few days of letting patches soak out there,
> > especially shared code, is good curtesy. Some folks demand 2 weeks,
> > which I think is too much, but less than 24h just means you're
> > guaranteed to leave out half the globe with their feedback. Which
> > isn't great.
>
> Well for structural changes I certainly agree, but not for a bug fix
> which adds a missing check for a special case.

Well if it's a bugfix should at least indicate that, and regression
fixes should come with Fixes: tags. Obvious for you who screamed at
the code is generally not implying it's obvious for anyone else. So
yeah I think in general more explanations would be good.
-Daniel

>
> Christian.
>
> >
> > Driver code I don't care since there you know all the stakeholders ofc.
> > -Daniel
> >
> >> Christian.
> >>
> >>> -Daniel
> >>>
> >>>> Christian.
> >>>>
> >>>>> -Daniel
> >>>>>
> >>>>>> Regards,
> >>>>>> Christian.
> >>>>>>
> >>>>>>> ---
> >>>>>>>      drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
> >>>>>>>      1 file changed, 18 insertions(+), 9 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>>>> index 5d8820725b75..e8b8c3257392 100644
> >>>>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> >>>>>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >>>>>>>            if (ttm_tt_is_populated(ttm))
> >>>>>>>                    return 0;
> >>>>>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> - if (bdev->pool.use_dma32)
> >>>>>>> -         atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>>>> +         atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> +         if (bdev->pool.use_dma32)
> >>>>>>> +                 atomic_long_add(ttm->num_pages,
> >>>>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>>>> + }
> >>>>>>>            while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
> >>>>>>>                   atomic_long_read(&ttm_dma32_pages_allocated) >
> >>>>>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >>>>>>>            return 0;
> >>>>>>>      error:
> >>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> - if (bdev->pool.use_dma32)
> >>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> +         if (bdev->pool.use_dma32)
> >>>>>>> +                 atomic_long_sub(ttm->num_pages,
> >>>>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>>>> + }
> >>>>>>>            return ret;
> >>>>>>>      }
> >>>>>>>      EXPORT_SYMBOL(ttm_tt_populate);
> >>>>>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
> >>>>>>>            else
> >>>>>>>                    ttm_pool_free(&bdev->pool, ttm);
> >>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> - if (bdev->pool.use_dma32)
> >>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
> >>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> >>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >>>>>>> +         if (bdev->pool.use_dma32)
> >>>>>>> +                 atomic_long_sub(ttm->num_pages,
> >>>>>>> +                                 &ttm_dma32_pages_allocated);
> >>>>>>> + }
> >>>>>>>            ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
> >>>>>>>      }
> >>>>>> _______________________________________________
> >>>>>> dri-devel mailing list
> >>>>>> dri-devel@lists.freedesktop.org
> >>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C503f240d409740c1333508d8ff406545%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637539999355330481%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=6sW53%2FGpxk4rZKM7mpHDfgBhreCXY4McypKGqTH13b8%3D&amp;reserved=0
> >
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 12:47                 ` Daniel Vetter
@ 2021-04-14 12:49                   ` Christian König
  0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2021-04-14 12:49 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Christian König, Felix Kuehling, dri-devel, amd-gfx list

Am 14.04.21 um 14:47 schrieb Daniel Vetter:
> On Wed, Apr 14, 2021 at 2:43 PM Christian König
> <christian.koenig@amd.com> wrote:
>> Am 14.04.21 um 14:25 schrieb Daniel Vetter:
>>> On Wed, Apr 14, 2021 at 12:49 PM Christian König
>>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>>> Am 14.04.21 um 12:26 schrieb Daniel Vetter:
>>>>> On Wed, Apr 14, 2021 at 11:19:41AM +0200, Christian König wrote:
>>>>>> Am 14.04.21 um 11:15 schrieb Daniel Vetter:
>>>>>>> On Wed, Apr 14, 2021 at 08:51:51AM +0200, Christian König wrote:
>>>>>>>> Am 14.04.21 um 08:48 schrieb Felix Kuehling:
>>>>>>>>> Pages in SG BOs were not allocated by TTM. So don't count them against
>>>>>>>>> TTM's pages limit.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>>
>>>>>>>> Going to pick that one up for inclusion in drm-misc-next.
>>>>>>> See my other email, but why do we need this? A bit more explanation is imo
>>>>>>> needed here at least, since we still need to guarantee that allocations
>>>>>>> don't over the limit in total for all gpu buffers together. At least until
>>>>>>> the shrinker has landed.
>>>>>>>
>>>>>>> And this here just opens up the barn door without any explanation why it's
>>>>>>> ok.
>>>>>> The SG based BOs might not even be backed by pages. E.g. exported VRAM.
>>>>>>
>>>>>> So either they are exported by a driver which should have accounted for the
>>>>>> allocation, exported by TTM which already did the accounting or doesn't even
>>>>>> point to pages at all.
>>>>>>
>>>>>> This is really a bug fix to recreate the behavior we had before moving the
>>>>>> accounting to this place.
>>>>> Throw that into the commit message and a-b: me. Ideally with a Fixes: line
>>>>> or so pointing at the offending commit that broke stuff. Commit messages
>>>>> should really go into more detail when there's an entire story behind a
>>>>> small change like this one.
>>>> Sorry I though that this would be obvious :)
>>>>
>>>> I've already pushed the patch in the morning, but going to keep that in
>>>> mind for the next time.
>>> I'll keep reminding you to pls elaborate more in commit messages, it's
>>> coming up every once in a while :-)
>> Well, describing stuff in a commit message which is obvious is just
>> redundant.
>>
>> I can of course explain the whole background of the code in question in
>> the commit message, but for obvious bug fixes like this it is just overkill.
>>
>>> Also in general I think a few days of letting patches soak out there,
>>> especially shared code, is good curtesy. Some folks demand 2 weeks,
>>> which I think is too much, but less than 24h just means you're
>>> guaranteed to leave out half the globe with their feedback. Which
>>> isn't great.
>> Well for structural changes I certainly agree, but not for a bug fix
>> which adds a missing check for a special case.
> Well if it's a bugfix should at least indicate that, and regression
> fixes should come with Fixes: tags. Obvious for you who screamed at
> the code is generally not implying it's obvious for anyone else. So
> yeah I think in general more explanations would be good.

Ok, seconded. The missing Fixes tag is a good point and the wording 
doesn't made it clear that this is a bug fix.

Going to keep that in mind.

Christian.

> -Daniel
>
>> Christian.
>>
>>> Driver code I don't care since there you know all the stakeholders ofc.
>>> -Daniel
>>>
>>>> Christian.
>>>>
>>>>> -Daniel
>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> -Daniel
>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> ---
>>>>>>>>>       drivers/gpu/drm/ttm/ttm_tt.c | 27 ++++++++++++++++++---------
>>>>>>>>>       1 file changed, 18 insertions(+), 9 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>>>> index 5d8820725b75..e8b8c3257392 100644
>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>>>>>>>> @@ -317,9 +317,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>>>>>             if (ttm_tt_is_populated(ttm))
>>>>>>>>>                     return 0;
>>>>>>>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>>>> -         atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>>>> +         atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>>>> +                 atomic_long_add(ttm->num_pages,
>>>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>>>> + }
>>>>>>>>>             while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
>>>>>>>>>                    atomic_long_read(&ttm_dma32_pages_allocated) >
>>>>>>>>> @@ -350,9 +353,12 @@ int ttm_tt_populate(struct ttm_device *bdev,
>>>>>>>>>             return 0;
>>>>>>>>>       error:
>>>>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>>>> +                 atomic_long_sub(ttm->num_pages,
>>>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>>>> + }
>>>>>>>>>             return ret;
>>>>>>>>>       }
>>>>>>>>>       EXPORT_SYMBOL(ttm_tt_populate);
>>>>>>>>> @@ -382,9 +388,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>>>>>>>>>             else
>>>>>>>>>                     ttm_pool_free(&bdev->pool, ttm);
>>>>>>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> - if (bdev->pool.use_dma32)
>>>>>>>>> -         atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
>>>>>>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
>>>>>>>>> +         atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
>>>>>>>>> +         if (bdev->pool.use_dma32)
>>>>>>>>> +                 atomic_long_sub(ttm->num_pages,
>>>>>>>>> +                                 &ttm_dma32_pages_allocated);
>>>>>>>>> + }
>>>>>>>>>             ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>>>>>>>>       }
>>>>>>>> _______________________________________________
>>>>>>>> dri-devel mailing list
>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C1be30de6774b44ce302808d8ff437774%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637540012543510114%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=R5IkjKJV%2BmGDul5YqoUCDQKCNaYtbm3oOqT9fTF%2Bguk%3D&amp;reserved=0
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit
  2021-04-14 12:25             ` Daniel Vetter
  2021-04-14 12:43               ` Christian König
@ 2021-04-14 14:41               ` Felix Kuehling
  1 sibling, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2021-04-14 14:41 UTC (permalink / raw)
  To: Daniel Vetter, Christian König
  Cc: dri-devel, Christian König, amd-gfx list

Am 2021-04-14 um 8:25 a.m. schrieb Daniel Vetter:
>> Sorry I though that this would be obvious :)
>>
>> I've already pushed the patch in the morning, but going to keep that in
>> mind for the next time.
> I'll keep reminding you to pls elaborate more in commit messages, it's
> coming up every once in a while :-)

It was actually my patch and commit message. I was not aware of the
history of this bug or the fact that it was a regression. Otherwise I
would have included a "Fixes:" tag. From my point of view it was just a
pretty obvious problem exposed when testing my DMA mapping patch series
for KFD.

Regards,
  Felix


>
> Also in general I think a few days of letting patches soak out there,
> especially shared code, is good curtesy. Some folks demand 2 weeks,
> which I think is too much, but less than 24h just means you're
> guaranteed to leave out half the globe with their feedback. Which
> isn't great.
>
> Driver code I don't care since there you know all the stakeholders ofc.
> -Daniel
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-04-14 14:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-14  6:47 [PATCH 0/9] Implement multi-GPU DMA mappings for KFD Felix Kuehling
2021-04-14  6:47 ` [PATCH 1/9] drm/amdgpu: Rename kfd_bo_va_list to kfd_mem_attachment Felix Kuehling
2021-04-14  6:47 ` [PATCH 2/9] drm/amdgpu: Keep a bo-reference per-attachment Felix Kuehling
2021-04-14  6:47 ` [PATCH 3/9] drm/amdgpu: Simplify AQL queue mapping Felix Kuehling
2021-04-14  6:47 ` [PATCH 4/9] drm/amdgpu: Add multi-GPU DMA mapping helpers Felix Kuehling
2021-04-14  6:48 ` [PATCH 5/9] drm/amdgpu: DMA map/unmap when updating GPU mappings Felix Kuehling
2021-04-14  6:48 ` [PATCH 6/9] drm/amdgpu: Move kfd_mem_attach outside reservation Felix Kuehling
2021-04-14  6:48 ` [PATCH 7/9] drm/amdgpu: Add DMA mapping of GTT BOs Felix Kuehling
2021-04-14  6:48 ` [PATCH 8/9] drm/ttm: Don't count pages in SG BOs against pages_limit Felix Kuehling
2021-04-14  6:51   ` Christian König
2021-04-14  9:15     ` Daniel Vetter
2021-04-14  9:19       ` Christian König
2021-04-14 10:26         ` Daniel Vetter
2021-04-14 10:49           ` Christian König
2021-04-14 12:25             ` Daniel Vetter
2021-04-14 12:43               ` Christian König
2021-04-14 12:47                 ` Daniel Vetter
2021-04-14 12:49                   ` Christian König
2021-04-14 14:41               ` Felix Kuehling
2021-04-14  6:48 ` [PATCH 9/9] drm/amdgpu: Lock the attached dmabuf in unpopulate Felix Kuehling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).