All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/amdgpu: Return EINVAL if no PT BO
@ 2017-05-15 21:31 Harish Kasiviswanathan
       [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Harish Kasiviswanathan @ 2017-05-15 21:31 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

This change is also useful for the upcoming changes where page tables
can be updated by CPU.

Change-Id: I07510ed60c94cf1944ee96bb4b16c40ec88ea17c
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 48 +++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 88420dc..c644e54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1164,8 +1164,9 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
+static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint64_t flags)
 {
@@ -1183,12 +1184,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	pt = amdgpu_vm_get_pt(params, addr);
 	if (!pt) {
 		pr_err("PT not found, aborting update_ptes\n");
-		return;
+		return -EINVAL;
 	}
 
 	if (params->shadow) {
 		if (!pt->shadow)
-			return;
+			return 0;
 		pt = pt->shadow;
 	}
 	if ((addr & ~mask) == (end & ~mask))
@@ -1210,12 +1211,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
-			return;
+			return -EINVAL;
 		}
 
 		if (params->shadow) {
 			if (!pt->shadow)
-				return;
+				return 0;
 			pt = pt->shadow;
 		}
 
@@ -1250,6 +1251,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
 	params->func(params, cur_pe_start, cur_dst, cur_nptes,
 		     AMDGPU_GPU_PAGE_SIZE, flags);
+
+	return 0;
 }
 
 /*
@@ -1261,11 +1264,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
  * @end: last PTE to handle
  * @dst: addr those PTEs should point to
  * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
+	int r;
+
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1294,28 +1300,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 
 	/* system pages are non continuously */
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
-
-		amdgpu_vm_update_ptes(params, start, end, dst, flags);
-		return;
-	}
+	    (frag_start >= frag_end))
+		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
-		amdgpu_vm_update_ptes(params, start, frag_start,
-				      dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_start,
+					  dst, flags);
+		if (r)
+			return r;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 	}
 
 	/* handle the area in the middle */
-	amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-			      flags | frag_flags);
+	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+				  flags | frag_flags);
+	if (r)
+		return r;
 
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
 	}
+	return r;
 }
 
 /**
@@ -1436,9 +1444,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 
 	params.shadow = true;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 	params.shadow = false;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/5] drm/amdgpu: Add vm context module param
       [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-15 21:32   ` Harish Kasiviswanathan
       [not found]     ` <1494883923-25876-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-15 21:32   ` [PATCH 3/5] drm/amdgpu: Add amdgpu_sync_wait Harish Kasiviswanathan
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Harish Kasiviswanathan @ 2017-05-15 21:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

Add VM update mode module param (amdgpu.vm_update_mode) that can used to
control how VM pde/pte are updated for Graphics and Compute.

BIT0 controls Graphics and BIT1 Compute.
 BIT0 [= 0] Graphics updated by SDMA [= 1] by CPU
 BIT1 [= 0] Compute updated by SDMA [= 1] by CPU

By default, only for large BAR system vm_update_mode = 2, indicating
that Graphics VMs will be updated via SDMA and Compute VMs will be
updated via CPU. And for all all other systems (by default)
vm_update_mode = 0

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 35 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 20 ++++++++++++++++++-
 5 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index fadeb55..fd84410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -94,6 +94,7 @@
 extern int amdgpu_vm_block_size;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
+extern int amdgpu_vm_update_mode;
 extern int amdgpu_dc;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 130c45d..8d28a35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -94,6 +94,7 @@
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_vram_page_split = 512;
+int amdgpu_vm_update_mode = -1;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_dc = -1;
 int amdgpu_sched_jobs = 32;
@@ -180,6 +181,9 @@
 MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
 module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 
+MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
+module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+
 MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
 module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d167949..8f6c20f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -774,7 +774,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto out_suspend;
 	}
 
-	r = amdgpu_vm_init(adev, &fpriv->vm);
+	r = amdgpu_vm_init(adev, &fpriv->vm,
+			   AMDGPU_VM_CONTEXT_GFX);
 	if (r) {
 		kfree(fpriv);
 		goto out_suspend;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c644e54..9c89cb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -721,6 +721,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 	return true;
 }
 
+static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
+{
+	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+}
+
 /**
  * amdgpu_vm_flush - hardware flush the vm
  *
@@ -2291,10 +2296,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @vm_context: Indicates if it GFX or Compute context
  *
  * Init @vm fields.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int vm_context)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
@@ -2323,6 +2330,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r)
 		return r;
 
+	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
+		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+	else
+		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+						AMDGPU_VM_USE_CPU_FOR_GFX);
+	DRM_DEBUG_DRIVER("VM update mode is %s\n",
+			 vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_dir_update = NULL;
 
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
@@ -2454,6 +2471,22 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);
 	atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+	/* If not overridden by the user, by default, only in large BAR systems
+	 * Compute VM tables will be updated by CPU
+	 */
+#ifdef CONFIG_X86_64
+	if (amdgpu_vm_update_mode == -1) {
+		if (amdgpu_vm_is_large_bar(adev))
+			adev->vm_manager.vm_update_mode =
+				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
+		else
+			adev->vm_manager.vm_update_mode = 0;
+	}
+#else
+	adev->vm_manager.vm_update_mode = 0;
+#endif
+
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index afe9073..9aa00d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -87,6 +87,14 @@
 /* max vmids dedicated for process */
 #define AMDGPU_VM_MAX_RESERVED_VMID	1
 
+#define AMDGPU_VM_CONTEXT_GFX 0
+#define AMDGPU_VM_CONTEXT_COMPUTE 1
+
+/* See vm_update_mode */
+#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
+#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
+
+
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
@@ -129,6 +137,9 @@ struct amdgpu_vm {
 	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 	/* each VM will map on CSA */
 	struct amdgpu_bo_va *csa_bo_va;
+
+	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
+	bool                    use_cpu_for_update;
 };
 
 struct amdgpu_vm_id {
@@ -184,11 +195,18 @@ struct amdgpu_vm_manager {
 	/* partial resident texture handling */
 	spinlock_t				prt_lock;
 	atomic_t				num_prt_users;
+
+	/* controls how VM page tables are updated for Graphics and Compute.
+	 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
+	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
+	 */
+	int					vm_update_mode;
 };
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int vm_context);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/5] drm/amdgpu: Add amdgpu_sync_wait
       [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-15 21:32   ` [PATCH 2/5] drm/amdgpu: Add vm context module param Harish Kasiviswanathan
@ 2017-05-15 21:32   ` Harish Kasiviswanathan
       [not found]     ` <1494883923-25876-3-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-15 21:32   ` [PATCH 4/5] drm/amdgpu: Support page directory update via CPU Harish Kasiviswanathan
  2017-05-15 21:32   ` [PATCH 5/5] drm/amdgpu: Support page table " Harish Kasiviswanathan
  3 siblings, 1 reply; 16+ messages in thread
From: Harish Kasiviswanathan @ 2017-05-15 21:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

Helper function useful for CPU update of VM page tables. Also useful if
kernel have to synchronously wait till VM page tables are updated.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 5c8d302..e5b202e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -296,6 +296,25 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 }
 
+int amdgpu_sync_wait(struct amdgpu_sync *sync)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i, r;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		r = fence_wait(e->fence, false);
+		if (r)
+			return r;
+
+		hash_del(&e->node);
+		fence_put(e->fence);
+		kmem_cache_free(amdgpu_sync_slab, e);
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_sync_free - free the sync object
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 405f379..6c9c489 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-15 21:32   ` [PATCH 2/5] drm/amdgpu: Add vm context module param Harish Kasiviswanathan
  2017-05-15 21:32   ` [PATCH 3/5] drm/amdgpu: Add amdgpu_sync_wait Harish Kasiviswanathan
@ 2017-05-15 21:32   ` Harish Kasiviswanathan
       [not found]     ` <1494883923-25876-4-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-15 21:32   ` [PATCH 5/5] drm/amdgpu: Support page table " Harish Kasiviswanathan
  3 siblings, 1 reply; 16+ messages in thread
From: Harish Kasiviswanathan @ 2017-05-15 21:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

If amdgpu.vm_update_context param is set to use CPU, then Page
Directories will be updated by CPU instead of SDMA

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++---------
 1 file changed, 109 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9c89cb2..d72a624 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  uint64_t saddr, uint64_t eaddr,
 				  unsigned level)
 {
+	u64 flags;
 	unsigned shift = (adev->vm_manager.num_level - level) *
 		adev->vm_manager.block_size;
 	unsigned pt_idx, from, to;
@@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	if (vm->use_cpu_for_update)
+		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	else
+		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				AMDGPU_GEM_CREATE_SHADOW);
+
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 		struct reservation_object *resv = vm->root.bo->tbo.resv;
@@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     AMDGPU_GPU_PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-					     AMDGPU_GEM_CREATE_SHADOW |
-					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
+					     flags,
 					     NULL, resv, &pt);
 			if (r)
 				return r;
@@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 	return result;
 }
 
+/**
+ * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @pe: kmap addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ */
+static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+				   uint64_t pe, uint64_t addr,
+				   unsigned count, uint32_t incr,
+				   uint64_t flags)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
+					i, addr, flags);
+		addr += incr;
+	}
+
+	mb();
+	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
+}
+
+static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+	struct amdgpu_sync sync;
+
+	amdgpu_sync_create(&sync);
+	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+	amdgpu_sync_wait(&sync);
+	amdgpu_sync_free(&sync);
+}
+
 /*
  * amdgpu_vm_update_level - update a single level in the hierarchy
  *
@@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	if (!parent->entries)
 		return 0;
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
-	/* padding, etc. */
-	ndw = 64;
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	shadow = parent->bo->shadow;
 
-	/* assume the worst case */
-	ndw += parent->last_entry_used * 6;
+	WARN_ON(vm->use_cpu_for_update && shadow);
+	if (vm->use_cpu_for_update && !shadow) {
+		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
+		if (r)
+			return r;
+		amdgpu_vm_bo_wait(adev, parent->bo);
+		params.func = amdgpu_vm_cpu_set_ptes;
+	} else {
+		if (shadow) {
+			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
+			if (r)
+				return r;
+		}
+		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+				    sched);
 
-	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		/* padding, etc. */
+		ndw = 64;
 
-	shadow = parent->bo->shadow;
-	if (shadow) {
-		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
+		/* assume the worst case */
+		ndw += parent->last_entry_used * 6;
+
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+
+		if (shadow) {
+			shadow_addr = amdgpu_bo_gpu_offset(shadow);
+			ndw *= 2;
+		} else {
+			shadow_addr = 0;
+		}
+
+		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 		if (r)
 			return r;
-		shadow_addr = amdgpu_bo_gpu_offset(shadow);
-		ndw *= 2;
-	} else {
-		shadow_addr = 0;
-	}
 
-	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-	if (r)
-		return r;
+		params.ib = &job->ibs[0];
+		params.func = amdgpu_vm_do_set_ptes;
+	}
 
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	params.ib = &job->ibs[0];
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
@@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 					amdgpu_vm_adjust_mc_addr(adev, last_pt);
 
 				if (shadow)
-					amdgpu_vm_do_set_ptes(&params,
-							      last_shadow,
-							      pt_addr, count,
-							      incr,
-							      AMDGPU_PTE_VALID);
-
-				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      pt_addr, count, incr,
-						      AMDGPU_PTE_VALID);
+					params.func(&params,
+						    last_shadow,
+						    pt_addr, count,
+						    incr,
+						    AMDGPU_PTE_VALID);
+
+				params.func(&params, last_pde,
+					    pt_addr, count, incr,
+					    AMDGPU_PTE_VALID);
 			}
 
 			count = 1;
@@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
 
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
-					      count, incr, AMDGPU_PTE_VALID);
+			params.func(&params, last_shadow, pt_addr,
+				    count, incr, AMDGPU_PTE_VALID);
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
-				      count, incr, AMDGPU_PTE_VALID);
+		params.func(&params, last_pde, pt_addr,
+			    count, incr, AMDGPU_PTE_VALID);
 	}
 
-	if (params.ib->length_dw == 0) {
+	if (params.func == amdgpu_vm_cpu_set_ptes)
+		amdgpu_bo_kunmap(parent->bo);
+	else if (params.ib->length_dw == 0) {
 		amdgpu_job_free(job);
 	} else {
 		amdgpu_ring_pad_ib(ring, params.ib);
@@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct amdgpu_ring *ring;
 	struct amd_sched_rq *rq;
 	int r, i;
+	u64 flags;
 
 	vm->va = RB_ROOT;
 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
@@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_dir_update = NULL;
 
+	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+			AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	if (vm->use_cpu_for_update)
+		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	else
+		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				AMDGPU_GEM_CREATE_SHADOW);
+
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-			     AMDGPU_GEM_CREATE_SHADOW |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
+			     flags,
 			     NULL, NULL, &vm->root.bo);
 	if (r)
 		goto error_free_sched_entity;
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] drm/amdgpu: Support page table update via CPU
       [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-05-15 21:32   ` [PATCH 4/5] drm/amdgpu: Support page directory update via CPU Harish Kasiviswanathan
@ 2017-05-15 21:32   ` Harish Kasiviswanathan
       [not found]     ` <1494883923-25876-5-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  3 siblings, 1 reply; 16+ messages in thread
From: Harish Kasiviswanathan @ 2017-05-15 21:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 86 +++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d72a624..e98d558 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -79,6 +79,12 @@ struct amdgpu_pte_update_params {
 		     uint64_t flags);
 	/* indicate update pt or its shadow */
 	bool shadow;
+	/* The next two are used during VM update by CPU
+	 *  DMA addresses to use for mapping
+	 *  Kernel pointer of PD/PT BO that needs to be updated
+	 */
+	dma_addr_t *pages_addr;
+	void *kptr;
 };
 
 /* Helper to disable partial resident texture feature from a fence callback */
@@ -974,10 +980,14 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 				   uint64_t flags)
 {
 	unsigned int i;
+	uint64_t value;
 
 	for (i = 0; i < count; i++) {
+		value = params->pages_addr ?
+			amdgpu_vm_map_gart(params->pages_addr, addr) :
+			addr;
 		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
-					i, addr, flags);
+					i, value, flags);
 		addr += incr;
 	}
 
@@ -1220,6 +1230,59 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
 }
 
 /**
+ * amdgpu_vm_update_ptes_cpu - Update the page tables in the range
+ *  start - @end using CPU.
+ * See amdgpu_vm_update_ptes for parameter description.
+ *
+ */
+static int amdgpu_vm_update_ptes_cpu(struct amdgpu_pte_update_params *params,
+				     uint64_t start, uint64_t end,
+				     uint64_t dst, uint64_t flags)
+{
+	struct amdgpu_device *adev = params->adev;
+	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
+	void *pe_ptr;
+	uint64_t addr;
+	struct amdgpu_bo *pt;
+	unsigned int nptes;
+	int r;
+
+	/* initialize the variables */
+	addr = start;
+
+	/* walk over the address space and update the page tables */
+	while (addr < end) {
+		pt = amdgpu_vm_get_pt(params, addr);
+		if (!pt) {
+			pr_err("PT not found, aborting update_ptes\n");
+			return -EINVAL;
+		}
+
+		WARN_ON(params->shadow);
+
+		r = amdgpu_bo_kmap(pt, &pe_ptr);
+		if (r)
+			return r;
+
+		pe_ptr += (addr & mask) * 8;
+
+		if ((addr & ~mask) == (end & ~mask))
+			nptes = end - addr;
+		else
+			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+
+		params->func(params, (uint64_t)pe_ptr, dst, nptes,
+			     AMDGPU_GPU_PAGE_SIZE, flags);
+
+		amdgpu_bo_kunmap(pt);
+		addr += nptes;
+		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @params: see amdgpu_pte_update_params definition
@@ -1245,6 +1308,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	unsigned nptes; /* next number of ptes to be updated */
 	uint64_t next_pe_start;
 
+	if (params->func == amdgpu_vm_cpu_set_ptes)
+		return amdgpu_vm_update_ptes_cpu(params, start, end,
+						 dst, flags);
+
 	/* initialize the variables */
 	addr = start;
 	pt = amdgpu_vm_get_pt(params, addr);
@@ -1431,6 +1498,23 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	params.vm = vm;
 	params.src = src;
 
+	if (vm->use_cpu_for_update) {
+		/* params.src is used as flag to indicate system Memory */
+		if (pages_addr)
+			params.src = ~0;
+
+		/* Wait for PT BOs to be free. PTs share the same resv. object
+		 * as the root PD BO
+		 */
+		amdgpu_vm_bo_wait(adev, vm->root.bo);
+		params.func = amdgpu_vm_cpu_set_ptes;
+		params.pages_addr = pages_addr;
+		params.shadow = false;
+		r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+		if (!r)
+			return r;
+	}
+
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	/* sync to everything on unmapping */
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]     ` <1494883923-25876-4-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-16  2:49       ` zhoucm1
       [not found]         ` <591A68D7.80009-5C7GfCeVMHo@public.gmane.org>
  2017-05-16 12:52       ` Christian König
  1 sibling, 1 reply; 16+ messages in thread
From: zhoucm1 @ 2017-05-16  2:49 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
> If amdgpu.vm_update_context param is set to use CPU, then Page
> Directories will be updated by CPU instead of SDMA
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++---------
>   1 file changed, 109 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9c89cb2..d72a624 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   				  uint64_t saddr, uint64_t eaddr,
>   				  unsigned level)
>   {
> +	u64 flags;
>   	unsigned shift = (adev->vm_manager.num_level - level) *
>   		adev->vm_manager.block_size;
>   	unsigned pt_idx, from, to;
> @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	saddr = saddr & ((1 << shift) - 1);
>   	eaddr = eaddr & ((1 << shift) - 1);
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
I think shadow flag is need for CPU case as well, which is used to 
backup VM bo and meaningful when gpu reset.
same comment for pd bo.

Regards,
David Zhou
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	/* walk over the address space and allocate the page tables */
>   	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>   		struct reservation_object *resv = vm->root.bo->tbo.resv;
> @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   					     amdgpu_vm_bo_size(adev, level),
>   					     AMDGPU_GPU_PAGE_SIZE, true,
>   					     AMDGPU_GEM_DOMAIN_VRAM,
> -					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -					     AMDGPU_GEM_CREATE_SHADOW |
> -					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +					     flags,
>   					     NULL, resv, &pt);
>   			if (r)
>   				return r;
> @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
>   	return result;
>   }
>   
> +/**
> + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @pe: kmap addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + */
> +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
> +				   uint64_t pe, uint64_t addr,
> +				   unsigned count, uint32_t incr,
> +				   uint64_t flags)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> +					i, addr, flags);
> +		addr += incr;
> +	}
> +
> +	mb();
> +	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> +}
> +
> +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
> +{
> +	struct amdgpu_sync sync;
> +
> +	amdgpu_sync_create(&sync);
> +	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
> +	amdgpu_sync_wait(&sync);
> +	amdgpu_sync_free(&sync);
> +}
> +
>   /*
>    * amdgpu_vm_update_level - update a single level in the hierarchy
>    *
> @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	if (!parent->entries)
>   		return 0;
> -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>   
> -	/* padding, etc. */
> -	ndw = 64;
> +	memset(&params, 0, sizeof(params));
> +	params.adev = adev;
> +	shadow = parent->bo->shadow;
>   
> -	/* assume the worst case */
> -	ndw += parent->last_entry_used * 6;
> +	WARN_ON(vm->use_cpu_for_update && shadow);
> +	if (vm->use_cpu_for_update && !shadow) {
> +		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> +		if (r)
> +			return r;
> +		amdgpu_vm_bo_wait(adev, parent->bo);
> +		params.func = amdgpu_vm_cpu_set_ptes;
> +	} else {
> +		if (shadow) {
> +			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +			if (r)
> +				return r;
> +		}
> +		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> +				    sched);
>   
> -	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		/* padding, etc. */
> +		ndw = 64;
>   
> -	shadow = parent->bo->shadow;
> -	if (shadow) {
> -		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +		/* assume the worst case */
> +		ndw += parent->last_entry_used * 6;
> +
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +
> +		if (shadow) {
> +			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +			ndw *= 2;
> +		} else {
> +			shadow_addr = 0;
> +		}
> +
> +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>   		if (r)
>   			return r;
> -		shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -		ndw *= 2;
> -	} else {
> -		shadow_addr = 0;
> -	}
>   
> -	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -	if (r)
> -		return r;
> +		params.ib = &job->ibs[0];
> +		params.func = amdgpu_vm_do_set_ptes;
> +	}
>   
> -	memset(&params, 0, sizeof(params));
> -	params.adev = adev;
> -	params.ib = &job->ibs[0];
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   					amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   				if (shadow)
> -					amdgpu_vm_do_set_ptes(&params,
> -							      last_shadow,
> -							      pt_addr, count,
> -							      incr,
> -							      AMDGPU_PTE_VALID);
> -
> -				amdgpu_vm_do_set_ptes(&params, last_pde,
> -						      pt_addr, count, incr,
> -						      AMDGPU_PTE_VALID);
> +					params.func(&params,
> +						    last_shadow,
> +						    pt_addr, count,
> +						    incr,
> +						    AMDGPU_PTE_VALID);
> +
> +				params.func(&params, last_pde,
> +					    pt_addr, count, incr,
> +					    AMDGPU_PTE_VALID);
>   			}
>   
>   			count = 1;
> @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   		if (vm->root.bo->shadow)
> -			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> -					      count, incr, AMDGPU_PTE_VALID);
> +			params.func(&params, last_shadow, pt_addr,
> +				    count, incr, AMDGPU_PTE_VALID);
>   
> -		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> -				      count, incr, AMDGPU_PTE_VALID);
> +		params.func(&params, last_pde, pt_addr,
> +			    count, incr, AMDGPU_PTE_VALID);
>   	}
>   
> -	if (params.ib->length_dw == 0) {
> +	if (params.func == amdgpu_vm_cpu_set_ptes)
> +		amdgpu_bo_kunmap(parent->bo);
> +	else if (params.ib->length_dw == 0) {
>   		amdgpu_job_free(job);
>   	} else {
>   		amdgpu_ring_pad_ib(ring, params.ib);
> @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	struct amdgpu_ring *ring;
>   	struct amd_sched_rq *rq;
>   	int r, i;
> +	u64 flags;
>   
>   	vm->va = RB_ROOT;
>   	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
> @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		  "CPU update of VM recommended only for large BAR system\n");
>   	vm->last_dir_update = NULL;
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
>   			     AMDGPU_GEM_DOMAIN_VRAM,
> -			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -			     AMDGPU_GEM_CREATE_SHADOW |
> -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +			     flags,
>   			     NULL, NULL, &vm->root.bo);
>   	if (r)
>   		goto error_free_sched_entity;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: Add amdgpu_sync_wait
       [not found]     ` <1494883923-25876-3-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-16 12:49       ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2017-05-16 12:49 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> Helper function useful for CPU update of VM page tables. Also useful if
> kernel have to synchronously wait till VM page tables are updated.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 19 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  1 +
>   2 files changed, 20 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 5c8d302..e5b202e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -296,6 +296,25 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
>   	return NULL;
>   }
>   
> +int amdgpu_sync_wait(struct amdgpu_sync *sync)
> +{
> +	struct amdgpu_sync_entry *e;
> +	struct hlist_node *tmp;
> +	int i, r;
> +
> +	hash_for_each_safe(sync->fences, i, tmp, e, node) {
> +		r = fence_wait(e->fence, false);

Depending on how you gonna use this you might want to wait interruptible 
here.

Apart from that the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> +		if (r)
> +			return r;
> +
> +		hash_del(&e->node);
> +		fence_put(e->fence);
> +		kmem_cache_free(amdgpu_sync_slab, e);
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * amdgpu_sync_free - free the sync object
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index 405f379..6c9c489 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>   struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>   				     struct amdgpu_ring *ring);
>   struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
> +int amdgpu_sync_wait(struct amdgpu_sync *sync);
>   void amdgpu_sync_free(struct amdgpu_sync *sync);
>   int amdgpu_sync_init(void);
>   void amdgpu_sync_fini(void);


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]     ` <1494883923-25876-4-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-05-16  2:49       ` zhoucm1
@ 2017-05-16 12:52       ` Christian König
  1 sibling, 0 replies; 16+ messages in thread
From: Christian König @ 2017-05-16 12:52 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> If amdgpu.vm_update_context param is set to use CPU, then Page
> Directories will be updated by CPU instead of SDMA
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++---------
>   1 file changed, 109 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9c89cb2..d72a624 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   				  uint64_t saddr, uint64_t eaddr,
>   				  unsigned level)
>   {
> +	u64 flags;

Reversed tree order, e.g. put it at the end of the variables.

>   	unsigned shift = (adev->vm_manager.num_level - level) *
>   		adev->vm_manager.block_size;
>   	unsigned pt_idx, from, to;
> @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	saddr = saddr & ((1 << shift) - 1);
>   	eaddr = eaddr & ((1 << shift) - 1);
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	/* walk over the address space and allocate the page tables */
>   	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>   		struct reservation_object *resv = vm->root.bo->tbo.resv;
> @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   					     amdgpu_vm_bo_size(adev, level),
>   					     AMDGPU_GPU_PAGE_SIZE, true,
>   					     AMDGPU_GEM_DOMAIN_VRAM,
> -					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -					     AMDGPU_GEM_CREATE_SHADOW |
> -					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +					     flags,
>   					     NULL, resv, &pt);
>   			if (r)
>   				return r;
> @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
>   	return result;
>   }
>   
> +/**
> + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @pe: kmap addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + */
> +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
> +				   uint64_t pe, uint64_t addr,
> +				   unsigned count, uint32_t incr,
> +				   uint64_t flags)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> +					i, addr, flags);
> +		addr += incr;
> +	}
> +
> +	mb();
> +	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> +}
> +
A comment what the function does would be nice here.

With those two nit picks fixed the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
> +{
> +	struct amdgpu_sync sync;
> +
> +	amdgpu_sync_create(&sync);
> +	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
> +	amdgpu_sync_wait(&sync);
> +	amdgpu_sync_free(&sync);
> +}
> +
>   /*
>    * amdgpu_vm_update_level - update a single level in the hierarchy
>    *
> @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	if (!parent->entries)
>   		return 0;
> -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>   
> -	/* padding, etc. */
> -	ndw = 64;
> +	memset(&params, 0, sizeof(params));
> +	params.adev = adev;
> +	shadow = parent->bo->shadow;
>   
> -	/* assume the worst case */
> -	ndw += parent->last_entry_used * 6;
> +	WARN_ON(vm->use_cpu_for_update && shadow);
> +	if (vm->use_cpu_for_update && !shadow) {
> +		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> +		if (r)
> +			return r;
> +		amdgpu_vm_bo_wait(adev, parent->bo);
> +		params.func = amdgpu_vm_cpu_set_ptes;
> +	} else {
> +		if (shadow) {
> +			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +			if (r)
> +				return r;
> +		}
> +		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> +				    sched);
>   
> -	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		/* padding, etc. */
> +		ndw = 64;
>   
> -	shadow = parent->bo->shadow;
> -	if (shadow) {
> -		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +		/* assume the worst case */
> +		ndw += parent->last_entry_used * 6;
> +
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +
> +		if (shadow) {
> +			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +			ndw *= 2;
> +		} else {
> +			shadow_addr = 0;
> +		}
> +
> +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>   		if (r)
>   			return r;
> -		shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -		ndw *= 2;
> -	} else {
> -		shadow_addr = 0;
> -	}
>   
> -	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -	if (r)
> -		return r;
> +		params.ib = &job->ibs[0];
> +		params.func = amdgpu_vm_do_set_ptes;
> +	}
>   
> -	memset(&params, 0, sizeof(params));
> -	params.adev = adev;
> -	params.ib = &job->ibs[0];
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   					amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   				if (shadow)
> -					amdgpu_vm_do_set_ptes(&params,
> -							      last_shadow,
> -							      pt_addr, count,
> -							      incr,
> -							      AMDGPU_PTE_VALID);
> -
> -				amdgpu_vm_do_set_ptes(&params, last_pde,
> -						      pt_addr, count, incr,
> -						      AMDGPU_PTE_VALID);
> +					params.func(&params,
> +						    last_shadow,
> +						    pt_addr, count,
> +						    incr,
> +						    AMDGPU_PTE_VALID);
> +
> +				params.func(&params, last_pde,
> +					    pt_addr, count, incr,
> +					    AMDGPU_PTE_VALID);
>   			}
>   
>   			count = 1;
> @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   		if (vm->root.bo->shadow)
> -			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> -					      count, incr, AMDGPU_PTE_VALID);
> +			params.func(&params, last_shadow, pt_addr,
> +				    count, incr, AMDGPU_PTE_VALID);
>   
> -		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> -				      count, incr, AMDGPU_PTE_VALID);
> +		params.func(&params, last_pde, pt_addr,
> +			    count, incr, AMDGPU_PTE_VALID);
>   	}
>   
> -	if (params.ib->length_dw == 0) {
> +	if (params.func == amdgpu_vm_cpu_set_ptes)
> +		amdgpu_bo_kunmap(parent->bo);
> +	else if (params.ib->length_dw == 0) {
>   		amdgpu_job_free(job);
>   	} else {
>   		amdgpu_ring_pad_ib(ring, params.ib);
> @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	struct amdgpu_ring *ring;
>   	struct amd_sched_rq *rq;
>   	int r, i;
> +	u64 flags;
>   
>   	vm->va = RB_ROOT;
>   	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
> @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		  "CPU update of VM recommended only for large BAR system\n");
>   	vm->last_dir_update = NULL;
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
>   			     AMDGPU_GEM_DOMAIN_VRAM,
> -			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -			     AMDGPU_GEM_CREATE_SHADOW |
> -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +			     flags,
>   			     NULL, NULL, &vm->root.bo);
>   	if (r)
>   		goto error_free_sched_entity;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: Support page table update via CPU
       [not found]     ` <1494883923-25876-5-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-16 12:55       ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2017-05-16 12:55 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 86 +++++++++++++++++++++++++++++++++-
>   1 file changed, 85 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d72a624..e98d558 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -79,6 +79,12 @@ struct amdgpu_pte_update_params {
>   		     uint64_t flags);
>   	/* indicate update pt or its shadow */
>   	bool shadow;
> +	/* The next two are used during VM update by CPU
> +	 *  DMA addresses to use for mapping
> +	 *  Kernel pointer of PD/PT BO that needs to be updated
> +	 */
> +	dma_addr_t *pages_addr;
> +	void *kptr;
>   };
>   
>   /* Helper to disable partial resident texture feature from a fence callback */
> @@ -974,10 +980,14 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
>   				   uint64_t flags)
>   {
>   	unsigned int i;
> +	uint64_t value;
>   
>   	for (i = 0; i < count; i++) {
> +		value = params->pages_addr ?
> +			amdgpu_vm_map_gart(params->pages_addr, addr) :
> +			addr;
>   		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> -					i, addr, flags);
> +					i, value, flags);
>   		addr += incr;
>   	}
>   
> @@ -1220,6 +1230,59 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>   }
>   
>   /**
> + * amdgpu_vm_update_ptes_cpu - Update the page tables in the range
> + *  start - @end using CPU.
> + * See amdgpu_vm_update_ptes for parameter description.
> + *
> + */
> +static int amdgpu_vm_update_ptes_cpu(struct amdgpu_pte_update_params *params,
> +				     uint64_t start, uint64_t end,
> +				     uint64_t dst, uint64_t flags)
> +{
> +	struct amdgpu_device *adev = params->adev;
> +	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
> +	void *pe_ptr;
> +	uint64_t addr;
> +	struct amdgpu_bo *pt;
> +	unsigned int nptes;
> +	int r;
> +
> +	/* initialize the variables */
> +	addr = start;
> +
> +	/* walk over the address space and update the page tables */
> +	while (addr < end) {
> +		pt = amdgpu_vm_get_pt(params, addr);
> +		if (!pt) {
> +			pr_err("PT not found, aborting update_ptes\n");
> +			return -EINVAL;
> +		}
> +
> +		WARN_ON(params->shadow);
> +
> +		r = amdgpu_bo_kmap(pt, &pe_ptr);
> +		if (r)
> +			return r;
> +
> +		pe_ptr += (addr & mask) * 8;
> +
> +		if ((addr & ~mask) == (end & ~mask))
> +			nptes = end - addr;
> +		else
> +			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> +
> +		params->func(params, (uint64_t)pe_ptr, dst, nptes,
> +			     AMDGPU_GPU_PAGE_SIZE, flags);
> +
> +		amdgpu_bo_kunmap(pt);
> +		addr += nptes;
> +		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
>    * amdgpu_vm_update_ptes - make sure that page tables are valid
>    *
>    * @params: see amdgpu_pte_update_params definition
> @@ -1245,6 +1308,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   	unsigned nptes; /* next number of ptes to be updated */
>   	uint64_t next_pe_start;
>   
> +	if (params->func == amdgpu_vm_cpu_set_ptes)
> +		return amdgpu_vm_update_ptes_cpu(params, start, end,
> +						 dst, flags);
> +
>   	/* initialize the variables */
>   	addr = start;
>   	pt = amdgpu_vm_get_pt(params, addr);
> @@ -1431,6 +1498,23 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	params.vm = vm;
>   	params.src = src;
>   
> +	if (vm->use_cpu_for_update) {
> +		/* params.src is used as flag to indicate system Memory */
> +		if (pages_addr)
> +			params.src = ~0;
> +
> +		/* Wait for PT BOs to be free. PTs share the same resv. object
> +		 * as the root PD BO
> +		 */
> +		amdgpu_vm_bo_wait(adev, vm->root.bo);
> +		params.func = amdgpu_vm_cpu_set_ptes;
> +		params.pages_addr = pages_addr;
> +		params.shadow = false;
> +		r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
> +		if (!r)
> +			return r;
> +	}
> +
>   	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>   
>   	/* sync to everything on unmapping */


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]         ` <591A68D7.80009-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-16 21:02           ` Kasiviswanathan, Harish
       [not found]             ` <CY1PR1201MB1034A467A20010323B44EAEC8CE60-JBJ/M6OpXY+irIVeHNVyQ2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Kasiviswanathan, Harish @ 2017-05-16 21:02 UTC (permalink / raw)
  To: Zhou, David(ChunMing), amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



-----Original Message-----
From: Zhou, David(ChunMing) 
Sent: Monday, May 15, 2017 10:50 PM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU



On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
> If amdgpu.vm_update_context param is set to use CPU, then Page
> Directories will be updated by CPU instead of SDMA
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++---------
>   1 file changed, 109 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9c89cb2..d72a624 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   				  uint64_t saddr, uint64_t eaddr,
>   				  unsigned level)
>   {
> +	u64 flags;
>   	unsigned shift = (adev->vm_manager.num_level - level) *
>   		adev->vm_manager.block_size;
>   	unsigned pt_idx, from, to;
> @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	saddr = saddr & ((1 << shift) - 1);
>   	eaddr = eaddr & ((1 << shift) - 1);
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
I think shadow flag is need for CPU case as well, which is used to 
backup VM bo and meaningful when gpu reset.
same comment for pd bo.

[HK]: Yes support for shadow BOs are desirable and it could be implemented as a separate commit. For supporting shadow BOs the caller should explicitly add shadow BOs into ttm_eu_reserve_buffer(..) to remove the BO from TTM swap list or ttm_bo_kmap has to be modified. This implementation for CPU update of VM page tables is mainly for KFD usage. Graphics will use for experimental and testing purpose. From KFD's view point shadow BO are not useful because if GPU is reset then all queue information is lost (since submissions are done by user space) and it is not possible to recover.

Regards,
David Zhou
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	/* walk over the address space and allocate the page tables */
>   	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>   		struct reservation_object *resv = vm->root.bo->tbo.resv;
> @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   					     amdgpu_vm_bo_size(adev, level),
>   					     AMDGPU_GPU_PAGE_SIZE, true,
>   					     AMDGPU_GEM_DOMAIN_VRAM,
> -					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -					     AMDGPU_GEM_CREATE_SHADOW |
> -					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +					     flags,
>   					     NULL, resv, &pt);
>   			if (r)
>   				return r;
> @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
>   	return result;
>   }
>   
> +/**
> + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @pe: kmap addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + */
> +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
> +				   uint64_t pe, uint64_t addr,
> +				   unsigned count, uint32_t incr,
> +				   uint64_t flags)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> +					i, addr, flags);
> +		addr += incr;
> +	}
> +
> +	mb();
> +	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> +}
> +
> +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
> +{
> +	struct amdgpu_sync sync;
> +
> +	amdgpu_sync_create(&sync);
> +	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
> +	amdgpu_sync_wait(&sync);
> +	amdgpu_sync_free(&sync);
> +}
> +
>   /*
>    * amdgpu_vm_update_level - update a single level in the hierarchy
>    *
> @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	if (!parent->entries)
>   		return 0;
> -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>   
> -	/* padding, etc. */
> -	ndw = 64;
> +	memset(&params, 0, sizeof(params));
> +	params.adev = adev;
> +	shadow = parent->bo->shadow;
>   
> -	/* assume the worst case */
> -	ndw += parent->last_entry_used * 6;
> +	WARN_ON(vm->use_cpu_for_update && shadow);
> +	if (vm->use_cpu_for_update && !shadow) {
> +		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> +		if (r)
> +			return r;
> +		amdgpu_vm_bo_wait(adev, parent->bo);
> +		params.func = amdgpu_vm_cpu_set_ptes;
> +	} else {
> +		if (shadow) {
> +			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +			if (r)
> +				return r;
> +		}
> +		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> +				    sched);
>   
> -	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		/* padding, etc. */
> +		ndw = 64;
>   
> -	shadow = parent->bo->shadow;
> -	if (shadow) {
> -		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +		/* assume the worst case */
> +		ndw += parent->last_entry_used * 6;
> +
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +
> +		if (shadow) {
> +			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +			ndw *= 2;
> +		} else {
> +			shadow_addr = 0;
> +		}
> +
> +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>   		if (r)
>   			return r;
> -		shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -		ndw *= 2;
> -	} else {
> -		shadow_addr = 0;
> -	}
>   
> -	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -	if (r)
> -		return r;
> +		params.ib = &job->ibs[0];
> +		params.func = amdgpu_vm_do_set_ptes;
> +	}
>   
> -	memset(&params, 0, sizeof(params));
> -	params.adev = adev;
> -	params.ib = &job->ibs[0];
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   					amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   				if (shadow)
> -					amdgpu_vm_do_set_ptes(&params,
> -							      last_shadow,
> -							      pt_addr, count,
> -							      incr,
> -							      AMDGPU_PTE_VALID);
> -
> -				amdgpu_vm_do_set_ptes(&params, last_pde,
> -						      pt_addr, count, incr,
> -						      AMDGPU_PTE_VALID);
> +					params.func(&params,
> +						    last_shadow,
> +						    pt_addr, count,
> +						    incr,
> +						    AMDGPU_PTE_VALID);
> +
> +				params.func(&params, last_pde,
> +					    pt_addr, count, incr,
> +					    AMDGPU_PTE_VALID);
>   			}
>   
>   			count = 1;
> @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   		if (vm->root.bo->shadow)
> -			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> -					      count, incr, AMDGPU_PTE_VALID);
> +			params.func(&params, last_shadow, pt_addr,
> +				    count, incr, AMDGPU_PTE_VALID);
>   
> -		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> -				      count, incr, AMDGPU_PTE_VALID);
> +		params.func(&params, last_pde, pt_addr,
> +			    count, incr, AMDGPU_PTE_VALID);
>   	}
>   
> -	if (params.ib->length_dw == 0) {
> +	if (params.func == amdgpu_vm_cpu_set_ptes)
> +		amdgpu_bo_kunmap(parent->bo);
> +	else if (params.ib->length_dw == 0) {
>   		amdgpu_job_free(job);
>   	} else {
>   		amdgpu_ring_pad_ib(ring, params.ib);
> @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	struct amdgpu_ring *ring;
>   	struct amd_sched_rq *rq;
>   	int r, i;
> +	u64 flags;
>   
>   	vm->va = RB_ROOT;
>   	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
> @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		  "CPU update of VM recommended only for large BAR system\n");
>   	vm->last_dir_update = NULL;
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->use_cpu_for_update)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
>   			     AMDGPU_GEM_DOMAIN_VRAM,
> -			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -			     AMDGPU_GEM_CREATE_SHADOW |
> -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +			     flags,
>   			     NULL, NULL, &vm->root.bo);
>   	if (r)
>   		goto error_free_sched_entity;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]             ` <CY1PR1201MB1034A467A20010323B44EAEC8CE60-JBJ/M6OpXY+irIVeHNVyQ2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-05-17  1:54               ` zhoucm1
       [not found]                 ` <591BAD6C.2070605-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: zhoucm1 @ 2017-05-17  1:54 UTC (permalink / raw)
  To: Kasiviswanathan, Harish, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 10985 bytes --]



On 2017年05月17日 05:02, Kasiviswanathan, Harish wrote:
>
>
> -----Original Message-----
> From: Zhou, David(ChunMing)
> Sent: Monday, May 15, 2017 10:50 PM
> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
>
>
>
> On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
> > If amdgpu.vm_update_context param is set to use CPU, then Page
> > Directories will be updated by CPU instead of SDMA
> >
> > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 
> ++++++++++++++++++++++++---------
> >   1 file changed, 109 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 9c89cb2..d72a624 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> >                                  uint64_t saddr, uint64_t eaddr,
> >                                  unsigned level)
> >   {
> > +     u64 flags;
> >        unsigned shift = (adev->vm_manager.num_level - level) *
> >                adev->vm_manager.block_size;
> >        unsigned pt_idx, from, to;
> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> >        saddr = saddr & ((1 << shift) - 1);
> >        eaddr = eaddr & ((1 << shift) - 1);
> >
> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > +     if (vm->use_cpu_for_update)
> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> I think shadow flag is need for CPU case as well, which is used to
> backup VM bo and meaningful when gpu reset.
> same comment for pd bo.
>
> [HK]: Yes support for shadow BOs are desirable and it could be 
> implemented as a separate commit. For supporting shadow BOs the caller 
> should explicitly add shadow BOs into ttm_eu_reserve_buffer(..) to 
> remove the BO from TTM swap list or ttm_bo_kmap has to be modified. 
> This implementation for CPU update of VM page tables is mainly for KFD 
> usage. Graphics will use for experimental and testing purpose. From 
> KFD's view point shadow BO are not useful because if GPU is reset then 
> all queue information is lost (since submissions are done by user 
> space) and it is not possible to recover.
Either way is fine to me.

David Zhou
>
> Regards,
> David Zhou
> > +     else
> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> >        /* walk over the address space and allocate the page tables */
> >        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> >                struct reservation_object *resv = vm->root.bo->tbo.resv;
> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> > amdgpu_vm_bo_size(adev, level),
> > AMDGPU_GPU_PAGE_SIZE, true,
> > AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > - AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > +                                          flags,
> >                                             NULL, resv, &pt);
> >                        if (r)
> >                                return r;
> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const 
> dma_addr_t *pages_addr, uint64_t addr)
> >        return result;
> >   }
> >
> > +/**
> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> > + *
> > + * @params: see amdgpu_pte_update_params definition
> > + * @pe: kmap addr of the page entry
> > + * @addr: dst addr to write into pe
> > + * @count: number of page entries to update
> > + * @incr: increase next addr by incr bytes
> > + * @flags: hw access flags
> > + */
> > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params 
> *params,
> > +                                uint64_t pe, uint64_t addr,
> > +                                unsigned count, uint32_t incr,
> > +                                uint64_t flags)
> > +{
> > +     unsigned int i;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> > +                                     i, addr, flags);
> > +             addr += incr;
> > +     }
> > +
> > +     mb();
> > +     amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> > +}
> > +
> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct 
> amdgpu_bo *bo)
> > +{
> > +     struct amdgpu_sync sync;
> > +
> > +     amdgpu_sync_create(&sync);
> > +     amdgpu_sync_resv(adev, &sync, bo->tbo.resv, 
> AMDGPU_FENCE_OWNER_VM);
> > +     amdgpu_sync_wait(&sync);
> > +     amdgpu_sync_free(&sync);
> > +}
> > +
> >   /*
> >    * amdgpu_vm_update_level - update a single level in the hierarchy
> >    *
> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> >
> >        if (!parent->entries)
> >                return 0;
> > -     ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
> >
> > -     /* padding, etc. */
> > -     ndw = 64;
> > +     memset(&params, 0, sizeof(params));
> > +     params.adev = adev;
> > +     shadow = parent->bo->shadow;
> >
> > -     /* assume the worst case */
> > -     ndw += parent->last_entry_used * 6;
> > +     WARN_ON(vm->use_cpu_for_update && shadow);
> > +     if (vm->use_cpu_for_update && !shadow) {
> > +             r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> > +             if (r)
> > +                     return r;
> > +             amdgpu_vm_bo_wait(adev, parent->bo);
> > +             params.func = amdgpu_vm_cpu_set_ptes;
> > +     } else {
> > +             if (shadow) {
> > +                     r = amdgpu_ttm_bind(&shadow->tbo, 
> &shadow->tbo.mem);
> > +                     if (r)
> > +                             return r;
> > +             }
> > +             ring = container_of(vm->entity.sched, struct amdgpu_ring,
> > +                                 sched);
> >
> > -     pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > +             /* padding, etc. */
> > +             ndw = 64;
> >
> > -     shadow = parent->bo->shadow;
> > -     if (shadow) {
> > -             r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> > +             /* assume the worst case */
> > +             ndw += parent->last_entry_used * 6;
> > +
> > +             pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > +
> > +             if (shadow) {
> > +                     shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > +                     ndw *= 2;
> > +             } else {
> > +                     shadow_addr = 0;
> > +             }
> > +
> > +             r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> >                if (r)
> >                        return r;
> > -             shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > -             ndw *= 2;
> > -     } else {
> > -             shadow_addr = 0;
> > -     }
> >
> > -     r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> > -     if (r)
> > -             return r;
> > +             params.ib = &job->ibs[0];
> > +             params.func = amdgpu_vm_do_set_ptes;
> > +     }
> >
> > -     memset(&params, 0, sizeof(params));
> > -     params.adev = adev;
> > -     params.ib = &job->ibs[0];
> >
> >        /* walk over the address space and update the directory */
> >        for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
> >
> >                                if (shadow)
> > - amdgpu_vm_do_set_ptes(&params,
> > - last_shadow,
> > - pt_addr, count,
> > - incr,
> > - AMDGPU_PTE_VALID);
> > -
> > - amdgpu_vm_do_set_ptes(&params, last_pde,
> > - pt_addr, count, incr,
> > - AMDGPU_PTE_VALID);
> > + params.func(&params,
> > + last_shadow,
> > + pt_addr, count,
> > +                                                 incr,
> > + AMDGPU_PTE_VALID);
> > +
> > +                             params.func(&params, last_pde,
> > +                                         pt_addr, count, incr,
> > + AMDGPU_PTE_VALID);
> >                        }
> >
> >                        count = 1;
> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> >                uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, 
> last_pt);
> >
> >                if (vm->root.bo->shadow)
> > - amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> > -                                           count, incr, 
> AMDGPU_PTE_VALID);
> > +                     params.func(&params, last_shadow, pt_addr,
> > +                                 count, incr, AMDGPU_PTE_VALID);
> >
> > -             amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> > -                                   count, incr, AMDGPU_PTE_VALID);
> > +             params.func(&params, last_pde, pt_addr,
> > +                         count, incr, AMDGPU_PTE_VALID);
> >        }
> >
> > -     if (params.ib->length_dw == 0) {
> > +     if (params.func == amdgpu_vm_cpu_set_ptes)
> > +             amdgpu_bo_kunmap(parent->bo);
> > +     else if (params.ib->length_dw == 0) {
> >                amdgpu_job_free(job);
> >        } else {
> >                amdgpu_ring_pad_ib(ring, params.ib);
> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
> >        struct amdgpu_ring *ring;
> >        struct amd_sched_rq *rq;
> >        int r, i;
> > +     u64 flags;
> >
> >        vm->va = RB_ROOT;
> >        vm->client_id = 
> atomic64_inc_return(&adev->vm_manager.client_counter);
> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm,
> >                  "CPU update of VM recommended only for large BAR 
> system\n");
> >        vm->last_dir_update = NULL;
> >
> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > +     if (vm->use_cpu_for_update)
> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> > +     else
> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> >        r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, 
> true,
> >                             AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > -                          AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > +                          flags,
> >                             NULL, NULL, &vm->root.bo);
> >        if (r)
> >                goto error_free_sched_entity;
>


[-- Attachment #1.2: Type: text/html, Size: 22464 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]                 ` <591BAD6C.2070605-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-17  8:48                   ` Christian König
       [not found]                     ` <bec8a5a3-7d62-2ff5-96b9-2c03afec1483-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2017-05-17  8:48 UTC (permalink / raw)
  To: zhoucm1, Kasiviswanathan, Harish,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 11769 bytes --]

Am 17.05.2017 um 03:54 schrieb zhoucm1:
>
>
> On 2017年05月17日 05:02, Kasiviswanathan, Harish wrote:
>>
>>
>> -----Original Message-----
>> From: Zhou, David(ChunMing)
>> Sent: Monday, May 15, 2017 10:50 PM
>> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>; 
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update 
>> via CPU
>>
>>
>>
>> On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
>> > If amdgpu.vm_update_context param is set to use CPU, then Page
>> > Directories will be updated by CPU instead of SDMA
>> >
>> > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 
>> ++++++++++++++++++++++++---------
>> >   1 file changed, 109 insertions(+), 42 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > index 9c89cb2..d72a624 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct 
>> amdgpu_device *adev,
>> >                                  uint64_t saddr, uint64_t eaddr,
>> >                                  unsigned level)
>> >   {
>> > +     u64 flags;
>> >        unsigned shift = (adev->vm_manager.num_level - level) *
>> >                adev->vm_manager.block_size;
>> >        unsigned pt_idx, from, to;
>> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct 
>> amdgpu_device *adev,
>> >        saddr = saddr & ((1 << shift) - 1);
>> >        eaddr = eaddr & ((1 << shift) - 1);
>> >
>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
>> > +     if (vm->use_cpu_for_update)
>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>> I think shadow flag is need for CPU case as well, which is used to
>> backup VM bo and meaningful when gpu reset.
>> same comment for pd bo.
>>
>> [HK]: Yes support for shadow BOs are desirable and it could be 
>> implemented as a separate commit. For supporting shadow BOs the 
>> caller should explicitly add shadow BOs into 
>> ttm_eu_reserve_buffer(..) to remove the BO from TTM swap list or 
>> ttm_bo_kmap has to be modified. This implementation for CPU update of 
>> VM page tables is mainly for KFD usage. Graphics will use for 
>> experimental and testing purpose. From KFD's view point shadow BO are 
>> not useful because if GPU is reset then all queue information is lost 
>> (since submissions are done by user space) and it is not possible to 
>> recover.
> Either way is fine to me.

Actually I'm thinking about if we shouldn't completely drop the shadow 
handling.

When VRAM is lost we now completely drop all jobs, so for new jobs we 
can recreate the page table content from the VM structures as well.

When VRAM is not lost we don't need to restore the page tables.

What do you think?

Regards,
Christian.

>
> David Zhou
>>
>> Regards,
>> David Zhou
>> > +     else
>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> > + AMDGPU_GEM_CREATE_SHADOW);
>> > +
>> >        /* walk over the address space and allocate the page tables */
>> >        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>> >                struct reservation_object *resv = vm->root.bo->tbo.resv;
>> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct 
>> amdgpu_device *adev,
>> > amdgpu_vm_bo_size(adev, level),
>> > AMDGPU_GPU_PAGE_SIZE, true,
>> > AMDGPU_GEM_DOMAIN_VRAM,
>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> > - AMDGPU_GEM_CREATE_SHADOW |
>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> > +                                          flags,
>> >                                             NULL, resv, &pt);
>> >                        if (r)
>> >                                return r;
>> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const 
>> dma_addr_t *pages_addr, uint64_t addr)
>> >        return result;
>> >   }
>> >
>> > +/**
>> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
>> > + *
>> > + * @params: see amdgpu_pte_update_params definition
>> > + * @pe: kmap addr of the page entry
>> > + * @addr: dst addr to write into pe
>> > + * @count: number of page entries to update
>> > + * @incr: increase next addr by incr bytes
>> > + * @flags: hw access flags
>> > + */
>> > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params 
>> *params,
>> > +                                uint64_t pe, uint64_t addr,
>> > +                                unsigned count, uint32_t incr,
>> > +                                uint64_t flags)
>> > +{
>> > +     unsigned int i;
>> > +
>> > +     for (i = 0; i < count; i++) {
>> > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
>> > +                                     i, addr, flags);
>> > +             addr += incr;
>> > +     }
>> > +
>> > +     mb();
>> > +     amdgpu_gart_flush_gpu_tlb(params->adev, 0);
>> > +}
>> > +
>> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct 
>> amdgpu_bo *bo)
>> > +{
>> > +     struct amdgpu_sync sync;
>> > +
>> > +     amdgpu_sync_create(&sync);
>> > +     amdgpu_sync_resv(adev, &sync, bo->tbo.resv, 
>> AMDGPU_FENCE_OWNER_VM);
>> > +     amdgpu_sync_wait(&sync);
>> > +     amdgpu_sync_free(&sync);
>> > +}
>> > +
>> >   /*
>> >    * amdgpu_vm_update_level - update a single level in the hierarchy
>> >    *
>> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct 
>> amdgpu_device *adev,
>> >
>> >        if (!parent->entries)
>> >                return 0;
>> > -     ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>> >
>> > -     /* padding, etc. */
>> > -     ndw = 64;
>> > +     memset(&params, 0, sizeof(params));
>> > +     params.adev = adev;
>> > +     shadow = parent->bo->shadow;
>> >
>> > -     /* assume the worst case */
>> > -     ndw += parent->last_entry_used * 6;
>> > +     WARN_ON(vm->use_cpu_for_update && shadow);
>> > +     if (vm->use_cpu_for_update && !shadow) {
>> > +             r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
>> > +             if (r)
>> > +                     return r;
>> > +             amdgpu_vm_bo_wait(adev, parent->bo);
>> > +             params.func = amdgpu_vm_cpu_set_ptes;
>> > +     } else {
>> > +             if (shadow) {
>> > +                     r = amdgpu_ttm_bind(&shadow->tbo, 
>> &shadow->tbo.mem);
>> > +                     if (r)
>> > +                             return r;
>> > +             }
>> > +             ring = container_of(vm->entity.sched, struct amdgpu_ring,
>> > +                                 sched);
>> >
>> > -     pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>> > +             /* padding, etc. */
>> > +             ndw = 64;
>> >
>> > -     shadow = parent->bo->shadow;
>> > -     if (shadow) {
>> > -             r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
>> > +             /* assume the worst case */
>> > +             ndw += parent->last_entry_used * 6;
>> > +
>> > +             pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>> > +
>> > +             if (shadow) {
>> > +                     shadow_addr = amdgpu_bo_gpu_offset(shadow);
>> > +                     ndw *= 2;
>> > +             } else {
>> > +                     shadow_addr = 0;
>> > +             }
>> > +
>> > +             r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>> >                if (r)
>> >                        return r;
>> > -             shadow_addr = amdgpu_bo_gpu_offset(shadow);
>> > -             ndw *= 2;
>> > -     } else {
>> > -             shadow_addr = 0;
>> > -     }
>> >
>> > -     r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>> > -     if (r)
>> > -             return r;
>> > +             params.ib = &job->ibs[0];
>> > +             params.func = amdgpu_vm_do_set_ptes;
>> > +     }
>> >
>> > -     memset(&params, 0, sizeof(params));
>> > -     params.adev = adev;
>> > -     params.ib = &job->ibs[0];
>> >
>> >        /* walk over the address space and update the directory */
>> >        for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct 
>> amdgpu_device *adev,
>> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
>> >
>> >                                if (shadow)
>> > - amdgpu_vm_do_set_ptes(&params,
>> > - last_shadow,
>> > - pt_addr, count,
>> > - incr,
>> > - AMDGPU_PTE_VALID);
>> > -
>> > - amdgpu_vm_do_set_ptes(&params, last_pde,
>> > - pt_addr, count, incr,
>> > - AMDGPU_PTE_VALID);
>> > + params.func(&params,
>> > + last_shadow,
>> > + pt_addr, count,
>> > + incr,
>> > + AMDGPU_PTE_VALID);
>> > +
>> > + params.func(&params, last_pde,
>> > +                                         pt_addr, count, incr,
>> > + AMDGPU_PTE_VALID);
>> >                        }
>> >
>> >                        count = 1;
>> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct 
>> amdgpu_device *adev,
>> >                uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, 
>> last_pt);
>> >
>> >                if (vm->root.bo->shadow)
>> > - amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
>> > -                                           count, incr, 
>> AMDGPU_PTE_VALID);
>> > +                     params.func(&params, last_shadow, pt_addr,
>> > +                                 count, incr, AMDGPU_PTE_VALID);
>> >
>> > -             amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
>> > -                                   count, incr, AMDGPU_PTE_VALID);
>> > +             params.func(&params, last_pde, pt_addr,
>> > +                         count, incr, AMDGPU_PTE_VALID);
>> >        }
>> >
>> > -     if (params.ib->length_dw == 0) {
>> > +     if (params.func == amdgpu_vm_cpu_set_ptes)
>> > +             amdgpu_bo_kunmap(parent->bo);
>> > +     else if (params.ib->length_dw == 0) {
>> >                amdgpu_job_free(job);
>> >        } else {
>> >                amdgpu_ring_pad_ib(ring, params.ib);
>> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>> >        struct amdgpu_ring *ring;
>> >        struct amd_sched_rq *rq;
>> >        int r, i;
>> > +     u64 flags;
>> >
>> >        vm->va = RB_ROOT;
>> >        vm->client_id = 
>> atomic64_inc_return(&adev->vm_manager.client_counter);
>> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>> >                  "CPU update of VM recommended only for large BAR 
>> system\n");
>> >        vm->last_dir_update = NULL;
>> >
>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
>> > +     if (vm->use_cpu_for_update)
>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>> > +     else
>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> > + AMDGPU_GEM_CREATE_SHADOW);
>> > +
>> >        r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), 
>> align, true,
>> >                             AMDGPU_GEM_DOMAIN_VRAM,
>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>> > -                          AMDGPU_GEM_CREATE_SHADOW |
>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> > +                          flags,
>> >                             NULL, NULL, &vm->root.bo);
>> >        if (r)
>> >                goto error_free_sched_entity;
>>
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 24770 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]                     ` <bec8a5a3-7d62-2ff5-96b9-2c03afec1483-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-17  8:53                       ` zhoucm1
       [not found]                         ` <591C0F9E.2030800-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: zhoucm1 @ 2017-05-17  8:53 UTC (permalink / raw)
  To: Christian König, Kasiviswanathan, Harish,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 12638 bytes --]



On 2017年05月17日 16:48, Christian König wrote:
> Am 17.05.2017 um 03:54 schrieb zhoucm1:
>>
>>
>> On 2017年05月17日 05:02, Kasiviswanathan, Harish wrote:
>>>
>>>
>>> -----Original Message-----
>>> From: Zhou, David(ChunMing)
>>> Sent: Monday, May 15, 2017 10:50 PM
>>> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>; 
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update 
>>> via CPU
>>>
>>>
>>>
>>> On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
>>> > If amdgpu.vm_update_context param is set to use CPU, then Page
>>> > Directories will be updated by CPU instead of SDMA
>>> >
>>> > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
>>> > ---
>>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 
>>> ++++++++++++++++++++++++---------
>>> >   1 file changed, 109 insertions(+), 42 deletions(-)
>>> >
>>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > index 9c89cb2..d72a624 100644
>>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>> >                                  uint64_t saddr, uint64_t eaddr,
>>> >                                  unsigned level)
>>> >   {
>>> > +     u64 flags;
>>> >        unsigned shift = (adev->vm_manager.num_level - level) *
>>> >                adev->vm_manager.block_size;
>>> >        unsigned pt_idx, from, to;
>>> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>> >        saddr = saddr & ((1 << shift) - 1);
>>> >        eaddr = eaddr & ((1 << shift) - 1);
>>> >
>>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
>>> > +     if (vm->use_cpu_for_update)
>>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>> I think shadow flag is need for CPU case as well, which is used to
>>> backup VM bo and meaningful when gpu reset.
>>> same comment for pd bo.
>>>
>>> [HK]: Yes support for shadow BOs are desirable and it could be 
>>> implemented as a separate commit. For supporting shadow BOs the 
>>> caller should explicitly add shadow BOs into 
>>> ttm_eu_reserve_buffer(..) to remove the BO from TTM swap list or 
>>> ttm_bo_kmap has to be modified. This implementation for CPU update 
>>> of VM page tables is mainly for KFD usage. Graphics will use for 
>>> experimental and testing purpose. From KFD's view point shadow BO 
>>> are not useful because if GPU is reset then all queue information is 
>>> lost (since submissions are done by user space) and it is not 
>>> possible to recover.
>> Either way is fine to me.
>
> Actually I'm thinking about if we shouldn't completely drop the shadow 
> handling.
>
> When VRAM is lost we now completely drop all jobs, so for new jobs we 
> can recreate the page table content from the VM structures as well.
For KGD, I agree. if their process is using both KGD and KFD, I still 
think shadow bo is needed.

>
> When VRAM is not lost we don't need to restore the page tables.
In fact, our 'vram lost' detection isn't critical, I was told by other 
team, they encountered case that just some part vram is lost. So 
restoring page table seems still need for vram isn't lost.

Regards,
David Zhou
>
> What do you think?

> Regards,
> Christian.
>
>>
>> David Zhou
>>>
>>> Regards,
>>> David Zhou
>>> > +     else
>>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>> > + AMDGPU_GEM_CREATE_SHADOW);
>>> > +
>>> >        /* walk over the address space and allocate the page tables */
>>> >        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>>> >                struct reservation_object *resv = 
>>> vm->root.bo->tbo.resv;
>>> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>> > amdgpu_vm_bo_size(adev, level),
>>> > AMDGPU_GPU_PAGE_SIZE, true,
>>> > AMDGPU_GEM_DOMAIN_VRAM,
>>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>> > - AMDGPU_GEM_CREATE_SHADOW |
>>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> > +                                          flags,
>>> >                                             NULL, resv, &pt);
>>> >                        if (r)
>>> >                                return r;
>>> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const 
>>> dma_addr_t *pages_addr, uint64_t addr)
>>> >        return result;
>>> >   }
>>> >
>>> > +/**
>>> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
>>> > + *
>>> > + * @params: see amdgpu_pte_update_params definition
>>> > + * @pe: kmap addr of the page entry
>>> > + * @addr: dst addr to write into pe
>>> > + * @count: number of page entries to update
>>> > + * @incr: increase next addr by incr bytes
>>> > + * @flags: hw access flags
>>> > + */
>>> > +static void amdgpu_vm_cpu_set_ptes(struct 
>>> amdgpu_pte_update_params *params,
>>> > +                                uint64_t pe, uint64_t addr,
>>> > +                                unsigned count, uint32_t incr,
>>> > +                                uint64_t flags)
>>> > +{
>>> > +     unsigned int i;
>>> > +
>>> > +     for (i = 0; i < count; i++) {
>>> > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
>>> > +                                     i, addr, flags);
>>> > +             addr += incr;
>>> > +     }
>>> > +
>>> > +     mb();
>>> > +     amdgpu_gart_flush_gpu_tlb(params->adev, 0);
>>> > +}
>>> > +
>>> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct 
>>> amdgpu_bo *bo)
>>> > +{
>>> > +     struct amdgpu_sync sync;
>>> > +
>>> > +     amdgpu_sync_create(&sync);
>>> > +     amdgpu_sync_resv(adev, &sync, bo->tbo.resv, 
>>> AMDGPU_FENCE_OWNER_VM);
>>> > +     amdgpu_sync_wait(&sync);
>>> > +     amdgpu_sync_free(&sync);
>>> > +}
>>> > +
>>> >   /*
>>> >    * amdgpu_vm_update_level - update a single level in the hierarchy
>>> >    *
>>> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct 
>>> amdgpu_device *adev,
>>> >
>>> >        if (!parent->entries)
>>> >                return 0;
>>> > -     ring = container_of(vm->entity.sched, struct amdgpu_ring, 
>>> sched);
>>> >
>>> > -     /* padding, etc. */
>>> > -     ndw = 64;
>>> > +     memset(&params, 0, sizeof(params));
>>> > +     params.adev = adev;
>>> > +     shadow = parent->bo->shadow;
>>> >
>>> > -     /* assume the worst case */
>>> > -     ndw += parent->last_entry_used * 6;
>>> > +     WARN_ON(vm->use_cpu_for_update && shadow);
>>> > +     if (vm->use_cpu_for_update && !shadow) {
>>> > +             r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
>>> > +             if (r)
>>> > +                     return r;
>>> > +             amdgpu_vm_bo_wait(adev, parent->bo);
>>> > +             params.func = amdgpu_vm_cpu_set_ptes;
>>> > +     } else {
>>> > +             if (shadow) {
>>> > +                     r = amdgpu_ttm_bind(&shadow->tbo, 
>>> &shadow->tbo.mem);
>>> > +                     if (r)
>>> > +                             return r;
>>> > +             }
>>> > +             ring = container_of(vm->entity.sched, struct 
>>> amdgpu_ring,
>>> > +                                 sched);
>>> >
>>> > -     pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>> > +             /* padding, etc. */
>>> > +             ndw = 64;
>>> >
>>> > -     shadow = parent->bo->shadow;
>>> > -     if (shadow) {
>>> > -             r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
>>> > +             /* assume the worst case */
>>> > +             ndw += parent->last_entry_used * 6;
>>> > +
>>> > +             pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>> > +
>>> > +             if (shadow) {
>>> > +                     shadow_addr = amdgpu_bo_gpu_offset(shadow);
>>> > +                     ndw *= 2;
>>> > +             } else {
>>> > +                     shadow_addr = 0;
>>> > +             }
>>> > +
>>> > +             r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>>> >                if (r)
>>> >                        return r;
>>> > -             shadow_addr = amdgpu_bo_gpu_offset(shadow);
>>> > -             ndw *= 2;
>>> > -     } else {
>>> > -             shadow_addr = 0;
>>> > -     }
>>> >
>>> > -     r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>>> > -     if (r)
>>> > -             return r;
>>> > +             params.ib = &job->ibs[0];
>>> > +             params.func = amdgpu_vm_do_set_ptes;
>>> > +     }
>>> >
>>> > -     memset(&params, 0, sizeof(params));
>>> > -     params.adev = adev;
>>> > -     params.ib = &job->ibs[0];
>>> >
>>> >        /* walk over the address space and update the directory */
>>> >        for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>>> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct 
>>> amdgpu_device *adev,
>>> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
>>> >
>>> >                                if (shadow)
>>> > - amdgpu_vm_do_set_ptes(&params,
>>> > - last_shadow,
>>> > - pt_addr, count,
>>> > - incr,
>>> > - AMDGPU_PTE_VALID);
>>> > -
>>> > - amdgpu_vm_do_set_ptes(&params, last_pde,
>>> > - pt_addr, count, incr,
>>> > - AMDGPU_PTE_VALID);
>>> > + params.func(&params,
>>> > + last_shadow,
>>> > + pt_addr, count,
>>> > + incr,
>>> > + AMDGPU_PTE_VALID);
>>> > +
>>> > + params.func(&params, last_pde,
>>> > +                                         pt_addr, count, incr,
>>> > + AMDGPU_PTE_VALID);
>>> >                        }
>>> >
>>> >                        count = 1;
>>> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct 
>>> amdgpu_device *adev,
>>> >                uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, 
>>> last_pt);
>>> >
>>> >                if (vm->root.bo->shadow)
>>> > - amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
>>> > -                                           count, incr, 
>>> AMDGPU_PTE_VALID);
>>> > +                     params.func(&params, last_shadow, pt_addr,
>>> > +                                 count, incr, AMDGPU_PTE_VALID);
>>> >
>>> > -             amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
>>> > -                                   count, incr, AMDGPU_PTE_VALID);
>>> > +             params.func(&params, last_pde, pt_addr,
>>> > +                         count, incr, AMDGPU_PTE_VALID);
>>> >        }
>>> >
>>> > -     if (params.ib->length_dw == 0) {
>>> > +     if (params.func == amdgpu_vm_cpu_set_ptes)
>>> > +             amdgpu_bo_kunmap(parent->bo);
>>> > +     else if (params.ib->length_dw == 0) {
>>> >                amdgpu_job_free(job);
>>> >        } else {
>>> >                amdgpu_ring_pad_ib(ring, params.ib);
>>> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>> >        struct amdgpu_ring *ring;
>>> >        struct amd_sched_rq *rq;
>>> >        int r, i;
>>> > +     u64 flags;
>>> >
>>> >        vm->va = RB_ROOT;
>>> >        vm->client_id = 
>>> atomic64_inc_return(&adev->vm_manager.client_counter);
>>> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>> >                  "CPU update of VM recommended only for large BAR 
>>> system\n");
>>> >        vm->last_dir_update = NULL;
>>> >
>>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
>>> > +     if (vm->use_cpu_for_update)
>>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>> > +     else
>>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>> > + AMDGPU_GEM_CREATE_SHADOW);
>>> > +
>>> >        r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), 
>>> align, true,
>>> >                             AMDGPU_GEM_DOMAIN_VRAM,
>>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>> > -                          AMDGPU_GEM_CREATE_SHADOW |
>>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> > +                          flags,
>>> >                             NULL, NULL, &vm->root.bo);
>>> >        if (r)
>>> >                goto error_free_sched_entity;
>>>
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 27337 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
       [not found]                         ` <591C0F9E.2030800-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-17  8:59                           ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2017-05-17  8:59 UTC (permalink / raw)
  To: zhoucm1, Kasiviswanathan, Harish,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13220 bytes --]

Am 17.05.2017 um 10:53 schrieb zhoucm1:
>
>
> On 2017年05月17日 16:48, Christian König wrote:
>> Am 17.05.2017 um 03:54 schrieb zhoucm1:
>>>
>>>
>>> On 2017年05月17日 05:02, Kasiviswanathan, Harish wrote:
>>>>
>>>>
>>>> -----Original Message-----
>>>> From: Zhou, David(ChunMing)
>>>> Sent: Monday, May 15, 2017 10:50 PM
>>>> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>; 
>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update 
>>>> via CPU
>>>>
>>>>
>>>>
>>>> On 2017年05月16日 05:32, Harish Kasiviswanathan wrote:
>>>> > If amdgpu.vm_update_context param is set to use CPU, then Page
>>>> > Directories will be updated by CPU instead of SDMA
>>>> >
>>>> > Signed-off-by: Harish Kasiviswanathan 
>>>> <Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
>>>> > ---
>>>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 
>>>> ++++++++++++++++++++++++---------
>>>> >   1 file changed, 109 insertions(+), 42 deletions(-)
>>>> >
>>>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> > index 9c89cb2..d72a624 100644
>>>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct 
>>>> amdgpu_device *adev,
>>>> >                                  uint64_t saddr, uint64_t eaddr,
>>>> >                                  unsigned level)
>>>> >   {
>>>> > +     u64 flags;
>>>> >        unsigned shift = (adev->vm_manager.num_level - level) *
>>>> >                adev->vm_manager.block_size;
>>>> >        unsigned pt_idx, from, to;
>>>> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct 
>>>> amdgpu_device *adev,
>>>> >        saddr = saddr & ((1 << shift) - 1);
>>>> >        eaddr = eaddr & ((1 << shift) - 1);
>>>> >
>>>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
>>>> > +     if (vm->use_cpu_for_update)
>>>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>> I think shadow flag is need for CPU case as well, which is used to
>>>> backup VM bo and meaningful when gpu reset.
>>>> same comment for pd bo.
>>>>
>>>> [HK]: Yes support for shadow BOs are desirable and it could be 
>>>> implemented as a separate commit. For supporting shadow BOs the 
>>>> caller should explicitly add shadow BOs into 
>>>> ttm_eu_reserve_buffer(..) to remove the BO from TTM swap list or 
>>>> ttm_bo_kmap has to be modified. This implementation for CPU update 
>>>> of VM page tables is mainly for KFD usage. Graphics will use for 
>>>> experimental and testing purpose. From KFD's view point shadow BO 
>>>> are not useful because if GPU is reset then all queue information 
>>>> is lost (since submissions are done by user space) and it is not 
>>>> possible to recover.
>>> Either way is fine to me.
>>
>> Actually I'm thinking about if we shouldn't completely drop the 
>> shadow handling.
>>
>> When VRAM is lost we now completely drop all jobs, so for new jobs we 
>> can recreate the page table content from the VM structures as well.
> For KGD, I agree. if their process is using both KGD and KFD, I still 
> think shadow bo is needed.
>
>>
>> When VRAM is not lost we don't need to restore the page tables.
> In fact, our 'vram lost' detection isn't critical, I was told by other 
> team, they encountered case that just some part vram is lost. So 
> restoring page table seems still need for vram isn't lost.

Ok, random VRAM corruption caused by a GPU reset is a good argument. So 
we should keep this feature.

Regards,
Christian.

>
> Regards,
> David Zhou
>>
>> What do you think?
>
>> Regards,
>> Christian.
>>
>>>
>>> David Zhou
>>>>
>>>> Regards,
>>>> David Zhou
>>>> > +     else
>>>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> > + AMDGPU_GEM_CREATE_SHADOW);
>>>> > +
>>>> >        /* walk over the address space and allocate the page tables */
>>>> >        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>>>> >                struct reservation_object *resv = 
>>>> vm->root.bo->tbo.resv;
>>>> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct 
>>>> amdgpu_device *adev,
>>>> > amdgpu_vm_bo_size(adev, level),
>>>> > AMDGPU_GPU_PAGE_SIZE, true,
>>>> > AMDGPU_GEM_DOMAIN_VRAM,
>>>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> > - AMDGPU_GEM_CREATE_SHADOW |
>>>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>> > +                                          flags,
>>>> >                                             NULL, resv, &pt);
>>>> >                        if (r)
>>>> >                                return r;
>>>> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const 
>>>> dma_addr_t *pages_addr, uint64_t addr)
>>>> >        return result;
>>>> >   }
>>>> >
>>>> > +/**
>>>> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
>>>> > + *
>>>> > + * @params: see amdgpu_pte_update_params definition
>>>> > + * @pe: kmap addr of the page entry
>>>> > + * @addr: dst addr to write into pe
>>>> > + * @count: number of page entries to update
>>>> > + * @incr: increase next addr by incr bytes
>>>> > + * @flags: hw access flags
>>>> > + */
>>>> > +static void amdgpu_vm_cpu_set_ptes(struct 
>>>> amdgpu_pte_update_params *params,
>>>> > +                                uint64_t pe, uint64_t addr,
>>>> > +                                unsigned count, uint32_t incr,
>>>> > +                                uint64_t flags)
>>>> > +{
>>>> > +     unsigned int i;
>>>> > +
>>>> > +     for (i = 0; i < count; i++) {
>>>> > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
>>>> > +                                     i, addr, flags);
>>>> > +             addr += incr;
>>>> > +     }
>>>> > +
>>>> > +     mb();
>>>> > +     amdgpu_gart_flush_gpu_tlb(params->adev, 0);
>>>> > +}
>>>> > +
>>>> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct 
>>>> amdgpu_bo *bo)
>>>> > +{
>>>> > +     struct amdgpu_sync sync;
>>>> > +
>>>> > +     amdgpu_sync_create(&sync);
>>>> > +     amdgpu_sync_resv(adev, &sync, bo->tbo.resv, 
>>>> AMDGPU_FENCE_OWNER_VM);
>>>> > +     amdgpu_sync_wait(&sync);
>>>> > +     amdgpu_sync_free(&sync);
>>>> > +}
>>>> > +
>>>> >   /*
>>>> >    * amdgpu_vm_update_level - update a single level in the hierarchy
>>>> >    *
>>>> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct 
>>>> amdgpu_device *adev,
>>>> >
>>>> >        if (!parent->entries)
>>>> >                return 0;
>>>> > -     ring = container_of(vm->entity.sched, struct amdgpu_ring, 
>>>> sched);
>>>> >
>>>> > -     /* padding, etc. */
>>>> > -     ndw = 64;
>>>> > +     memset(&params, 0, sizeof(params));
>>>> > +     params.adev = adev;
>>>> > +     shadow = parent->bo->shadow;
>>>> >
>>>> > -     /* assume the worst case */
>>>> > -     ndw += parent->last_entry_used * 6;
>>>> > +     WARN_ON(vm->use_cpu_for_update && shadow);
>>>> > +     if (vm->use_cpu_for_update && !shadow) {
>>>> > +             r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
>>>> > +             if (r)
>>>> > +                     return r;
>>>> > +             amdgpu_vm_bo_wait(adev, parent->bo);
>>>> > +             params.func = amdgpu_vm_cpu_set_ptes;
>>>> > +     } else {
>>>> > +             if (shadow) {
>>>> > +                     r = amdgpu_ttm_bind(&shadow->tbo, 
>>>> &shadow->tbo.mem);
>>>> > +                     if (r)
>>>> > +                             return r;
>>>> > +             }
>>>> > +             ring = container_of(vm->entity.sched, struct 
>>>> amdgpu_ring,
>>>> > +                                 sched);
>>>> >
>>>> > -     pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>>> > +             /* padding, etc. */
>>>> > +             ndw = 64;
>>>> >
>>>> > -     shadow = parent->bo->shadow;
>>>> > -     if (shadow) {
>>>> > -             r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
>>>> > +             /* assume the worst case */
>>>> > +             ndw += parent->last_entry_used * 6;
>>>> > +
>>>> > +             pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>>> > +
>>>> > +             if (shadow) {
>>>> > +                     shadow_addr = amdgpu_bo_gpu_offset(shadow);
>>>> > +                     ndw *= 2;
>>>> > +             } else {
>>>> > +                     shadow_addr = 0;
>>>> > +             }
>>>> > +
>>>> > +             r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>>>> >                if (r)
>>>> >                        return r;
>>>> > -             shadow_addr = amdgpu_bo_gpu_offset(shadow);
>>>> > -             ndw *= 2;
>>>> > -     } else {
>>>> > -             shadow_addr = 0;
>>>> > -     }
>>>> >
>>>> > -     r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>>>> > -     if (r)
>>>> > -             return r;
>>>> > +             params.ib = &job->ibs[0];
>>>> > +             params.func = amdgpu_vm_do_set_ptes;
>>>> > +     }
>>>> >
>>>> > -     memset(&params, 0, sizeof(params));
>>>> > -     params.adev = adev;
>>>> > -     params.ib = &job->ibs[0];
>>>> >
>>>> >        /* walk over the address space and update the directory */
>>>> >        for (pt_idx = 0; pt_idx <= parent->last_entry_used; 
>>>> ++pt_idx) {
>>>> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct 
>>>> amdgpu_device *adev,
>>>> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
>>>> >
>>>> >                                if (shadow)
>>>> > - amdgpu_vm_do_set_ptes(&params,
>>>> > - last_shadow,
>>>> > - pt_addr, count,
>>>> > - incr,
>>>> > - AMDGPU_PTE_VALID);
>>>> > -
>>>> > - amdgpu_vm_do_set_ptes(&params, last_pde,
>>>> > - pt_addr, count, incr,
>>>> > - AMDGPU_PTE_VALID);
>>>> > + params.func(&params,
>>>> > + last_shadow,
>>>> > + pt_addr, count,
>>>> > + incr,
>>>> > + AMDGPU_PTE_VALID);
>>>> > +
>>>> > + params.func(&params, last_pde,
>>>> > + pt_addr, count, incr,
>>>> > + AMDGPU_PTE_VALID);
>>>> >                        }
>>>> >
>>>> >                        count = 1;
>>>> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct 
>>>> amdgpu_device *adev,
>>>> >                uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, 
>>>> last_pt);
>>>> >
>>>> >                if (vm->root.bo->shadow)
>>>> > - amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
>>>> > - count, incr, AMDGPU_PTE_VALID);
>>>> > +                     params.func(&params, last_shadow, pt_addr,
>>>> > +                                 count, incr, AMDGPU_PTE_VALID);
>>>> >
>>>> > -             amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
>>>> > -                                   count, incr, AMDGPU_PTE_VALID);
>>>> > +             params.func(&params, last_pde, pt_addr,
>>>> > +                         count, incr, AMDGPU_PTE_VALID);
>>>> >        }
>>>> >
>>>> > -     if (params.ib->length_dw == 0) {
>>>> > +     if (params.func == amdgpu_vm_cpu_set_ptes)
>>>> > +             amdgpu_bo_kunmap(parent->bo);
>>>> > +     else if (params.ib->length_dw == 0) {
>>>> >                amdgpu_job_free(job);
>>>> >        } else {
>>>> >                amdgpu_ring_pad_ib(ring, params.ib);
>>>> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm,
>>>> >        struct amdgpu_ring *ring;
>>>> >        struct amd_sched_rq *rq;
>>>> >        int r, i;
>>>> > +     u64 flags;
>>>> >
>>>> >        vm->va = RB_ROOT;
>>>> >        vm->client_id = 
>>>> atomic64_inc_return(&adev->vm_manager.client_counter);
>>>> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm,
>>>> >                  "CPU update of VM recommended only for large BAR 
>>>> system\n");
>>>> >        vm->last_dir_update = NULL;
>>>> >
>>>> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>> > + AMDGPU_GEM_CREATE_VRAM_CLEARED;
>>>> > +     if (vm->use_cpu_for_update)
>>>> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>> > +     else
>>>> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> > + AMDGPU_GEM_CREATE_SHADOW);
>>>> > +
>>>> >        r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), 
>>>> align, true,
>>>> > AMDGPU_GEM_DOMAIN_VRAM,
>>>> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>> > - AMDGPU_GEM_CREATE_SHADOW |
>>>> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>> > +                          flags,
>>>> >                             NULL, NULL, &vm->root.bo);
>>>> >        if (r)
>>>> >                goto error_free_sched_entity;
>>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 29568 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: Add vm context module param
       [not found]     ` <1494883923-25876-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-24  9:41       ` Christian König
       [not found]         ` <e8c14f45-1072-6b55-4370-c3c8d44dd2fc-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2017-05-24  9:41 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> Add VM update mode module param (amdgpu.vm_update_mode) that can used to
> control how VM pde/pte are updated for Graphics and Compute.
>
> BIT0 controls Graphics and BIT1 Compute.
>   BIT0 [= 0] Graphics updated by SDMA [= 1] by CPU
>   BIT1 [= 0] Compute updated by SDMA [= 1] by CPU
>
> By default, only for large BAR system vm_update_mode = 2, indicating
> that Graphics VMs will be updated via SDMA and Compute VMs will be
> updated via CPU. And for all all other systems (by default)
> vm_update_mode = 0
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 35 ++++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 20 ++++++++++++++++++-
>   5 files changed, 60 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index fadeb55..fd84410 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -94,6 +94,7 @@
>   extern int amdgpu_vm_block_size;
>   extern int amdgpu_vm_fault_stop;
>   extern int amdgpu_vm_debug;
> +extern int amdgpu_vm_update_mode;
>   extern int amdgpu_dc;
>   extern int amdgpu_sched_jobs;
>   extern int amdgpu_sched_hw_submission;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 130c45d..8d28a35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -94,6 +94,7 @@
>   int amdgpu_vm_fault_stop = 0;
>   int amdgpu_vm_debug = 0;
>   int amdgpu_vram_page_split = 512;
> +int amdgpu_vm_update_mode = -1;
>   int amdgpu_exp_hw_support = 0;
>   int amdgpu_dc = -1;
>   int amdgpu_sched_jobs = 32;
> @@ -180,6 +181,9 @@
>   MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
>   module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
>   
> +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
> +module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
> +
>   MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
>   module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d167949..8f6c20f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -774,7 +774,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>   		goto out_suspend;
>   	}
>   
> -	r = amdgpu_vm_init(adev, &fpriv->vm);
> +	r = amdgpu_vm_init(adev, &fpriv->vm,
> +			   AMDGPU_VM_CONTEXT_GFX);
>   	if (r) {
>   		kfree(fpriv);
>   		goto out_suspend;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c644e54..9c89cb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -721,6 +721,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>   	return true;
>   }
>   
> +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
> +{
> +	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
> +}
> +
>   /**
>    * amdgpu_vm_flush - hardware flush the vm
>    *
> @@ -2291,10 +2296,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
>    *
>    * @adev: amdgpu_device pointer
>    * @vm: requested vm
> + * @vm_context: Indicates if it GFX or Compute context
>    *
>    * Init @vm fields.
>    */
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +		   int vm_context)
>   {
>   	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
>   		AMDGPU_VM_PTE_COUNT(adev) * 8);
> @@ -2323,6 +2330,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	if (r)
>   		return r;
>   
> +	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
> +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> +						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
> +	else
> +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> +						AMDGPU_VM_USE_CPU_FOR_GFX);
> +	DRM_DEBUG_DRIVER("VM update mode is %s\n",
> +			 vm->use_cpu_for_update ? "CPU" : "SDMA");
> +	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
> +		  "CPU update of VM recommended only for large BAR system\n");
>   	vm->last_dir_update = NULL;
>   
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
> @@ -2454,6 +2471,22 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   	atomic64_set(&adev->vm_manager.client_counter, 0);
>   	spin_lock_init(&adev->vm_manager.prt_lock);
>   	atomic_set(&adev->vm_manager.num_prt_users, 0);
> +
> +	/* If not overridden by the user, by default, only in large BAR systems
> +	 * Compute VM tables will be updated by CPU
> +	 */
> +#ifdef CONFIG_X86_64
> +	if (amdgpu_vm_update_mode == -1) {
> +		if (amdgpu_vm_is_large_bar(adev))
> +			adev->vm_manager.vm_update_mode =
> +				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
> +		else
> +			adev->vm_manager.vm_update_mode = 0;
> +	}

Aren't you missing the else case here?

In other words when amdgpu_vm_update_mode is not -1 we should take it's 
value for vm_update_mode.

With that fixed the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> +#else
> +	adev->vm_manager.vm_update_mode = 0;
> +#endif
> +
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index afe9073..9aa00d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -87,6 +87,14 @@
>   /* max vmids dedicated for process */
>   #define AMDGPU_VM_MAX_RESERVED_VMID	1
>   
> +#define AMDGPU_VM_CONTEXT_GFX 0
> +#define AMDGPU_VM_CONTEXT_COMPUTE 1
> +
> +/* See vm_update_mode */
> +#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
> +#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
> +
> +
>   struct amdgpu_vm_pt {
>   	struct amdgpu_bo	*bo;
>   	uint64_t		addr;
> @@ -129,6 +137,9 @@ struct amdgpu_vm {
>   	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
>   	/* each VM will map on CSA */
>   	struct amdgpu_bo_va *csa_bo_va;
> +
> +	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
> +	bool                    use_cpu_for_update;
>   };
>   
>   struct amdgpu_vm_id {
> @@ -184,11 +195,18 @@ struct amdgpu_vm_manager {
>   	/* partial resident texture handling */
>   	spinlock_t				prt_lock;
>   	atomic_t				num_prt_users;
> +
> +	/* controls how VM page tables are updated for Graphics and Compute.
> +	 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
> +	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
> +	 */
> +	int					vm_update_mode;
>   };
>   
>   void amdgpu_vm_manager_init(struct amdgpu_device *adev);
>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +		   int vm_context);
>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct list_head *validated,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 2/5] drm/amdgpu: Add vm context module param
       [not found]         ` <e8c14f45-1072-6b55-4370-c3c8d44dd2fc-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-24 17:49           ` Kasiviswanathan, Harish
  0 siblings, 0 replies; 16+ messages in thread
From: Kasiviswanathan, Harish @ 2017-05-24 17:49 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



-----Original Message-----
From: Christian König [mailto:deathsimple@vodafone.de] 
Sent: Wednesday, May 24, 2017 5:41 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/5] drm/amdgpu: Add vm context module param

Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan:
> Add VM update mode module param (amdgpu.vm_update_mode) that can used 
> to control how VM pde/pte are updated for Graphics and Compute.
>
> BIT0 controls Graphics and BIT1 Compute.
>   BIT0 [= 0] Graphics updated by SDMA [= 1] by CPU
>   BIT1 [= 0] Compute updated by SDMA [= 1] by CPU
>
> By default, only for large BAR system vm_update_mode = 2, indicating 
> that Graphics VMs will be updated via SDMA and Compute VMs will be 
> updated via CPU. And for all all other systems (by default) 
> vm_update_mode = 0
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  3 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 35 ++++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 20 ++++++++++++++++++-
>   5 files changed, 60 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index fadeb55..fd84410 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -94,6 +94,7 @@
>   extern int amdgpu_vm_block_size;
>   extern int amdgpu_vm_fault_stop;
>   extern int amdgpu_vm_debug;
> +extern int amdgpu_vm_update_mode;
>   extern int amdgpu_dc;
>   extern int amdgpu_sched_jobs;
>   extern int amdgpu_sched_hw_submission; diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 130c45d..8d28a35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -94,6 +94,7 @@
>   int amdgpu_vm_fault_stop = 0;
>   int amdgpu_vm_debug = 0;
>   int amdgpu_vram_page_split = 512;
> +int amdgpu_vm_update_mode = -1;
>   int amdgpu_exp_hw_support = 0;
>   int amdgpu_dc = -1;
>   int amdgpu_sched_jobs = 32;
> @@ -180,6 +181,9 @@
>   MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
>   module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
>   
> +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never 
> +(default except for large BAR(LB)), 1 = Graphics only, 2 = Compute 
> +only (default for LB), 3 = Both"); module_param_named(vm_update_mode, 
> +amdgpu_vm_update_mode, int, 0444);
> +
>   MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
>   module_param_named(vram_page_split, amdgpu_vram_page_split, int, 
> 0444);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d167949..8f6c20f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -774,7 +774,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>   		goto out_suspend;
>   	}
>   
> -	r = amdgpu_vm_init(adev, &fpriv->vm);
> +	r = amdgpu_vm_init(adev, &fpriv->vm,
> +			   AMDGPU_VM_CONTEXT_GFX);
>   	if (r) {
>   		kfree(fpriv);
>   		goto out_suspend;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c644e54..9c89cb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -721,6 +721,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>   	return true;
>   }
>   
> +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) {
> +	return (adev->mc.real_vram_size == adev->mc.visible_vram_size); }
> +
>   /**
>    * amdgpu_vm_flush - hardware flush the vm
>    *
> @@ -2291,10 +2296,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
>    *
>    * @adev: amdgpu_device pointer
>    * @vm: requested vm
> + * @vm_context: Indicates if it GFX or Compute context
>    *
>    * Init @vm fields.
>    */
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +		   int vm_context)
>   {
>   	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
>   		AMDGPU_VM_PTE_COUNT(adev) * 8);
> @@ -2323,6 +2330,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	if (r)
>   		return r;
>   
> +	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
> +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> +						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
> +	else
> +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> +						AMDGPU_VM_USE_CPU_FOR_GFX);
> +	DRM_DEBUG_DRIVER("VM update mode is %s\n",
> +			 vm->use_cpu_for_update ? "CPU" : "SDMA");
> +	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
> +		  "CPU update of VM recommended only for large BAR system\n");
>   	vm->last_dir_update = NULL;
>   
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, 
> @@ -2454,6 +2471,22 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   	atomic64_set(&adev->vm_manager.client_counter, 0);
>   	spin_lock_init(&adev->vm_manager.prt_lock);
>   	atomic_set(&adev->vm_manager.num_prt_users, 0);
> +
> +	/* If not overridden by the user, by default, only in large BAR systems
> +	 * Compute VM tables will be updated by CPU
> +	 */
> +#ifdef CONFIG_X86_64
> +	if (amdgpu_vm_update_mode == -1) {
> +		if (amdgpu_vm_is_large_bar(adev))
> +			adev->vm_manager.vm_update_mode =
> +				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
> +		else
> +			adev->vm_manager.vm_update_mode = 0;
> +	}

Aren't you missing the else case here?

In other words when amdgpu_vm_update_mode is not -1 we should take it's value for vm_update_mode.

[HK]: Good catch. Thanks for reviews. I have fixed it and pushed the code.


With that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>.

Regards,
Christian.

> +#else
> +	adev->vm_manager.vm_update_mode = 0; #endif
> +
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index afe9073..9aa00d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -87,6 +87,14 @@
>   /* max vmids dedicated for process */
>   #define AMDGPU_VM_MAX_RESERVED_VMID	1
>   
> +#define AMDGPU_VM_CONTEXT_GFX 0
> +#define AMDGPU_VM_CONTEXT_COMPUTE 1
> +
> +/* See vm_update_mode */
> +#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define 
> +AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
> +
> +
>   struct amdgpu_vm_pt {
>   	struct amdgpu_bo	*bo;
>   	uint64_t		addr;
> @@ -129,6 +137,9 @@ struct amdgpu_vm {
>   	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
>   	/* each VM will map on CSA */
>   	struct amdgpu_bo_va *csa_bo_va;
> +
> +	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
> +	bool                    use_cpu_for_update;
>   };
>   
>   struct amdgpu_vm_id {
> @@ -184,11 +195,18 @@ struct amdgpu_vm_manager {
>   	/* partial resident texture handling */
>   	spinlock_t				prt_lock;
>   	atomic_t				num_prt_users;
> +
> +	/* controls how VM page tables are updated for Graphics and Compute.
> +	 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
> +	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
> +	 */
> +	int					vm_update_mode;
>   };
>   
>   void amdgpu_vm_manager_init(struct amdgpu_device *adev);
>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int 
> amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +		   int vm_context);
>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct list_head *validated,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-05-24 17:49 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-15 21:31 [PATCH 1/5] drm/amdgpu: Return EINVAL if no PT BO Harish Kasiviswanathan
     [not found] ` <1494883923-25876-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-05-15 21:32   ` [PATCH 2/5] drm/amdgpu: Add vm context module param Harish Kasiviswanathan
     [not found]     ` <1494883923-25876-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-05-24  9:41       ` Christian König
     [not found]         ` <e8c14f45-1072-6b55-4370-c3c8d44dd2fc-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-24 17:49           ` Kasiviswanathan, Harish
2017-05-15 21:32   ` [PATCH 3/5] drm/amdgpu: Add amdgpu_sync_wait Harish Kasiviswanathan
     [not found]     ` <1494883923-25876-3-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-05-16 12:49       ` Christian König
2017-05-15 21:32   ` [PATCH 4/5] drm/amdgpu: Support page directory update via CPU Harish Kasiviswanathan
     [not found]     ` <1494883923-25876-4-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-05-16  2:49       ` zhoucm1
     [not found]         ` <591A68D7.80009-5C7GfCeVMHo@public.gmane.org>
2017-05-16 21:02           ` Kasiviswanathan, Harish
     [not found]             ` <CY1PR1201MB1034A467A20010323B44EAEC8CE60-JBJ/M6OpXY+irIVeHNVyQ2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-05-17  1:54               ` zhoucm1
     [not found]                 ` <591BAD6C.2070605-5C7GfCeVMHo@public.gmane.org>
2017-05-17  8:48                   ` Christian König
     [not found]                     ` <bec8a5a3-7d62-2ff5-96b9-2c03afec1483-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-17  8:53                       ` zhoucm1
     [not found]                         ` <591C0F9E.2030800-5C7GfCeVMHo@public.gmane.org>
2017-05-17  8:59                           ` Christian König
2017-05-16 12:52       ` Christian König
2017-05-15 21:32   ` [PATCH 5/5] drm/amdgpu: Support page table " Harish Kasiviswanathan
     [not found]     ` <1494883923-25876-5-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-05-16 12:55       ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.