All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu: restrict userptr even more
@ 2017-08-30 15:00 Christian König
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-08-30 15:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Don't allow them to be GEM imported into another process.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d028806..e32a2b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -112,7 +112,13 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va *bo_va;
+	struct mm_struct *mm;
 	int r;
+
+	mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
+	if (mm && mm != current->mm)
+		return -EPERM;
+
 	r = amdgpu_bo_reserve(abo, false);
 	if (r)
 		return r;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 15:00   ` Christian König
       [not found]     ` <1504105209-1558-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 15:00   ` [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3 Christian König
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-08-30 15:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
need to be specified in the BO lists.

v2: validate PDs/PTs first

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
 3 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f68ac56..48e18cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4cdfb70..6cd20e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (vm->use_cpu_for_update) {
+		if (bo->tbo.type == ttm_bo_type_kernel &&
+		    vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(bo, NULL);
 			if (r)
 				return r;
 		}
 
 		spin_lock(&vm->status_lock);
-		list_move(&bo_base->vm_status, &vm->relocated);
+		if (bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_clear_moved - clear moved BOs in the PT
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @sync: sync object to add fences to
  *
- * Make sure all moved BOs are cleared in the PT.
+ * Make sure all BOs which are moved are updated in the PTs.
  * Returns 0 for success.
  *
- * PTs have to be reserved and mutex must be locked!
+ * PTs have to be reserved!
  */
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			    struct amdgpu_sync *sync)
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
+	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
@@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
-		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		/* Per VM BOs never need to bo cleared in the page tables */
+		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
@@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	return bo_va;
 }
 
+
+/**
+ * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_PTE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		spin_lock(&vm->status_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->status_lock);
+	}
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	if (!mapping)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mapping->list);
 	mapping->start = saddr;
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo = bo_va->base.bo;
-	struct amdgpu_vm *vm = bo_va->base.vm;
 	uint64_t eaddr;
 	int r;
 
@@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 			spin_lock(&bo_base->vm->status_lock);
-			list_move(&bo_base->vm_status, &vm->evicted);
+			if (bo->tbo.type == ttm_bo_type_kernel)
+				list_move(&bo_base->vm_status, &vm->evicted);
+			else
+				list_move_tail(&bo_base->vm_status,
+					       &vm->evicted);
 			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c3753af..90b7741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct amdgpu_sync *sync);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 15:00   ` [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2 Christian König
@ 2017-08-30 15:00   ` Christian König
       [not found]     ` <1504105209-1558-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 15:00   ` [PATCH 4/4] drm/amdgpu: bump version for support of local BOs Christian König
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-08-30 15:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Add the IOCTL interface so that applications can allocate per VM BOs.

Still WIP since not all corner cases are tested yet, but this reduces average
CS overhead for 10K BOs from 21ms down to 48us.

v2: add some extra checks, remove the WIP tag
v3: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 63 ++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
 include/uapi/drm/amdgpu_drm.h             |  2 +
 5 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b1e817c..21cab36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
  */
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj);
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
 
 int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 0e907ea..7256f83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
-				       true, &gobj);
+				       true, NULL, &gobj);
 	if (ret) {
 		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index e32a2b5..f1e61b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 }
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj)
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj)
 {
-	struct amdgpu_bo *robj;
+	struct amdgpu_bo *bo;
 	int r;
 
 	*obj = NULL;
@@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 
 retry:
 	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, 0, &robj);
+			     flags, NULL, resv, 0, &bo);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		}
 		return r;
 	}
-	*obj = &robj->gem_base;
+	*obj = &bo->gem_base;
 
 	return 0;
 }
@@ -119,6 +120,10 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	if (mm && mm != current->mm)
 		return -EPERM;
 
+	if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
+	    abo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return -EPERM;
+
 	r = amdgpu_bo_reserve(abo, false);
 	if (r)
 		return r;
@@ -142,13 +147,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list;
+	struct list_head list, duplicates;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -156,7 +162,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -191,9 +197,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_gem_create *args = data;
 	uint64_t flags = args->in.domain_flags;
 	uint64_t size = args->in.bo_size;
+	struct reservation_object *resv = NULL;
 	struct drm_gem_object *gobj;
 	uint32_t handle;
 	int r;
@@ -202,7 +211,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
+		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
+		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
@@ -229,9 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	}
 	size = roundup(size, PAGE_SIZE);
 
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
+		if (r)
+			return r;
+
+		resv = vm->root.base.bo->tbo.resv;
+	}
+
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     flags, false, &gobj);
+				     flags, false, resv, &gobj);
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		if (!r) {
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
+		}
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 	if (r)
 		return r;
 
@@ -273,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* create a gem object to contain this object in */
-	r = amdgpu_gem_object_create(adev, args->size, 0,
-				     AMDGPU_GEM_DOMAIN_CPU, 0,
-				     0, &gobj);
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+				     0, 0, NULL, &gobj);
 	if (r)
 		return r;
 
@@ -527,7 +552,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
-	struct list_head list;
+	struct list_head list, duplicates;
 	uint64_t va_flags;
 	int r = 0;
 
@@ -563,6 +588,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
 	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
 		gobj = drm_gem_object_lookup(filp, args->handle);
@@ -579,7 +605,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_unref;
 
@@ -645,6 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *filp)
 {
+	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_gem_op *args = data;
 	struct drm_gem_object *gobj;
 	struct amdgpu_bo *robj;
@@ -692,6 +719,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
+		if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+			amdgpu_vm_bo_invalidate(adev, robj, true);
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -721,8 +751,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 	r = amdgpu_gem_object_create(adev, args->size, 0,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				     ttm_bo_type_device,
-				     &gobj);
+				     false, NULL, &gobj);
 	if (r)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 5b3f928..7e08264 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
 		return ERR_PTR(-EPERM);
 
 	return drm_gem_prime_export(dev, gobj, flags);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d0ee739..fe839d9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -89,6 +89,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] drm/amdgpu: bump version for support of local BOs
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 15:00   ` [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2 Christian König
  2017-08-30 15:00   ` [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3 Christian König
@ 2017-08-30 15:00   ` Christian König
       [not found]     ` <1504105209-1558-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-30 21:00   ` [PATCH 1/4] drm/amdgpu: restrict userptr even more Felix Kuehling
  2017-08-31  2:10   ` zhoucm1
  4 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-08-30 15:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 21116fc..5035305c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
  * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
  * - 3.18.0 - Export gpu always on cu bitmap
  * - 3.19.0 - Add support for UVD MJPEG decode
+ * - 3.20.0 - Add support for local BOs
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	19
+#define KMS_DRIVER_MINOR	20
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: restrict userptr even more
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-08-30 15:00   ` [PATCH 4/4] drm/amdgpu: bump version for support of local BOs Christian König
@ 2017-08-30 21:00   ` Felix Kuehling
  2017-08-31  2:10   ` zhoucm1
  4 siblings, 0 replies; 14+ messages in thread
From: Felix Kuehling @ 2017-08-30 21:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König
  Cc: Bridgman, John

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

John and I were wondering whether this is redundant because
amdgpu_gem_prime_export already prevents exporting of userptr BOs.
However, I just realized this is not for prime buffer sharing but the
GEM_FLINK buffer sharing API. It doesn't have a driver callback in
GEM_FLINK, so the only chance to prevent sharing userptrs is in GEM_OPEN.

As I understand it, this doesn't add any new fundamental limitation,
just enforces it in a different situation.

Regards,
  Felix


On 2017-08-30 11:00 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Don't allow them to be GEM imported into another process.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++++++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..e32a2b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -112,7 +112,13 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
>  	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>  	struct amdgpu_vm *vm = &fpriv->vm;
>  	struct amdgpu_bo_va *bo_va;
> +	struct mm_struct *mm;
>  	int r;
> +
> +	mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
> +	if (mm && mm != current->mm)
> +		return -EPERM;
> +
>  	r = amdgpu_bo_reserve(abo, false);
>  	if (r)
>  		return r;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
       [not found]     ` <1504105209-1558-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 22:50       ` Felix Kuehling
       [not found]         ` <0f0164e2-2a6e-9805-54d2-c2220614657f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Felix Kuehling @ 2017-08-30 22:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Some more thoughts inline, but nothing that should be addressed in this
change.

Regards,
  Felix


On 2017-08-30 11:00 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
> need to be specified in the BO lists.
>
> v2: validate PDs/PTs first
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>  3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f68ac56..48e18cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>  
>  	}
>  
> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>  
>  	if (amdgpu_vm_debug && p->bo_list) {
>  		/* Invalidate all BOs to test for userspace bugs */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4cdfb70..6cd20e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			spin_unlock(&glob->lru_lock);
>  		}
>  
> -		if (vm->use_cpu_for_update) {
> +		if (bo->tbo.type == ttm_bo_type_kernel &&
> +		    vm->use_cpu_for_update) {
>  			r = amdgpu_bo_kmap(bo, NULL);
>  			if (r)
>  				return r;
>  		}
>  
>  		spin_lock(&vm->status_lock);
> -		list_move(&bo_base->vm_status, &vm->relocated);
> +		if (bo->tbo.type != ttm_bo_type_kernel)
> +			list_move(&bo_base->vm_status, &vm->moved);
> +		else
> +			list_move(&bo_base->vm_status, &vm->relocated);
>  	}
>  	spin_unlock(&vm->status_lock);
>  
> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  }
>  
>  /**
> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>   *
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
> + * @sync: sync object to add fences to
>   *
> - * Make sure all moved BOs are cleared in the PT.
> + * Make sure all BOs which are moved are updated in the PTs.
>   * Returns 0 for success.
>   *
> - * PTs have to be reserved and mutex must be locked!
> + * PTs have to be reserved!
>   */
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			    struct amdgpu_sync *sync)
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync)
>  {
>  	struct amdgpu_bo_va *bo_va = NULL;
> +	bool clear;
>  	int r = 0;
>  
>  	spin_lock(&vm->status_lock);
> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			struct amdgpu_bo_va, base.vm_status);
>  		spin_unlock(&vm->status_lock);
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
> +		/* Per VM BOs never need to bo cleared in the page tables */
> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
> +
> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);

Just thinking out loud: Per VM BOs don't need to be cleared because they
have separate lists for evicted and moved (but valid) BOs. If we had a
similar thing for non-VM BOs (but without the automatic validation),
this case could be optimized. Then moved but valid BOs could be updated
in the page table instead of clearing them here and updating the page
table again later.

>  		if (r)
>  			return r;
>  
> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>  	return bo_va;
>  }
>  
> +
> +/**
> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
> + *
> + * @adev: amdgpu_device pointer
> + * @bo_va: bo_va to store the address
> + * @mapping: the mapping to insert
> + *
> + * Insert a new mapping into all structures.
> + */
> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> +				    struct amdgpu_bo_va *bo_va,
> +				    struct amdgpu_bo_va_mapping *mapping)
> +{
> +	struct amdgpu_vm *vm = bo_va->base.vm;
> +	struct amdgpu_bo *bo = bo_va->base.bo;
> +
> +	list_add(&mapping->list, &bo_va->invalids);
> +	amdgpu_vm_it_insert(mapping, &vm->va);
> +
> +	if (mapping->flags & AMDGPU_PTE_PRT)
> +		amdgpu_vm_prt_get(adev);
> +
> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> +		spin_lock(&vm->status_lock);
> +		list_move(&bo_va->base.vm_status, &vm->moved);
> +		spin_unlock(&vm->status_lock);
> +	}
> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +}
> +
>  /**
>   * amdgpu_vm_bo_map - map bo inside a vm
>   *
> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>  	if (!mapping)
>  		return -ENOMEM;
>  
> -	INIT_LIST_HEAD(&mapping->list);
>  	mapping->start = saddr;
>  	mapping->last = eaddr;
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  {
>  	struct amdgpu_bo_va_mapping *mapping;
>  	struct amdgpu_bo *bo = bo_va->base.bo;
> -	struct amdgpu_vm *vm = bo_va->base.vm;
>  	uint64_t eaddr;
>  	int r;
>  
> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  		bo_base->moved = true;
>  		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>  			spin_lock(&bo_base->vm->status_lock);
> -			list_move(&bo_base->vm_status, &vm->evicted);
> +			if (bo->tbo.type == ttm_bo_type_kernel)
> +				list_move(&bo_base->vm_status, &vm->evicted);
> +			else
> +				list_move_tail(&bo_base->vm_status,
> +					       &vm->evicted);
>  			spin_unlock(&bo_base->vm->status_lock);
>  			continue;
>  		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index c3753af..90b7741 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  			  struct amdgpu_vm *vm,
>  			  struct dma_fence **fence);
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			  struct amdgpu_sync *sync);
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync);
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  			struct amdgpu_bo_va *bo_va,
>  			bool clear);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3
       [not found]     ` <1504105209-1558-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 22:55       ` Felix Kuehling
       [not found]         ` <5ec19bf2-75a2-91f2-e3d7-aeb5719f53e7-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Felix Kuehling @ 2017-08-30 22:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2017-08-30 11:00 AM, Christian König wrote:
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>  {
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>  
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
>  		return ERR_PTR(-EPERM);
>  
>  	return drm_gem_prime_export(dev, gobj, flags);

Is this limitation necessary? If it weren't for this, I'd use per-VM BOs
for KFD, because we always need to validate all our BOs when we restore
from an eviction anyway. But we need to be able to support buffer
sharing at the same time. And we don't know which buffers an application
plans to shared at allocation time.

Either way, we could address this later. This patch is Reviewed-by:
Felix Kuehling <Felix.Kuehling@amd.com>

Regards,
  Felix

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: bump version for support of local BOs
       [not found]     ` <1504105209-1558-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30 23:30       ` Felix Kuehling
  0 siblings, 0 replies; 14+ messages in thread
From: Felix Kuehling @ 2017-08-30 23:30 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


On 2017-08-30 11:00 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 21116fc..5035305c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -69,9 +69,10 @@
>   * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
>   * - 3.18.0 - Export gpu always on cu bitmap
>   * - 3.19.0 - Add support for UVD MJPEG decode
> + * - 3.20.0 - Add support for local BOs
>   */
>  #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	19
> +#define KMS_DRIVER_MINOR	20
>  #define KMS_DRIVER_PATCHLEVEL	0
>  
>  int amdgpu_vram_limit = 0;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: restrict userptr even more
       [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-08-30 21:00   ` [PATCH 1/4] drm/amdgpu: restrict userptr even more Felix Kuehling
@ 2017-08-31  2:10   ` zhoucm1
  4 siblings, 0 replies; 14+ messages in thread
From: zhoucm1 @ 2017-08-31  2:10 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Acked-by: Chunming Zhou <david1.zhou@amd.com>, For the other three, we'd 
better to wait for our UMD guys tests.


On 2017年08月30日 23:00, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Don't allow them to be GEM imported into another process.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++++++
>   1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..e32a2b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -112,7 +112,13 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
>   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_bo_va *bo_va;
> +	struct mm_struct *mm;
>   	int r;
> +
> +	mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
> +	if (mm && mm != current->mm)
> +		return -EPERM;
> +
>   	r = amdgpu_bo_reserve(abo, false);
>   	if (r)
>   		return r;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
       [not found]         ` <0f0164e2-2a6e-9805-54d2-c2220614657f-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-31 12:20           ` Christian König
       [not found]             ` <663386b1-c6e8-5fa8-16d4-b3b9a58aee2b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-08-31 12:20 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian König

Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Some more thoughts inline, but nothing that should be addressed in this
> change.
>
> Regards,
>    Felix
>
>
> On 2017-08-30 11:00 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>> need to be specified in the BO lists.
>>
>> v2: validate PDs/PTs first
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>   3 files changed, 60 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index f68ac56..48e18cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>   
>>   	}
>>   
>> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>   
>>   	if (amdgpu_vm_debug && p->bo_list) {
>>   		/* Invalidate all BOs to test for userspace bugs */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 4cdfb70..6cd20e7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			spin_unlock(&glob->lru_lock);
>>   		}
>>   
>> -		if (vm->use_cpu_for_update) {
>> +		if (bo->tbo.type == ttm_bo_type_kernel &&
>> +		    vm->use_cpu_for_update) {
>>   			r = amdgpu_bo_kmap(bo, NULL);
>>   			if (r)
>>   				return r;
>>   		}
>>   
>>   		spin_lock(&vm->status_lock);
>> -		list_move(&bo_base->vm_status, &vm->relocated);
>> +		if (bo->tbo.type != ttm_bo_type_kernel)
>> +			list_move(&bo_base->vm_status, &vm->moved);
>> +		else
>> +			list_move(&bo_base->vm_status, &vm->relocated);
>>   	}
>>   	spin_unlock(&vm->status_lock);
>>   
>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>    *
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>> + * @sync: sync object to add fences to
>>    *
>> - * Make sure all moved BOs are cleared in the PT.
>> + * Make sure all BOs which are moved are updated in the PTs.
>>    * Returns 0 for success.
>>    *
>> - * PTs have to be reserved and mutex must be locked!
>> + * PTs have to be reserved!
>>    */
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			    struct amdgpu_sync *sync)
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync)
>>   {
>>   	struct amdgpu_bo_va *bo_va = NULL;
>> +	bool clear;
>>   	int r = 0;
>>   
>>   	spin_lock(&vm->status_lock);
>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			struct amdgpu_bo_va, base.vm_status);
>>   		spin_unlock(&vm->status_lock);
>>   
>> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
>> +		/* Per VM BOs never need to bo cleared in the page tables */
>> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>> +
>> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);
> Just thinking out loud: Per VM BOs don't need to be cleared because they
> have separate lists for evicted and moved (but valid) BOs.

Actually the reason is that we know they are reserved together with the 
page directory.

> If we had a
> similar thing for non-VM BOs (but without the automatic validation),
> this case could be optimized. Then moved but valid BOs could be updated
> in the page table instead of clearing them here and updating the page
> table again later.

Well, that won't work because other BOs can change their location while 
we try to use their location.

But what I'm speculating about for a while now is to use trylock on the 
BOs here. That might not work all the time, but clearly most of the time.

Going to give that a try now,
Christian.

>
>>   		if (r)
>>   			return r;
>>   
>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>   	return bo_va;
>>   }
>>   
>> +
>> +/**
>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @bo_va: bo_va to store the address
>> + * @mapping: the mapping to insert
>> + *
>> + * Insert a new mapping into all structures.
>> + */
>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>> +				    struct amdgpu_bo_va *bo_va,
>> +				    struct amdgpu_bo_va_mapping *mapping)
>> +{
>> +	struct amdgpu_vm *vm = bo_va->base.vm;
>> +	struct amdgpu_bo *bo = bo_va->base.bo;
>> +
>> +	list_add(&mapping->list, &bo_va->invalids);
>> +	amdgpu_vm_it_insert(mapping, &vm->va);
>> +
>> +	if (mapping->flags & AMDGPU_PTE_PRT)
>> +		amdgpu_vm_prt_get(adev);
>> +
>> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> +		spin_lock(&vm->status_lock);
>> +		list_move(&bo_va->base.vm_status, &vm->moved);
>> +		spin_unlock(&vm->status_lock);
>> +	}
>> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_map - map bo inside a vm
>>    *
>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>   	if (!mapping)
>>   		return -ENOMEM;
>>   
>> -	INIT_LIST_HEAD(&mapping->list);
>>   	mapping->start = saddr;
>>   	mapping->last = eaddr;
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   {
>>   	struct amdgpu_bo_va_mapping *mapping;
>>   	struct amdgpu_bo *bo = bo_va->base.bo;
>> -	struct amdgpu_vm *vm = bo_va->base.vm;
>>   	uint64_t eaddr;
>>   	int r;
>>   
>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   		bo_base->moved = true;
>>   		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>   			spin_lock(&bo_base->vm->status_lock);
>> -			list_move(&bo_base->vm_status, &vm->evicted);
>> +			if (bo->tbo.type == ttm_bo_type_kernel)
>> +				list_move(&bo_base->vm_status, &vm->evicted);
>> +			else
>> +				list_move_tail(&bo_base->vm_status,
>> +					       &vm->evicted);
>>   			spin_unlock(&bo_base->vm->status_lock);
>>   			continue;
>>   		}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index c3753af..90b7741 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   			  struct amdgpu_vm *vm,
>>   			  struct dma_fence **fence);
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			  struct amdgpu_sync *sync);
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>>   			bool clear);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3
       [not found]         ` <5ec19bf2-75a2-91f2-e3d7-aeb5719f53e7-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-31 12:25           ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2017-08-31 12:25 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 31.08.2017 um 00:55 schrieb Felix Kuehling:
> On 2017-08-30 11:00 AM, Christian König wrote:
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>   {
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>   
>> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
>>   		return ERR_PTR(-EPERM);
>>   
>>   	return drm_gem_prime_export(dev, gobj, flags);
> Is this limitation necessary?

Not really, there are just two issue why I would like to have it at 
least for the GFX side:

1. The root page directory stays allocated while we wait for all the per 
VM BOs to be destroyed.

This can be fixed with reservation object reference counting, but I 
didn't had time yet to implement this.

2. The implicit fencing causes a bunch of problems for displayable 
textures. So any sharing of such BOs with the X server can cause 
undefined delays.

We need to fix this on the userspace level and add something like a 
AMDGPU_GEM_CREATE_UNSYCED flag for BO allocation.

Regards,
Christian.

> If it weren't for this, I'd use per-VM BOs
> for KFD, because we always need to validate all our BOs when we restore
> from an eviction anyway. But we need to be able to support buffer
> sharing at the same time. And we don't know which buffers an application
> plans to shared at allocation time.
>
> Either way, we could address this later. This patch is Reviewed-by:
> Felix Kuehling <Felix.Kuehling@amd.com>
>
> Regards,
>    Felix
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
       [not found]             ` <663386b1-c6e8-5fa8-16d4-b3b9a58aee2b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-31 13:20               ` Kuehling, Felix
       [not found]                 ` <DM5PR1201MB023534198780B550F5E8CBC2929D0-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Kuehling, Felix @ 2017-08-31 13:20 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Koenig, Christian

> > Just thinking out loud: Per VM BOs don't need to be cleared because they
> > have separate lists for evicted and moved (but valid) BOs.
>
> Actually the reason is that we know they are reserved together with the
> page directory.

I mentioned separate lists because that allows us to distinguish valid and invalid BOs. Non-VM BOs are all on the same list. So it's harder to tell, which ones are still valid but have moved, and which ones have been evicted and must be revalidated.

Regards,
  Felix

________________________________________
From: Christian König <deathsimple@vodafone.de>
Sent: Thursday, August 31, 2017 8:20:58 AM
To: Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, Christian
Subject: Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2

Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Some more thoughts inline, but nothing that should be addressed in this
> change.
>
> Regards,
>    Felix
>
>
> On 2017-08-30 11:00 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>> need to be specified in the BO lists.
>>
>> v2: validate PDs/PTs first
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>   3 files changed, 60 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index f68ac56..48e18cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>
>>      }
>>
>> -    r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>> +    r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>
>>      if (amdgpu_vm_debug && p->bo_list) {
>>              /* Invalidate all BOs to test for userspace bugs */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 4cdfb70..6cd20e7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>                      spin_unlock(&glob->lru_lock);
>>              }
>>
>> -            if (vm->use_cpu_for_update) {
>> +            if (bo->tbo.type == ttm_bo_type_kernel &&
>> +                vm->use_cpu_for_update) {
>>                      r = amdgpu_bo_kmap(bo, NULL);
>>                      if (r)
>>                              return r;
>>              }
>>
>>              spin_lock(&vm->status_lock);
>> -            list_move(&bo_base->vm_status, &vm->relocated);
>> +            if (bo->tbo.type != ttm_bo_type_kernel)
>> +                    list_move(&bo_base->vm_status, &vm->moved);
>> +            else
>> +                    list_move(&bo_base->vm_status, &vm->relocated);
>>      }
>>      spin_unlock(&vm->status_lock);
>>
>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   }
>>
>>   /**
>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>    *
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>> + * @sync: sync object to add fences to
>>    *
>> - * Make sure all moved BOs are cleared in the PT.
>> + * Make sure all BOs which are moved are updated in the PTs.
>>    * Returns 0 for success.
>>    *
>> - * PTs have to be reserved and mutex must be locked!
>> + * PTs have to be reserved!
>>    */
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -                        struct amdgpu_sync *sync)
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +                       struct amdgpu_vm *vm,
>> +                       struct amdgpu_sync *sync)
>>   {
>>      struct amdgpu_bo_va *bo_va = NULL;
>> +    bool clear;
>>      int r = 0;
>>
>>      spin_lock(&vm->status_lock);
>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>                      struct amdgpu_bo_va, base.vm_status);
>>              spin_unlock(&vm->status_lock);
>>
>> -            r = amdgpu_vm_bo_update(adev, bo_va, true);
>> +            /* Per VM BOs never need to bo cleared in the page tables */
>> +            clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>> +
>> +            r = amdgpu_vm_bo_update(adev, bo_va, clear);
> Just thinking out loud: Per VM BOs don't need to be cleared because they
> have separate lists for evicted and moved (but valid) BOs.

Actually the reason is that we know they are reserved together with the
page directory.

> If we had a
> similar thing for non-VM BOs (but without the automatic validation),
> this case could be optimized. Then moved but valid BOs could be updated
> in the page table instead of clearing them here and updating the page
> table again later.

Well, that won't work because other BOs can change their location while
we try to use their location.

But what I'm speculating about for a while now is to use trylock on the
BOs here. That might not work all the time, but clearly most of the time.

Going to give that a try now,
Christian.

>
>>              if (r)
>>                      return r;
>>
>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>      return bo_va;
>>   }
>>
>> +
>> +/**
>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @bo_va: bo_va to store the address
>> + * @mapping: the mapping to insert
>> + *
>> + * Insert a new mapping into all structures.
>> + */
>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>> +                                struct amdgpu_bo_va *bo_va,
>> +                                struct amdgpu_bo_va_mapping *mapping)
>> +{
>> +    struct amdgpu_vm *vm = bo_va->base.vm;
>> +    struct amdgpu_bo *bo = bo_va->base.bo;
>> +
>> +    list_add(&mapping->list, &bo_va->invalids);
>> +    amdgpu_vm_it_insert(mapping, &vm->va);
>> +
>> +    if (mapping->flags & AMDGPU_PTE_PRT)
>> +            amdgpu_vm_prt_get(adev);
>> +
>> +    if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> +            spin_lock(&vm->status_lock);
>> +            list_move(&bo_va->base.vm_status, &vm->moved);
>> +            spin_unlock(&vm->status_lock);
>> +    }
>> +    trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_map - map bo inside a vm
>>    *
>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>      if (!mapping)
>>              return -ENOMEM;
>>
>> -    INIT_LIST_HEAD(&mapping->list);
>>      mapping->start = saddr;
>>      mapping->last = eaddr;
>>      mapping->offset = offset;
>>      mapping->flags = flags;
>>
>> -    list_add(&mapping->list, &bo_va->invalids);
>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -    if (flags & AMDGPU_PTE_PRT)
>> -            amdgpu_vm_prt_get(adev);
>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>
>>      return 0;
>>   }
>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   {
>>      struct amdgpu_bo_va_mapping *mapping;
>>      struct amdgpu_bo *bo = bo_va->base.bo;
>> -    struct amdgpu_vm *vm = bo_va->base.vm;
>>      uint64_t eaddr;
>>      int r;
>>
>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>      mapping->offset = offset;
>>      mapping->flags = flags;
>>
>> -    list_add(&mapping->list, &bo_va->invalids);
>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -    if (flags & AMDGPU_PTE_PRT)
>> -            amdgpu_vm_prt_get(adev);
>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>
>>      return 0;
>>   }
>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>              bo_base->moved = true;
>>              if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>                      spin_lock(&bo_base->vm->status_lock);
>> -                    list_move(&bo_base->vm_status, &vm->evicted);
>> +                    if (bo->tbo.type == ttm_bo_type_kernel)
>> +                            list_move(&bo_base->vm_status, &vm->evicted);
>> +                    else
>> +                            list_move_tail(&bo_base->vm_status,
>> +                                           &vm->evicted);
>>                      spin_unlock(&bo_base->vm->status_lock);
>>                      continue;
>>              }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index c3753af..90b7741 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>                        struct amdgpu_vm *vm,
>>                        struct dma_fence **fence);
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -                      struct amdgpu_sync *sync);
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +                       struct amdgpu_vm *vm,
>> +                       struct amdgpu_sync *sync);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>                      struct amdgpu_bo_va *bo_va,
>>                      bool clear);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
       [not found]                 ` <DM5PR1201MB023534198780B550F5E8CBC2929D0-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-08-31 13:23                   ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2017-08-31 13:23 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Koenig, Christian

Am 31.08.2017 um 15:20 schrieb Kuehling, Felix:
>>> Just thinking out loud: Per VM BOs don't need to be cleared because they
>>> have separate lists for evicted and moved (but valid) BOs.
>> Actually the reason is that we know they are reserved together with the
>> page directory.
> I mentioned separate lists because that allows us to distinguish valid and invalid BOs. Non-VM BOs are all on the same list. So it's harder to tell, which ones are still valid but have moved, and which ones have been evicted and must be revalidated.

Actually that's not an issue, even evicted BOs are handled correctly 
here and shouldn't be revalidated just because somebody uses the VM.

It's just that we could as well set the new location instead of clearing 
the VM page tables of BOs which aren't part of the current working set.

Regards,
Christian.

>
> Regards,
>    Felix
>
> ________________________________________
> From: Christian König <deathsimple@vodafone.de>
> Sent: Thursday, August 31, 2017 8:20:58 AM
> To: Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, Christian
> Subject: Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
>
> Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>
>> Some more thoughts inline, but nothing that should be addressed in this
>> change.
>>
>> Regards,
>>     Felix
>>
>>
>> On 2017-08-30 11:00 AM, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>>> need to be specified in the BO lists.
>>>
>>> v2: validate PDs/PTs first
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>>    3 files changed, 60 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index f68ac56..48e18cc 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>>
>>>       }
>>>
>>> -    r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>>> +    r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>>
>>>       if (amdgpu_vm_debug && p->bo_list) {
>>>               /* Invalidate all BOs to test for userspace bugs */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 4cdfb70..6cd20e7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                       spin_unlock(&glob->lru_lock);
>>>               }
>>>
>>> -            if (vm->use_cpu_for_update) {
>>> +            if (bo->tbo.type == ttm_bo_type_kernel &&
>>> +                vm->use_cpu_for_update) {
>>>                       r = amdgpu_bo_kmap(bo, NULL);
>>>                       if (r)
>>>                               return r;
>>>               }
>>>
>>>               spin_lock(&vm->status_lock);
>>> -            list_move(&bo_base->vm_status, &vm->relocated);
>>> +            if (bo->tbo.type != ttm_bo_type_kernel)
>>> +                    list_move(&bo_base->vm_status, &vm->moved);
>>> +            else
>>> +                    list_move(&bo_base->vm_status, &vm->relocated);
>>>       }
>>>       spin_unlock(&vm->status_lock);
>>>
>>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>    }
>>>
>>>    /**
>>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>>     *
>>>     * @adev: amdgpu_device pointer
>>>     * @vm: requested vm
>>> + * @sync: sync object to add fences to
>>>     *
>>> - * Make sure all moved BOs are cleared in the PT.
>>> + * Make sure all BOs which are moved are updated in the PTs.
>>>     * Returns 0 for success.
>>>     *
>>> - * PTs have to be reserved and mutex must be locked!
>>> + * PTs have to be reserved!
>>>     */
>>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> -                        struct amdgpu_sync *sync)
>>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>> +                       struct amdgpu_vm *vm,
>>> +                       struct amdgpu_sync *sync)
>>>    {
>>>       struct amdgpu_bo_va *bo_va = NULL;
>>> +    bool clear;
>>>       int r = 0;
>>>
>>>       spin_lock(&vm->status_lock);
>>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                       struct amdgpu_bo_va, base.vm_status);
>>>               spin_unlock(&vm->status_lock);
>>>
>>> -            r = amdgpu_vm_bo_update(adev, bo_va, true);
>>> +            /* Per VM BOs never need to bo cleared in the page tables */
>>> +            clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>>> +
>>> +            r = amdgpu_vm_bo_update(adev, bo_va, clear);
>> Just thinking out loud: Per VM BOs don't need to be cleared because they
>> have separate lists for evicted and moved (but valid) BOs.
> Actually the reason is that we know they are reserved together with the
> page directory.
>
>> If we had a
>> similar thing for non-VM BOs (but without the automatic validation),
>> this case could be optimized. Then moved but valid BOs could be updated
>> in the page table instead of clearing them here and updating the page
>> table again later.
> Well, that won't work because other BOs can change their location while
> we try to use their location.
>
> But what I'm speculating about for a while now is to use trylock on the
> BOs here. That might not work all the time, but clearly most of the time.
>
> Going to give that a try now,
> Christian.
>
>>>               if (r)
>>>                       return r;
>>>
>>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>>       return bo_va;
>>>    }
>>>
>>> +
>>> +/**
>>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @bo_va: bo_va to store the address
>>> + * @mapping: the mapping to insert
>>> + *
>>> + * Insert a new mapping into all structures.
>>> + */
>>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>> +                                struct amdgpu_bo_va *bo_va,
>>> +                                struct amdgpu_bo_va_mapping *mapping)
>>> +{
>>> +    struct amdgpu_vm *vm = bo_va->base.vm;
>>> +    struct amdgpu_bo *bo = bo_va->base.bo;
>>> +
>>> +    list_add(&mapping->list, &bo_va->invalids);
>>> +    amdgpu_vm_it_insert(mapping, &vm->va);
>>> +
>>> +    if (mapping->flags & AMDGPU_PTE_PRT)
>>> +            amdgpu_vm_prt_get(adev);
>>> +
>>> +    if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>> +            spin_lock(&vm->status_lock);
>>> +            list_move(&bo_va->base.vm_status, &vm->moved);
>>> +            spin_unlock(&vm->status_lock);
>>> +    }
>>> +    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +}
>>> +
>>>    /**
>>>     * amdgpu_vm_bo_map - map bo inside a vm
>>>     *
>>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>       if (!mapping)
>>>               return -ENOMEM;
>>>
>>> -    INIT_LIST_HEAD(&mapping->list);
>>>       mapping->start = saddr;
>>>       mapping->last = eaddr;
>>>       mapping->offset = offset;
>>>       mapping->flags = flags;
>>>
>>> -    list_add(&mapping->list, &bo_va->invalids);
>>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>>> -
>>> -    if (flags & AMDGPU_PTE_PRT)
>>> -            amdgpu_vm_prt_get(adev);
>>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>
>>>       return 0;
>>>    }
>>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>>    {
>>>       struct amdgpu_bo_va_mapping *mapping;
>>>       struct amdgpu_bo *bo = bo_va->base.bo;
>>> -    struct amdgpu_vm *vm = bo_va->base.vm;
>>>       uint64_t eaddr;
>>>       int r;
>>>
>>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>>       mapping->offset = offset;
>>>       mapping->flags = flags;
>>>
>>> -    list_add(&mapping->list, &bo_va->invalids);
>>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>>> -
>>> -    if (flags & AMDGPU_PTE_PRT)
>>> -            amdgpu_vm_prt_get(adev);
>>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>
>>>       return 0;
>>>    }
>>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>               bo_base->moved = true;
>>>               if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>>                       spin_lock(&bo_base->vm->status_lock);
>>> -                    list_move(&bo_base->vm_status, &vm->evicted);
>>> +                    if (bo->tbo.type == ttm_bo_type_kernel)
>>> +                            list_move(&bo_base->vm_status, &vm->evicted);
>>> +                    else
>>> +                            list_move_tail(&bo_base->vm_status,
>>> +                                           &vm->evicted);
>>>                       spin_unlock(&bo_base->vm->status_lock);
>>>                       continue;
>>>               }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index c3753af..90b7741 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>>    int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>                         struct amdgpu_vm *vm,
>>>                         struct dma_fence **fence);
>>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> -                      struct amdgpu_sync *sync);
>>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>> +                       struct amdgpu_vm *vm,
>>> +                       struct amdgpu_sync *sync);
>>>    int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>                       struct amdgpu_bo_va *bo_va,
>>>                       bool clear);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/4] drm/amdgpu: bump version for support of local BOs
       [not found] ` <1504026462-1788-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-29 17:07   ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2017-08-29 17:07 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 21116fc..5035305c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
  * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
  * - 3.18.0 - Export gpu always on cu bitmap
  * - 3.19.0 - Add support for UVD MJPEG decode
+ * - 3.20.0 - Add support for local BOs
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	19
+#define KMS_DRIVER_MINOR	20
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-08-31 13:23 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-30 15:00 [PATCH 1/4] drm/amdgpu: restrict userptr even more Christian König
     [not found] ` <1504105209-1558-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 15:00   ` [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2 Christian König
     [not found]     ` <1504105209-1558-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 22:50       ` Felix Kuehling
     [not found]         ` <0f0164e2-2a6e-9805-54d2-c2220614657f-5C7GfCeVMHo@public.gmane.org>
2017-08-31 12:20           ` Christian König
     [not found]             ` <663386b1-c6e8-5fa8-16d4-b3b9a58aee2b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-31 13:20               ` Kuehling, Felix
     [not found]                 ` <DM5PR1201MB023534198780B550F5E8CBC2929D0-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-08-31 13:23                   ` Christian König
2017-08-30 15:00   ` [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v3 Christian König
     [not found]     ` <1504105209-1558-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 22:55       ` Felix Kuehling
     [not found]         ` <5ec19bf2-75a2-91f2-e3d7-aeb5719f53e7-5C7GfCeVMHo@public.gmane.org>
2017-08-31 12:25           ` Christian König
2017-08-30 15:00   ` [PATCH 4/4] drm/amdgpu: bump version for support of local BOs Christian König
     [not found]     ` <1504105209-1558-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30 23:30       ` Felix Kuehling
2017-08-30 21:00   ` [PATCH 1/4] drm/amdgpu: restrict userptr even more Felix Kuehling
2017-08-31  2:10   ` zhoucm1
  -- strict thread matches above, loose matches on Subject: below --
2017-08-29 17:07 Christian König
     [not found] ` <1504026462-1788-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-29 17:07   ` [PATCH 4/4] drm/amdgpu: bump version for support of local BOs Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.