All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
@ 2017-06-16 16:02 Chris Wilson
  2017-06-16 16:02 ` [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Chris Wilson @ 2017-06-16 16:02 UTC (permalink / raw)
  To: intel-gfx

When userspace is doing most of the work, avoiding relocs (using
NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we
still spend a lot of time processing the arrays in execbuf, even though
we now should have nothing to do most of the time. One issue that
becomes readily apparent in profiling anv is that iterating over the
large execobj[] is unfriendly to the loop prefetchers of the CPU and it
much prefers iterating over a pair of arrays rather than one big array.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_evict.c      |   4 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 187 +++++++++++++++--------------
 drivers/gpu/drm/i915/i915_vma.h            |   2 +-
 3 files changed, 97 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a193f1b36c67..4df039ef2ce3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		/* Overlap of objects in the same batch? */
 		if (i915_vma_is_pinned(vma)) {
 			ret = -ENOSPC;
-			if (vma->exec_entry &&
-			    vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+			if (vma->exec_flags &&
+			    *vma->exec_flags & EXEC_OBJECT_PINNED)
 				ret = -EINVAL;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index eb46dfa374a7..9da0d209dd38 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -191,6 +191,8 @@ struct i915_execbuffer {
 	struct drm_file *file; /** per-file lookup tables and limits */
 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+	struct i915_vma **vma;
+	unsigned int *flags;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct i915_gem_context *ctx; /** context for building the request */
@@ -245,14 +247,6 @@ struct i915_execbuffer {
 };
 
 /*
- * As an alternative to creating a hashtable of handle-to-vma for a batch,
- * we used the last available reserved field in the execobject[] and stash
- * a link from the execobj to its vma.
- */
-#define __exec_to_vma(ee) (ee)->rsvd2
-#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
-
-/*
  * Used to convert any address to canonical form.
  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
@@ -315,7 +309,7 @@ static bool
 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 		 const struct i915_vma *vma)
 {
-	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
+	if (!(*vma->exec_flags & __EXEC_OBJECT_HAS_PIN))
 		return true;
 
 	if (vma->node.size < entry->pad_to_size)
@@ -341,7 +335,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 
 static inline void
 eb_pin_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
+	   const struct drm_i915_gem_exec_object2 *entry,
 	   struct i915_vma *vma)
 {
 	u64 flags;
@@ -365,33 +359,30 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			*vma->exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+	*vma->exec_flags |= __EXEC_OBJECT_HAS_PIN;
 }
 
-static inline void
-__eb_unreserve_vma(struct i915_vma *vma,
-		   const struct drm_i915_gem_exec_object2 *entry)
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
-	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
+	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 
-	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 		i915_vma_unpin_fence(vma);
 
 	__i915_vma_unpin(vma);
 }
 
 static inline void
-eb_unreserve_vma(struct i915_vma *vma,
-		 struct drm_i915_gem_exec_object2 *entry)
+eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 {
-	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
+	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 		return;
 
-	__eb_unreserve_vma(vma, entry);
-	entry->flags &= ~__EXEC_OBJECT_RESERVED;
+	__eb_unreserve_vma(vma, *flags);
+	*flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
 static int
@@ -421,7 +412,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
 		entry->pad_to_size = 0;
 	}
 
-	if (unlikely(vma->exec_entry)) {
+	if (unlikely(vma->exec_flags)) {
 		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 			  entry->handle, (int)(entry - eb->exec));
 		return -EINVAL;
@@ -438,10 +429,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
-	   struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 {
+	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -480,13 +470,14 @@ eb_add_vma(struct i915_execbuffer *eb,
 	 * to the vma inside the execobj so that we can use a direct lookup
 	 * to find the right target VMA when doing relocations.
 	 */
-	vma->exec_entry = entry;
-	__exec_to_vma(entry) = (uintptr_t)vma;
+	eb->vma[i] = vma;
+	eb->flags[i] = entry->flags;
+	vma->exec_flags = &eb->flags[i];
 
 	err = 0;
 	eb_pin_vma(eb, entry, vma);
 	if (eb_vma_misplaced(entry, vma)) {
-		eb_unreserve_vma(vma, entry);
+		eb_unreserve_vma(vma, vma->exec_flags);
 
 		list_add_tail(&vma->exec_link, &eb->unbound);
 		if (drm_mm_node_allocated(&vma->node))
@@ -520,7 +511,8 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			  struct i915_vma *vma)
 {
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	u64 flags;
 	int err;
 
@@ -554,7 +546,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		eb->args->flags |= __EXEC_HAS_RELOC;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+	*vma->exec_flags |= __EXEC_OBJECT_HAS_PIN;
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
@@ -565,7 +557,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			*vma->exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
 	return 0;
@@ -608,18 +600,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		INIT_LIST_HEAD(&eb->unbound);
 		INIT_LIST_HEAD(&last);
 		for (i = 0; i < count; i++) {
-			struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+			unsigned int flags = eb->flags[i];
+			struct i915_vma *vma = eb->vma[i];
 
-			if (entry->flags & EXEC_OBJECT_PINNED &&
-			    entry->flags & __EXEC_OBJECT_HAS_PIN)
+			if (flags & EXEC_OBJECT_PINNED &&
+			    flags & __EXEC_OBJECT_HAS_PIN)
 				continue;
 
-			vma = exec_to_vma(entry);
-			eb_unreserve_vma(vma, entry);
+			eb_unreserve_vma(vma, &eb->flags[i]);
 
-			if (entry->flags & EXEC_OBJECT_PINNED)
+			if (flags & EXEC_OBJECT_PINNED)
 				list_add(&vma->exec_link, &eb->unbound);
-			else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 				list_add_tail(&vma->exec_link, &eb->unbound);
 			else
 				list_add_tail(&vma->exec_link, &last);
@@ -707,23 +699,23 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
 
 	for (i = 0; i < count; i++) {
-		__exec_to_vma(&eb->exec[i]) = 0;
-
 		hlist_for_each_entry(vma,
 				     ht_head(lut, eb->exec[i].handle),
 				     ctx_node) {
 			if (vma->ctx_handle != eb->exec[i].handle)
 				continue;
 
-			err = eb_add_vma(eb, &eb->exec[i], vma);
+			err = eb_add_vma(eb, i, vma);
 			if (unlikely(err))
 				return err;
-
 			goto next_vma;
 		}
 
 		if (slow_pass < 0)
 			slow_pass = i;
+
+		eb->vma[i] = NULL;
+
 next_vma: ;
 	}
 
@@ -739,7 +731,7 @@ next_vma: ;
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
 
-		if (__exec_to_vma(&eb->exec[i]))
+		if (eb->vma[i])
 			continue;
 
 		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
@@ -751,14 +743,17 @@ next_vma: ;
 			goto err;
 		}
 
-		__exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
+		eb->vma[i] =
+			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
 	}
 	spin_unlock(&eb->file->table_lock);
 
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
+		unsigned int is_obj;
 
-		if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
+		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
+		if (!is_obj)
 			continue;
 
 		/*
@@ -769,8 +764,6 @@ next_vma: ;
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		obj = u64_to_ptr(typeof(*obj),
-				 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			DRM_DEBUG("Failed to lookup VMA\n");
@@ -794,14 +787,14 @@ next_vma: ;
 			i915_vma_get(vma);
 		}
 
-		err = eb_add_vma(eb, &eb->exec[i], vma);
+		err = eb_add_vma(eb, i, vma);
 		if (unlikely(err))
 			goto err;
 
 		/* Only after we validated the user didn't use our bits */
 		if (vma->ctx != eb->ctx) {
 			i915_vma_get(vma);
-			eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF;
+			eb->flags[i] |= __EXEC_OBJECT_HAS_REF;
 		}
 	}
 
@@ -815,7 +808,8 @@ next_vma: ;
 out:
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
-	eb->batch = exec_to_vma(&eb->exec[i]);
+	eb->batch = eb->vma[i];
+	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 
 	/*
 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -836,8 +830,8 @@ next_vma: ;
 
 err:
 	for (i = slow_pass; i < count; i++) {
-		if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
-			__exec_to_vma(&eb->exec[i]) = 0;
+		if (ptr_unmask_bits(eb->vma[i], 1))
+			eb->vma[i] = NULL;
 	}
 	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
@@ -850,7 +844,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	if (eb->lut_size < 0) {
 		if (handle >= -eb->lut_size)
 			return NULL;
-		return exec_to_vma(&eb->exec[handle]);
+		return eb->vma[handle];
 	} else {
 		struct hlist_head *head;
 		struct i915_vma *vma;
@@ -870,23 +864,20 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 	unsigned int i;
 
 	for (i = 0; i < count; i++) {
-		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int flags = eb->flags[i];
 
 		if (!vma)
 			continue;
 
-		GEM_BUG_ON(vma->exec_entry != entry);
-		vma->exec_entry = NULL;
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		vma->exec_flags = NULL;
 
-		if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, entry);
+		if (flags & __EXEC_OBJECT_HAS_PIN)
+			__eb_unreserve_vma(vma, flags);
 
-		if (entry->flags & __EXEC_OBJECT_HAS_REF)
+		if (flags & __EXEC_OBJECT_HAS_REF)
 			i915_vma_put(vma);
-
-		entry->flags &=
-			~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
 	}
 }
 
@@ -1375,7 +1366,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	}
 
 	if (reloc->write_domain) {
-		target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+		*target->exec_flags |= EXEC_OBJECT_WRITE;
 
 		/*
 		 * Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1427,7 +1418,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * do relocations we are already stalling, disable the user's opt
 	 * of our synchronisation.
 	 */
-	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
 
 	/* and update the user's relocation entry */
 	return relocate_entry(vma, reloc, eb, target);
@@ -1438,7 +1429,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *urelocs;
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	unsigned int remain;
 
 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1521,7 +1513,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 static int
 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
 {
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	struct drm_i915_gem_relocation_entry *relocs =
 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
 	unsigned int i;
@@ -1725,6 +1718,8 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 	if (err)
 		goto err;
 
+	GEM_BUG_ON(!eb->batch);
+
 	list_for_each_entry(vma, &eb->relocs, reloc_link) {
 		if (!have_copy) {
 			pagefault_disable();
@@ -1818,11 +1813,10 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	int err;
 
 	for (i = 0; i < count; i++) {
-		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
-		struct drm_i915_gem_object *obj = vma->obj;
+		unsigned int flags = eb->flags[i];
+		struct drm_i915_gem_object *obj;
 
-		if (entry->flags & EXEC_OBJECT_CAPTURE) {
+		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_gem_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1830,33 +1824,34 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				return -ENOMEM;
 
 			capture->next = eb->request->capture_list;
-			capture->vma = vma;
+			capture->vma = eb->vma[i];
 			eb->request->capture_list = capture;
 		}
 
-		if (entry->flags & EXEC_OBJECT_ASYNC)
-			goto skip_flushes;
+		if (flags & EXEC_OBJECT_ASYNC)
+			continue;
 
+		obj = eb->vma[i]->obj;
 		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
 			i915_gem_clflush_object(obj, 0);
 
 		err = i915_gem_request_await_object
-			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
+			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
 		if (err)
 			return err;
-
-skip_flushes:
-		i915_vma_move_to_active(vma, eb->request, entry->flags);
-		__eb_unreserve_vma(vma, entry);
-		vma->exec_entry = NULL;
 	}
 
 	for (i = 0; i < count; i++) {
-		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int flags = eb->flags[i];
+
+		i915_vma_move_to_active(vma, eb->request, flags);
+		eb_export_fence(vma, eb->request, flags);
+
+		__eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
 
-		eb_export_fence(vma, eb->request, entry->flags);
-		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
 	eb->exec = NULL;
@@ -1983,11 +1978,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 	if (IS_ERR(vma))
 		goto out;
 
-	vma->exec_entry =
-		memset(&eb->exec[eb->buffer_count++],
-		       0, sizeof(*vma->exec_entry));
-	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	__exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
+	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+	eb->flags[eb->buffer_count] =
+		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+	vma->exec_flags = &eb->flags[eb->buffer_count];
+	eb->buffer_count++;
 
 out:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -2128,6 +2123,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 	eb.exec = exec;
 	eb.ctx = NULL;
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	if (USES_FULL_PPGTT(eb.i915))
 		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -2211,7 +2208,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err < 0)
 		goto err_vma;
 
-	if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
+	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
 		err = -EINVAL;
 		goto err_vma;
@@ -2354,7 +2351,9 @@ int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
-	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+			   sizeof(struct i915_vma *) +
+			   sizeof(unsigned int));
 	struct drm_i915_gem_execbuffer *args = data;
 	struct drm_i915_gem_execbuffer2 exec2;
 	struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -2445,7 +2444,9 @@ int
 i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		     struct drm_file *file)
 {
-	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+			   sizeof(struct i915_vma *) +
+			   sizeof(unsigned int));
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
 	int err;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4a673fc1a432..5f1356e93c6c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,7 +112,7 @@ struct i915_vma {
 	/**
 	 * Used for performing relocations during execbuffer insertion.
 	 */
-	struct drm_i915_gem_exec_object2 *exec_entry;
+	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas()
  2017-06-16 16:02 [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
@ 2017-06-16 16:02 ` Chris Wilson
  2017-06-20 10:11   ` Tvrtko Ursulin
  2017-06-16 16:02 ` [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-06-16 16:02 UTC (permalink / raw)
  To: intel-gfx

Since the introduction of being able to perform a lockless lookup of an
object (i915_gem_object_get_rcu() in fbbd37b36fa5 ("drm/i915: Move object
release to a freelist + worker") we no longer need to split the
object/vma lookup into 3 phases and so combine them into a much simpler
single loop.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 140 +++++++++--------------------
 1 file changed, 42 insertions(+), 98 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9da0d209dd38..18d6581bc6c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -47,16 +47,16 @@ enum {
 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
 };
 
-#define __EXEC_OBJECT_HAS_REF		BIT(31)
-#define __EXEC_OBJECT_HAS_PIN		BIT(30)
-#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
-#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
-#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
-#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
+#define __EXEC_OBJECT_VALIDATED		BIT(31)
+#define __EXEC_OBJECT_HAS_REF		BIT(30)
+#define __EXEC_OBJECT_HAS_PIN		BIT(29)
+#define __EXEC_OBJECT_HAS_FENCE		BIT(28)
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above */
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
-#define __EXEC_VALIDATED	BIT(30)
 #define UPDATE			PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -429,14 +429,16 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int i, struct i915_vma *vma,
+	   unsigned int flags)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 
-	if (!(eb->args->flags & __EXEC_VALIDATED)) {
+	if (!(flags & __EXEC_OBJECT_VALIDATED)) {
 		err = eb_validate_vma(eb, entry, vma);
 		if (unlikely(err))
 			return err;
@@ -471,7 +473,7 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 	 * to find the right target VMA when doing relocations.
 	 */
 	eb->vma[i] = vma;
-	eb->flags[i] = entry->flags;
+	eb->flags[i] = entry->flags | flags;
 	vma->exec_flags = &eb->flags[i];
 
 	err = 0;
@@ -680,15 +682,11 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static int eb_lookup_vmas(struct i915_execbuffer *eb)
+static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 {
-#define INTERMEDIATE BIT(0)
-	const unsigned int count = eb->buffer_count;
 	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
-	struct i915_vma *vma;
-	struct idr *idr;
+	struct drm_i915_gem_object *uninitialized_var(obj);
 	unsigned int i;
-	int slow_pass = -1;
 	int err;
 
 	INIT_LIST_HEAD(&eb->relocs);
@@ -698,85 +696,39 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		flush_work(&lut->resize);
 	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
 
-	for (i = 0; i < count; i++) {
-		hlist_for_each_entry(vma,
-				     ht_head(lut, eb->exec[i].handle),
-				     ctx_node) {
-			if (vma->ctx_handle != eb->exec[i].handle)
-				continue;
-
-			err = eb_add_vma(eb, i, vma);
-			if (unlikely(err))
-				return err;
-			goto next_vma;
-		}
-
-		if (slow_pass < 0)
-			slow_pass = i;
-
-		eb->vma[i] = NULL;
-
-next_vma: ;
-	}
+	for (i = 0; i < eb->buffer_count; i++) {
+		u32 handle = eb->exec[i].handle;
+		struct hlist_head *hl = ht_head(lut, handle);
+		struct i915_vma *vma;
 
-	if (slow_pass < 0)
-		goto out;
+		hlist_for_each_entry(vma, hl, ctx_node) {
+			GEM_BUG_ON(vma->ctx != eb->ctx);
 
-	spin_lock(&eb->file->table_lock);
-	/*
-	 * Grab a reference to the object and release the lock so we can lookup
-	 * or create the VMA without using GFP_ATOMIC
-	 */
-	idr = &eb->file->object_idr;
-	for (i = slow_pass; i < count; i++) {
-		struct drm_i915_gem_object *obj;
+			if (vma->ctx_handle != handle)
+				continue;
 
-		if (eb->vma[i])
-			continue;
+			goto add_vma;
+		}
 
-		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
+		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
-			spin_unlock(&eb->file->table_lock);
-			DRM_DEBUG("Invalid object handle %d at index %d\n",
-				  eb->exec[i].handle, i);
 			err = -ENOENT;
-			goto err;
+			goto err_vma;
 		}
 
-		eb->vma[i] =
-			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
-	}
-	spin_unlock(&eb->file->table_lock);
-
-	for (i = slow_pass; i < count; i++) {
-		struct drm_i915_gem_object *obj;
-		unsigned int is_obj;
-
-		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
-		if (!is_obj)
-			continue;
+		flags |= __EXEC_OBJECT_HAS_REF;
 
-		/*
-		 * NOTE: We can leak any vmas created here when something fails
-		 * later on. But that's no issue since vma_unbind can deal with
-		 * vmas which are not actually bound. And since only
-		 * lookup_or_create exists as an interface to get at the vma
-		 * from the (obj, vm) we don't run the risk of creating
-		 * duplicated vmas for the same vm.
-		 */
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
-			DRM_DEBUG("Failed to lookup VMA\n");
 			err = PTR_ERR(vma);
-			goto err;
+			goto err_obj;
 		}
 
 		/* First come, first served */
 		if (!vma->ctx) {
 			vma->ctx = eb->ctx;
-			vma->ctx_handle = eb->exec[i].handle;
-			hlist_add_head(&vma->ctx_node,
-				       ht_head(lut, eb->exec[i].handle));
+			vma->ctx_handle = handle;
+			hlist_add_head(&vma->ctx_node, hl);
 			lut->ht_count++;
 			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
 			if (i915_vma_is_ggtt(vma)) {
@@ -784,18 +736,14 @@ next_vma: ;
 				obj->vma_hashed = vma;
 			}
 
-			i915_vma_get(vma);
+			/* transfer ref to ctx */
+			flags &= ~__EXEC_OBJECT_HAS_REF;
 		}
 
-		err = eb_add_vma(eb, i, vma);
+add_vma:
+		err = eb_add_vma(eb, i, vma, flags);
 		if (unlikely(err))
-			goto err;
-
-		/* Only after we validated the user didn't use our bits */
-		if (vma->ctx != eb->ctx) {
-			i915_vma_get(vma);
-			eb->flags[i] |= __EXEC_OBJECT_HAS_REF;
-		}
+			goto err_vma;
 	}
 
 	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
@@ -805,7 +753,6 @@ next_vma: ;
 			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	}
 
-out:
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
 	eb->batch = eb->vma[i];
@@ -825,17 +772,14 @@ next_vma: ;
 	if (eb->reloc_cache.has_fence)
 		eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
 
-	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
-err:
-	for (i = slow_pass; i < count; i++) {
-		if (ptr_unmask_bits(eb->vma[i], 1))
-			eb->vma[i] = NULL;
-	}
+err_obj:
+	i915_gem_object_put(obj);
+err_vma:
+	eb->vma[i] = NULL;
 	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
-#undef INTERMEDIATE
 }
 
 static struct i915_vma *
@@ -868,7 +812,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 
 		if (!vma)
-			continue;
+			break;
 
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 		vma->exec_flags = NULL;
@@ -1714,7 +1658,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 	}
 
 	/* reacquire the objects */
-	err = eb_lookup_vmas(eb);
+	err = eb_lookup_vmas(eb, __EXEC_OBJECT_VALIDATED);
 	if (err)
 		goto err;
 
@@ -1768,7 +1712,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 
 static int eb_relocate(struct i915_execbuffer *eb)
 {
-	if (eb_lookup_vmas(eb))
+	if (eb_lookup_vmas(eb, 0))
 		goto slow;
 
 	/* The objects are in their final locations, apply the relocations. */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr
  2017-06-16 16:02 [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
  2017-06-16 16:02 ` [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
@ 2017-06-16 16:02 ` Chris Wilson
  2017-06-20 11:26   ` Tvrtko Ursulin
  2017-06-16 16:43 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Patchwork
  2017-06-19 12:06 ` [PATCH 1/3] " Tvrtko Ursulin
  3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-06-16 16:02 UTC (permalink / raw)
  To: intel-gfx

This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
and the realisation that we can store the vma directly in the radixtree
and only need a list for the reverse mapping, that made the patch
performant enough to displace using a hashtable.  Though the vma ht is
fast and doesn't require and extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization.
That is hairy enough to investigate alternative and favour them if
equivalent in performance. One advantage of allocating an indirection
entry is that we can support a single shared bo between many clients,
something that was done on a first-come first-serve basis for shared GGTT
vma previously. To offset the extra allocations, we create yet another
kmem_cache for them.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  6 --
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 86 +++++----------------------
 drivers/gpu/drm/i915/i915_gem_context.h       | 30 ++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 70 +++++++---------------
 drivers/gpu/drm/i915/i915_gem_object.h        | 10 +++-
 drivers/gpu/drm/i915/i915_vma.c               | 22 -------
 drivers/gpu/drm/i915/i915_vma.h               |  4 --
 drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
 lib/radix-tree.c                              |  1 +
 11 files changed, 81 insertions(+), 196 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4577b0af6886..a6ba2100bb88 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1998,12 +1998,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 			seq_putc(m, '\n');
 		}
 
-		seq_printf(m,
-			   "\tvma hashtable size=%u (actual %lu), count=%u\n",
-			   ctx->vma_lut.ht_size,
-			   BIT(ctx->vma_lut.ht_bits),
-			   ctx->vma_lut.ht_count);
-
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cd15697c1ca4..de8dd0c06ce6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2083,6 +2083,7 @@ struct drm_i915_private {
 
 	struct kmem_cache *objects;
 	struct kmem_cache *vmas;
+	struct kmem_cache *luts;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
 	struct kmem_cache *priorities;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7dcac3bfb771..3388c65b54a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3252,19 +3252,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
+	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_vma *vma, *vn;
+	struct i915_lut_handle *lut, *ln;
 
 	mutex_lock(&obj->base.dev->struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		radix_tree_delete(&ctx->handles_vma, lut->handle);
+		i915_gem_object_put(obj);
+
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
+
+		kmem_cache_free(i915->luts, lut);
+	}
+
 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
 
-	vma = obj->vma_hashed;
-	if (vma && vma->ctx->file_priv == fpriv)
-		i915_vma_unlink_ctx(vma);
-
 	if (i915_gem_object_is_active(obj) &&
 	    !i915_gem_object_has_active_reference(obj)) {
 		i915_gem_object_set_active_reference(obj);
@@ -4259,6 +4273,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
 	INIT_LIST_HEAD(&obj->vma_list);
+	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
@@ -4897,12 +4912,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->vmas)
 		goto err_objects;
 
+	dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
+	if (!dev_priv->luts)
+		goto err_vmas;
+
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
 					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
-		goto err_vmas;
+		goto err_luts;
 
 	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
 					    SLAB_HWCACHE_ALIGN |
@@ -4949,6 +4968,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
 	kmem_cache_destroy(dev_priv->requests);
+err_luts:
+	kmem_cache_destroy(dev_priv->luts);
 err_vmas:
 	kmem_cache_destroy(dev_priv->vmas);
 err_objects:
@@ -4971,6 +4992,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
+	kmem_cache_destroy(dev_priv->luts);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 39ed58a21fc1..dc182a8dd4e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -93,69 +93,21 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-/* Initial size (as log2) to preallocate the handle->object hashtable */
-#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
-
-static void resize_vma_ht(struct work_struct *work)
+static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_gem_context_vma_lut *lut =
-		container_of(work, typeof(*lut), resize);
-	unsigned int bits, new_bits, size, i;
-	struct hlist_head *new_ht;
-
-	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
-
-	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
-	new_bits = min_t(unsigned int,
-			 max(bits, VMA_HT_BITS),
-			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
-	if (new_bits == lut->ht_bits)
-		goto out;
-
-	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
-	if (!new_ht)
-		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
-	if (!new_ht)
-		/* Pretend resize succeeded and stop calling us for a bit! */
-		goto out;
+	struct i915_lut_handle *lut, *ln;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
 
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
-		struct hlist_node *tmp;
-
-		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
-			hlist_add_head(&vma->ctx_node,
-				       &new_ht[hash_32(vma->ctx_handle,
-						       new_bits)]);
+	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
+		list_del(&lut->obj_link);
+		kmem_cache_free(ctx->i915->luts, lut);
 	}
-	kvfree(lut->ht);
-	lut->ht = new_ht;
-	lut->ht_bits = new_bits;
-out:
-	smp_store_release(&lut->ht_size, BIT(bits));
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-}
 
-static void vma_lut_free(struct i915_gem_context *ctx)
-{
-	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
-	unsigned int i, size;
-
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
-		cancel_work_sync(&lut->resize);
-
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
-
-		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
-			vma->obj->vma_hashed = NULL;
-			vma->ctx = NULL;
-			i915_vma_put(vma);
-		}
+	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+		i915_vma_put(rcu_dereference_raw(*slot));
+		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 	}
-	kvfree(lut->ht);
 }
 
 void i915_gem_context_free(struct kref *ctx_ref)
@@ -167,7 +119,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	trace_i915_context_free(ctx);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	vma_lut_free(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -195,8 +146,11 @@ void i915_gem_context_free(struct kref *ctx_ref)
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
+
+	lut_close(ctx);
 	if (ctx->ppgtt)
 		i915_ppgtt_close(&ctx->ppgtt->base);
+
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
 }
@@ -269,16 +223,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_out;
-
-	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -328,8 +274,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	put_pid(ctx->pid);
 	idr_remove(&file_priv->context_idr, ctx->user_handle);
 err_lut:
-	kvfree(ctx->vma_lut.ht);
-err_out:
 	context_close(ctx);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 82c99ba92ad3..cd0c5d8e0db7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -27,6 +27,7 @@
 
 #include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/radix-tree.h>
 
 struct pid;
 
@@ -143,32 +144,6 @@ struct i915_gem_context {
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
-	struct i915_gem_context_vma_lut {
-		/** ht_size: last request size to allocate the hashtable for. */
-		unsigned int ht_size;
-#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
-		/** ht_bits: real log2(size) of hashtable. */
-		unsigned int ht_bits;
-		/** ht_count: current number of entries inside the hashtable */
-		unsigned int ht_count;
-
-		/** ht: the array of buckets comprising the simple hashtable */
-		struct hlist_head *ht;
-
-		/**
-		 * resize: After an execbuf completes, we check the load factor
-		 * of the hashtable. If the hashtable is too full, or too empty,
-		 * we schedule a task to resize the hashtable. During the
-		 * resize, the entries are moved between different buckets and
-		 * so we cannot simultaneously read the hashtable as it is
-		 * being resized (unlike rhashtable). Therefore we treat the
-		 * active work as a strong barrier, pausing a subsequent
-		 * execbuf to wait for the resize worker to complete, if
-		 * required.
-		 */
-		struct work_struct resize;
-	} vma_lut;
-
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_vma *state;
@@ -199,6 +174,9 @@ struct i915_gem_context {
 
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
+
+	struct radix_tree_root handles_vma;
+	struct list_head handles_list;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 18d6581bc6c5..adcea065985c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -637,19 +637,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	} while (1);
 }
 
-static inline struct hlist_head *
-ht_head(const  struct i915_gem_context_vma_lut *lut, u32 handle)
-{
-	return &lut->ht[hash_32(handle, lut->ht_bits)];
-}
-
-static inline bool
-ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
-{
-	return (4*lut->ht_count > 3*lut->ht_size ||
-		4*lut->ht_count + 1 < lut->ht_size);
-}
-
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 {
 	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
@@ -684,31 +671,22 @@ static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 {
-	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
-	struct drm_i915_gem_object *uninitialized_var(obj);
+	struct drm_i915_gem_object *obj;
+	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 	unsigned int i;
 	int err;
 
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
-	if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
-		flush_work(&lut->resize);
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
-		struct hlist_head *hl = ht_head(lut, handle);
 		struct i915_vma *vma;
+		struct i915_lut_handle *lut;
 
-		hlist_for_each_entry(vma, hl, ctx_node) {
-			GEM_BUG_ON(vma->ctx != eb->ctx);
-
-			if (vma->ctx_handle != handle)
-				continue;
-
+		vma = radix_tree_lookup(handles_vma, handle);
+		if (likely(vma))
 			goto add_vma;
-		}
 
 		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
@@ -716,43 +694,36 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 			goto err_vma;
 		}
 
-		flags |= __EXEC_OBJECT_HAS_REF;
-
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			err = PTR_ERR(vma);
 			goto err_obj;
 		}
 
-		/* First come, first served */
-		if (!vma->ctx) {
-			vma->ctx = eb->ctx;
-			vma->ctx_handle = handle;
-			hlist_add_head(&vma->ctx_node, hl);
-			lut->ht_count++;
-			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
-			if (i915_vma_is_ggtt(vma)) {
-				GEM_BUG_ON(obj->vma_hashed);
-				obj->vma_hashed = vma;
-			}
+		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+		if (unlikely(!lut)) {
+			err = -ENOMEM;
+			goto err_obj;
+		}
 
-			/* transfer ref to ctx */
-			flags &= ~__EXEC_OBJECT_HAS_REF;
+		/* transfer ref to ctx */
+		err = radix_tree_insert(handles_vma, handle, vma);
+		if (unlikely(err)) {
+			kfree(lut);
+			goto err_obj;
 		}
 
+		list_add(&lut->obj_link, &obj->lut_list);
+		list_add(&lut->ctx_link, &eb->ctx->handles_list);
+		lut->ctx = eb->ctx;
+		lut->handle = handle;
+
 add_vma:
 		err = eb_add_vma(eb, i, vma, flags);
 		if (unlikely(err))
 			goto err_vma;
 	}
 
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		if (ht_needs_resize(lut))
-			queue_work(system_highpri_wq, &lut->resize);
-		else
-			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
 	eb->batch = eb->vma[i];
@@ -778,7 +749,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
-	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..8188d3cf5e11 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -35,6 +35,13 @@
 
 #include "i915_selftest.h"
 
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -86,7 +93,8 @@ struct drm_i915_gem_object {
 	 * They are also added to @vma_list for easy iteration.
 	 */
 	struct rb_root vma_tree;
-	struct i915_vma *vma_hashed;
+
+	struct list_head lut_list;
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 532c709febbd..93329a809764 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -591,33 +591,11 @@ static void i915_vma_destroy(struct i915_vma *vma)
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
-void i915_vma_unlink_ctx(struct i915_vma *vma)
-{
-	struct i915_gem_context *ctx = vma->ctx;
-
-	if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		cancel_work_sync(&ctx->vma_lut.resize);
-		ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
-	__hlist_del(&vma->ctx_node);
-	ctx->vma_lut.ht_count--;
-
-	if (i915_vma_is_ggtt(vma))
-		vma->obj->vma_hashed = NULL;
-	vma->ctx = NULL;
-
-	i915_vma_put(vma);
-}
-
 void i915_vma_close(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	vma->flags |= I915_VMA_CLOSED;
 
-	if (vma->ctx)
-		i915_vma_unlink_ctx(vma);
-
 	list_del(&vma->obj_link);
 	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5f1356e93c6c..6d35cf7ddcfb 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -115,10 +115,6 @@ struct i915_vma {
 	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
-
-	struct i915_gem_context *ctx;
-	struct hlist_node ctx_node;
-	u32 ctx_handle;
 };
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index f8b9cc212b02..3ee31ef84607 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_free;
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	ret = ida_simple_get(&i915->context_hw_ida,
 			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 	if (ret < 0)
-		goto err_vma_ht;
+		goto err_handles;
 	ctx->hw_id = ret;
 
 	if (name) {
@@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_vma_ht:
-	kvfree(ctx->vma_lut.ht);
-err_free:
+err_handles:
 	kfree(ctx);
 	return NULL;
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 898e87998417..3527eb364964 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
 	if (__radix_tree_delete(root, iter->node, slot))
 		iter->index = iter->next_index;
 }
+EXPORT_SYMBOL(radix_tree_iter_delete);
 
 /**
  * radix_tree_delete_item - delete an item from a radix tree
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
  2017-06-16 16:02 [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
  2017-06-16 16:02 ` [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
  2017-06-16 16:02 ` [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
@ 2017-06-16 16:43 ` Patchwork
  2017-06-19 12:06 ` [PATCH 1/3] " Tvrtko Ursulin
  3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-06-16 16:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
URL   : https://patchwork.freedesktop.org/series/25921/
State : success

== Summary ==

Series 25921v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/25921/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:468s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:485s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:582s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:548s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:491s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:489s
fi-glk-2a        total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:588s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:431s
fi-hsw-4770r     total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:16  time:424s
fi-ilk-650       total:278  pass:227  dwarn:0   dfail:0   fail:0   skip:50  time:463s
fi-ivb-3520m     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:18  time:502s
fi-ivb-3770      total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:18  time:519s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:470s
fi-kbl-7560u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:567s
fi-kbl-r         total:278  pass:259  dwarn:1   dfail:0   fail:0   skip:18  time:577s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:475s
fi-skl-6700hq    total:278  pass:228  dwarn:1   dfail:0   fail:27  skip:22  time:435s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:511s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:509s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:507s
fi-snb-2520m     total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:28  time:632s
fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:0   skip:29  time:400s

5c57aae3824c148c466226fc53305fa1f2c274c1 drm-tip: 2017y-06m-16d-15h-56m-29s UTC integration manifest
51d76ab drm/i915: Replace execbuf vma ht with an idr
57b25be drm/i915: Simplify eb_lookup_vmas()
9620ceb drm/i915: Convert execbuf to use struct-of-array packing for critical fields

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4973/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
  2017-06-16 16:02 [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
                   ` (2 preceding siblings ...)
  2017-06-16 16:43 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Patchwork
@ 2017-06-19 12:06 ` Tvrtko Ursulin
  2017-06-19 12:22   ` Chris Wilson
  3 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-06-19 12:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/06/2017 17:02, Chris Wilson wrote:
> When userspace is doing most of the work, avoiding relocs (using
> NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we
> still spend a lot of time processing the arrays in execbuf, even though
> we now should have nothing to do most of the time. One issue that
> becomes readily apparent in profiling anv is that iterating over the
> large execobj[] is unfriendly to the loop prefetchers of the CPU and it
> much prefers iterating over a pair of arrays rather than one big array.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_evict.c      |   4 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 187 +++++++++++++++--------------
>   drivers/gpu/drm/i915/i915_vma.h            |   2 +-
>   3 files changed, 97 insertions(+), 96 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index a193f1b36c67..4df039ef2ce3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   		/* Overlap of objects in the same batch? */
>   		if (i915_vma_is_pinned(vma)) {
>   			ret = -ENOSPC;
> -			if (vma->exec_entry &&
> -			    vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +			if (vma->exec_flags &&
> +			    *vma->exec_flags & EXEC_OBJECT_PINNED)
>   				ret = -EINVAL;
>   			break;
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index eb46dfa374a7..9da0d209dd38 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -191,6 +191,8 @@ struct i915_execbuffer {
>   	struct drm_file *file; /** per-file lookup tables and limits */
>   	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
>   	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
> +	struct i915_vma **vma;
> +	unsigned int *flags;
>   
>   	struct intel_engine_cs *engine; /** engine to queue the request to */
>   	struct i915_gem_context *ctx; /** context for building the request */
> @@ -245,14 +247,6 @@ struct i915_execbuffer {
>   };
>   
>   /*
> - * As an alternative to creating a hashtable of handle-to-vma for a batch,
> - * we used the last available reserved field in the execobject[] and stash
> - * a link from the execobj to its vma.
> - */
> -#define __exec_to_vma(ee) (ee)->rsvd2
> -#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
> -
> -/*
>    * Used to convert any address to canonical form.
>    * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
>    * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
> @@ -315,7 +309,7 @@ static bool
>   eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
>   		 const struct i915_vma *vma)
>   {
> -	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
> +	if (!(*vma->exec_flags & __EXEC_OBJECT_HAS_PIN))
>   		return true;
>   
>   	if (vma->node.size < entry->pad_to_size)
> @@ -341,7 +335,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
>   
>   static inline void
>   eb_pin_vma(struct i915_execbuffer *eb,
> -	   struct drm_i915_gem_exec_object2 *entry,
> +	   const struct drm_i915_gem_exec_object2 *entry,
>   	   struct i915_vma *vma)
>   {
>   	u64 flags;
> @@ -365,33 +359,30 @@ eb_pin_vma(struct i915_execbuffer *eb,
>   		}
>   
>   		if (i915_vma_pin_fence(vma))
> -			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> +			*vma->exec_flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
>   
> -	entry->flags |= __EXEC_OBJECT_HAS_PIN;
> +	*vma->exec_flags |= __EXEC_OBJECT_HAS_PIN;
>   }
>   
> -static inline void
> -__eb_unreserve_vma(struct i915_vma *vma,
> -		   const struct drm_i915_gem_exec_object2 *entry)
> +static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
>   {
> -	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
> +	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
>   
> -	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
> +	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
>   		i915_vma_unpin_fence(vma);
>   
>   	__i915_vma_unpin(vma);
>   }
>   
>   static inline void
> -eb_unreserve_vma(struct i915_vma *vma,
> -		 struct drm_i915_gem_exec_object2 *entry)
> +eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
>   {
> -	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
> +	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
>   		return;
>   
> -	__eb_unreserve_vma(vma, entry);
> -	entry->flags &= ~__EXEC_OBJECT_RESERVED;
> +	__eb_unreserve_vma(vma, *flags);
> +	*flags &= ~__EXEC_OBJECT_RESERVED;
>   }
>   
>   static int
> @@ -421,7 +412,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
>   		entry->pad_to_size = 0;
>   	}
>   
> -	if (unlikely(vma->exec_entry)) {
> +	if (unlikely(vma->exec_flags)) {
>   		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
>   			  entry->handle, (int)(entry - eb->exec));
>   		return -EINVAL;
> @@ -438,10 +429,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
>   }
>   
>   static int
> -eb_add_vma(struct i915_execbuffer *eb,
> -	   struct drm_i915_gem_exec_object2 *entry,
> -	   struct i915_vma *vma)
> +eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
>   {
> +	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
>   	int err;
>   
>   	GEM_BUG_ON(i915_vma_is_closed(vma));
> @@ -480,13 +470,14 @@ eb_add_vma(struct i915_execbuffer *eb,
>   	 * to the vma inside the execobj so that we can use a direct lookup
>   	 * to find the right target VMA when doing relocations.
>   	 */
> -	vma->exec_entry = entry;
> -	__exec_to_vma(entry) = (uintptr_t)vma;
> +	eb->vma[i] = vma;
> +	eb->flags[i] = entry->flags;
> +	vma->exec_flags = &eb->flags[i];
>   
>   	err = 0;
>   	eb_pin_vma(eb, entry, vma);
>   	if (eb_vma_misplaced(entry, vma)) {
> -		eb_unreserve_vma(vma, entry);
> +		eb_unreserve_vma(vma, vma->exec_flags);
>   
>   		list_add_tail(&vma->exec_link, &eb->unbound);
>   		if (drm_mm_node_allocated(&vma->node))
> @@ -520,7 +511,8 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
>   static int eb_reserve_vma(const struct i915_execbuffer *eb,
>   			  struct i915_vma *vma)
>   {
> -	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	struct drm_i915_gem_exec_object2 *entry =
> +		&eb->exec[vma->exec_flags - eb->flags];

There are three instances of this so maybe "entry = exec_entry(eb, 
vma)"? Added benefit that exec_entry implementation could site a comment 
explaining the magic.

>   	u64 flags;
>   	int err;
>   
> @@ -554,7 +546,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
>   		eb->args->flags |= __EXEC_HAS_RELOC;
>   	}
>   
> -	entry->flags |= __EXEC_OBJECT_HAS_PIN;
> +	*vma->exec_flags |= __EXEC_OBJECT_HAS_PIN;
>   	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
>   
>   	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> @@ -565,7 +557,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
>   		}
>   
>   		if (i915_vma_pin_fence(vma))
> -			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> +			*vma->exec_flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
>   
>   	return 0;
> @@ -608,18 +600,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
>   		INIT_LIST_HEAD(&eb->unbound);
>   		INIT_LIST_HEAD(&last);
>   		for (i = 0; i < count; i++) {
> -			struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> +			unsigned int flags = eb->flags[i];
> +			struct i915_vma *vma = eb->vma[i];
>   
> -			if (entry->flags & EXEC_OBJECT_PINNED &&
> -			    entry->flags & __EXEC_OBJECT_HAS_PIN)
> +			if (flags & EXEC_OBJECT_PINNED &&
> +			    flags & __EXEC_OBJECT_HAS_PIN)
>   				continue;
>   
> -			vma = exec_to_vma(entry);
> -			eb_unreserve_vma(vma, entry);
> +			eb_unreserve_vma(vma, &eb->flags[i]);
>   
> -			if (entry->flags & EXEC_OBJECT_PINNED)
> +			if (flags & EXEC_OBJECT_PINNED)
>   				list_add(&vma->exec_link, &eb->unbound);
> -			else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> +			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
>   				list_add_tail(&vma->exec_link, &eb->unbound);
>   			else
>   				list_add_tail(&vma->exec_link, &last);
> @@ -707,23 +699,23 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
>   
>   	for (i = 0; i < count; i++) {
> -		__exec_to_vma(&eb->exec[i]) = 0;
> -
>   		hlist_for_each_entry(vma,
>   				     ht_head(lut, eb->exec[i].handle),
>   				     ctx_node) {
>   			if (vma->ctx_handle != eb->exec[i].handle)
>   				continue;
>   
> -			err = eb_add_vma(eb, &eb->exec[i], vma);
> +			err = eb_add_vma(eb, i, vma);
>   			if (unlikely(err))
>   				return err;
> -
>   			goto next_vma;
>   		}
>   
>   		if (slow_pass < 0)
>   			slow_pass = i;
> +
> +		eb->vma[i] = NULL;
> +
>   next_vma: ;
>   	}
>   
> @@ -739,7 +731,7 @@ next_vma: ;
>   	for (i = slow_pass; i < count; i++) {
>   		struct drm_i915_gem_object *obj;
>   
> -		if (__exec_to_vma(&eb->exec[i]))
> +		if (eb->vma[i])
>   			continue;
>   
>   		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
> @@ -751,14 +743,17 @@ next_vma: ;
>   			goto err;
>   		}
>   
> -		__exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
> +		eb->vma[i] =
> +			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
>   	}
>   	spin_unlock(&eb->file->table_lock);
>   
>   	for (i = slow_pass; i < count; i++) {
>   		struct drm_i915_gem_object *obj;
> +		unsigned int is_obj;
>   
> -		if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
> +		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
> +		if (!is_obj)
>   			continue;
>   
>   		/*
> @@ -769,8 +764,6 @@ next_vma: ;
>   		 * from the (obj, vm) we don't run the risk of creating
>   		 * duplicated vmas for the same vm.
>   		 */
> -		obj = u64_to_ptr(typeof(*obj),
> -				 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
>   		vma = i915_vma_instance(obj, eb->vm, NULL);
>   		if (unlikely(IS_ERR(vma))) {
>   			DRM_DEBUG("Failed to lookup VMA\n");
> @@ -794,14 +787,14 @@ next_vma: ;
>   			i915_vma_get(vma);
>   		}
>   
> -		err = eb_add_vma(eb, &eb->exec[i], vma);
> +		err = eb_add_vma(eb, i, vma);
>   		if (unlikely(err))
>   			goto err;
>   
>   		/* Only after we validated the user didn't use our bits */
>   		if (vma->ctx != eb->ctx) {
>   			i915_vma_get(vma);
> -			eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF;
> +			eb->flags[i] |= __EXEC_OBJECT_HAS_REF;
>   		}
>   	}
>   
> @@ -815,7 +808,8 @@ next_vma: ;
>   out:
>   	/* take note of the batch buffer before we might reorder the lists */
>   	i = eb_batch_index(eb);
> -	eb->batch = exec_to_vma(&eb->exec[i]);
> +	eb->batch = eb->vma[i];
> +	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
>   
>   	/*
>   	 * SNA is doing fancy tricks with compressing batch buffers, which leads
> @@ -836,8 +830,8 @@ next_vma: ;
>   
>   err:
>   	for (i = slow_pass; i < count; i++) {
> -		if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
> -			__exec_to_vma(&eb->exec[i]) = 0;
> +		if (ptr_unmask_bits(eb->vma[i], 1))
> +			eb->vma[i] = NULL;
>   	}
>   	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	return err;
> @@ -850,7 +844,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
>   	if (eb->lut_size < 0) {
>   		if (handle >= -eb->lut_size)
>   			return NULL;
> -		return exec_to_vma(&eb->exec[handle]);
> +		return eb->vma[handle];
>   	} else {
>   		struct hlist_head *head;
>   		struct i915_vma *vma;
> @@ -870,23 +864,20 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
>   	unsigned int i;
>   
>   	for (i = 0; i < count; i++) {
> -		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> -		struct i915_vma *vma = exec_to_vma(entry);
> +		struct i915_vma *vma = eb->vma[i];
> +		unsigned int flags = eb->flags[i];
>   
>   		if (!vma)
>   			continue;
>   
> -		GEM_BUG_ON(vma->exec_entry != entry);
> -		vma->exec_entry = NULL;
> +		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
> +		vma->exec_flags = NULL;
>   
> -		if (entry->flags & __EXEC_OBJECT_HAS_PIN)
> -			__eb_unreserve_vma(vma, entry);
> +		if (flags & __EXEC_OBJECT_HAS_PIN)
> +			__eb_unreserve_vma(vma, flags);
>   
> -		if (entry->flags & __EXEC_OBJECT_HAS_REF)
> +		if (flags & __EXEC_OBJECT_HAS_REF)
>   			i915_vma_put(vma);
> -
> -		entry->flags &=
> -			~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);

No need to clear these from eb->flags[i] to ensure no double free?

>   	}
>   }
>   
> @@ -1375,7 +1366,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
>   	}
>   
>   	if (reloc->write_domain) {
> -		target->exec_entry->flags |= EXEC_OBJECT_WRITE;
> +		*target->exec_flags |= EXEC_OBJECT_WRITE;
>   
>   		/*
>   		 * Sandybridge PPGTT errata: We need a global gtt mapping
> @@ -1427,7 +1418,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
>   	 * do relocations we are already stalling, disable the user's opt
>   	 * of our synchronisation.
>   	 */
> -	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
> +	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
>   
>   	/* and update the user's relocation entry */
>   	return relocate_entry(vma, reloc, eb, target);
> @@ -1438,7 +1429,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
>   #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
>   	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
>   	struct drm_i915_gem_relocation_entry __user *urelocs;
> -	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	const struct drm_i915_gem_exec_object2 *entry =
> +		&eb->exec[vma->exec_flags - eb->flags];
>   	unsigned int remain;
>   
>   	urelocs = u64_to_user_ptr(entry->relocs_ptr);
> @@ -1521,7 +1513,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
>   static int
>   eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
>   {
> -	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	const struct drm_i915_gem_exec_object2 *entry =
> +		&eb->exec[vma->exec_flags - eb->flags];
>   	struct drm_i915_gem_relocation_entry *relocs =
>   		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
>   	unsigned int i;
> @@ -1725,6 +1718,8 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	if (err)
>   		goto err;
>   
> +	GEM_BUG_ON(!eb->batch);
> +
>   	list_for_each_entry(vma, &eb->relocs, reloc_link) {
>   		if (!have_copy) {
>   			pagefault_disable();
> @@ -1818,11 +1813,10 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   	int err;
>   
>   	for (i = 0; i < count; i++) {
> -		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> -		struct i915_vma *vma = exec_to_vma(entry);
> -		struct drm_i915_gem_object *obj = vma->obj;
> +		unsigned int flags = eb->flags[i];
> +		struct drm_i915_gem_object *obj;
>   
> -		if (entry->flags & EXEC_OBJECT_CAPTURE) {
> +		if (flags & EXEC_OBJECT_CAPTURE) {
>   			struct i915_gem_capture_list *capture;
>   
>   			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
> @@ -1830,33 +1824,34 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   				return -ENOMEM;
>   
>   			capture->next = eb->request->capture_list;
> -			capture->vma = vma;
> +			capture->vma = eb->vma[i];
>   			eb->request->capture_list = capture;
>   		}
>   
> -		if (entry->flags & EXEC_OBJECT_ASYNC)
> -			goto skip_flushes;
> +		if (flags & EXEC_OBJECT_ASYNC)
> +			continue;
>   
> +		obj = eb->vma[i]->obj;
>   		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
>   			i915_gem_clflush_object(obj, 0);
>   
>   		err = i915_gem_request_await_object
> -			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
> +			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
>   		if (err)
>   			return err;
> -
> -skip_flushes:
> -		i915_vma_move_to_active(vma, eb->request, entry->flags);
> -		__eb_unreserve_vma(vma, entry);
> -		vma->exec_entry = NULL;

Grumble, it would have been better if this change was in a separate patch.

>   	}
>   
>   	for (i = 0; i < count; i++) {
> -		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> -		struct i915_vma *vma = exec_to_vma(entry);
> +		struct i915_vma *vma = eb->vma[i];
> +		unsigned int flags = eb->flags[i];
> +
> +		i915_vma_move_to_active(vma, eb->request, flags);
> +		eb_export_fence(vma, eb->request, flags);
> +
> +		__eb_unreserve_vma(vma, flags);
> +		vma->exec_flags = NULL;
>   
> -		eb_export_fence(vma, eb->request, entry->flags);
> -		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
> +		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
>   			i915_vma_put(vma);
>   	}
>   	eb->exec = NULL;
> @@ -1983,11 +1978,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
>   	if (IS_ERR(vma))
>   		goto out;
>   
> -	vma->exec_entry =
> -		memset(&eb->exec[eb->buffer_count++],
> -		       0, sizeof(*vma->exec_entry));
> -	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
> -	__exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
> +	eb->vma[eb->buffer_count] = i915_vma_get(vma);
> +	eb->flags[eb->buffer_count] =
> +		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
> +	vma->exec_flags = &eb->flags[eb->buffer_count];
> +	eb->buffer_count++;
>   
>   out:
>   	i915_gem_object_unpin_pages(shadow_batch_obj);
> @@ -2128,6 +2123,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		args->flags |= __EXEC_HAS_RELOC;
>   	eb.exec = exec;
>   	eb.ctx = NULL;
> +	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
> +	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
>   	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
>   	if (USES_FULL_PPGTT(eb.i915))
>   		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
> @@ -2211,7 +2208,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (err < 0)
>   		goto err_vma;
>   
> -	if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
> +	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
>   		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
>   		err = -EINVAL;
>   		goto err_vma;
> @@ -2354,7 +2351,9 @@ int
>   i915_gem_execbuffer(struct drm_device *dev, void *data,
>   		    struct drm_file *file)
>   {
> -	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
> +	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
> +			   sizeof(struct i915_vma *) +
> +			   sizeof(unsigned int));
>   	struct drm_i915_gem_execbuffer *args = data;
>   	struct drm_i915_gem_execbuffer2 exec2;
>   	struct drm_i915_gem_exec_object *exec_list = NULL;
> @@ -2445,7 +2444,9 @@ int
>   i915_gem_execbuffer2(struct drm_device *dev, void *data,
>   		     struct drm_file *file)
>   {
> -	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
> +	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
> +			   sizeof(struct i915_vma *) +
> +			   sizeof(unsigned int));
>   	struct drm_i915_gem_execbuffer2 *args = data;
>   	struct drm_i915_gem_exec_object2 *exec2_list;
>   	int err;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 4a673fc1a432..5f1356e93c6c 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -112,7 +112,7 @@ struct i915_vma {
>   	/**
>   	 * Used for performing relocations during execbuffer insertion.
>   	 */
> -	struct drm_i915_gem_exec_object2 *exec_entry;
> +	unsigned int *exec_flags;
>   	struct hlist_node exec_node;
>   	u32 exec_handle;
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
  2017-06-19 12:06 ` [PATCH 1/3] " Tvrtko Ursulin
@ 2017-06-19 12:22   ` Chris Wilson
  2017-06-20 10:15     ` Tvrtko Ursulin
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-06-19 12:22 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-06-19 13:06:18)
> 
> On 16/06/2017 17:02, Chris Wilson wrote:
> > @@ -520,7 +511,8 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
> >   static int eb_reserve_vma(const struct i915_execbuffer *eb,
> >                         struct i915_vma *vma)
> >   {
> > -     struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> > +     struct drm_i915_gem_exec_object2 *entry =
> > +             &eb->exec[vma->exec_flags - eb->flags];
> 
> There are three instances of this so maybe "entry = exec_entry(eb, 
> vma)"? Added benefit that exec_entry implementation could site a comment 
> explaining the magic.

You were meant to jump in and suggest some way I would store the index.
:)

> > @@ -870,23 +864,20 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
> >       unsigned int i;
> >   
> >       for (i = 0; i < count; i++) {
> > -             struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> > -             struct i915_vma *vma = exec_to_vma(entry);
> > +             struct i915_vma *vma = eb->vma[i];
> > +             unsigned int flags = eb->flags[i];
> >   
> >               if (!vma)
> >                       continue;
> >   
> > -             GEM_BUG_ON(vma->exec_entry != entry);
> > -             vma->exec_entry = NULL;
> > +             GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
> > +             vma->exec_flags = NULL;
> >   
> > -             if (entry->flags & __EXEC_OBJECT_HAS_PIN)
> > -                     __eb_unreserve_vma(vma, entry);
> > +             if (flags & __EXEC_OBJECT_HAS_PIN)
> > +                     __eb_unreserve_vma(vma, flags);
> >   
> > -             if (entry->flags & __EXEC_OBJECT_HAS_REF)
> > +             if (flags & __EXEC_OBJECT_HAS_REF)
> >                       i915_vma_put(vma);
> > -
> > -             entry->flags &=
> > -                     ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
> 
> No need to clear these from eb->flags[i] to ensure no double free?

No, the other path that may drop the vma prevents the double-free by
setting count to 0. Along the error path we don't have the vma set for
incomplete loads.

> > @@ -1830,33 +1824,34 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> >                               return -ENOMEM;
> >   
> >                       capture->next = eb->request->capture_list;
> > -                     capture->vma = vma;
> > +                     capture->vma = eb->vma[i];
> >                       eb->request->capture_list = capture;
> >               }
> >   
> > -             if (entry->flags & EXEC_OBJECT_ASYNC)
> > -                     goto skip_flushes;
> > +             if (flags & EXEC_OBJECT_ASYNC)
> > +                     continue;
> >   
> > +             obj = eb->vma[i]->obj;
> >               if (unlikely(obj->cache_dirty && !obj->cache_coherent))
> >                       i915_gem_clflush_object(obj, 0);
> >   
> >               err = i915_gem_request_await_object
> > -                     (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
> > +                     (eb->request, obj, flags & EXEC_OBJECT_WRITE);
> >               if (err)
> >                       return err;
> > -
> > -skip_flushes:
> > -             i915_vma_move_to_active(vma, eb->request, entry->flags);
> > -             __eb_unreserve_vma(vma, entry);
> > -             vma->exec_entry = NULL;
> 
> Grumble, it would have been better if this change was in a separate patch.

Grumble. It was more meaningful at some earlier version of the patch
where there was a dance here with flags.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas()
  2017-06-16 16:02 ` [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
@ 2017-06-20 10:11   ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-06-20 10:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/06/2017 17:02, Chris Wilson wrote:
> Since the introduction of being able to perform a lockless lookup of an
> object (i915_gem_object_get_rcu() in fbbd37b36fa5 ("drm/i915: Move object
> release to a freelist + worker") we no longer need to split the
> object/vma lookup into 3 phases and so combine them into a much simpler
> single loop.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 140 +++++++++--------------------
>   1 file changed, 42 insertions(+), 98 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 9da0d209dd38..18d6581bc6c5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -47,16 +47,16 @@ enum {
>   #define DBG_FORCE_RELOC 0 /* choose one of the above! */
>   };
>   
> -#define __EXEC_OBJECT_HAS_REF		BIT(31)
> -#define __EXEC_OBJECT_HAS_PIN		BIT(30)
> -#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
> -#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
> -#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
> -#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
> +#define __EXEC_OBJECT_VALIDATED		BIT(31)
> +#define __EXEC_OBJECT_HAS_REF		BIT(30)
> +#define __EXEC_OBJECT_HAS_PIN		BIT(29)
> +#define __EXEC_OBJECT_HAS_FENCE		BIT(28)
> +#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
> +#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
> +#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above */
>   #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>   
>   #define __EXEC_HAS_RELOC	BIT(31)
> -#define __EXEC_VALIDATED	BIT(30)
>   #define UPDATE			PIN_OFFSET_FIXED
>   
>   #define BATCH_OFFSET_BIAS (256*1024)
> @@ -429,14 +429,16 @@ eb_validate_vma(struct i915_execbuffer *eb,
>   }
>   
>   static int
> -eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
> +eb_add_vma(struct i915_execbuffer *eb,
> +	   unsigned int i, struct i915_vma *vma,
> +	   unsigned int flags)
>   {
>   	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
>   	int err;
>   
>   	GEM_BUG_ON(i915_vma_is_closed(vma));
>   
> -	if (!(eb->args->flags & __EXEC_VALIDATED)) {
> +	if (!(flags & __EXEC_OBJECT_VALIDATED)) {
>   		err = eb_validate_vma(eb, entry, vma);
>   		if (unlikely(err))
>   			return err;
> @@ -471,7 +473,7 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
>   	 * to find the right target VMA when doing relocations.
>   	 */
>   	eb->vma[i] = vma;
> -	eb->flags[i] = entry->flags;
> +	eb->flags[i] = entry->flags | flags;
>   	vma->exec_flags = &eb->flags[i];
>   
>   	err = 0;
> @@ -680,15 +682,11 @@ static int eb_select_context(struct i915_execbuffer *eb)
>   	return 0;
>   }
>   
> -static int eb_lookup_vmas(struct i915_execbuffer *eb)
> +static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   {
> -#define INTERMEDIATE BIT(0)
> -	const unsigned int count = eb->buffer_count;
>   	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
> -	struct i915_vma *vma;
> -	struct idr *idr;
> +	struct drm_i915_gem_object *uninitialized_var(obj);
>   	unsigned int i;
> -	int slow_pass = -1;
>   	int err;
>   
>   	INIT_LIST_HEAD(&eb->relocs);
> @@ -698,85 +696,39 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   		flush_work(&lut->resize);
>   	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
>   
> -	for (i = 0; i < count; i++) {
> -		hlist_for_each_entry(vma,
> -				     ht_head(lut, eb->exec[i].handle),
> -				     ctx_node) {
> -			if (vma->ctx_handle != eb->exec[i].handle)
> -				continue;
> -
> -			err = eb_add_vma(eb, i, vma);
> -			if (unlikely(err))
> -				return err;
> -			goto next_vma;
> -		}
> -
> -		if (slow_pass < 0)
> -			slow_pass = i;
> -
> -		eb->vma[i] = NULL;
> -
> -next_vma: ;
> -	}
> +	for (i = 0; i < eb->buffer_count; i++) {
> +		u32 handle = eb->exec[i].handle;
> +		struct hlist_head *hl = ht_head(lut, handle);
> +		struct i915_vma *vma;
>   
> -	if (slow_pass < 0)
> -		goto out;
> +		hlist_for_each_entry(vma, hl, ctx_node) {
> +			GEM_BUG_ON(vma->ctx != eb->ctx);
>   
> -	spin_lock(&eb->file->table_lock);
> -	/*
> -	 * Grab a reference to the object and release the lock so we can lookup
> -	 * or create the VMA without using GFP_ATOMIC
> -	 */
> -	idr = &eb->file->object_idr;
> -	for (i = slow_pass; i < count; i++) {
> -		struct drm_i915_gem_object *obj;
> +			if (vma->ctx_handle != handle)
> +				continue;
>   
> -		if (eb->vma[i])
> -			continue;
> +			goto add_vma;
> +		}
>   
> -		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
> +		obj = i915_gem_object_lookup(eb->file, handle);
>   		if (unlikely(!obj)) {
> -			spin_unlock(&eb->file->table_lock);
> -			DRM_DEBUG("Invalid object handle %d at index %d\n",
> -				  eb->exec[i].handle, i);
>   			err = -ENOENT;
> -			goto err;
> +			goto err_vma;
>   		}
>   
> -		eb->vma[i] =
> -			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
> -	}
> -	spin_unlock(&eb->file->table_lock);
> -
> -	for (i = slow_pass; i < count; i++) {
> -		struct drm_i915_gem_object *obj;
> -		unsigned int is_obj;
> -
> -		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
> -		if (!is_obj)
> -			continue;
> +		flags |= __EXEC_OBJECT_HAS_REF;
>   
> -		/*
> -		 * NOTE: We can leak any vmas created here when something fails
> -		 * later on. But that's no issue since vma_unbind can deal with
> -		 * vmas which are not actually bound. And since only
> -		 * lookup_or_create exists as an interface to get at the vma
> -		 * from the (obj, vm) we don't run the risk of creating
> -		 * duplicated vmas for the same vm.
> -		 */
>   		vma = i915_vma_instance(obj, eb->vm, NULL);
>   		if (unlikely(IS_ERR(vma))) {
> -			DRM_DEBUG("Failed to lookup VMA\n");
>   			err = PTR_ERR(vma);
> -			goto err;
> +			goto err_obj;
>   		}
>   
>   		/* First come, first served */
>   		if (!vma->ctx) {
>   			vma->ctx = eb->ctx;
> -			vma->ctx_handle = eb->exec[i].handle;
> -			hlist_add_head(&vma->ctx_node,
> -				       ht_head(lut, eb->exec[i].handle));
> +			vma->ctx_handle = handle;
> +			hlist_add_head(&vma->ctx_node, hl);
>   			lut->ht_count++;
>   			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
>   			if (i915_vma_is_ggtt(vma)) {
> @@ -784,18 +736,14 @@ next_vma: ;
>   				obj->vma_hashed = vma;
>   			}
>   
> -			i915_vma_get(vma);
> +			/* transfer ref to ctx */
> +			flags &= ~__EXEC_OBJECT_HAS_REF;
>   		}
>   
> -		err = eb_add_vma(eb, i, vma);
> +add_vma:
> +		err = eb_add_vma(eb, i, vma, flags);
>   		if (unlikely(err))
> -			goto err;
> -
> -		/* Only after we validated the user didn't use our bits */
> -		if (vma->ctx != eb->ctx) {
> -			i915_vma_get(vma);
> -			eb->flags[i] |= __EXEC_OBJECT_HAS_REF;
> -		}
> +			goto err_vma;
>   	}
>   
>   	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> @@ -805,7 +753,6 @@ next_vma: ;
>   			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	}
>   
> -out:
>   	/* take note of the batch buffer before we might reorder the lists */
>   	i = eb_batch_index(eb);
>   	eb->batch = eb->vma[i];
> @@ -825,17 +772,14 @@ next_vma: ;
>   	if (eb->reloc_cache.has_fence)
>   		eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
>   
> -	eb->args->flags |= __EXEC_VALIDATED;
>   	return eb_reserve(eb);
>   
> -err:
> -	for (i = slow_pass; i < count; i++) {
> -		if (ptr_unmask_bits(eb->vma[i], 1))
> -			eb->vma[i] = NULL;
> -	}
> +err_obj:
> +	i915_gem_object_put(obj);
> +err_vma:
> +	eb->vma[i] = NULL;
>   	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	return err;
> -#undef INTERMEDIATE
>   }
>   
>   static struct i915_vma *
> @@ -868,7 +812,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
>   		unsigned int flags = eb->flags[i];
>   
>   		if (!vma)
> -			continue;
> +			break;
>   
>   		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
>   		vma->exec_flags = NULL;
> @@ -1714,7 +1658,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	}
>   
>   	/* reacquire the objects */
> -	err = eb_lookup_vmas(eb);
> +	err = eb_lookup_vmas(eb, __EXEC_OBJECT_VALIDATED);
>   	if (err)
>   		goto err;
>   
> @@ -1768,7 +1712,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   
>   static int eb_relocate(struct i915_execbuffer *eb)
>   {
> -	if (eb_lookup_vmas(eb))
> +	if (eb_lookup_vmas(eb, 0))
>   		goto slow;
>   
>   	/* The objects are in their final locations, apply the relocations. */
> 

Couldn't parse the diff mentally, but looked at the resulting code and 
that looked simple enough to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
  2017-06-19 12:22   ` Chris Wilson
@ 2017-06-20 10:15     ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-06-20 10:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/06/2017 13:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-06-19 13:06:18)
>>
>> On 16/06/2017 17:02, Chris Wilson wrote:
>>> @@ -520,7 +511,8 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
>>>    static int eb_reserve_vma(const struct i915_execbuffer *eb,
>>>                          struct i915_vma *vma)
>>>    {
>>> -     struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>>> +     struct drm_i915_gem_exec_object2 *entry =
>>> +             &eb->exec[vma->exec_flags - eb->flags];
>>
>> There are three instances of this so maybe "entry = exec_entry(eb,
>> vma)"? Added benefit that exec_entry implementation could site a comment
>> explaining the magic.
> 
> You were meant to jump in and suggest some way I would store the index.
> :)

No ideas I'm afraid, maybe it is just to hot. :)

>>> @@ -870,23 +864,20 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
>>>        unsigned int i;
>>>    
>>>        for (i = 0; i < count; i++) {
>>> -             struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
>>> -             struct i915_vma *vma = exec_to_vma(entry);
>>> +             struct i915_vma *vma = eb->vma[i];
>>> +             unsigned int flags = eb->flags[i];
>>>    
>>>                if (!vma)
>>>                        continue;
>>>    
>>> -             GEM_BUG_ON(vma->exec_entry != entry);
>>> -             vma->exec_entry = NULL;
>>> +             GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
>>> +             vma->exec_flags = NULL;
>>>    
>>> -             if (entry->flags & __EXEC_OBJECT_HAS_PIN)
>>> -                     __eb_unreserve_vma(vma, entry);
>>> +             if (flags & __EXEC_OBJECT_HAS_PIN)
>>> +                     __eb_unreserve_vma(vma, flags);
>>>    
>>> -             if (entry->flags & __EXEC_OBJECT_HAS_REF)
>>> +             if (flags & __EXEC_OBJECT_HAS_REF)
>>>                        i915_vma_put(vma);
>>> -
>>> -             entry->flags &=
>>> -                     ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
>>
>> No need to clear these from eb->flags[i] to ensure no double free?
> 
> No, the other path that may drop the vma prevents the double-free by
> setting count to 0. Along the error path we don't have the vma set for
> incomplete loads.

Cool.

>>> @@ -1830,33 +1824,34 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>>>                                return -ENOMEM;
>>>    
>>>                        capture->next = eb->request->capture_list;
>>> -                     capture->vma = vma;
>>> +                     capture->vma = eb->vma[i];
>>>                        eb->request->capture_list = capture;
>>>                }
>>>    
>>> -             if (entry->flags & EXEC_OBJECT_ASYNC)
>>> -                     goto skip_flushes;
>>> +             if (flags & EXEC_OBJECT_ASYNC)
>>> +                     continue;
>>>    
>>> +             obj = eb->vma[i]->obj;
>>>                if (unlikely(obj->cache_dirty && !obj->cache_coherent))
>>>                        i915_gem_clflush_object(obj, 0);
>>>    
>>>                err = i915_gem_request_await_object
>>> -                     (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
>>> +                     (eb->request, obj, flags & EXEC_OBJECT_WRITE);
>>>                if (err)
>>>                        return err;
>>> -
>>> -skip_flushes:
>>> -             i915_vma_move_to_active(vma, eb->request, entry->flags);
>>> -             __eb_unreserve_vma(vma, entry);
>>> -             vma->exec_entry = NULL;
>>
>> Grumble, it would have been better if this change was in a separate patch.
> 
> Grumble. It was more meaningful at some earlier version of the patch
> where there was a dance here with flags.

Can't spot any issues anyway:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr
  2017-06-16 16:02 ` [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
@ 2017-06-20 11:26   ` Tvrtko Ursulin
  2017-06-21 10:30     ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-06-20 11:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/06/2017 17:02, Chris Wilson wrote:
> This was the competing idea long ago, but it was only with the rewrite
> of the idr as an radixtree and using the radixtree directly ourselves,
> and the realisation that we can store the vma directly in the radixtree
> and only need a list for the reverse mapping, that made the patch
> performant enough to displace using a hashtable.  Though the vma ht is
> fast and doesn't require and extra allocation (as we can embed the node
> inside the vma), it does require a thread for resizing and serialization.
> That is hairy enough to investigate alternative and favour them if
> equivalent in performance. One advantage of allocating an indirection
> entry is that we can support a single shared bo between many clients,
> something that was done on a first-come first-serve basis for shared GGTT
> vma previously. To offset the extra allocations, we create yet another
> kmem_cache for them.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c           |  6 --
>   drivers/gpu/drm/i915/i915_drv.h               |  1 +
>   drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++--
>   drivers/gpu/drm/i915/i915_gem_context.c       | 86 +++++----------------------
>   drivers/gpu/drm/i915/i915_gem_context.h       | 30 ++--------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 70 +++++++---------------
>   drivers/gpu/drm/i915/i915_gem_object.h        | 10 +++-
>   drivers/gpu/drm/i915/i915_vma.c               | 22 -------
>   drivers/gpu/drm/i915/i915_vma.h               |  4 --
>   drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
>   lib/radix-tree.c                              |  1 +
>   11 files changed, 81 insertions(+), 196 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 4577b0af6886..a6ba2100bb88 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1998,12 +1998,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   			seq_putc(m, '\n');
>   		}
>   
> -		seq_printf(m,
> -			   "\tvma hashtable size=%u (actual %lu), count=%u\n",
> -			   ctx->vma_lut.ht_size,
> -			   BIT(ctx->vma_lut.ht_bits),
> -			   ctx->vma_lut.ht_count);
> -
>   		seq_putc(m, '\n');
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cd15697c1ca4..de8dd0c06ce6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2083,6 +2083,7 @@ struct drm_i915_private {
>   
>   	struct kmem_cache *objects;
>   	struct kmem_cache *vmas;
> +	struct kmem_cache *luts;
>   	struct kmem_cache *requests;
>   	struct kmem_cache *dependencies;
>   	struct kmem_cache *priorities;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7dcac3bfb771..3388c65b54a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3252,19 +3252,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   
>   void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   {
> +	struct drm_i915_private *i915 = to_i915(gem->dev);
>   	struct drm_i915_gem_object *obj = to_intel_bo(gem);
>   	struct drm_i915_file_private *fpriv = file->driver_priv;
>   	struct i915_vma *vma, *vn;
> +	struct i915_lut_handle *lut, *ln;
>   
>   	mutex_lock(&obj->base.dev->struct_mutex);
> +
> +	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
> +		struct i915_gem_context *ctx = lut->ctx;
> +
> +		if (ctx->file_priv != fpriv)
> +			continue;
> +
> +		radix_tree_delete(&ctx->handles_vma, lut->handle);
> +		i915_gem_object_put(obj);
> +
> +		list_del(&lut->obj_link);
> +		list_del(&lut->ctx_link);
> +
> +		kmem_cache_free(i915->luts, lut);
> +	}
> +
>   	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
>   		if (vma->vm->file == fpriv)
>   			i915_vma_close(vma);
>   
> -	vma = obj->vma_hashed;
> -	if (vma && vma->ctx->file_priv == fpriv)
> -		i915_vma_unlink_ctx(vma);
> -
>   	if (i915_gem_object_is_active(obj) &&
>   	    !i915_gem_object_has_active_reference(obj)) {
>   		i915_gem_object_set_active_reference(obj);
> @@ -4259,6 +4273,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	INIT_LIST_HEAD(&obj->global_link);
>   	INIT_LIST_HEAD(&obj->userfault_link);
>   	INIT_LIST_HEAD(&obj->vma_list);
> +	INIT_LIST_HEAD(&obj->lut_list);
>   	INIT_LIST_HEAD(&obj->batch_pool_link);
>   
>   	obj->ops = ops;
> @@ -4897,12 +4912,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   	if (!dev_priv->vmas)
>   		goto err_objects;
>   
> +	dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
> +	if (!dev_priv->luts)
> +		goto err_vmas;
> +
>   	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
>   					SLAB_HWCACHE_ALIGN |
>   					SLAB_RECLAIM_ACCOUNT |
>   					SLAB_TYPESAFE_BY_RCU);
>   	if (!dev_priv->requests)
> -		goto err_vmas;
> +		goto err_luts;
>   
>   	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
>   					    SLAB_HWCACHE_ALIGN |
> @@ -4949,6 +4968,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   	kmem_cache_destroy(dev_priv->dependencies);
>   err_requests:
>   	kmem_cache_destroy(dev_priv->requests);
> +err_luts:
> +	kmem_cache_destroy(dev_priv->luts);
>   err_vmas:
>   	kmem_cache_destroy(dev_priv->vmas);
>   err_objects:
> @@ -4971,6 +4992,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
>   	kmem_cache_destroy(dev_priv->priorities);
>   	kmem_cache_destroy(dev_priv->dependencies);
>   	kmem_cache_destroy(dev_priv->requests);
> +	kmem_cache_destroy(dev_priv->luts);
>   	kmem_cache_destroy(dev_priv->vmas);
>   	kmem_cache_destroy(dev_priv->objects);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 39ed58a21fc1..dc182a8dd4e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -93,69 +93,21 @@
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>   
> -/* Initial size (as log2) to preallocate the handle->object hashtable */
> -#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
> -
> -static void resize_vma_ht(struct work_struct *work)
> +static void lut_close(struct i915_gem_context *ctx)
>   {
> -	struct i915_gem_context_vma_lut *lut =
> -		container_of(work, typeof(*lut), resize);
> -	unsigned int bits, new_bits, size, i;
> -	struct hlist_head *new_ht;
> -
> -	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
> -
> -	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
> -	new_bits = min_t(unsigned int,
> -			 max(bits, VMA_HT_BITS),
> -			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
> -	if (new_bits == lut->ht_bits)
> -		goto out;
> -
> -	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
> -	if (!new_ht)
> -		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
> -	if (!new_ht)
> -		/* Pretend resize succeeded and stop calling us for a bit! */
> -		goto out;
> +	struct i915_lut_handle *lut, *ln;
> +	struct radix_tree_iter iter;
> +	void __rcu **slot;
>   
> -	size = BIT(lut->ht_bits);
> -	for (i = 0; i < size; i++) {
> -		struct i915_vma *vma;
> -		struct hlist_node *tmp;
> -
> -		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
> -			hlist_add_head(&vma->ctx_node,
> -				       &new_ht[hash_32(vma->ctx_handle,
> -						       new_bits)]);
> +	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> +		list_del(&lut->obj_link);
I assume you deliberately did not bother unlinking from the ctx list 
since all that is getting zapped anyway?

> +		kmem_cache_free(ctx->i915->luts, lut);
>   	}
> -	kvfree(lut->ht);
> -	lut->ht = new_ht;
> -	lut->ht_bits = new_bits;
> -out:
> -	smp_store_release(&lut->ht_size, BIT(bits));
> -	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
> -}
>   
> -static void vma_lut_free(struct i915_gem_context *ctx)
> -{
> -	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
> -	unsigned int i, size;
> -
> -	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
> -		cancel_work_sync(&lut->resize);
> -
> -	size = BIT(lut->ht_bits);
> -	for (i = 0; i < size; i++) {
> -		struct i915_vma *vma;
> -
> -		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
> -			vma->obj->vma_hashed = NULL;
> -			vma->ctx = NULL;
> -			i915_vma_put(vma);
> -		}
> +	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
> +		i915_vma_put(rcu_dereference_raw(*slot));
> +		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
>   	}
> -	kvfree(lut->ht);
>   }
>   
>   void i915_gem_context_free(struct kref *ctx_ref)
> @@ -167,7 +119,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   	trace_i915_context_free(ctx);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
> -	vma_lut_free(ctx);
>   	i915_ppgtt_put(ctx->ppgtt);
>   
>   	for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -195,8 +146,11 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   static void context_close(struct i915_gem_context *ctx)
>   {
>   	i915_gem_context_set_closed(ctx);
> +
> +	lut_close(ctx);
>   	if (ctx->ppgtt)
>   		i915_ppgtt_close(&ctx->ppgtt->base);
> +
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   	i915_gem_context_put(ctx);
>   }
> @@ -269,16 +223,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	ctx->i915 = dev_priv;
>   	ctx->priority = I915_PRIORITY_NORMAL;
>   
> -	ctx->vma_lut.ht_bits = VMA_HT_BITS;
> -	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
> -	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
> -	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
> -				  sizeof(*ctx->vma_lut.ht),
> -				  GFP_KERNEL);
> -	if (!ctx->vma_lut.ht)
> -		goto err_out;
> -
> -	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
> +	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> +	INIT_LIST_HEAD(&ctx->handles_list);
>   
>   	/* Default context will never have a file_priv */
>   	ret = DEFAULT_CONTEXT_HANDLE;
> @@ -328,8 +274,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	put_pid(ctx->pid);
>   	idr_remove(&file_priv->context_idr, ctx->user_handle);
>   err_lut:
> -	kvfree(ctx->vma_lut.ht);
> -err_out:
>   	context_close(ctx);
>   	return ERR_PTR(ret);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 82c99ba92ad3..cd0c5d8e0db7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -27,6 +27,7 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/list.h>
> +#include <linux/radix-tree.h>
>   
>   struct pid;
>   
> @@ -143,32 +144,6 @@ struct i915_gem_context {
>   	/** ggtt_offset_bias: placement restriction for context objects */
>   	u32 ggtt_offset_bias;
>   
> -	struct i915_gem_context_vma_lut {
> -		/** ht_size: last request size to allocate the hashtable for. */
> -		unsigned int ht_size;
> -#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
> -		/** ht_bits: real log2(size) of hashtable. */
> -		unsigned int ht_bits;
> -		/** ht_count: current number of entries inside the hashtable */
> -		unsigned int ht_count;
> -
> -		/** ht: the array of buckets comprising the simple hashtable */
> -		struct hlist_head *ht;
> -
> -		/**
> -		 * resize: After an execbuf completes, we check the load factor
> -		 * of the hashtable. If the hashtable is too full, or too empty,
> -		 * we schedule a task to resize the hashtable. During the
> -		 * resize, the entries are moved between different buckets and
> -		 * so we cannot simultaneously read the hashtable as it is
> -		 * being resized (unlike rhashtable). Therefore we treat the
> -		 * active work as a strong barrier, pausing a subsequent
> -		 * execbuf to wait for the resize worker to complete, if
> -		 * required.
> -		 */
> -		struct work_struct resize;
> -	} vma_lut;
> -
>   	/** engine: per-engine logical HW state */
>   	struct intel_context {
>   		struct i915_vma *state;
> @@ -199,6 +174,9 @@ struct i915_gem_context {
>   
>   	/** remap_slice: Bitmask of cache lines that need remapping */
>   	u8 remap_slice;
> +
> +	struct radix_tree_root handles_vma;
> +	struct list_head handles_list;

Add kernel doc for the two please.

>   };
>   
>   static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 18d6581bc6c5..adcea065985c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -637,19 +637,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
>   	} while (1);
>   }
>   
> -static inline struct hlist_head *
> -ht_head(const  struct i915_gem_context_vma_lut *lut, u32 handle)
> -{
> -	return &lut->ht[hash_32(handle, lut->ht_bits)];
> -}
> -
> -static inline bool
> -ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
> -{
> -	return (4*lut->ht_count > 3*lut->ht_size ||
> -		4*lut->ht_count + 1 < lut->ht_size);
> -}
> -
>   static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
>   {
>   	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
> @@ -684,31 +671,22 @@ static int eb_select_context(struct i915_execbuffer *eb)
>   
>   static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   {
> -	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
> -	struct drm_i915_gem_object *uninitialized_var(obj);
> +	struct drm_i915_gem_object *obj;
> +	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
>   	unsigned int i;
>   	int err;
>   
>   	INIT_LIST_HEAD(&eb->relocs);
>   	INIT_LIST_HEAD(&eb->unbound);
>   
> -	if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
> -		flush_work(&lut->resize);
> -	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
> -
>   	for (i = 0; i < eb->buffer_count; i++) {
>   		u32 handle = eb->exec[i].handle;
> -		struct hlist_head *hl = ht_head(lut, handle);
>   		struct i915_vma *vma;
> +		struct i915_lut_handle *lut;
>   
> -		hlist_for_each_entry(vma, hl, ctx_node) {
> -			GEM_BUG_ON(vma->ctx != eb->ctx);
> -
> -			if (vma->ctx_handle != handle)
> -				continue;
> -
> +		vma = radix_tree_lookup(handles_vma, handle);
> +		if (likely(vma))
>   			goto add_vma;
> -		}
>   
>   		obj = i915_gem_object_lookup(eb->file, handle);
>   		if (unlikely(!obj)) {
> @@ -716,43 +694,36 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   			goto err_vma;
>   		}
>   
> -		flags |= __EXEC_OBJECT_HAS_REF;
> -
>   		vma = i915_vma_instance(obj, eb->vm, NULL);
>   		if (unlikely(IS_ERR(vma))) {
>   			err = PTR_ERR(vma);
>   			goto err_obj;
>   		}
>   
> -		/* First come, first served */
> -		if (!vma->ctx) {
> -			vma->ctx = eb->ctx;
> -			vma->ctx_handle = handle;
> -			hlist_add_head(&vma->ctx_node, hl);
> -			lut->ht_count++;
> -			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
> -			if (i915_vma_is_ggtt(vma)) {
> -				GEM_BUG_ON(obj->vma_hashed);
> -				obj->vma_hashed = vma;
> -			}
> +		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
> +		if (unlikely(!lut)) {
> +			err = -ENOMEM;
> +			goto err_obj;
> +		}
>   
> -			/* transfer ref to ctx */
> -			flags &= ~__EXEC_OBJECT_HAS_REF;
> +		/* transfer ref to ctx */
> +		err = radix_tree_insert(handles_vma, handle, vma);
> +		if (unlikely(err)) {
> +			kfree(lut);
> +			goto err_obj;
>   		}
>   
> +		list_add(&lut->obj_link, &obj->lut_list);
> +		list_add(&lut->ctx_link, &eb->ctx->handles_list);
> +		lut->ctx = eb->ctx;
> +		lut->handle = handle;
> +
>   add_vma:
>   		err = eb_add_vma(eb, i, vma, flags);
>   		if (unlikely(err))
>   			goto err_vma;
>   	}
>   
> -	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> -		if (ht_needs_resize(lut))
> -			queue_work(system_highpri_wq, &lut->resize);
> -		else
> -			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
> -	}
> -
>   	/* take note of the batch buffer before we might reorder the lists */
>   	i = eb_batch_index(eb);
>   	eb->batch = eb->vma[i];
> @@ -778,7 +749,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   	i915_gem_object_put(obj);
>   err_vma:
>   	eb->vma[i] = NULL;
> -	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 5b19a4916a4d..8188d3cf5e11 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -35,6 +35,13 @@
>   
>   #include "i915_selftest.h"
>   
> +struct i915_lut_handle {
> +	struct list_head obj_link;
> +	struct list_head ctx_link;
> +	struct i915_gem_context *ctx;
> +	u32 handle;
> +};
> +
>   struct drm_i915_gem_object_ops {
>   	unsigned int flags;
>   #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
> @@ -86,7 +93,8 @@ struct drm_i915_gem_object {
>   	 * They are also added to @vma_list for easy iteration.
>   	 */
>   	struct rb_root vma_tree;
> -	struct i915_vma *vma_hashed;
> +
> +	struct list_head lut_list;

And some kernel doc here please.

>   
>   	/** Stolen memory for this object, instead of being backed by shmem. */
>   	struct drm_mm_node *stolen;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 532c709febbd..93329a809764 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -591,33 +591,11 @@ static void i915_vma_destroy(struct i915_vma *vma)
>   	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
>   }
>   
> -void i915_vma_unlink_ctx(struct i915_vma *vma)
> -{
> -	struct i915_gem_context *ctx = vma->ctx;
> -
> -	if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> -		cancel_work_sync(&ctx->vma_lut.resize);
> -		ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
> -	}
> -
> -	__hlist_del(&vma->ctx_node);
> -	ctx->vma_lut.ht_count--;
> -
> -	if (i915_vma_is_ggtt(vma))
> -		vma->obj->vma_hashed = NULL;
> -	vma->ctx = NULL;
> -
> -	i915_vma_put(vma);
> -}
> -
>   void i915_vma_close(struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(i915_vma_is_closed(vma));
>   	vma->flags |= I915_VMA_CLOSED;
>   
> -	if (vma->ctx)
> -		i915_vma_unlink_ctx(vma);
> -
>   	list_del(&vma->obj_link);
>   	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5f1356e93c6c..6d35cf7ddcfb 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -115,10 +115,6 @@ struct i915_vma {
>   	unsigned int *exec_flags;
>   	struct hlist_node exec_node;
>   	u32 exec_handle;
> -
> -	struct i915_gem_context *ctx;
> -	struct hlist_node ctx_node;
> -	u32 ctx_handle;
>   };
>   
>   struct i915_vma *
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index f8b9cc212b02..3ee31ef84607 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
>   	INIT_LIST_HEAD(&ctx->link);
>   	ctx->i915 = i915;
>   
> -	ctx->vma_lut.ht_bits = VMA_HT_BITS;
> -	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
> -	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
> -				  sizeof(*ctx->vma_lut.ht),
> -				  GFP_KERNEL);
> -	if (!ctx->vma_lut.ht)
> -		goto err_free;
> +	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> +	INIT_LIST_HEAD(&ctx->handles_list);
>   
>   	ret = ida_simple_get(&i915->context_hw_ida,
>   			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
>   	if (ret < 0)
> -		goto err_vma_ht;
> +		goto err_handles;
>   	ctx->hw_id = ret;
>   
>   	if (name) {
> @@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
>   
>   	return ctx;
>   
> -err_vma_ht:
> -	kvfree(ctx->vma_lut.ht);
> -err_free:
> +err_handles:
>   	kfree(ctx);
>   	return NULL;
>   
> diff --git a/lib/radix-tree.c b/lib/radix-tree.c
> index 898e87998417..3527eb364964 100644
> --- a/lib/radix-tree.c
> +++ b/lib/radix-tree.c
> @@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
>   	if (__radix_tree_delete(root, iter->node, slot))
>   		iter->index = iter->next_index;
>   }
> +EXPORT_SYMBOL(radix_tree_iter_delete);
>   
>   /**
>    * radix_tree_delete_item - delete an item from a radix tree
> 

Very nice! Much shorter and more elegant (and so readable) than the 
previous state of affairs.

With the kerneldoc added:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr
  2017-06-20 11:26   ` Tvrtko Ursulin
@ 2017-06-21 10:30     ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-06-21 10:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-06-20 12:26:44)
> 
> On 16/06/2017 17:02, Chris Wilson wrote:
> > -static void resize_vma_ht(struct work_struct *work)
> > +static void lut_close(struct i915_gem_context *ctx)
> >   {
> > -     struct i915_gem_context_vma_lut *lut =
> > -             container_of(work, typeof(*lut), resize);
> > -     unsigned int bits, new_bits, size, i;
> > -     struct hlist_head *new_ht;
> > -
> > -     GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
> > -
> > -     bits = 1 + ilog2(4*lut->ht_count/3 + 1);
> > -     new_bits = min_t(unsigned int,
> > -                      max(bits, VMA_HT_BITS),
> > -                      sizeof(unsigned int) * BITS_PER_BYTE - 1);
> > -     if (new_bits == lut->ht_bits)
> > -             goto out;
> > -
> > -     new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
> > -     if (!new_ht)
> > -             new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
> > -     if (!new_ht)
> > -             /* Pretend resize succeeded and stop calling us for a bit! */
> > -             goto out;
> > +     struct i915_lut_handle *lut, *ln;
> > +     struct radix_tree_iter iter;
> > +     void __rcu **slot;
> >   
> > -     size = BIT(lut->ht_bits);
> > -     for (i = 0; i < size; i++) {
> > -             struct i915_vma *vma;
> > -             struct hlist_node *tmp;
> > -
> > -             hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
> > -                     hlist_add_head(&vma->ctx_node,
> > -                                    &new_ht[hash_32(vma->ctx_handle,
> > -                                                    new_bits)]);
> > +     list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> > +             list_del(&lut->obj_link);
> I assume you deliberately did not bother unlinking from the ctx list 
> since all that is getting zapped anyway?

Yes, it is not obvious though. The context is marked as closed so we
should thereafter prevent further usage of the ctx->handles_list.

At the moment, this is governed by struct_mutex but in the future it is
likely to be replaced by file->mutex or ctx->mutex.

If I add a GEM_BUG_ON(eb->ctx->closed) to eb_lookup_vmas that should
help explain the relationship.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-06-21 10:30 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-16 16:02 [PATCH 1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
2017-06-16 16:02 ` [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
2017-06-20 10:11   ` Tvrtko Ursulin
2017-06-16 16:02 ` [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
2017-06-20 11:26   ` Tvrtko Ursulin
2017-06-21 10:30     ` Chris Wilson
2017-06-16 16:43 ` ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Patchwork
2017-06-19 12:06 ` [PATCH 1/3] " Tvrtko Ursulin
2017-06-19 12:22   ` Chris Wilson
2017-06-20 10:15     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.