All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Generalise GPU activity tracking
@ 2019-01-30 20:50 Chris Wilson
  2019-01-30 20:50 ` [PATCH 2/3] drm/i915: Release the active tracker tree upon idling Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Chris Wilson @ 2019-01-30 20:50 UTC (permalink / raw)
  To: intel-gfx

We currently track GPU memory usage inside VMA, such that we never
release memory used by the GPU until after it has finished accessing it.
However, we may want to track other resources aside from VMA, or we may
want to split a VMA into multiple independent regions and track each
separately. For this purpose, generalise our request tracking (akin to
struct reservation_object) so that we can embed it into other objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
Ideas for names?

i915_activity_tracker
i915_active_object
i915_active_reference // tempting
gpu_read_lock (i915_read_lock was the first name I tried)

Fwiw, by the end there will be nothing i915 specific to this so perhaps
dma_read_lock isn't so bad?

i915_active_request -> dma_read_fence

Possibilities there.
-Chris
---
 drivers/gpu/drm/i915/Makefile                 |   4 +-
 drivers/gpu/drm/i915/i915_active.c            | 228 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_active.h            |  62 +++++
 drivers/gpu/drm/i915/i915_active_types.h      |  26 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_vma.c               | 173 +++----------
 drivers/gpu/drm/i915/i915_vma.h               |   9 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  | 158 ++++++++++++
 .../drm/i915/selftests/i915_live_selftests.h  |   3 +-
 9 files changed, 512 insertions(+), 154 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_active.c
 create mode 100644 drivers/gpu/drm/i915/i915_active.h
 create mode 100644 drivers/gpu/drm/i915/i915_active_types.h
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 210d0e8777b6..1787e1299b1b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
-i915-y += i915_cmd_parser.o \
+i915-y += \
+	  i915_active.o \
+	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
 	  i915_gem_clflush.o \
 	  i915_gem_context.o \
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
new file mode 100644
index 000000000000..91950d778cab
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -0,0 +1,228 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_active.h"
+
+#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+
+struct active_node {
+	struct i915_gem_active base;
+	struct i915_active *ref;
+	struct rb_node node;
+	u64 timeline;
+};
+
+static void
+__active_retire(struct i915_active *ref)
+{
+	GEM_BUG_ON(!ref->count);
+	if (!--ref->count)
+		ref->retire(ref);
+}
+
+static void
+node_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__active_retire(container_of(base, struct active_node, base)->ref);
+}
+
+static void
+last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__active_retire(container_of(base, struct i915_active, last));
+}
+
+static struct i915_gem_active *
+active_instance(struct i915_active *ref, u64 idx)
+{
+	struct active_node *node;
+	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last slot if it is empty, that is
+	 * after the previous activity has been retired, or if it matches the
+	 * current timeline.
+	 *
+	 * Note that we allow the timeline to be active simultaneously in
+	 * the rbtree and the last cache. We do this to avoid having
+	 * to search and replace the rbtree element for a new timeline, with
+	 * the cost being that we must be aware that the ref may be retired
+	 * twice for the same timeline (as the older rbtree element will be
+	 * retired before the new request added to last).
+	 */
+	old = i915_gem_active_raw(&ref->last, BKL(ref));
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
+
+	parent = NULL;
+	p = &ref->tree.rb_node;
+	while (*p) {
+		parent = *p;
+
+		node = rb_entry(parent, struct active_node, node);
+		if (node->timeline == idx)
+			goto replace;
+
+		if (node->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+
+	/* kmalloc may retire the ref->last (thanks shrinker)! */
+	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
+		kfree(node);
+		goto out;
+	}
+
+	if (unlikely(!node))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&node->base, node_retire);
+	node->ref = ref;
+	node->timeline = idx;
+
+	rb_link_node(&node->node, parent, p);
+	rb_insert_color(&node->node, &ref->tree);
+
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last,
+	 * leaving last zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to receive one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_gem_active_isset(&node->base)) {
+		/* Retire ourselves from the old rq->active_list */
+		__list_del_entry(&node->base.link);
+		ref->count--;
+		GEM_BUG_ON(!ref->count);
+	}
+	GEM_BUG_ON(list_empty(&ref->last.link));
+	list_replace_init(&ref->last.link, &node->base.link);
+	node->base.request = fetch_and_zero(&ref->last.request);
+
+out:
+	return &ref->last;
+}
+
+void i915_active_init(struct drm_i915_private *i915,
+		      struct i915_active *ref,
+		      void (*retire)(struct i915_active *ref))
+{
+	ref->i915 = i915;
+	ref->retire = retire;
+	ref->tree = RB_ROOT;
+	init_request_active(&ref->last, last_retire);
+	ref->count = 0;
+}
+
+int i915_active_ref(struct i915_active *ref,
+		    u64 timeline,
+		    struct i915_request *rq)
+{
+	struct i915_gem_active *active;
+
+	active = active_instance(ref, timeline);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
+	if (!i915_gem_active_isset(active))
+		ref->count++;
+	i915_gem_active_set(active, rq);
+
+	GEM_BUG_ON(!ref->count);
+	return 0;
+}
+
+bool i915_active_acquire(struct i915_active *ref)
+{
+	lockdep_assert_held(BKL(ref));
+	return !ref->count++;
+}
+
+void i915_active_release(struct i915_active *ref)
+{
+	lockdep_assert_held(BKL(ref));
+	__active_retire(ref);
+}
+
+int i915_active_wait(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+	int ret = 0;
+
+	if (i915_active_acquire(ref))
+		goto out_release;
+
+	ret = i915_gem_active_retire(&ref->last, BKL(ref));
+	if (ret)
+		goto out_release;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		ret = i915_gem_active_retire(&it->base, BKL(ref));
+		if (ret)
+			break;
+	}
+
+out_release:
+	i915_active_release(ref);
+	return ret;
+}
+
+static int __i915_request_await_active(struct i915_request *rq,
+				       struct i915_gem_active *active)
+{
+	struct i915_request *barrier =
+		i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
+{
+	struct active_node *it, *n;
+	int ret;
+
+	ret = __i915_request_await_active(rq, &ref->last);
+	if (ret)
+		return ret;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		ret = __i915_request_await_active(rq, &it->base);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void i915_active_fini(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+
+	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&it->base));
+		kfree(it);
+	}
+	ref->tree = RB_ROOT;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_active.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
new file mode 100644
index 000000000000..8bd7cf757d1a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -0,0 +1,62 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+/*
+ * GPU activity tracking
+ *
+ * Each set of commands submitted to the GPU compromises a single request that
+ * signals a fence upon completion. struct i915_request combines the
+ * command submission, scheduling and fence signaling roles. If we want to see
+ * if a particular task is complete, we need to grab the fence (struct
+ * i915_request) for that task and check or wait for it to be signaled. More
+ * often though we want to track the status of a bunch of tasks, for example
+ * to wait for the GPU to finish accessing some memory across a variety of
+ * different command pipelines from different clients. We could choose to
+ * track every single request associated with the task, but knowing that
+ * each request belongs to an ordered timeline (later requests within a
+ * timeline must wait for earlier requests), we need only track the
+ * latest request in each timeline to determine the overall status of the
+ * task.
+ *
+ * struct i915_active provides this tracking across timelines. It builds a
+ * composite shared-fence, and is updated as new work is submitted to the task,
+ * forming a snapshot of the current status. It should be embedded into the
+ * different resources that need to track their associated GPU activity to
+ * provide a callback when that GPU activity has ceased, or otherwise to
+ * provide a serialisation point either for request submission or for CPU
+ * synchronisation.
+ */
+
+void i915_active_init(struct drm_i915_private *i915,
+		      struct i915_active *ref,
+		      void (*retire)(struct i915_active *ref));
+
+int i915_active_ref(struct i915_active *ref,
+		    u64 timeline,
+		    struct i915_request *rq);
+
+int i915_active_wait(struct i915_active *ref);
+
+int i915_request_await_active(struct i915_request *rq,
+			      struct i915_active *ref);
+
+bool i915_active_acquire(struct i915_active *ref);
+void i915_active_release(struct i915_active *ref);
+
+static inline bool
+i915_active_is_idle(const struct i915_active *ref)
+{
+	return !ref->count;
+}
+
+void i915_active_fini(struct i915_active *ref);
+
+#endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
new file mode 100644
index 000000000000..411e502ed8dd
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+#include <linux/rbtree.h>
+
+#include "i915_request.h"
+
+struct drm_i915_private;
+
+struct i915_active {
+	struct drm_i915_private *i915;
+
+	struct rb_root tree;
+	struct i915_gem_active last;
+	unsigned int count;
+
+	void (*retire)(struct i915_active *ref);
+};
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49b00996a15e..e625659c03a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
+	i915_active_init(i915, &vma->active, NULL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
 	vma->ops = &pd_vma_ops;
 	vma->private = ppgtt;
 
-	vma->active = RB_ROOT;
-
 	vma->size = size;
 	vma->fence_size = size;
 	vma->flags = I915_VMA_GGTT;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d83b8ad5f859..d4772061e642 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,22 +63,23 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
-struct i915_vma_active {
-	struct i915_gem_active base;
-	struct i915_vma *vma;
-	struct rb_node node;
-	u64 timeline;
-};
+static void obj_bump_mru(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
-static void
-__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
+	spin_lock(&i915->mm.obj_lock);
+	if (obj->bind_count)
+		list_move_tail(&obj->mm.link, &i915->mm.bound_list);
+	spin_unlock(&i915->mm.obj_lock);
+
+	obj->mm.dirty = true; /* be paranoid  */
+}
+
+static void __i915_vma_retire(struct i915_active *ref)
 {
+	struct i915_vma *vma = container_of(ref, typeof(*vma), active);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_is_active(vma));
-	if (--vma->active_count)
-		return;
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 		reservation_object_unlock(obj->resv);
 	}
 
-	/* Bump our place on the bound list to keep it roughly in LRU order
+	/*
+	 * Bump our place on the bound list to keep it roughly in LRU order
 	 * so that we don't steal from recently used but inactive objects
 	 * (unless we are forced to ofc!)
 	 */
-	spin_lock(&rq->i915->mm.obj_lock);
-	if (obj->bind_count)
-		list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
-	spin_unlock(&rq->i915->mm.obj_lock);
-
-	obj->mm.dirty = true; /* be paranoid  */
+	obj_bump_mru(obj);
 
 	if (i915_gem_object_has_active_reference(obj)) {
 		i915_gem_object_clear_active_reference(obj);
@@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	}
 }
 
-static void
-i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
-	struct i915_vma_active *active =
-		container_of(base, typeof(*active), base);
-
-	__i915_vma_retire(active->vma, rq);
-}
-
-static void
-i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
-	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
-}
-
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vma->active = RB_ROOT;
-
-	init_request_active(&vma->last_active, i915_vma_last_retire);
+	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
 	init_request_active(&vma->last_fence, NULL);
+
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
 	vma->obj = obj;
@@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
@@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 		spin_unlock(&obj->vma.lock);
 	}
 
-	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
-		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
-		kfree(iter);
-	}
+	i915_active_fini(&vma->active);
 
 	kmem_cache_free(i915->vmas, vma);
 }
@@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
-static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
-{
-	struct i915_vma_active *active;
-	struct rb_node **p, *parent;
-	struct i915_request *old;
-
-	/*
-	 * We track the most recently used timeline to skip a rbtree search
-	 * for the common case, under typical loads we never need the rbtree
-	 * at all. We can reuse the last_active slot if it is empty, that is
-	 * after the previous activity has been retired, or if the active
-	 * matches the current timeline.
-	 *
-	 * Note that we allow the timeline to be active simultaneously in
-	 * the rbtree and the last_active cache. We do this to avoid having
-	 * to search and replace the rbtree element for a new timeline, with
-	 * the cost being that we must be aware that the vma may be retired
-	 * twice for the same timeline (as the older rbtree element will be
-	 * retired before the new request added to last_active).
-	 */
-	old = i915_gem_active_raw(&vma->last_active,
-				  &vma->vm->i915->drm.struct_mutex);
-	if (!old || old->fence.context == idx)
-		goto out;
-
-	/* Move the currently active fence into the rbtree */
-	idx = old->fence.context;
-
-	parent = NULL;
-	p = &vma->active.rb_node;
-	while (*p) {
-		parent = *p;
-
-		active = rb_entry(parent, struct i915_vma_active, node);
-		if (active->timeline == idx)
-			goto replace;
-
-		if (active->timeline < idx)
-			p = &parent->rb_right;
-		else
-			p = &parent->rb_left;
-	}
-
-	active = kmalloc(sizeof(*active), GFP_KERNEL);
-
-	/* kmalloc may retire the vma->last_active request (thanks shrinker)! */
-	if (unlikely(!i915_gem_active_raw(&vma->last_active,
-					  &vma->vm->i915->drm.struct_mutex))) {
-		kfree(active);
-		goto out;
-	}
-
-	if (unlikely(!active))
-		return ERR_PTR(-ENOMEM);
-
-	init_request_active(&active->base, i915_vma_retire);
-	active->vma = vma;
-	active->timeline = idx;
-
-	rb_link_node(&active->node, parent, p);
-	rb_insert_color(&active->node, &vma->active);
-
-replace:
-	/*
-	 * Overwrite the previous active slot in the rbtree with last_active,
-	 * leaving last_active zeroed. If the previous slot is still active,
-	 * we must be careful as we now only expect to receive one retire
-	 * callback not two, and so much undo the active counting for the
-	 * overwritten slot.
-	 */
-	if (i915_gem_active_isset(&active->base)) {
-		/* Retire ourselves from the old rq->active_list */
-		__list_del_entry(&active->base.link);
-		vma->active_count--;
-		GEM_BUG_ON(!vma->active_count);
-	}
-	GEM_BUG_ON(list_empty(&vma->last_active.link));
-	list_replace_init(&vma->last_active.link, &active->base.link);
-	active->base.request = fetch_and_zero(&vma->last_active.request);
-
-out:
-	return &vma->last_active;
-}
-
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	active = active_instance(vma, rq->fence.context);
-	if (IS_ERR(active))
-		return PTR_ERR(active);
-
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++)
+	if (!vma->active.count)
 		obj->active_count++;
-	i915_gem_active_set(active, rq);
+
+	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
+		if (!vma->active.count)
+			obj->active_count--;
+		return -ENOMEM;
+	}
+
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
 
@@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 */
 	might_sleep();
 	if (i915_vma_is_active(vma)) {
-		struct i915_vma_active *active, *n;
-
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
 		 * In order to prevent it from being recursively closed,
@@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		ret = i915_gem_active_retire(&vma->last_active,
-					     &vma->vm->i915->drm.struct_mutex);
+		ret = i915_active_wait(&vma->active);
 		if (ret)
 			goto unpin;
 
-		rbtree_postorder_for_each_entry_safe(active, n,
-						     &vma->active, node) {
-			ret = i915_gem_active_retire(&active->base,
-						     &vma->vm->i915->drm.struct_mutex);
-			if (ret)
-				goto unpin;
-		}
-
 		ret = i915_gem_active_retire(&vma->last_fence,
 					     &vma->vm->i915->drm.struct_mutex);
 unpin:
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5793abe509a2..3c03d4569481 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -34,6 +34,7 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 
+#include "i915_active.h"
 #include "i915_request.h"
 
 enum i915_cache_level;
@@ -108,9 +109,7 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(15)
 
-	unsigned int active_count;
-	struct rb_root active;
-	struct i915_gem_active last_active;
+	struct i915_active active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
 #define I915_VMA_RELEASE_MAP BIT(0)
 
-static inline bool i915_vma_is_active(struct i915_vma *vma)
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
 {
-	return vma->active_count;
+	return !i915_active_is_idle(&vma->active);
 }
 
 int __must_check i915_vma_move_to_active(struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
new file mode 100644
index 000000000000..c05ca366729a
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -0,0 +1,158 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+struct live_active {
+	struct i915_active base;
+	bool retired;
+};
+
+static void __live_active_retire(struct i915_active *base)
+{
+	struct live_active *active = container_of(base, typeof(*active), base);
+
+	active->retired = true;
+}
+
+static int __live_active_setup(struct drm_i915_private *i915,
+			       struct live_active *active)
+{
+	struct intel_engine_cs *engine;
+	struct i915_sw_fence *submit;
+	enum intel_engine_id id;
+	unsigned int count = 0;
+	int err = 0;
+
+	i915_active_init(i915, &active->base, __live_active_retire);
+	active->retired = false;
+
+	if (!i915_active_acquire(&active->base)) {
+		pr_err("First i915_active_acquire should report being idle\n");
+		return -EINVAL;
+	}
+
+	submit = heap_fence_create(GFP_KERNEL);
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+						       submit,
+						       GFP_KERNEL);
+		if (err < 0) {
+			pr_err("Failed to allocate submission fence!\n");
+			i915_request_add(rq);
+			break;
+		}
+
+		err = i915_active_ref(&active->base, rq->fence.context, rq);
+		if (err) {
+			pr_err("Failed to track active ref!\n");
+			i915_request_add(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	}
+
+	i915_active_release(&active->base);
+	if (active->retired) {
+		pr_err("i915_active retired before submission!\n");
+		err = -EINVAL;
+	}
+	if (active->base.count != count) {
+		pr_err("i915_active not tracking all requests, found %d, expected %d\n",
+		       active->base.count, count);
+		err = -EINVAL;
+	}
+
+	i915_sw_fence_commit(submit);
+	heap_fence_put(submit);
+
+	return err;
+}
+
+static int live_active_wait(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct live_active active;
+	intel_wakeref_t wakeref;
+	int err;
+
+	/* Check that we get a callback when requests retire upon waiting */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	err = __live_active_setup(i915, &active);
+
+	i915_active_wait(&active.base);
+	if (!active.retired) {
+		pr_err("i915_active not retired after waiting!\n");
+		err = -EINVAL;
+	}
+
+	i915_active_fini(&active.base);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_active_retire(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct live_active active;
+	intel_wakeref_t wakeref;
+	int err;
+
+	/* Check that we get a callback when requests are indirectly retired */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	err = __live_active_setup(i915, &active);
+
+	/* waits for & retires all requests */
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	if (!active.retired) {
+		pr_err("i915_active not retired after flushing!\n");
+		err = -EINVAL;
+	}
+
+	i915_active_fini(&active.base);
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+int i915_active_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_active_wait),
+		SUBTEST(live_active_retire),
+	};
+
+	if (i915_terminally_wedged(&i915->gpu_error))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 76b4f87fc853..6d766925ad04 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -12,8 +12,9 @@
 selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
 selftest(uncore, intel_uncore_live_selftests)
 selftest(workarounds, intel_workarounds_live_selftests)
-selftest(requests, i915_request_live_selftests)
 selftest(timelines, i915_timeline_live_selftests)
+selftest(requests, i915_request_live_selftests)
+selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] drm/i915: Release the active tracker tree upon idling
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
@ 2019-01-30 20:50 ` Chris Wilson
  2019-01-31 11:33   ` Tvrtko Ursulin
  2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-01-30 20:50 UTC (permalink / raw)
  To: intel-gfx

As soon as we detect that the active tracker is idle and we prepare to
call the retire callback, release the storage for our tree of
per-timeline nodes. We expect these to be infrequently usage and quick
to allocate, so there is little benefit in keeping the tree cached and
we would prefer to return the pages back to the system in a timely
fashion.

This also means that when we finalize the struct as a whole, we know as
the activity tracker must be idle, the tree has already been released.
Indeed we can reduce i915_active_fini() just the assertions that there
is nothing to do.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_active.c | 33 +++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_active.h |  4 ++++
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 91950d778cab..b1fefe98f9a6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -16,12 +16,29 @@ struct active_node {
 	u64 timeline;
 };
 
+static void
+__active_park(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&it->base));
+		kfree(it);
+	}
+	ref->tree = RB_ROOT;
+}
+
 static void
 __active_retire(struct i915_active *ref)
 {
 	GEM_BUG_ON(!ref->count);
-	if (!--ref->count)
-		ref->retire(ref);
+	if (--ref->count)
+		return;
+
+	/* return the unused nodes to our slabcache */
+	__active_park(ref);
+
+	ref->retire(ref);
 }
 
 static void
@@ -210,18 +227,14 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
-	struct active_node *it, *n;
-
 	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
-
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
-	}
-	ref->tree = RB_ROOT;
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
+	GEM_BUG_ON(ref->count);
 }
+#endif
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_active.c"
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 8bd7cf757d1a..6130c6770d10 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -57,6 +57,10 @@ i915_active_is_idle(const struct i915_active *ref)
 	return !ref->count;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref);
+#else
+static inline void i915_active_fini(struct i915_active *ref) { }
+#endif
 
 #endif /* _I915_ACTIVE_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
  2019-01-30 20:50 ` [PATCH 2/3] drm/i915: Release the active tracker tree upon idling Chris Wilson
@ 2019-01-30 20:50 ` Chris Wilson
  2019-01-31  9:39   ` kbuild test robot
                     ` (2 more replies)
  2019-01-30 22:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Generalise GPU activity tracking Patchwork
                   ` (4 subsequent siblings)
  6 siblings, 3 replies; 15+ messages in thread
From: Chris Wilson @ 2019-01-30 20:50 UTC (permalink / raw)
  To: intel-gfx

Wrap the active tracking for a GPU references in a slabcache for faster
allocations, and hopefully better fragmentation reduction.

v3: Nothing device specific left, it's just a slabcache that we can
make global.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 31 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_active.h |  3 +++
 drivers/gpu/drm/i915/i915_pci.c    |  3 +++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b1fefe98f9a6..5818ddf88462 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -9,6 +9,17 @@
 
 #define BKL(ref) (&(ref)->i915->drm.struct_mutex)
 
+/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees as
+ * they idle (when we know the active requests are inactive) and allocate the
+ * nodes from a local slab cache to hopefully reduce the fragmentation.
+ */
+static struct i915_global_active {
+	struct kmem_cache *slab_cache;
+} global;
+
 struct active_node {
 	struct i915_gem_active base;
 	struct i915_active *ref;
@@ -23,7 +34,7 @@ __active_park(struct i915_active *ref)
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
+		kmem_cache_free(global.slab_cache, it);
 	}
 	ref->tree = RB_ROOT;
 }
@@ -96,11 +107,11 @@ active_instance(struct i915_active *ref, u64 idx)
 			p = &parent->rb_left;
 	}
 
-	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 
 	/* kmalloc may retire the ref->last (thanks shrinker)! */
 	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
-		kfree(node);
+		kmem_cache_free(global.slab_cache, node);
 		goto out;
 	}
 
@@ -234,6 +245,20 @@ void i915_active_fini(struct i915_active *ref)
 	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	GEM_BUG_ON(ref->count);
 }
+
+int __init i915_global_active_init(void)
+{
+	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void __exit i915_global_active_exit(void)
+{
+	kmem_cache_destroy(global.slab_cache);
+}
 #endif
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 6130c6770d10..48fdb1497883 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -63,4 +63,7 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_global_active_init(void);
+void i915_global_active_exit(void);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 44c23ac60347..751a787c83d1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -793,6 +793,8 @@ static int __init i915_init(void)
 	bool use_kms = true;
 	int err;
 
+	i915_global_active_init();
+
 	err = i915_mock_selftests();
 	if (err)
 		return err > 0 ? 0 : err;
@@ -824,6 +826,7 @@ static void __exit i915_exit(void)
 		return;
 
 	pci_unregister_driver(&i915_pci_driver);
+	i915_global_active_exit();
 }
 
 module_init(i915_init);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Generalise GPU activity tracking
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
  2019-01-30 20:50 ` [PATCH 2/3] drm/i915: Release the active tracker tree upon idling Chris Wilson
  2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
@ 2019-01-30 22:56 ` Patchwork
  2019-01-30 22:58 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-01-30 22:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Generalise GPU activity tracking
URL   : https://patchwork.freedesktop.org/series/56010/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
204bf30b25ef drm/i915: Generalise GPU activity tracking
-:31: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#31: 
new file mode 100644

-:36: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#36: FILE: drivers/gpu/drm/i915/i915_active.c:1:
+/*

-:270: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#270: FILE: drivers/gpu/drm/i915/i915_active.h:1:
+/*

-:338: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#338: FILE: drivers/gpu/drm/i915/i915_active_types.h:1:
+/*

-:693: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#693: FILE: drivers/gpu/drm/i915/selftests/i915_active.c:1:
+/*

total: 0 errors, 5 warnings, 0 checks, 791 lines checked
1df4bc330f06 drm/i915: Release the active tracker tree upon idling
944d08bd5ed1 drm/i915: Allocate active tracking nodes from a slabcache

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Generalise GPU activity tracking
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
                   ` (2 preceding siblings ...)
  2019-01-30 22:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Generalise GPU activity tracking Patchwork
@ 2019-01-30 22:58 ` Patchwork
  2019-01-30 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-01-30 22:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Generalise GPU activity tracking
URL   : https://patchwork.freedesktop.org/series/56010/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Generalise GPU activity tracking
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Release the active tracker tree upon idling
Okay!

Commit: drm/i915: Allocate active tracking nodes from a slabcache
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: Generalise GPU activity tracking
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
                   ` (3 preceding siblings ...)
  2019-01-30 22:58 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-01-30 23:19 ` Patchwork
  2019-01-31  9:06 ` ✓ Fi.CI.IGT: " Patchwork
  2019-01-31 11:25 ` [PATCH 1/3] " Tvrtko Ursulin
  6 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-01-30 23:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Generalise GPU activity tracking
URL   : https://patchwork.freedesktop.org/series/56010/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5514 -> Patchwork_12098
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/56010/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_12098 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_busy@basic-flip-a:
    - fi-gdg-551:         PASS -> FAIL [fdo#103182]

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362]

  
#### Possible fixes ####

  * igt@kms_busy@basic-flip-b:
    - fi-gdg-551:         FAIL [fdo#103182] -> PASS

  * igt@kms_flip@basic-flip-vs-modeset:
    - fi-skl-6700hq:      DMESG-WARN [fdo#105998] -> PASS

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362


Participating hosts (44 -> 41)
------------------------------

  Missing    (3): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan 


Build changes
-------------

    * Linux: CI_DRM_5514 -> Patchwork_12098

  CI_DRM_5514: 8a7e6109652e58be9c43e2a7a4d318a7c5b34fef @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4801: 6f6bacf12759fb319ade3ba37861ae711f8a5cd9 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12098: 944d08bd5ed1a45e46666f0b9eb59bf45559aa2c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

944d08bd5ed1 drm/i915: Allocate active tracking nodes from a slabcache
1df4bc330f06 drm/i915: Release the active tracker tree upon idling
204bf30b25ef drm/i915: Generalise GPU activity tracking

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12098/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915: Generalise GPU activity tracking
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
                   ` (4 preceding siblings ...)
  2019-01-30 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-31  9:06 ` Patchwork
  2019-01-31 11:25 ` [PATCH 1/3] " Tvrtko Ursulin
  6 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-01-31  9:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Generalise GPU activity tracking
URL   : https://patchwork.freedesktop.org/series/56010/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5514_full -> Patchwork_12098_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_12098_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_cursor_crc@cursor-128x42-random:
    - shard-hsw:          PASS -> INCOMPLETE [fdo#103540]

  * igt@kms_cursor_crc@cursor-128x42-sliding:
    - shard-apl:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_plane@pixel-format-pipe-b-planes-source-clamping:
    - shard-apl:          PASS -> FAIL [fdo#108948]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-glk:          PASS -> INCOMPLETE [fdo#103359] / [k.org#198133] +1

  * igt@kms_plane@plane-position-covered-pipe-c-planes:
    - shard-glk:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-apl:          PASS -> FAIL [fdo#103166] +1

  
#### Possible fixes ####

  * igt@gem_workarounds@suspend-resume:
    - shard-kbl:          INCOMPLETE [fdo#103665] -> PASS

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
    - shard-apl:          FAIL [fdo#106510] / [fdo#108145] -> PASS

  * igt@kms_color@pipe-a-degamma:
    - shard-apl:          FAIL [fdo#104782] / [fdo#108145] -> PASS

  * igt@kms_cursor_crc@cursor-256x85-onscreen:
    - shard-apl:          FAIL [fdo#103232] -> PASS +1

  * igt@kms_cursor_crc@cursor-256x85-random:
    - shard-glk:          FAIL [fdo#103232] -> PASS

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          FAIL [fdo#104873] -> PASS

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-glk:          FAIL [fdo#105363] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-glk:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_setmode@basic:
    - shard-apl:          FAIL [fdo#99912] -> PASS

  * igt@kms_universal_plane@universal-plane-pipe-b-functional:
    - shard-apl:          FAIL [fdo#103166] -> PASS +2

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#104782]: https://bugs.freedesktop.org/show_bug.cgi?id=104782
  [fdo#104873]: https://bugs.freedesktop.org/show_bug.cgi?id=104873
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#106510]: https://bugs.freedesktop.org/show_bug.cgi?id=106510
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (7 -> 4)
------------------------------

  Missing    (3): shard-snb shard-skl shard-iclb 


Build changes
-------------

    * Linux: CI_DRM_5514 -> Patchwork_12098

  CI_DRM_5514: 8a7e6109652e58be9c43e2a7a4d318a7c5b34fef @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4801: 6f6bacf12759fb319ade3ba37861ae711f8a5cd9 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12098: 944d08bd5ed1a45e46666f0b9eb59bf45559aa2c @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12098/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache
  2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
@ 2019-01-31  9:39   ` kbuild test robot
  2019-01-31  9:50   ` kbuild test robot
  2019-01-31 11:39   ` Tvrtko Ursulin
  2 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2019-01-31  9:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1136 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20190130]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Generalise-GPU-activity-tracking/20190131-160548
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-defconfig (attached as .config)
compiler: gcc-8 (Debian 8.2.0-14) 8.2.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   ld: drivers/gpu/drm/i915/i915_pci.o: in function `i915_exit':
>> i915_pci.c:(.exit.text+0x17): undefined reference to `i915_global_active_exit'
   ld: drivers/gpu/drm/i915/i915_pci.o: in function `i915_init':
>> i915_pci.c:(.init.text+0x5): undefined reference to `i915_global_active_init'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 26963 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache
  2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
  2019-01-31  9:39   ` kbuild test robot
@ 2019-01-31  9:50   ` kbuild test robot
  2019-01-31 11:39   ` Tvrtko Ursulin
  2 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2019-01-31  9:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1018 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20190130]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Generalise-GPU-activity-tracking/20190131-160548
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s1-201904 (attached as .config)
compiler: gcc-6 (Debian 6.5.0-2) 6.5.0 20181026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "i915_global_active_exit" [drivers/gpu/drm/i915/i915.ko] undefined!
>> ERROR: "i915_global_active_init" [drivers/gpu/drm/i915/i915.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30338 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] drm/i915: Generalise GPU activity tracking
  2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
                   ` (5 preceding siblings ...)
  2019-01-31  9:06 ` ✓ Fi.CI.IGT: " Patchwork
@ 2019-01-31 11:25 ` Tvrtko Ursulin
  2019-01-31 11:32   ` Chris Wilson
  6 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2019-01-31 11:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/01/2019 20:50, Chris Wilson wrote:
> We currently track GPU memory usage inside VMA, such that we never
> release memory used by the GPU until after it has finished accessing it.
> However, we may want to track other resources aside from VMA, or we may
> want to split a VMA into multiple independent regions and track each
> separately. For this purpose, generalise our request tracking (akin to
> struct reservation_object) so that we can embed it into other objects.
> 

Changelog where art thou?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> Ideas for names?
> 
> i915_activity_tracker
> i915_active_object
> i915_active_reference // tempting
> gpu_read_lock (i915_read_lock was the first name I tried)
> 
> Fwiw, by the end there will be nothing i915 specific to this so perhaps
> dma_read_lock isn't so bad?
> 
> i915_active_request -> dma_read_fence
> 
> Possibilities there.

i915_active_reference sounds good.

i915_active_reference_ref though? :) i915_active_reference_

> -Chris
> ---
>   drivers/gpu/drm/i915/Makefile                 |   4 +-
>   drivers/gpu/drm/i915/i915_active.c            | 228 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_active.h            |  62 +++++
>   drivers/gpu/drm/i915/i915_active_types.h      |  26 ++
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
>   drivers/gpu/drm/i915/i915_vma.c               | 173 +++----------
>   drivers/gpu/drm/i915/i915_vma.h               |   9 +-
>   drivers/gpu/drm/i915/selftests/i915_active.c  | 158 ++++++++++++
>   .../drm/i915/selftests/i915_live_selftests.h  |   3 +-
>   9 files changed, 512 insertions(+), 154 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_active.c
>   create mode 100644 drivers/gpu/drm/i915/i915_active.h
>   create mode 100644 drivers/gpu/drm/i915/i915_active_types.h
>   create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 210d0e8777b6..1787e1299b1b 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>   i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>   
>   # GEM code
> -i915-y += i915_cmd_parser.o \
> +i915-y += \
> +	  i915_active.o \
> +	  i915_cmd_parser.o \
>   	  i915_gem_batch_pool.o \
>   	  i915_gem_clflush.o \
>   	  i915_gem_context.o \
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> new file mode 100644
> index 000000000000..91950d778cab
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -0,0 +1,228 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "i915_active.h"
> +
> +#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
> +
> +struct active_node {
> +	struct i915_gem_active base;
> +	struct i915_active *ref;
> +	struct rb_node node;
> +	u64 timeline;
> +};
> +
> +static void
> +__active_retire(struct i915_active *ref)
> +{
> +	GEM_BUG_ON(!ref->count);
> +	if (!--ref->count)
> +		ref->retire(ref);
> +}
> +
> +static void
> +node_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__active_retire(container_of(base, struct active_node, base)->ref);
> +}
> +
> +static void
> +last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__active_retire(container_of(base, struct i915_active, last));
> +}
> +
> +static struct i915_gem_active *
> +active_instance(struct i915_active *ref, u64 idx)
> +{
> +	struct active_node *node;
> +	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last slot if it is empty, that is
> +	 * after the previous activity has been retired, or if it matches the
> +	 * current timeline.
> +	 *
> +	 * Note that we allow the timeline to be active simultaneously in
> +	 * the rbtree and the last cache. We do this to avoid having
> +	 * to search and replace the rbtree element for a new timeline, with
> +	 * the cost being that we must be aware that the ref may be retired
> +	 * twice for the same timeline (as the older rbtree element will be
> +	 * retired before the new request added to last).
> +	 */
> +	old = i915_gem_active_raw(&ref->last, BKL(ref));
> +	if (!old || old->fence.context == idx)
> +		goto out;
> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
> +
> +	parent = NULL;
> +	p = &ref->tree.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		node = rb_entry(parent, struct active_node, node);
> +		if (node->timeline == idx)
> +			goto replace;
> +
> +		if (node->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	node = kmalloc(sizeof(*node), GFP_KERNEL);
> +
> +	/* kmalloc may retire the ref->last (thanks shrinker)! */
> +	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
> +		kfree(node);
> +		goto out;
> +	}
> +
> +	if (unlikely(!node))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&node->base, node_retire);
> +	node->ref = ref;
> +	node->timeline = idx;
> +
> +	rb_link_node(&node->node, parent, p);
> +	rb_insert_color(&node->node, &ref->tree);
> +
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last,
> +	 * leaving last zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to receive one retire
> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&node->base)) {
> +		/* Retire ourselves from the old rq->active_list */
> +		__list_del_entry(&node->base.link);
> +		ref->count--;
> +		GEM_BUG_ON(!ref->count);
> +	}
> +	GEM_BUG_ON(list_empty(&ref->last.link));
> +	list_replace_init(&ref->last.link, &node->base.link);
> +	node->base.request = fetch_and_zero(&ref->last.request);
> +
> +out:
> +	return &ref->last;
> +}
> +
> +void i915_active_init(struct drm_i915_private *i915,
> +		      struct i915_active *ref,
> +		      void (*retire)(struct i915_active *ref))
> +{
> +	ref->i915 = i915;
> +	ref->retire = retire;
> +	ref->tree = RB_ROOT;
> +	init_request_active(&ref->last, last_retire);
> +	ref->count = 0;
> +}
> +
> +int i915_active_ref(struct i915_active *ref,
> +		    u64 timeline,
> +		    struct i915_request *rq)
> +{
> +	struct i915_gem_active *active;
> +
> +	active = active_instance(ref, timeline);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
> +	if (!i915_gem_active_isset(active))
> +		ref->count++;
> +	i915_gem_active_set(active, rq);
> +
> +	GEM_BUG_ON(!ref->count);
> +	return 0;
> +}
> +
> +bool i915_active_acquire(struct i915_active *ref)
> +{
> +	lockdep_assert_held(BKL(ref));
> +	return !ref->count++;
> +}
> +
> +void i915_active_release(struct i915_active *ref)
> +{
> +	lockdep_assert_held(BKL(ref));
> +	__active_retire(ref);
> +}
> +
> +int i915_active_wait(struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +	int ret = 0;
> +
> +	if (i915_active_acquire(ref))
> +		goto out_release;
> +
> +	ret = i915_gem_active_retire(&ref->last, BKL(ref));
> +	if (ret)
> +		goto out_release;
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		ret = i915_gem_active_retire(&it->base, BKL(ref));
> +		if (ret)
> +			break;
> +	}
> +
> +out_release:
> +	i915_active_release(ref);
> +	return ret;
> +}
> +
> +static int __i915_request_await_active(struct i915_request *rq,
> +				       struct i915_gem_active *active)
> +{
> +	struct i915_request *barrier =
> +		i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
> +
> +	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
> +}
> +
> +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +	int ret;
> +
> +	ret = __i915_request_await_active(rq, &ref->last);
> +	if (ret)
> +		return ret;
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		ret = __i915_request_await_active(rq, &it->base);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +void i915_active_fini(struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +
> +	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> +		kfree(it);
> +	}
> +	ref->tree = RB_ROOT;
> +}
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/i915_active.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> new file mode 100644
> index 000000000000..8bd7cf757d1a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -0,0 +1,62 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_H_
> +#define _I915_ACTIVE_H_
> +
> +#include "i915_active_types.h"
> +
> +/*
> + * GPU activity tracking
> + *
> + * Each set of commands submitted to the GPU compromises a single request that
> + * signals a fence upon completion. struct i915_request combines the
> + * command submission, scheduling and fence signaling roles. If we want to see
> + * if a particular task is complete, we need to grab the fence (struct
> + * i915_request) for that task and check or wait for it to be signaled. More
> + * often though we want to track the status of a bunch of tasks, for example
> + * to wait for the GPU to finish accessing some memory across a variety of
> + * different command pipelines from different clients. We could choose to
> + * track every single request associated with the task, but knowing that
> + * each request belongs to an ordered timeline (later requests within a
> + * timeline must wait for earlier requests), we need only track the
> + * latest request in each timeline to determine the overall status of the
> + * task.
> + *
> + * struct i915_active provides this tracking across timelines. It builds a
> + * composite shared-fence, and is updated as new work is submitted to the task,
> + * forming a snapshot of the current status. It should be embedded into the
> + * different resources that need to track their associated GPU activity to
> + * provide a callback when that GPU activity has ceased, or otherwise to
> + * provide a serialisation point either for request submission or for CPU
> + * synchronisation.
> + */
> +
> +void i915_active_init(struct drm_i915_private *i915,
> +		      struct i915_active *ref,
> +		      void (*retire)(struct i915_active *ref));
> +
> +int i915_active_ref(struct i915_active *ref,
> +		    u64 timeline,
> +		    struct i915_request *rq);
> +
> +int i915_active_wait(struct i915_active *ref);
> +
> +int i915_request_await_active(struct i915_request *rq,
> +			      struct i915_active *ref);
> +
> +bool i915_active_acquire(struct i915_active *ref);
> +void i915_active_release(struct i915_active *ref);
> +
> +static inline bool
> +i915_active_is_idle(const struct i915_active *ref)
> +{
> +	return !ref->count;
> +}
> +
> +void i915_active_fini(struct i915_active *ref);
> +
> +#endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> new file mode 100644
> index 000000000000..411e502ed8dd
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -0,0 +1,26 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_TYPES_H_
> +#define _I915_ACTIVE_TYPES_H_
> +
> +#include <linux/rbtree.h>
> +
> +#include "i915_request.h"
> +
> +struct drm_i915_private;
> +
> +struct i915_active {
> +	struct drm_i915_private *i915;
> +
> +	struct rb_root tree;
> +	struct i915_gem_active last;
> +	unsigned int count;
> +
> +	void (*retire)(struct i915_active *ref);
> +};
> +
> +#endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 49b00996a15e..e625659c03a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> +	i915_active_init(i915, &vma->active, NULL);
>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
>   	vma->ops = &pd_vma_ops;
>   	vma->private = ppgtt;
>   
> -	vma->active = RB_ROOT;
> -
>   	vma->size = size;
>   	vma->fence_size = size;
>   	vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d83b8ad5f859..d4772061e642 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,22 +63,23 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> -struct i915_vma_active {
> -	struct i915_gem_active base;
> -	struct i915_vma *vma;
> -	struct rb_node node;
> -	u64 timeline;
> -};
> +static void obj_bump_mru(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>   
> -static void
> -__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> +	spin_lock(&i915->mm.obj_lock);
> +	if (obj->bind_count)
> +		list_move_tail(&obj->mm.link, &i915->mm.bound_list);
> +	spin_unlock(&i915->mm.obj_lock);
> +
> +	obj->mm.dirty = true; /* be paranoid  */
> +}
> +
> +static void __i915_vma_retire(struct i915_active *ref)
>   {
> +	struct i915_vma *vma = container_of(ref, typeof(*vma), active);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_is_active(vma));
> -	if (--vma->active_count)
> -		return;
> -
>   	GEM_BUG_ON(!i915_gem_object_is_active(obj));
>   	if (--obj->active_count)
>   		return;
> @@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   		reservation_object_unlock(obj->resv);
>   	}
>   
> -	/* Bump our place on the bound list to keep it roughly in LRU order
> +	/*
> +	 * Bump our place on the bound list to keep it roughly in LRU order
>   	 * so that we don't steal from recently used but inactive objects
>   	 * (unless we are forced to ofc!)
>   	 */
> -	spin_lock(&rq->i915->mm.obj_lock);
> -	if (obj->bind_count)
> -		list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
> -	spin_unlock(&rq->i915->mm.obj_lock);
> -
> -	obj->mm.dirty = true; /* be paranoid  */
> +	obj_bump_mru(obj);
>   
>   	if (i915_gem_object_has_active_reference(obj)) {
>   		i915_gem_object_clear_active_reference(obj);
> @@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   	}
>   }
>   
> -static void
> -i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> -	struct i915_vma_active *active =
> -		container_of(base, typeof(*active), base);
> -
> -	__i915_vma_retire(active->vma, rq);
> -}
> -
> -static void
> -i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> -	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> -}
> -
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	vma->active = RB_ROOT;
> -
> -	init_request_active(&vma->last_active, i915_vma_last_retire);
> +	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
>   	init_request_active(&vma->last_fence, NULL);
> +
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
>   	vma->obj = obj;
> @@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
> @@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   		spin_unlock(&obj->vma.lock);
>   	}
>   
> -	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> -		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> -		kfree(iter);
> -	}
> +	i915_active_fini(&vma->active);
>   
>   	kmem_cache_free(i915->vmas, vma);
>   }
> @@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> -static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> -{
> -	struct i915_vma_active *active;
> -	struct rb_node **p, *parent;
> -	struct i915_request *old;
> -
> -	/*
> -	 * We track the most recently used timeline to skip a rbtree search
> -	 * for the common case, under typical loads we never need the rbtree
> -	 * at all. We can reuse the last_active slot if it is empty, that is
> -	 * after the previous activity has been retired, or if the active
> -	 * matches the current timeline.
> -	 *
> -	 * Note that we allow the timeline to be active simultaneously in
> -	 * the rbtree and the last_active cache. We do this to avoid having
> -	 * to search and replace the rbtree element for a new timeline, with
> -	 * the cost being that we must be aware that the vma may be retired
> -	 * twice for the same timeline (as the older rbtree element will be
> -	 * retired before the new request added to last_active).
> -	 */
> -	old = i915_gem_active_raw(&vma->last_active,
> -				  &vma->vm->i915->drm.struct_mutex);
> -	if (!old || old->fence.context == idx)
> -		goto out;
> -
> -	/* Move the currently active fence into the rbtree */
> -	idx = old->fence.context;
> -
> -	parent = NULL;
> -	p = &vma->active.rb_node;
> -	while (*p) {
> -		parent = *p;
> -
> -		active = rb_entry(parent, struct i915_vma_active, node);
> -		if (active->timeline == idx)
> -			goto replace;
> -
> -		if (active->timeline < idx)
> -			p = &parent->rb_right;
> -		else
> -			p = &parent->rb_left;
> -	}
> -
> -	active = kmalloc(sizeof(*active), GFP_KERNEL);
> -
> -	/* kmalloc may retire the vma->last_active request (thanks shrinker)! */
> -	if (unlikely(!i915_gem_active_raw(&vma->last_active,
> -					  &vma->vm->i915->drm.struct_mutex))) {
> -		kfree(active);
> -		goto out;
> -	}
> -
> -	if (unlikely(!active))
> -		return ERR_PTR(-ENOMEM);
> -
> -	init_request_active(&active->base, i915_vma_retire);
> -	active->vma = vma;
> -	active->timeline = idx;
> -
> -	rb_link_node(&active->node, parent, p);
> -	rb_insert_color(&active->node, &vma->active);
> -
> -replace:
> -	/*
> -	 * Overwrite the previous active slot in the rbtree with last_active,
> -	 * leaving last_active zeroed. If the previous slot is still active,
> -	 * we must be careful as we now only expect to receive one retire
> -	 * callback not two, and so much undo the active counting for the
> -	 * overwritten slot.
> -	 */
> -	if (i915_gem_active_isset(&active->base)) {
> -		/* Retire ourselves from the old rq->active_list */
> -		__list_del_entry(&active->base.link);
> -		vma->active_count--;
> -		GEM_BUG_ON(!vma->active_count);
> -	}
> -	GEM_BUG_ON(list_empty(&vma->last_active.link));
> -	list_replace_init(&vma->last_active.link, &active->base.link);
> -	active->base.request = fetch_and_zero(&vma->last_active.request);
> -
> -out:
> -	return &vma->last_active;
> -}
> -
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> -	active = active_instance(vma, rq->fence.context);
> -	if (IS_ERR(active))
> -		return PTR_ERR(active);
> -
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_gem_active_isset(active) && !vma->active_count++)
> +	if (!vma->active.count)
>   		obj->active_count++;
> -	i915_gem_active_set(active, rq);
> +
> +	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
> +		if (!vma->active.count)
> +			obj->active_count--;
> +		return -ENOMEM;
> +	}

You did not like streamlining the obj->active_count fiddling by making 
i915_active_ref return the old ref count or error?

> +
>   	GEM_BUG_ON(!i915_vma_is_active(vma));
>   	GEM_BUG_ON(!obj->active_count);
>   
> @@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 */
>   	might_sleep();
>   	if (i915_vma_is_active(vma)) {
> -		struct i915_vma_active *active, *n;
> -
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
>   		 * In order to prevent it from being recursively closed,
> @@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		ret = i915_gem_active_retire(&vma->last_active,
> -					     &vma->vm->i915->drm.struct_mutex);
> +		ret = i915_active_wait(&vma->active);
>   		if (ret)
>   			goto unpin;
>   
> -		rbtree_postorder_for_each_entry_safe(active, n,
> -						     &vma->active, node) {
> -			ret = i915_gem_active_retire(&active->base,
> -						     &vma->vm->i915->drm.struct_mutex);
> -			if (ret)
> -				goto unpin;
> -		}
> -
>   		ret = i915_gem_active_retire(&vma->last_fence,
>   					     &vma->vm->i915->drm.struct_mutex);
>   unpin:
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5793abe509a2..3c03d4569481 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -34,6 +34,7 @@
>   #include "i915_gem_fence_reg.h"
>   #include "i915_gem_object.h"
>   
> +#include "i915_active.h"
>   #include "i915_request.h"
>   
>   enum i915_cache_level;
> @@ -108,9 +109,7 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(15)
>   
> -	unsigned int active_count;
> -	struct rb_root active;
> -	struct i915_gem_active last_active;
> +	struct i915_active active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
>   #define I915_VMA_RELEASE_MAP BIT(0)
>   
> -static inline bool i915_vma_is_active(struct i915_vma *vma)
> +static inline bool i915_vma_is_active(const struct i915_vma *vma)
>   {
> -	return vma->active_count;
> +	return !i915_active_is_idle(&vma->active);
>   }
>   
>   int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> new file mode 100644
> index 000000000000..c05ca366729a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -0,0 +1,158 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +
> +#include "igt_flush_test.h"
> +#include "lib_sw_fence.h"
> +
> +struct live_active {
> +	struct i915_active base;
> +	bool retired;
> +};
> +
> +static void __live_active_retire(struct i915_active *base)
> +{
> +	struct live_active *active = container_of(base, typeof(*active), base);
> +
> +	active->retired = true;
> +}
> +
> +static int __live_active_setup(struct drm_i915_private *i915,
> +			       struct live_active *active)
> +{
> +	struct intel_engine_cs *engine;
> +	struct i915_sw_fence *submit;
> +	enum intel_engine_id id;
> +	unsigned int count = 0;
> +	int err = 0;
> +
> +	i915_active_init(i915, &active->base, __live_active_retire);
> +	active->retired = false;
> +
> +	if (!i915_active_acquire(&active->base)) {
> +		pr_err("First i915_active_acquire should report being idle\n");
> +		return -EINVAL;
> +	}
> +
> +	submit = heap_fence_create(GFP_KERNEL);
> +
> +	for_each_engine(engine, i915, id) {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_alloc(engine, i915->kernel_context);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> +						       submit,
> +						       GFP_KERNEL);
> +		if (err < 0) {
> +			pr_err("Failed to allocate submission fence!\n");
> +			i915_request_add(rq);
> +			break;
> +		}
> +
> +		err = i915_active_ref(&active->base, rq->fence.context, rq);
> +		if (err) {
> +			pr_err("Failed to track active ref!\n");
> +			i915_request_add(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);

I guess you could consolidate all i915_request_add's to a single 
instance just after err = i915_sw_fence_await..

> +		count++;
> +	}
> +
> +	i915_active_release(&active->base);
> +	if (active->retired) {

If rq allocation failed we get here due i915_active_release retiring the 
tracker.

> +		pr_err("i915_active retired before submission!\n");
> +		err = -EINVAL;
> +	}
> +	if (active->base.count != count) {
> +		pr_err("i915_active not tracking all requests, found %d, expected %d\n",
> +		       active->base.count, count);
> +		err = -EINVAL;
> +	}
> +
> +	i915_sw_fence_commit(submit);
> +	heap_fence_put(submit);
> +
> +	return err;
> +}
> +
> +static int live_active_wait(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_active active;
> +	intel_wakeref_t wakeref;
> +	int err;
> +
> +	/* Check that we get a callback when requests retire upon waiting */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	err = __live_active_setup(i915, &active);
> +
> +	i915_active_wait(&active.base);
> +	if (!active.retired) {
> +		pr_err("i915_active not retired after waiting!\n");
> +		err = -EINVAL;
> +	}
> +
> +	i915_active_fini(&active.base);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
> +static int live_active_retire(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_active active;
> +	intel_wakeref_t wakeref;
> +	int err;
> +
> +	/* Check that we get a callback when requests are indirectly retired */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	err = __live_active_setup(i915, &active);
> +
> +	/* waits for & retires all requests */
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	if (!active.retired) {
> +		pr_err("i915_active not retired after flushing!\n");
> +		err = -EINVAL;
> +	}
> +
> +	i915_active_fini(&active.base);
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
> +int i915_active_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(live_active_wait),
> +		SUBTEST(live_active_retire),
> +	};
> +
> +	if (i915_terminally_wedged(&i915->gpu_error))
> +		return 0;
> +
> +	return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index 76b4f87fc853..6d766925ad04 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -12,8 +12,9 @@
>   selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
>   selftest(uncore, intel_uncore_live_selftests)
>   selftest(workarounds, intel_workarounds_live_selftests)
> -selftest(requests, i915_request_live_selftests)
>   selftest(timelines, i915_timeline_live_selftests)
> +selftest(requests, i915_request_live_selftests)
> +selftest(active, i915_active_live_selftests)
>   selftest(objects, i915_gem_object_live_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_live_selftests)
>   selftest(coherency, i915_gem_coherency_live_selftests)
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] drm/i915: Generalise GPU activity tracking
  2019-01-31 11:25 ` [PATCH 1/3] " Tvrtko Ursulin
@ 2019-01-31 11:32   ` Chris Wilson
  2019-01-31 11:40     ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-01-31 11:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-31 11:25:10)
> 
> On 30/01/2019 20:50, Chris Wilson wrote:
> > We currently track GPU memory usage inside VMA, such that we never
> > release memory used by the GPU until after it has finished accessing it.
> > However, we may want to track other resources aside from VMA, or we may
> > want to split a VMA into multiple independent regions and track each
> > separately. For this purpose, generalise our request tracking (akin to
> > struct reservation_object) so that we can embed it into other objects.
> > 
> 
> Changelog where art thou?

The changes are patches 2 & 3!

This patch is just about moving code, we haven't come up with a new name
so nothing to change here yet. But I wanted to discuss to the idea of
immediately parking and using global slabs.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] drm/i915: Release the active tracker tree upon idling
  2019-01-30 20:50 ` [PATCH 2/3] drm/i915: Release the active tracker tree upon idling Chris Wilson
@ 2019-01-31 11:33   ` Tvrtko Ursulin
  0 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2019-01-31 11:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/01/2019 20:50, Chris Wilson wrote:
> As soon as we detect that the active tracker is idle and we prepare to
> call the retire callback, release the storage for our tree of
> per-timeline nodes. We expect these to be infrequently usage and quick

s/usage/used/

> to allocate, so there is little benefit in keeping the tree cached and
> we would prefer to return the pages back to the system in a timely
> fashion.
> 
> This also means that when we finalize the struct as a whole, we know as
> the activity tracker must be idle, the tree has already been released.
> Indeed we can reduce i915_active_fini() just the assertions that there
> is nothing to do.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_active.c | 33 +++++++++++++++++++++---------
>   drivers/gpu/drm/i915/i915_active.h |  4 ++++
>   2 files changed, 27 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 91950d778cab..b1fefe98f9a6 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -16,12 +16,29 @@ struct active_node {
>   	u64 timeline;
>   };
>   
> +static void
> +__active_park(struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> +		kfree(it);
> +	}
> +	ref->tree = RB_ROOT;
> +}
> +
>   static void
>   __active_retire(struct i915_active *ref)
>   {
>   	GEM_BUG_ON(!ref->count);
> -	if (!--ref->count)
> -		ref->retire(ref);
> +	if (--ref->count)
> +		return;
> +
> +	/* return the unused nodes to our slabcache */
> +	__active_park(ref);
> +
> +	ref->retire(ref);
>   }
>   
>   static void
> @@ -210,18 +227,14 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
>   	return 0;
>   }
>   
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   void i915_active_fini(struct i915_active *ref)
>   {
> -	struct active_node *it, *n;
> -
>   	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
> -
> -	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> -		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> -		kfree(it);
> -	}
> -	ref->tree = RB_ROOT;
> +	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
> +	GEM_BUG_ON(ref->count);
>   }
> +#endif
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/i915_active.c"
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 8bd7cf757d1a..6130c6770d10 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -57,6 +57,10 @@ i915_active_is_idle(const struct i915_active *ref)
>   	return !ref->count;
>   }
>   
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   void i915_active_fini(struct i915_active *ref);
> +#else
> +static inline void i915_active_fini(struct i915_active *ref) { }
> +#endif
>   
>   #endif /* _I915_ACTIVE_H_ */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache
  2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
  2019-01-31  9:39   ` kbuild test robot
  2019-01-31  9:50   ` kbuild test robot
@ 2019-01-31 11:39   ` Tvrtko Ursulin
  2019-01-31 11:41     ` Chris Wilson
  2 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2019-01-31 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 30/01/2019 20:50, Chris Wilson wrote:
> Wrap the active tracking for a GPU references in a slabcache for faster
> allocations, and hopefully better fragmentation reduction.
>

v2 where art thou? :)

> v3: Nothing device specific left, it's just a slabcache that we can
> make global.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_active.c | 31 +++++++++++++++++++++++++++---
>   drivers/gpu/drm/i915/i915_active.h |  3 +++
>   drivers/gpu/drm/i915/i915_pci.c    |  3 +++
>   3 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index b1fefe98f9a6..5818ddf88462 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -9,6 +9,17 @@
>   
>   #define BKL(ref) (&(ref)->i915->drm.struct_mutex)
>   
> +/*
> + * Active refs memory management
> + *
> + * To be more economical with memory, we reap all the i915_active trees as
> + * they idle (when we know the active requests are inactive) and allocate the
> + * nodes from a local slab cache to hopefully reduce the fragmentation.
> + */
> +static struct i915_global_active {
> +	struct kmem_cache *slab_cache;
> +} global;
> +
>   struct active_node {
>   	struct i915_gem_active base;
>   	struct i915_active *ref;
> @@ -23,7 +34,7 @@ __active_park(struct i915_active *ref)
>   
>   	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
>   		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> -		kfree(it);
> +		kmem_cache_free(global.slab_cache, it);
>   	}
>   	ref->tree = RB_ROOT;
>   }
> @@ -96,11 +107,11 @@ active_instance(struct i915_active *ref, u64 idx)
>   			p = &parent->rb_left;
>   	}
>   
> -	node = kmalloc(sizeof(*node), GFP_KERNEL);
> +	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
>   
>   	/* kmalloc may retire the ref->last (thanks shrinker)! */
>   	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
> -		kfree(node);
> +		kmem_cache_free(global.slab_cache, node);
>   		goto out;
>   	}
>   
> @@ -234,6 +245,20 @@ void i915_active_fini(struct i915_active *ref)
>   	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
>   	GEM_BUG_ON(ref->count);
>   }
> +
> +int __init i915_global_active_init(void)
> +{
> +	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
> +	if (!global.slab_cache)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +void __exit i915_global_active_exit(void)
> +{
> +	kmem_cache_destroy(global.slab_cache);
> +}
>   #endif
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 6130c6770d10..48fdb1497883 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -63,4 +63,7 @@ void i915_active_fini(struct i915_active *ref);
>   static inline void i915_active_fini(struct i915_active *ref) { }
>   #endif
>   
> +int i915_global_active_init(void);
> +void i915_global_active_exit(void);
> +
>   #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 44c23ac60347..751a787c83d1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c

Add explicit #include "i915_active.h"?

> @@ -793,6 +793,8 @@ static int __init i915_init(void)
>   	bool use_kms = true;
>   	int err;
>   
> +	i915_global_active_init();
> +
>   	err = i915_mock_selftests();
>   	if (err)
>   		return err > 0 ? 0 : err;
> @@ -824,6 +826,7 @@ static void __exit i915_exit(void)
>   		return;
>   
>   	pci_unregister_driver(&i915_pci_driver);
> +	i915_global_active_exit();
>   }
>   
>   module_init(i915_init);
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] drm/i915: Generalise GPU activity tracking
  2019-01-31 11:32   ` Chris Wilson
@ 2019-01-31 11:40     ` Tvrtko Ursulin
  0 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2019-01-31 11:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 31/01/2019 11:32, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-01-31 11:25:10)
>>
>> On 30/01/2019 20:50, Chris Wilson wrote:
>>> We currently track GPU memory usage inside VMA, such that we never
>>> release memory used by the GPU until after it has finished accessing it.
>>> However, we may want to track other resources aside from VMA, or we may
>>> want to split a VMA into multiple independent regions and track each
>>> separately. For this purpose, generalise our request tracking (akin to
>>> struct reservation_object) so that we can embed it into other objects.
>>>
>>
>> Changelog where art thou?
> 
> The changes are patches 2 & 3!
> 
> This patch is just about moving code, we haven't come up with a new name
> so nothing to change here yet. But I wanted to discuss to the idea of
> immediately parking and using global slabs.

There's a new GEM_BUG_ON! ;)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache
  2019-01-31 11:39   ` Tvrtko Ursulin
@ 2019-01-31 11:41     ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2019-01-31 11:41 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-01-31 11:39:46)
> 
> 
> On 30/01/2019 20:50, Chris Wilson wrote:
> > Wrap the active tracking for a GPU references in a slabcache for faster
> > allocations, and hopefully better fragmentation reduction.
> >
> 
> v2 where art thou? :)

You killed v2!

> > v3: Nothing device specific left, it's just a slabcache that we can
> > make global.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> > index 6130c6770d10..48fdb1497883 100644
> > --- a/drivers/gpu/drm/i915/i915_active.h
> > +++ b/drivers/gpu/drm/i915/i915_active.h
> > @@ -63,4 +63,7 @@ void i915_active_fini(struct i915_active *ref);
> >   static inline void i915_active_fini(struct i915_active *ref) { }
> >   #endif
> >   
> > +int i915_global_active_init(void);
> > +void i915_global_active_exit(void);
> > +
> >   #endif /* _I915_ACTIVE_H_ */
> > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> > index 44c23ac60347..751a787c83d1 100644
> > --- a/drivers/gpu/drm/i915/i915_pci.c
> > +++ b/drivers/gpu/drm/i915/i915_pci.c
> 
> Add explicit #include "i915_active.h"?

Indeed.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-01-31 11:41 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-30 20:50 [PATCH 1/3] drm/i915: Generalise GPU activity tracking Chris Wilson
2019-01-30 20:50 ` [PATCH 2/3] drm/i915: Release the active tracker tree upon idling Chris Wilson
2019-01-31 11:33   ` Tvrtko Ursulin
2019-01-30 20:50 ` [PATCH 3/3] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
2019-01-31  9:39   ` kbuild test robot
2019-01-31  9:50   ` kbuild test robot
2019-01-31 11:39   ` Tvrtko Ursulin
2019-01-31 11:41     ` Chris Wilson
2019-01-30 22:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Generalise GPU activity tracking Patchwork
2019-01-30 22:58 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-30 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-31  9:06 ` ✓ Fi.CI.IGT: " Patchwork
2019-01-31 11:25 ` [PATCH 1/3] " Tvrtko Ursulin
2019-01-31 11:32   ` Chris Wilson
2019-01-31 11:40     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.