All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
@ 2019-07-23 18:38 Chris Wilson
  2019-07-23 18:38 ` [PATCH 02/23] drm/i915/gt: Provide a local intel_context.vm Chris Wilson
                   ` (26 more replies)
  0 siblings, 27 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

The aliasing_ppgtt provides a PIN_USER alias for the global gtt, so move
it under the i915_ggtt to simplify later transformations to enable
intel_context.vm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  7 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 69 ++++++++++++-------
 drivers/gpu/drm/i915/i915_drv.h               |  3 -
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 36 +++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  3 +
 drivers/gpu/drm/i915/i915_vma.c               |  2 +-
 7 files changed, 71 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ffb59d96d4d8..0f6b0678f548 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -459,8 +459,7 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_recoverable(ctx);
 
 	ctx->ring_size = 4 * PAGE_SIZE;
-	ctx->desc_template =
-		default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
+	ctx->desc_template = default_desc_template(i915, NULL);
 
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
@@ -2258,8 +2257,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->size = 0;
 		if (ctx->vm)
 			args->value = ctx->vm->total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else if (to_i915(dev)->ggtt.alias)
+			args->value = to_i915(dev)->ggtt.alias->vm.total;
 		else
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index db7856f0f31e..bbd17d4b8ffd 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1190,7 +1190,7 @@ static int igt_ctx_readonly(void *arg)
 		goto out_unlock;
 	}
 
-	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+	vm = ctx->vm ?: &i915->ggtt.alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 1de19dac4a14..b056f25c66f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1392,30 +1392,41 @@ static void ring_context_destroy(struct kref *ref)
 	intel_context_free(ce);
 }
 
-static int __context_pin_ppgtt(struct i915_gem_context *ctx)
+static struct i915_address_space *vm_alias(struct intel_context *ce)
+{
+	struct i915_address_space *vm;
+
+	vm = ce->gem_context->vm;
+	if (!vm)
+		vm = &ce->engine->gt->ggtt->alias->vm;
+
+	return vm;
+}
+
+static int __context_pin_ppgtt(struct intel_context *ce)
 {
 	struct i915_address_space *vm;
 	int err = 0;
 
-	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+	vm = vm_alias(ce);
 	if (vm)
 		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
 
 	return err;
 }
 
-static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
+static void __context_unpin_ppgtt(struct intel_context *ce)
 {
 	struct i915_address_space *vm;
 
-	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+	vm = vm_alias(ce);
 	if (vm)
 		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 }
 
 static void ring_context_unpin(struct intel_context *ce)
 {
-	__context_unpin_ppgtt(ce->gem_context);
+	__context_unpin_ppgtt(ce);
 }
 
 static struct i915_vma *
@@ -1509,7 +1520,7 @@ static int ring_context_pin(struct intel_context *ce)
 	if (err)
 		return err;
 
-	err = __context_pin_ppgtt(ce->gem_context);
+	err = __context_pin_ppgtt(ce);
 	if (err)
 		goto err_active;
 
@@ -1701,7 +1712,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 	return 0;
 }
 
-static int remap_l3(struct i915_request *rq, int slice)
+static int remap_l3_slice(struct i915_request *rq, int slice)
 {
 	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
 	int i;
@@ -1729,15 +1740,34 @@ static int remap_l3(struct i915_request *rq, int slice)
 	return 0;
 }
 
+static int remap_l3(struct i915_request *rq)
+{
+	struct i915_gem_context *ctx = rq->gem_context;
+	int i, err;
+
+	if (!ctx->remap_slice)
+		return 0;
+
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(ctx->remap_slice & BIT(i)))
+			continue;
+
+		err = remap_l3_slice(rq, i);
+		if (err)
+			return err;
+	}
+
+	ctx->remap_slice = 0;
+	return 0;
+}
+
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_gem_context *ctx = rq->gem_context;
-	struct i915_address_space *vm =
-		ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm;
+	struct i915_address_space *vm = vm_alias(rq->hw_context);
 	unsigned int unwind_mm = 0;
 	u32 hw_flags = 0;
-	int ret, i;
+	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
@@ -1781,7 +1811,7 @@ static int switch_context(struct i915_request *rq)
 		 * as nothing actually executes using the kernel context; it
 		 * is purely used for flushing user contexts.
 		 */
-		if (i915_gem_context_is_kernel(ctx))
+		if (i915_gem_context_is_kernel(rq->gem_context))
 			hw_flags = MI_RESTORE_INHIBIT;
 
 		ret = mi_set_context(rq, hw_flags);
@@ -1815,18 +1845,9 @@ static int switch_context(struct i915_request *rq)
 			goto err_mm;
 	}
 
-	if (ctx->remap_slice) {
-		for (i = 0; i < MAX_L3_SLICES; i++) {
-			if (!(ctx->remap_slice & BIT(i)))
-				continue;
-
-			ret = remap_l3(rq, i);
-			if (ret)
-				goto err_mm;
-		}
-
-		ctx->remap_slice = 0;
-	}
+	ret = remap_l3(rq);
+	if (ret)
+		goto err_mm;
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e44cc4b2ca1..269a1b32b48b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -784,9 +784,6 @@ struct i915_gem_mm {
 	 */
 	struct vfsmount *gemfs;
 
-	/** PPGTT used for aliasing the PPGTT with the GTT */
-	struct i915_ppgtt *aliasing_ppgtt;
-
 	struct notifier_block oom_notifier;
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4dd1fa956143..8304b98b0bf8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2446,18 +2446,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
+		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
 
 		if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
-			ret = appgtt->vm.allocate_va_range(&appgtt->vm,
-							   vma->node.start,
-							   vma->size);
+			ret = alias->vm.allocate_va_range(&alias->vm,
+							  vma->node.start,
+							  vma->size);
 			if (ret)
 				return ret;
 		}
 
-		appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
-					  pte_flags);
+		alias->vm.insert_entries(&alias->vm, vma,
+					 cache_level, pte_flags);
 	}
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
@@ -2485,7 +2485,8 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND) {
-		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
+		struct i915_address_space *vm =
+			&i915_vm_to_ggtt(vma->vm)->alias->vm;
 
 		vm->clear_range(vm, vma->node.start, vma->size);
 	}
@@ -2542,13 +2543,12 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node,
 		*end -= I915_GTT_PAGE_SIZE;
 }
 
-static int init_aliasing_ppgtt(struct drm_i915_private *i915)
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_ppgtt *ppgtt;
 	int err;
 
-	ppgtt = i915_ppgtt_create(i915);
+	ppgtt = i915_ppgtt_create(ggtt->vm.i915);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -2567,7 +2567,7 @@ static int init_aliasing_ppgtt(struct drm_i915_private *i915)
 	if (err)
 		goto err_ppgtt;
 
-	i915->mm.aliasing_ppgtt = ppgtt;
+	ggtt->alias = ppgtt;
 
 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
@@ -2582,14 +2582,14 @@ static int init_aliasing_ppgtt(struct drm_i915_private *i915)
 	return err;
 }
 
-static void fini_aliasing_ppgtt(struct drm_i915_private *i915)
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct i915_ppgtt *ppgtt;
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
+	ppgtt = fetch_and_zero(&ggtt->alias);
 	if (!ppgtt)
 		goto out;
 
@@ -2706,7 +2706,7 @@ int i915_init_ggtt(struct drm_i915_private *i915)
 		return ret;
 
 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
-		ret = init_aliasing_ppgtt(i915);
+		ret = init_aliasing_ppgtt(&i915->ggtt);
 		if (ret)
 			cleanup_init_ggtt(&i915->ggtt);
 	}
@@ -2752,7 +2752,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
 {
 	struct pagevec *pvec;
 
-	fini_aliasing_ppgtt(i915);
+	fini_aliasing_ppgtt(&i915->ggtt);
 
 	ggtt_cleanup_hw(&i915->ggtt);
 
@@ -3585,7 +3585,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(range_overflows(offset, size, vm->total));
-	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
+	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	node->size = size;
@@ -3682,7 +3682,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	GEM_BUG_ON(start >= end);
 	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
-	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
+	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	if (unlikely(range_overflows(start, size, end)))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index cea59ef1a365..51274483502e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -394,6 +394,9 @@ struct i915_ggtt {
 	void __iomem *gsm;
 	void (*invalidate)(struct i915_ggtt *ggtt);
 
+	/** PPGTT used for aliasing the PPGTT with the GTT */
+	struct i915_ppgtt *alias;
+
 	bool do_idle_maps;
 
 	int mtrr;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ee73baf29415..eb16a1a93bbc 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -104,7 +104,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	struct rb_node *rb, **p;
 
 	/* The aliasing_ppgtt should never be used directly! */
-	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
+	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
 
 	vma = i915_vma_alloc();
 	if (vma == NULL)
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 02/23] drm/i915/gt: Provide a local intel_context.vm
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context Chris Wilson
                   ` (25 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Track the currently bound address space used by the HW context. Minor
conversions to use the local intel_context.vm are made, leaving behind
some more surgery required to make intel_context the primary through the
selftests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_client_blt.c    |  4 +---
 drivers/gpu/drm/i915/gem/i915_gem_context.c       | 15 +++++++++++----
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c    | 11 +++--------
 drivers/gpu/drm/i915/gem/i915_gem_object_blt.c    |  6 +-----
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_context.c           |  4 ++++
 drivers/gpu/drm/i915/gt/intel_context_types.h     |  4 +++-
 drivers/gpu/drm/i915/gt/intel_lrc.c               |  9 +++------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c        |  6 +++---
 drivers/gpu/drm/i915/gvt/scheduler.c              |  2 +-
 10 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 6f537e8e4dea..2312a0c6af89 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -250,13 +250,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 				     u32 value)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_gem_context *ctx = ce->gem_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct clear_pages_work *work;
 	struct i915_sleeve *sleeve;
 	int err;
 
-	sleeve = create_sleeve(vm, obj, pages, page_sizes);
+	sleeve = create_sleeve(ce->vm, obj, pages, page_sizes);
 	if (IS_ERR(sleeve))
 		return PTR_ERR(sleeve);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0f6b0678f548..b28c7ca681a8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -475,10 +475,18 @@ static struct i915_address_space *
 __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
 	struct i915_address_space *old = ctx->vm;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 
 	ctx->vm = i915_vm_get(vm);
 	ctx->desc_template = default_desc_template(ctx->i915, vm);
 
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		i915_vm_put(ce->vm);
+		ce->vm = i915_vm_get(vm);
+	}
+	i915_gem_context_unlock_engines(ctx);
+
 	return old;
 }
 
@@ -1004,7 +1012,7 @@ static void set_ppgtt_barrier(void *data)
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
-	struct i915_address_space *vm = rq->gem_context->vm;
+	struct i915_address_space *vm = rq->hw_context->vm;
 	struct intel_engine_cs *engine = rq->engine;
 	u32 base = engine->mmio_base;
 	u32 *cs;
@@ -1113,9 +1121,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		ctx->vm = old;
-		ctx->desc_template = default_desc_template(ctx->i915, old);
-		i915_vm_put(vm);
+		i915_vm_put(__set_ppgtt(ctx, old));
+		i915_vm_put(old);
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8a2047c4e7c3..cbd7c6e3a1f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -223,7 +223,6 @@ struct i915_execbuffer {
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
-	struct i915_address_space *vm; /** GTT and vma for the request */
 
 	struct i915_request *request; /** our request to build */
 	struct i915_vma *batch; /** identity of the batch obj/vma */
@@ -697,7 +696,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
 
 		case 1:
 			/* Too fragmented, unbind everything and retry */
-			err = i915_gem_evict_vm(eb->vm);
+			err = i915_gem_evict_vm(eb->context->vm);
 			if (err)
 				return err;
 			break;
@@ -725,12 +724,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
 		return -ENOENT;
 
 	eb->gem_context = ctx;
-	if (ctx->vm) {
-		eb->vm = ctx->vm;
+	if (ctx->vm)
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
-	} else {
-		eb->vm = &eb->i915->ggtt.vm;
-	}
 
 	eb->context_flags = 0;
 	if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
@@ -832,7 +827,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			goto err_vma;
 		}
 
-		vma = i915_vma_instance(obj, eb->vm, NULL);
+		vma = i915_vma_instance(obj, eb->context->vm, NULL);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto err_obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index cb42e3a312e2..685064af32d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -47,15 +47,11 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_gem_context *ctx = ce->gem_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	int err;
 
-	/* XXX: ce->vm please */
-	vma = i915_vma_instance(obj, vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index bbd17d4b8ffd..7f9f6701b32c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -747,7 +747,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 9292b6ca5e9c..9e4f51ce52ff 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -191,6 +191,8 @@ intel_context_init(struct intel_context *ce,
 	kref_init(&ce->ref);
 
 	ce->gem_context = ctx;
+	ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
+
 	ce->engine = engine;
 	ce->ops = engine->cops;
 	ce->sseu = engine->sseu;
@@ -206,6 +208,8 @@ intel_context_init(struct intel_context *ce,
 
 void intel_context_fini(struct intel_context *ce)
 {
+	i915_vm_put(ce->vm);
+
 	mutex_destroy(&ce->pin_mutex);
 	i915_active_fini(&ce->active);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4c0e211c715d..68a7e979b1a9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -36,7 +36,6 @@ struct intel_context_ops {
 struct intel_context {
 	struct kref ref;
 
-	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
@@ -44,6 +43,9 @@ struct intel_context {
 #define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
 #define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
 
+	struct i915_address_space *vm;
+	struct i915_gem_context *gem_context;
+
 	struct list_head signal_link;
 	struct list_head signals;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 884dfc1cb033..632344c163a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1606,8 +1606,6 @@ __execlists_context_pin(struct intel_context *ce,
 	void *vaddr;
 	int ret;
 
-	GEM_BUG_ON(!ce->gem_context->vm);
-
 	ret = execlists_context_deferred_alloc(ce, engine);
 	if (ret)
 		goto err;
@@ -1717,8 +1715,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
-	struct i915_ppgtt * const ppgtt =
-		i915_vm_to_ppgtt(rq->gem_context->vm);
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
 	int err, i;
 	u32 *cs;
 
@@ -1791,7 +1788,7 @@ static int execlists_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	if (i915_vm_is_4lvl(request->gem_context->vm))
+	if (i915_vm_is_4lvl(request->hw_context->vm))
 		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	else
 		ret = emit_pdps(request);
@@ -2941,7 +2938,7 @@ static void execlists_init_reg_state(u32 *regs,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
 	bool rcs = engine->class == RENDER_CLASS;
 	u32 base = engine->mmio_base;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index b056f25c66f2..38ec11ae6ed7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1396,9 +1396,9 @@ static struct i915_address_space *vm_alias(struct intel_context *ce)
 {
 	struct i915_address_space *vm;
 
-	vm = ce->gem_context->vm;
-	if (!vm)
-		vm = &ce->engine->gt->ggtt->alias->vm;
+	vm = ce->vm;
+	if (i915_is_ggtt(vm))
+		vm = &i915_vm_to_ggtt(vm)->alias->vm;
 
 	return vm;
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 2144fb46d0e1..f68798ab1e7c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1156,7 +1156,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 
 	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
 
-	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->gem_context->vm));
+	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
 	for_each_engine(engine, vgpu->gvt->dev_priv, id)
 		intel_context_unpin(s->shadow[id]);
 
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
  2019-07-23 18:38 ` [PATCH 02/23] drm/i915/gt: Provide a local intel_context.vm Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-24  9:20   ` Tvrtko Ursulin
  2019-07-23 18:38 ` [PATCH 04/23] drm/i915: Push the ring creation flags to the backend Chris Wilson
                   ` (24 subsequent siblings)
  26 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

We only compute the lrc_descriptor() on pinning the context, i.e.
infrequently, so we do not benefit from storing the template as the
addressing mode is also fixed for the lifetime of the intel_context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
 4 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index b28c7ca681a8..1b3dc7258ef2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context *ctx)
 	i915_gem_context_put(ctx);
 }
 
-static u32 default_desc_template(const struct drm_i915_private *i915,
-				 const struct i915_address_space *vm)
-{
-	u32 address_mode;
-	u32 desc;
-
-	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
-
-	address_mode = INTEL_LEGACY_32B_CONTEXT;
-	if (vm && i915_vm_is_4lvl(vm))
-		address_mode = INTEL_LEGACY_64B_CONTEXT;
-	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
-	if (IS_GEN(i915, 8))
-		desc |= GEN8_CTX_L3LLC_COHERENT;
-
-	/* TODO: WaDisableLiteRestore when we start using semaphore
-	 * signalling between Command Streamers
-	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
-	 */
-
-	return desc;
-}
-
 static struct i915_gem_context *
 __create_context(struct drm_i915_private *i915)
 {
@@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_recoverable(ctx);
 
 	ctx->ring_size = 4 * PAGE_SIZE;
-	ctx->desc_template = default_desc_template(i915, NULL);
 
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
@@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 
+	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
+
 	ctx->vm = i915_vm_get(vm);
-	ctx->desc_template = default_desc_template(ctx->i915, vm);
 
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		i915_vm_put(ce->vm);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 0ee61482ef94..a02d98494078 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -171,8 +171,6 @@ struct i915_gem_context {
 
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
-	/** desc_template: invariant fields for the HW context descriptor */
-	u32 desc_template;
 
 	/** guilty_count: How many times this context has caused a GPU hang. */
 	atomic_t guilty_count;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 632344c163a8..5fdac40015cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
 	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
 
-	desc = ctx->desc_template;				/* bits  0-11 */
-	GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+	desc = INTEL_LEGACY_32B_CONTEXT;
+	if (i915_vm_is_4lvl(ce->vm))
+		desc = INTEL_LEGACY_64B_CONTEXT;
+	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+	if (IS_GEN(engine->i915, 8))
+		desc |= GEN8_CTX_L3LLC_COHERENT;
 
 	desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
 								/* bits 12-31 */
-	GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
-
 	/*
 	 * The following 32bits are copied into the OA reports (dword 2).
 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index f68798ab1e7c..4c018fb1359c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -291,9 +291,6 @@ shadow_context_descriptor_update(struct intel_context *ce,
 	 * Update bits 0-11 of the context descriptor which includes flags
 	 * like GEN8_CTX_* cached in desc_template
 	 */
-	desc &= U64_MAX << 12;
-	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
-
 	desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
 	desc |= workload->ctx_desc.addressing_mode <<
 		GEN8_CTX_ADDRESSING_MODE_SHIFT;
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
  2019-07-23 18:38 ` [PATCH 02/23] drm/i915/gt: Provide a local intel_context.vm Chris Wilson
  2019-07-23 18:38 ` [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-24 11:11   ` Tvrtko Ursulin
  2019-07-23 18:38 ` [PATCH 05/23] drm/i915: Flush extra hard after writing relocations through the GTT Chris Wilson
                   ` (23 subsequent siblings)
  26 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Push the ring creation flags from the outer GEM context to the inner
intel_cotnext to avoid an unsightly back-reference from inside the
backend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 25 +++++++++++++------
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  3 ---
 drivers/gpu/drm/i915/gt/intel_context.c       |  1 +
 drivers/gpu/drm/i915/gt/intel_context.h       |  5 ++++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  5 ++--
 drivers/gpu/drm/i915/i915_debugfs.c           | 23 +++++++++++------
 6 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1b3dc7258ef2..18b226bc5e3a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -296,6 +296,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 			return ERR_CAST(ce);
 		}
 
+		ce->ring = __intel_context_ring_size(SZ_16K);
+
 		e->engines[id] = ce;
 	}
 	e->num_engines = id;
@@ -434,8 +436,6 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
 
-	ctx->ring_size = 4 * PAGE_SIZE;
-
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
@@ -565,8 +565,15 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	i915_gem_context_set_closed(ctx); /* not user accessible */
 	i915_gem_context_clear_bannable(ctx);
 	i915_gem_context_set_force_single_submission(ctx);
-	if (!USES_GUC_SUBMISSION(to_i915(dev)))
-		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
+	if (!USES_GUC_SUBMISSION(to_i915(dev))) {
+		const unsigned long ring_size = 512 * SZ_4K; /* max */
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+			ce->ring = __intel_context_ring_size(ring_size);
+		i915_gem_context_unlock_engines(ctx);
+	}
 
 	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
 out:
@@ -605,7 +612,6 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 
 	i915_gem_context_clear_bannable(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
-	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
 
@@ -1589,6 +1595,7 @@ set_engines(struct i915_gem_context *ctx,
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
+		struct intel_context *ce;
 
 		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
 			__free_engines(set.engines, n);
@@ -1611,11 +1618,15 @@ set_engines(struct i915_gem_context *ctx,
 			return -ENOENT;
 		}
 
-		set.engines->engines[n] = intel_context_create(ctx, engine);
-		if (!set.engines->engines[n]) {
+		ce = intel_context_create(ctx, engine);
+		if (!ce) {
 			__free_engines(set.engines, n);
 			return -ENOMEM;
 		}
+
+		ce->ring = __intel_context_ring_size(SZ_16K);
+
+		set.engines->engines[n] = ce;
 	}
 	set.engines->num_engines = num_engines;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index a02d98494078..260d59cc3de8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -169,9 +169,6 @@ struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
-	/** ring_size: size for allocating the per-engine ring buffer */
-	u32 ring_size;
-
 	/** guilty_count: How many times this context has caused a GPU hang. */
 	atomic_t guilty_count;
 	/**
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 9e4f51ce52ff..295fa0ddbcac 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -196,6 +196,7 @@ intel_context_init(struct intel_context *ce,
 	ce->engine = engine;
 	ce->ops = engine->cops;
 	ce->sseu = engine->sseu;
+	ce->ring = __intel_context_ring_size(SZ_4K);
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 23c7e4c0ce7c..3f54eb3d10ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -145,4 +145,9 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce);
 
+static inline struct intel_ring *__intel_context_ring_size(u64 sz)
+{
+	return u64_to_ptr(struct intel_ring, sz);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5fdac40015cf..3f1b20cc50c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3140,9 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 		goto error_deref_obj;
 	}
 
-	ring = intel_engine_create_ring(engine,
-					timeline,
-					ce->gem_context->ring_size);
+	ring = intel_engine_create_ring(engine, timeline,
+					(unsigned long)ce->ring);
 	intel_timeline_put(timeline);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6d3911469801..e237bcecfa1f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			if (ce->state)
-				per_file_stats(0, ce->state->obj, &kstats);
-			if (ce->ring)
+			intel_context_lock_pinned(ce);
+			if (intel_context_is_pinned(ce)) {
+				if (ce->state)
+					per_file_stats(0,
+						       ce->state->obj, &kstats);
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
+			}
+			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
@@ -1677,12 +1681,15 @@ static int i915_context_status(struct seq_file *m, void *unused)
 
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
-			seq_printf(m, "%s: ", ce->engine->name);
-			if (ce->state)
-				describe_obj(m, ce->state->obj);
-			if (ce->ring)
+			intel_context_lock_pinned(ce);
+			if (intel_context_is_pinned(ce)) {
+				seq_printf(m, "%s: ", ce->engine->name);
+				if (ce->state)
+					describe_obj(m, ce->state->obj);
 				describe_ctx_ring(m, ce->ring);
-			seq_putc(m, '\n');
+				seq_putc(m, '\n');
+			}
+			intel_context_unlock_pinned(ce);
 		}
 		i915_gem_context_unlock_engines(ctx);
 
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 05/23] drm/i915: Flush extra hard after writing relocations through the GTT
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (2 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 04/23] drm/i915: Push the ring creation flags to the backend Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 06/23] drm/i915: Hide unshrinkable context objects from the shrinker Chris Wilson
                   ` (22 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: tvrtko.ursulin, Chris Wilson, stable

Recently discovered in commit bdae33b8b82b ("drm/i915: Use maximum write
flush for pwrite_gtt") was that we needed to our full write barrier
before changing the GGTT PTE to ensure that our indirect writes through
the GTT landed before the PTE changed (and the writes end up in a
different page). That also applies to our GGTT relocation path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cbd7c6e3a1f8..4db4463089ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1014,11 +1014,12 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 		kunmap_atomic(vaddr);
 		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
-		wmb();
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
-		if (cache->node.allocated) {
-			struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 
+		if (cache->node.allocated) {
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
@@ -1073,6 +1074,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	void *vaddr;
 
 	if (cache->vaddr) {
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 	} else {
 		struct i915_vma *vma;
@@ -1114,7 +1116,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 
 	offset = cache->node.start;
 	if (cache->node.allocated) {
-		wmb();
 		ggtt->vm.insert_page(&ggtt->vm,
 				     i915_gem_object_get_dma_address(obj, page),
 				     offset, I915_CACHE_NONE, 0);
-- 
2.22.0


^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 06/23] drm/i915: Hide unshrinkable context objects from the shrinker
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (3 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 05/23] drm/i915: Flush extra hard after writing relocations through the GTT Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT Chris Wilson
                   ` (21 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

The shrinker cannot touch objects used by the contexts (logical state
and ring). Currently we mark those as "pin_global" to let the shrinker
skip over them, however, if we remove them from the shrinker lists
entirely, we don't event have to include them in our shrink accounting.

By keeping the unshrinkable objects in our shrinker tracking, we report
a large number of objects available to be shrunk, and leave the shrinker
deeply unsatisfied when we fail to reclaim those. The shrinker will
persist in trying to reclaim the unavailable objects, forcing the system
into a livelock (not even hitting the dread oomkiller).

v2: Extend unshrinkable protection for perma-pinned scratch and guc
allocations (Tvrtko)
v3: Notice that we should be pinned when marking unshrinkable and so the
link cannot be empty; merge duplicate paths.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 11 +---
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  4 ++
 drivers/gpu/drm/i915/gem/i915_gem_pages.c    | 13 +----
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 58 ++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context.c      |  4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c           |  3 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c   | 17 +++---
 drivers/gpu/drm/i915/gt/uc/intel_guc.c       |  2 +-
 drivers/gpu/drm/i915/i915_debugfs.c          |  3 +-
 drivers/gpu/drm/i915/i915_vma.c              | 16 ++++++
 drivers/gpu/drm/i915/i915_vma.h              |  4 ++
 11 files changed, 102 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d5197a2a106f..4ea97fca9c35 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -63,6 +63,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	spin_lock_init(&obj->vma.lock);
 	INIT_LIST_HEAD(&obj->vma.list);
 
+	INIT_LIST_HEAD(&obj->mm.link);
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
@@ -273,14 +275,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 * or else we may oom whilst there are plenty of deferred
 	 * freed objects.
 	 */
-	if (i915_gem_object_has_pages(obj) &&
-	    i915_gem_object_is_shrinkable(obj)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-		list_del_init(&obj->mm.link);
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-	}
+	i915_gem_object_make_unshrinkable(obj);
 
 	/*
 	 * Since we require blocking on struct_mutex to unbind the freed
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 67aea07ea019..3714cf234d64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -394,6 +394,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->cache_dirty)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index b36ad269f4ea..92ad3cc220e3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -153,24 +153,13 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages;
 
 	pages = fetch_and_zero(&obj->mm.pages);
 	if (IS_ERR_OR_NULL(pages))
 		return pages;
 
-	if (i915_gem_object_is_shrinkable(obj)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-
-		list_del(&obj->mm.link);
-		i915->mm.shrink_count--;
-		i915->mm.shrink_memory -= obj->base.size;
-
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-	}
+	i915_gem_object_make_unshrinkable(obj);
 
 	if (obj->mm.mapping) {
 		void *ptr;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 3f4c6bdcc3c3..5ab7df53c2a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -530,3 +530,61 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 	if (unlock)
 		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
 }
+
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We can only be called while the pages are pinned or when
+	 * the pages are released. If pinned, we should only be called
+	 * from a single caller under controlled conditions; and on release
+	 * only one caller may release us. Neither the two may cross.
+	 */
+	if (!list_empty(&obj->mm.link)) { /* pinned by caller */
+		struct drm_i915_private *i915 = obj_to_i915(obj);
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		GEM_BUG_ON(list_empty(&obj->mm.link));
+
+		list_del_init(&obj->mm.link);
+		i915->mm.shrink_count--;
+		i915->mm.shrink_memory -= obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+					      struct list_head *head)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!list_empty(&obj->mm.link));
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		struct drm_i915_private *i915 = obj_to_i915(obj);
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		GEM_BUG_ON(!kref_read(&obj->base.refcount));
+
+		list_add_tail(&obj->mm.link, head);
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_make_shrinkable(obj,
+					  &obj_to_i915(obj)->mm.shrink_list);
+}
+
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_make_shrinkable(obj,
+					  &obj_to_i915(obj)->mm.purge_list);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 295fa0ddbcac..c39ad7d19197 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -118,7 +118,7 @@ static int __context_pin_state(struct i915_vma *vma)
 	 * And mark it as a globally pinned object to let the shrinker know
 	 * it cannot reclaim the object until we release it.
 	 */
-	vma->obj->pin_global++;
+	i915_vma_make_unshrinkable(vma);
 	vma->obj->mm.dirty = true;
 
 	return 0;
@@ -126,8 +126,8 @@ static int __context_pin_state(struct i915_vma *vma)
 
 static void __context_unpin_state(struct i915_vma *vma)
 {
-	vma->obj->pin_global--;
 	__i915_vma_unpin(vma);
+	i915_vma_make_shrinkable(vma);
 }
 
 static void __intel_context_retire(struct i915_active *active)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f7e69db4019d..de0d6ad5f93c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -231,7 +231,8 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 	if (ret)
 		goto err_unref;
 
-	gt->scratch = vma;
+	gt->scratch = i915_vma_make_unshrinkable(vma);
+
 	return 0;
 
 err_unref:
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 38ec11ae6ed7..d8efb88f33f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1238,7 +1238,7 @@ int intel_ring_pin(struct intel_ring *ring)
 		goto err_ring;
 	}
 
-	vma->obj->pin_global++;
+	i915_vma_make_unshrinkable(vma);
 
 	GEM_BUG_ON(ring->vaddr);
 	ring->vaddr = addr;
@@ -1267,6 +1267,8 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail)
 
 void intel_ring_unpin(struct intel_ring *ring)
 {
+	struct i915_vma *vma = ring->vma;
+
 	if (!atomic_dec_and_test(&ring->pin_count))
 		return;
 
@@ -1275,18 +1277,17 @@ void intel_ring_unpin(struct intel_ring *ring)
 	/* Discard any unused bytes beyond that submitted to hw. */
 	intel_ring_reset(ring, ring->tail);
 
-	GEM_BUG_ON(!ring->vma);
-	i915_vma_unset_ggtt_write(ring->vma);
-	if (i915_vma_is_map_and_fenceable(ring->vma))
-		i915_vma_unpin_iomap(ring->vma);
+	i915_vma_unset_ggtt_write(vma);
+	if (i915_vma_is_map_and_fenceable(vma))
+		i915_vma_unpin_iomap(vma);
 	else
-		i915_gem_object_unpin_map(ring->vma->obj);
+		i915_gem_object_unpin_map(vma->obj);
 
 	GEM_BUG_ON(!ring->vaddr);
 	ring->vaddr = NULL;
 
-	ring->vma->obj->pin_global--;
-	i915_vma_unpin(ring->vma);
+	i915_vma_unpin(vma);
+	i915_vma_make_purgeable(vma);
 
 	intel_timeline_unpin(ring->timeline);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 83f2c197375f..9c1712748527 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -597,7 +597,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 		goto err;
 	}
 
-	return vma;
+	return i915_vma_make_unshrinkable(vma);
 
 err:
 	i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e237bcecfa1f..defa98c3b323 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -367,8 +367,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	struct drm_i915_private *i915 = node_to_i915(m->private);
 	int ret;
 
-	seq_printf(m, "%u shrinkable objects, %llu bytes\n",
+	seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
 		   i915->mm.shrink_count,
+		   atomic_read(&i915->mm.free_count),
 		   i915->mm.shrink_memory);
 
 	seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index eb16a1a93bbc..b52f71e0ade6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1030,6 +1030,22 @@ int i915_vma_unbind(struct i915_vma *vma)
 	return 0;
 }
 
+struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
+{
+	i915_gem_object_make_unshrinkable(vma->obj);
+	return vma;
+}
+
+void i915_vma_make_shrinkable(struct i915_vma *vma)
+{
+	i915_gem_object_make_shrinkable(vma->obj);
+}
+
+void i915_vma_make_purgeable(struct i915_vma *vma)
+{
+	i915_gem_object_make_purgeable(vma->obj);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_vma.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4b769db649bf..5c4224749bde 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -459,4 +459,8 @@ void i915_vma_parked(struct drm_i915_private *i915);
 struct i915_vma *i915_vma_alloc(void);
 void i915_vma_free(struct i915_vma *vma);
 
+struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
+void i915_vma_make_shrinkable(struct i915_vma *vma);
+void i915_vma_make_purgeable(struct i915_vma *vma);
+
 #endif
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (4 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 06/23] drm/i915: Hide unshrinkable context objects from the shrinker Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-25 21:21   ` Daniele Ceraolo Spurio
  2019-07-26  9:22   ` Tvrtko Ursulin
  2019-07-23 18:38 ` [PATCH 08/23] drm/i915: Introduce for_each_user_engine() Chris Wilson
                   ` (20 subsequent siblings)
  26 siblings, 2 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

To maintain a fast lookup from a GT centric irq handler, we want the
engine lookup tables on the intel_gt. To avoid having multiple copies of
the same multi-dimension lookup table, move the generic user engine
lookup into an rbtree (for fast and flexible indexing).

v2: Split uabi_instance cf uabi_class

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/Makefile                |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |  3 +-
 drivers/gpu/drm/i915/gt/intel_engine.h       |  3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 53 +++++-----------
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++-
 drivers/gpu/drm/i915/gt/intel_engine_user.c  | 66 ++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_user.h  | 20 ++++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h     |  4 ++
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++--
 drivers/gpu/drm/i915/i915_drv.h              |  7 ++-
 drivers/gpu/drm/i915/i915_irq.c              |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c              |  3 +-
 drivers/gpu/drm/i915/i915_query.c            |  2 +-
 drivers/gpu/drm/i915/i915_trace.h            | 10 +--
 14 files changed, 138 insertions(+), 60 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 524516251a40..fafc3763dc2d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -74,6 +74,7 @@ gt-y += \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
 	gt/intel_engine_pm.o \
+	gt/intel_engine_user.o \
 	gt/intel_gt.o \
 	gt/intel_gt_pm.o \
 	gt/intel_hangcheck.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 18b226bc5e3a..e31431fa141e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_user.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
@@ -1740,7 +1741,7 @@ get_engines(struct i915_gem_context *ctx,
 
 		if (e->engines[n]) {
 			ci.engine_class = e->engines[n]->engine->uabi_class;
-			ci.engine_instance = e->engines[n]->engine->instance;
+			ci.engine_instance = e->engines[n]->engine->uabi_instance;
 		}
 
 		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index db5c73ce86ee..30856383e4c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -432,9 +432,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...);
 
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
-
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 65cbf1d9118d..ed5c4e161e6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -32,6 +32,7 @@
 
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_engine_user.h"
 #include "intel_context.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
@@ -285,9 +286,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 }
 
-static int
-intel_engine_setup(struct drm_i915_private *dev_priv,
-		   enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
 	struct intel_engine_cs *engine;
@@ -303,10 +302,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
 		return -EINVAL;
 
-	if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
+	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
 		return -EINVAL;
 
-	GEM_BUG_ON(dev_priv->engine[id]);
 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 	if (!engine)
 		return -ENOMEM;
@@ -315,12 +313,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 
 	engine->id = id;
 	engine->mask = BIT(id);
-	engine->i915 = dev_priv;
-	engine->gt = &dev_priv->gt;
-	engine->uncore = &dev_priv->uncore;
+	engine->i915 = gt->i915;
+	engine->gt = gt;
+	engine->uncore = gt->uncore;
 	__sprint_engine_name(engine->name, info);
 	engine->hw_id = engine->guc_id = info->hw_id;
-	engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
+	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
 	engine->class = info->class;
 	engine->instance = info->instance;
 
@@ -331,13 +329,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->destroy = (typeof(engine->destroy))kfree;
 
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
+	engine->uabi_instance = info->instance;
 
-	engine->context_size = intel_engine_context_size(dev_priv,
+	engine->context_size = intel_engine_context_size(gt->i915,
 							 engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
 	if (engine->context_size)
-		DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
+		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
@@ -349,8 +348,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	/* Scrub mmio state on takeover */
 	intel_engine_sanitize_mmio(engine);
 
-	dev_priv->engine_class[info->class][info->instance] = engine;
-	dev_priv->engine[id] = engine;
+	engine->gt->engine_class[info->class][info->instance] = engine;
+
+	intel_engine_add_user(engine);
+	gt->i915->engine[id] = engine;
+
 	return 0;
 }
 
@@ -433,7 +435,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
 		if (!HAS_ENGINE(i915, i))
 			continue;
 
-		err = intel_engine_setup(i915, i);
+		err = intel_engine_setup(&i915->gt, i);
 		if (err)
 			goto cleanup;
 
@@ -1501,29 +1503,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	intel_engine_print_breadcrumbs(engine, m);
 }
 
-static u8 user_class_map[] = {
-	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
-	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
-	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
-	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
-};
-
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
-{
-	if (class >= ARRAY_SIZE(user_class_map))
-		return NULL;
-
-	class = user_class_map[class];
-
-	GEM_BUG_ON(class > MAX_ENGINE_CLASS);
-
-	if (instance > MAX_ENGINE_INSTANCE)
-		return NULL;
-
-	return i915->engine_class[class][instance];
-}
-
 /**
  * intel_enable_engine_stats() - Enable engine busy tracking on engine
  * @engine: engine to enable stats collection
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8be63019d707..9c927fa408aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -12,6 +12,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/llist.h>
+#include <linux/rbtree.h>
 #include <linux/timer.h>
 #include <linux/types.h>
 
@@ -267,15 +268,19 @@ struct intel_engine_cs {
 	unsigned int guc_id;
 	intel_engine_mask_t mask;
 
-	u8 uabi_class;
-
 	u8 class;
 	u8 instance;
+
+	u8 uabi_class;
+	u8 uabi_instance;
+
 	u32 context_size;
 	u32 mmio_base;
 
 	u32 uabi_capabilities;
 
+	struct rb_node uabi_node;
+
 	struct intel_sseu sseu;
 
 	struct intel_ring *buffer;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
new file mode 100644
index 000000000000..f74fb4d2fa0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -0,0 +1,66 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_user.h"
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
+{
+	struct rb_node *p = i915->uabi_engines.rb_node;
+
+	while (p) {
+		struct intel_engine_cs *it =
+			rb_entry(p, typeof(*it), uabi_node);
+
+		if (class < it->uabi_class)
+			p = p->rb_left;
+		else if (class > it->uabi_class ||
+			 instance > it->uabi_instance)
+			p = p->rb_right;
+		else if (instance < it->uabi_instance)
+			p = p->rb_left;
+		else
+			return it;
+	}
+
+	return NULL;
+}
+
+void intel_engine_add_user(struct intel_engine_cs *engine)
+{
+	struct rb_root *root = &engine->i915->uabi_engines;
+	struct rb_node **p, *parent;
+
+	parent = NULL;
+	p = &root->rb_node;
+	while (*p) {
+		struct intel_engine_cs *it;
+
+		parent = *p;
+		it = rb_entry(parent, typeof(*it), uabi_node);
+
+		/* All user class:instance identifiers must be unique */
+		GEM_BUG_ON(it->uabi_class == engine->uabi_class &&
+			   it->uabi_instance == engine->uabi_instance);
+
+		if (engine->uabi_class < it->uabi_class)
+			p = &parent->rb_left;
+		else if (engine->uabi_class > it->uabi_class ||
+			 engine->uabi_instance > it->uabi_instance)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	rb_link_node(&engine->uabi_node, parent, p);
+	rb_insert_color(&engine->uabi_node, root);
+
+	GEM_BUG_ON(intel_engine_lookup_user(engine->i915,
+					    engine->uabi_class,
+					    engine->uabi_instance) != engine);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
new file mode 100644
index 000000000000..091dc8a4a39f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_USER_H
+#define INTEL_ENGINE_USER_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_engine_cs;
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+void intel_engine_add_user(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_USER_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 34d4a868e4f1..5fd11e361d03 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -21,6 +21,7 @@
 
 struct drm_i915_private;
 struct i915_ggtt;
+struct intel_engine_cs;
 struct intel_uncore;
 
 struct intel_hangcheck {
@@ -76,6 +77,9 @@ struct intel_gt {
 	u32 pm_ier;
 
 	u32 pm_guc_events;
+
+	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
+					    [MAX_ENGINE_INSTANCE + 1];
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 60f27e52d267..eb40a58665be 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1773,6 +1773,7 @@ static int live_virtual_engine(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	struct intel_engine_cs *engine;
+	struct intel_gt *gt = &i915->gt;
 	enum intel_engine_id id;
 	unsigned int class, inst;
 	int err = -ENODEV;
@@ -1796,10 +1797,10 @@ static int live_virtual_engine(void *arg)
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				continue;
 
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
@@ -1920,6 +1921,7 @@ static int live_virtual_mask(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
 	int err = 0;
 
@@ -1933,10 +1935,10 @@ static int live_virtual_mask(void *arg)
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				break;
 
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
@@ -2097,6 +2099,7 @@ static int live_virtual_bond(void *arg)
 	};
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
 	int err = 0;
 
@@ -2111,11 +2114,11 @@ static int live_virtual_bond(void *arg)
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				break;
 
 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 269a1b32b48b..12a7fdabc2f0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1371,11 +1371,12 @@ struct drm_i915_private {
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
-	struct intel_engine_cs *engine[I915_NUM_ENGINES];
+
 	/* Context used internally to idle the GPU and setup initial state */
 	struct i915_gem_context *kernel_context;
-	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
-					    [MAX_ENGINE_INSTANCE + 1];
+
+	struct intel_engine_cs *engine[I915_NUM_ENGINES];
+	struct rb_root uabi_engines;
 
 	struct resource mch_res;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 11c73af92597..4d98e8597637 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3109,7 +3109,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 	struct intel_engine_cs *engine;
 
 	if (instance <= MAX_ENGINE_INSTANCE)
-		engine = gt->i915->engine_class[class][instance];
+		engine = gt->engine_class[class][instance];
 	else
 		engine = NULL;
 
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index eff86483bec0..bdf7963a043b 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -8,6 +8,7 @@
 #include <linux/pm_runtime.h>
 
 #include "gt/intel_engine.h"
+#include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
 #include "i915_pmu.h"
@@ -926,7 +927,7 @@ create_event_attributes(struct drm_i915_private *i915)
 			i915_iter =
 				add_i915_attr(i915_iter, str,
 					      __I915_PMU_ENGINE(engine->uabi_class,
-								engine->instance,
+								engine->uabi_instance,
 								engine_events[i].sample));
 
 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 7b7016171057..70b1ad38e615 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -127,7 +127,7 @@ query_engine_info(struct drm_i915_private *i915,
 
 	for_each_engine(engine, i915, id) {
 		info.engine.engine_class = engine->uabi_class;
-		info.engine.engine_instance = engine->instance;
+		info.engine.engine_instance = engine->uabi_instance;
 		info.capabilities = engine->uabi_capabilities;
 
 		if (__copy_to_user(info_ptr, &info, sizeof(info)))
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index da18b8d6b80c..1d11245c4c87 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -677,7 +677,7 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->dev = rq->i915->drm.primary->index;
 			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
-			   __entry->instance = rq->engine->instance;
+			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   __entry->flags = flags;
@@ -706,7 +706,7 @@ DECLARE_EVENT_CLASS(i915_request,
 			   __entry->dev = rq->i915->drm.primary->index;
 			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
-			   __entry->instance = rq->engine->instance;
+			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
@@ -751,7 +751,7 @@ TRACE_EVENT(i915_request_in,
 			   __entry->dev = rq->i915->drm.primary->index;
 			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
-			   __entry->instance = rq->engine->instance;
+			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   __entry->prio = rq->sched.attr.priority;
@@ -782,7 +782,7 @@ TRACE_EVENT(i915_request_out,
 			   __entry->dev = rq->i915->drm.primary->index;
 			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
-			   __entry->instance = rq->engine->instance;
+			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   __entry->completed = i915_request_completed(rq);
@@ -847,7 +847,7 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->dev = rq->i915->drm.primary->index;
 			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
-			   __entry->instance = rq->engine->instance;
+			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   __entry->flags = flags;
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 08/23] drm/i915: Introduce for_each_user_engine()
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (5 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 09/23] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc Chris Wilson
                   ` (19 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Now that we have a compact tree representation for uabi engines, make
use of it for walking all user engines from catchall user interfaces
like debugfs and capabilities.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  6 ++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  2 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  3 +--
 drivers/gpu/drm/i915/i915_debugfs.c           | 26 +++++++------------
 drivers/gpu/drm/i915/i915_drv.h               |  8 ++++++
 drivers/gpu/drm/i915/i915_query.c             |  3 +--
 6 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ed5c4e161e6e..3ea0b5debe94 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -695,12 +695,11 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	u32 enabled, disabled;
 
 	enabled = 0;
 	disabled = 0;
-	for_each_engine(engine, i915, id) { /* all engines must agree! */
+	for_each_user_engine(engine, i915) { /* all engines must agree! */
 		int i;
 
 		if (engine->schedule)
@@ -1195,11 +1194,10 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	unsigned int which;
 
 	which = 0;
-	for_each_engine(engine, i915, id)
+	for_each_user_engine(engine, i915)
 		if (engine->default_state)
 			which |= BIT(engine->uabi_class);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a0f2a01365bc..c99cdbcce64e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -488,7 +488,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 			   ring_tail, rq->fence.seqno);
 	guc_ring_doorbell(client);
 
-	client->submissions[engine->id] += 1;
+	client->submissions[engine->guc_id] += 1;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a28bcd2d7c09..730c1ed6d2a7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1352,11 +1352,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool active = false;
 
 	/* If the command parser is not enabled, report 0 - unsupported */
-	for_each_engine(engine, dev_priv, id) {
+	for_each_user_engine(engine, dev_priv) {
 		if (intel_engine_needs_cmd_parser(engine)) {
 			active = true;
 			break;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index defa98c3b323..f9a9f589d631 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -301,10 +301,9 @@ static void print_batch_pool_stats(struct seq_file *m,
 	struct drm_i915_gem_object *obj;
 	struct intel_engine_cs *engine;
 	struct file_stats stats = {};
-	enum intel_engine_id id;
 	int j;
 
-	for_each_engine(engine, dev_priv, id) {
+	for_each_user_engine(engine, dev_priv) {
 		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
 			list_for_each_entry(obj,
 					    &engine->batch_pool.cache_list[j],
@@ -391,7 +390,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct drm_i915_gem_object *obj;
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int total = 0;
 	int ret, j;
 
@@ -399,7 +397,7 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	if (ret)
 		return ret;
 
-	for_each_engine(engine, dev_priv, id) {
+	for_each_user_engine(engine, dev_priv) {
 		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
 			int count;
 
@@ -486,7 +484,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
 	int i, pipe;
 
@@ -689,7 +686,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 			   I915_READ(GEN11_GUNIT_CSME_INTR_MASK));
 
 	} else if (INTEL_GEN(dev_priv) >= 6) {
-		for_each_engine(engine, dev_priv, id) {
+		for_each_user_engine(engine, dev_priv) {
 			seq_printf(m,
 				   "Graphics Interrupt mask (%s):	%08x\n",
 				   engine->name, ENGINE_READ(engine, RING_IMR));
@@ -1083,7 +1080,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	struct intel_gt *gt = &i915->gt;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
-	enum intel_engine_id id;
 
 	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
 	if (test_bit(I915_WEDGED, &gt->reset.flags))
@@ -1108,7 +1104,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		for_each_engine(engine, i915, id) {
+		for_each_user_engine(engine, i915) {
 			struct intel_instdone instdone;
 
 			seq_printf(m, "%s: %d ms ago\n",
@@ -1964,7 +1960,6 @@ static void i915_guc_client_info(struct seq_file *m,
 				 struct intel_guc_client *client)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	u64 tot = 0;
 
 	seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
@@ -1972,8 +1967,8 @@ static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n",
 		client->doorbell_id, client->doorbell_offset);
 
-	for_each_engine(engine, dev_priv, id) {
-		u64 submissions = client->submissions[id];
+	for_each_user_engine(engine, dev_priv) {
+		u64 submissions = client->submissions[engine->guc_id];
 		tot += submissions;
 		seq_printf(m, "\tSubmissions: %llu %s\n",
 				submissions, engine->name);
@@ -2013,7 +2008,6 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_guc *guc = &dev_priv->gt.uc.guc;
 	struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
-	intel_engine_mask_t tmp;
 	int index;
 
 	if (!USES_GUC_SUBMISSION(dev_priv))
@@ -2042,7 +2036,7 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 			   desc->wq_addr, desc->wq_size);
 		seq_putc(m, '\n');
 
-		for_each_engine(engine, dev_priv, tmp) {
+		for_each_user_engine(engine, dev_priv) {
 			u32 guc_engine_id = engine->guc_id;
 			struct guc_execlist_context *lrc =
 						&desc->lrc[guc_engine_id];
@@ -2875,7 +2869,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
-	enum intel_engine_id id;
 	struct drm_printer p;
 
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
@@ -2887,7 +2880,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
 
 	p = drm_seq_file_printer(m);
-	for_each_engine(engine, dev_priv, id)
+	for_each_user_engine(engine, dev_priv)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
@@ -2968,9 +2961,8 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 
-	for_each_engine(engine, i915, id) {
+	for_each_user_engine(engine, i915) {
 		const struct i915_wa_list *wal = &engine->ctx_wa_list;
 		const struct i915_wa *wa;
 		unsigned int count;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 12a7fdabc2f0..b8363c483caf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1915,6 +1915,14 @@ static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm)
 	     ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
 	     0;)
 
+#define rb_to_uabi_engine(rb) \
+	rb_entry_safe(rb, struct intel_engine_cs, uabi_node)
+
+#define for_each_user_engine(engine__, i915__) \
+	for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\
+	     (engine__); \
+	     (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node)))
+
 enum hdmi_force_audio {
 	HDMI_AUDIO_OFF_DVI = -2,	/* no aux data for HDMI-DVI converter */
 	HDMI_AUDIO_OFF,			/* force turn off HDMI audio */
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 70b1ad38e615..8abba3a31767 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -105,7 +105,6 @@ query_engine_info(struct drm_i915_private *i915,
 	struct drm_i915_query_engine_info query;
 	struct drm_i915_engine_info info = { };
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	int len, ret;
 
 	if (query_item->flags)
@@ -125,7 +124,7 @@ query_engine_info(struct drm_i915_private *i915,
 
 	info_ptr = &query_ptr->engines[0];
 
-	for_each_engine(engine, i915, id) {
+	for_each_user_engine(engine, i915) {
 		info.engine.engine_class = engine->uabi_class;
 		info.engine.engine_instance = engine->uabi_instance;
 		info.capabilities = engine->uabi_capabilities;
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 09/23] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (6 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 08/23] drm/i915: Introduce for_each_user_engine() Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 10/23] drm/i915: Isolate i915_getparam_ioctl() Chris Wilson
                   ` (18 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Use the same mechanism to determine if a backend engine exists for a
uabi mapping as used internally.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f2d3d754af37..d3c6805045d0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -61,6 +61,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
+#include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
@@ -371,16 +372,20 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = dev_priv->overlay ? 1 : 0;
 		break;
 	case I915_PARAM_HAS_BSD:
-		value = !!dev_priv->engine[VCS0];
+		value = !!intel_engine_lookup_user(dev_priv,
+						   I915_ENGINE_CLASS_VIDEO, 0);
 		break;
 	case I915_PARAM_HAS_BLT:
-		value = !!dev_priv->engine[BCS0];
+		value = !!intel_engine_lookup_user(dev_priv,
+						   I915_ENGINE_CLASS_COPY, 0);
 		break;
 	case I915_PARAM_HAS_VEBOX:
-		value = !!dev_priv->engine[VECS0];
+		value = !!intel_engine_lookup_user(dev_priv,
+						   I915_ENGINE_CLASS_VIDEO_ENHANCE, 0);
 		break;
 	case I915_PARAM_HAS_BSD2:
-		value = !!dev_priv->engine[VCS1];
+		value = !!intel_engine_lookup_user(dev_priv,
+						   I915_ENGINE_CLASS_VIDEO, 1);
 		break;
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev_priv);
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 10/23] drm/i915: Isolate i915_getparam_ioctl()
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (7 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 09/23] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 11/23] drm/i915: Only include active engines in the capture state Chris Wilson
                   ` (17 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

This giant switch has tendrils all other the struct and does not fit
into the rest of the driver bring up and control of i915_drv.c. Push it
to one side so that it can grow in peace.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_drv.c      | 169 ---------------------------
 drivers/gpu/drm/i915/i915_drv.h      |   3 +
 drivers/gpu/drm/i915/i915_getparam.c | 168 ++++++++++++++++++++++++++
 4 files changed, 172 insertions(+), 169 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_getparam.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fafc3763dc2d..d2c1dcda20a1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
 # core driver code
 i915-y += i915_drv.o \
 	  i915_irq.o \
+	  i915_getparam.o \
 	  i915_params.o \
 	  i915_pci.o \
 	  i915_scatterlist.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d3c6805045d0..a36fa0a3017f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -61,22 +61,15 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
-#include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
-#include "gt/intel_reset.h"
-#include "gt/intel_workarounds.h"
-#include "gt/uc/intel_uc.h"
 
 #include "i915_debugfs.h"
 #include "i915_drv.h"
 #include "i915_irq.h"
-#include "i915_pmu.h"
 #include "i915_query.h"
-#include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "intel_csr.h"
-#include "intel_drv.h"
 #include "intel_pm.h"
 
 static struct drm_driver driver;
@@ -343,168 +336,6 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
 	pci_dev_put(pch);
 }
 
-static int i915_getparam_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	drm_i915_getparam_t *param = data;
-	int value;
-
-	switch (param->param) {
-	case I915_PARAM_IRQ_ACTIVE:
-	case I915_PARAM_ALLOW_BATCHBUFFER:
-	case I915_PARAM_LAST_DISPATCH:
-	case I915_PARAM_HAS_EXEC_CONSTANTS:
-		/* Reject all old ums/dri params. */
-		return -ENODEV;
-	case I915_PARAM_CHIPSET_ID:
-		value = pdev->device;
-		break;
-	case I915_PARAM_REVISION:
-		value = pdev->revision;
-		break;
-	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = dev_priv->ggtt.num_fences;
-		break;
-	case I915_PARAM_HAS_OVERLAY:
-		value = dev_priv->overlay ? 1 : 0;
-		break;
-	case I915_PARAM_HAS_BSD:
-		value = !!intel_engine_lookup_user(dev_priv,
-						   I915_ENGINE_CLASS_VIDEO, 0);
-		break;
-	case I915_PARAM_HAS_BLT:
-		value = !!intel_engine_lookup_user(dev_priv,
-						   I915_ENGINE_CLASS_COPY, 0);
-		break;
-	case I915_PARAM_HAS_VEBOX:
-		value = !!intel_engine_lookup_user(dev_priv,
-						   I915_ENGINE_CLASS_VIDEO_ENHANCE, 0);
-		break;
-	case I915_PARAM_HAS_BSD2:
-		value = !!intel_engine_lookup_user(dev_priv,
-						   I915_ENGINE_CLASS_VIDEO, 1);
-		break;
-	case I915_PARAM_HAS_LLC:
-		value = HAS_LLC(dev_priv);
-		break;
-	case I915_PARAM_HAS_WT:
-		value = HAS_WT(dev_priv);
-		break;
-	case I915_PARAM_HAS_ALIASING_PPGTT:
-		value = INTEL_PPGTT(dev_priv);
-		break;
-	case I915_PARAM_HAS_SEMAPHORES:
-		value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
-		break;
-	case I915_PARAM_HAS_SECURE_BATCHES:
-		value = capable(CAP_SYS_ADMIN);
-		break;
-	case I915_PARAM_CMD_PARSER_VERSION:
-		value = i915_cmd_parser_get_version(dev_priv);
-		break;
-	case I915_PARAM_SUBSLICE_TOTAL:
-		value = intel_sseu_subslice_total(sseu);
-		if (!value)
-			return -ENODEV;
-		break;
-	case I915_PARAM_EU_TOTAL:
-		value = sseu->eu_total;
-		if (!value)
-			return -ENODEV;
-		break;
-	case I915_PARAM_HAS_GPU_RESET:
-		value = i915_modparams.enable_hangcheck &&
-			intel_has_gpu_reset(dev_priv);
-		if (value && intel_has_reset_engine(dev_priv))
-			value = 2;
-		break;
-	case I915_PARAM_HAS_RESOURCE_STREAMER:
-		value = 0;
-		break;
-	case I915_PARAM_HAS_POOLED_EU:
-		value = HAS_POOLED_EU(dev_priv);
-		break;
-	case I915_PARAM_MIN_EU_IN_POOL:
-		value = sseu->min_eu_in_pool;
-		break;
-	case I915_PARAM_HUC_STATUS:
-		value = intel_huc_check_status(&dev_priv->gt.uc.huc);
-		if (value < 0)
-			return value;
-		break;
-	case I915_PARAM_MMAP_GTT_VERSION:
-		/* Though we've started our numbering from 1, and so class all
-		 * earlier versions as 0, in effect their value is undefined as
-		 * the ioctl will report EINVAL for the unknown param!
-		 */
-		value = i915_gem_mmap_gtt_version();
-		break;
-	case I915_PARAM_HAS_SCHEDULER:
-		value = dev_priv->caps.scheduler;
-		break;
-
-	case I915_PARAM_MMAP_VERSION:
-		/* Remember to bump this if the version changes! */
-	case I915_PARAM_HAS_GEM:
-	case I915_PARAM_HAS_PAGEFLIPPING:
-	case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */
-	case I915_PARAM_HAS_RELAXED_FENCING:
-	case I915_PARAM_HAS_COHERENT_RINGS:
-	case I915_PARAM_HAS_RELAXED_DELTA:
-	case I915_PARAM_HAS_GEN7_SOL_RESET:
-	case I915_PARAM_HAS_WAIT_TIMEOUT:
-	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
-	case I915_PARAM_HAS_PINNED_BATCHES:
-	case I915_PARAM_HAS_EXEC_NO_RELOC:
-	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
-	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
-	case I915_PARAM_HAS_EXEC_SOFTPIN:
-	case I915_PARAM_HAS_EXEC_ASYNC:
-	case I915_PARAM_HAS_EXEC_FENCE:
-	case I915_PARAM_HAS_EXEC_CAPTURE:
-	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
-	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
-	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
-		/* For the time being all of these are always true;
-		 * if some supported hardware does not have one of these
-		 * features this value needs to be provided from
-		 * INTEL_INFO(), a feature macro, or similar.
-		 */
-		value = 1;
-		break;
-	case I915_PARAM_HAS_CONTEXT_ISOLATION:
-		value = intel_engines_has_context_isolation(dev_priv);
-		break;
-	case I915_PARAM_SLICE_MASK:
-		value = sseu->slice_mask;
-		if (!value)
-			return -ENODEV;
-		break;
-	case I915_PARAM_SUBSLICE_MASK:
-		value = sseu->subslice_mask[0];
-		if (!value)
-			return -ENODEV;
-		break;
-	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
-		value = 1000 * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz;
-		break;
-	case I915_PARAM_MMAP_GTT_COHERENT:
-		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
-		break;
-	default:
-		DRM_DEBUG("Unknown parameter %d\n", param->param);
-		return -EINVAL;
-	}
-
-	if (put_user(value, param->value))
-		return -EFAULT;
-
-	return 0;
-}
-
 static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
 {
 	int domain = pci_domain_nr(dev_priv->drm.pdev->bus);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b8363c483caf..c85e0d5adab8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2405,6 +2405,9 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
 	return dev_priv->vgpu.active;
 }
 
+int i915_getparam_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+
 /* i915_gem.c */
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
new file mode 100644
index 000000000000..5d9101376a3d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -0,0 +1,168 @@
+/*
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "gt/intel_engine_user.h"
+
+#include "i915_drv.h"
+
+int i915_getparam_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+	drm_i915_getparam_t *param = data;
+	int value;
+
+	switch (param->param) {
+	case I915_PARAM_IRQ_ACTIVE:
+	case I915_PARAM_ALLOW_BATCHBUFFER:
+	case I915_PARAM_LAST_DISPATCH:
+	case I915_PARAM_HAS_EXEC_CONSTANTS:
+		/* Reject all old ums/dri params. */
+		return -ENODEV;
+	case I915_PARAM_CHIPSET_ID:
+		value = i915->drm.pdev->device;
+		break;
+	case I915_PARAM_REVISION:
+		value = i915->drm.pdev->revision;
+		break;
+	case I915_PARAM_NUM_FENCES_AVAIL:
+		value = i915->ggtt.num_fences;
+		break;
+	case I915_PARAM_HAS_OVERLAY:
+		value = !!i915->overlay;
+		break;
+	case I915_PARAM_HAS_BSD:
+		value = !!intel_engine_lookup_user(i915,
+						   I915_ENGINE_CLASS_VIDEO, 0);
+		break;
+	case I915_PARAM_HAS_BLT:
+		value = !!intel_engine_lookup_user(i915,
+						   I915_ENGINE_CLASS_COPY, 0);
+		break;
+	case I915_PARAM_HAS_VEBOX:
+		value = !!intel_engine_lookup_user(i915,
+						   I915_ENGINE_CLASS_VIDEO_ENHANCE, 0);
+		break;
+	case I915_PARAM_HAS_BSD2:
+		value = !!intel_engine_lookup_user(i915,
+						   I915_ENGINE_CLASS_VIDEO, 1);
+		break;
+	case I915_PARAM_HAS_LLC:
+		value = HAS_LLC(i915);
+		break;
+	case I915_PARAM_HAS_WT:
+		value = HAS_WT(i915);
+		break;
+	case I915_PARAM_HAS_ALIASING_PPGTT:
+		value = INTEL_PPGTT(i915);
+		break;
+	case I915_PARAM_HAS_SEMAPHORES:
+		value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
+		break;
+	case I915_PARAM_HAS_SECURE_BATCHES:
+		value = capable(CAP_SYS_ADMIN);
+		break;
+	case I915_PARAM_CMD_PARSER_VERSION:
+		value = i915_cmd_parser_get_version(i915);
+		break;
+	case I915_PARAM_SUBSLICE_TOTAL:
+		value = intel_sseu_subslice_total(sseu);
+		if (!value)
+			return -ENODEV;
+		break;
+	case I915_PARAM_EU_TOTAL:
+		value = sseu->eu_total;
+		if (!value)
+			return -ENODEV;
+		break;
+	case I915_PARAM_HAS_GPU_RESET:
+		value = i915_modparams.enable_hangcheck &&
+			intel_has_gpu_reset(i915);
+		if (value && intel_has_reset_engine(i915))
+			value = 2;
+		break;
+	case I915_PARAM_HAS_RESOURCE_STREAMER:
+		value = 0;
+		break;
+	case I915_PARAM_HAS_POOLED_EU:
+		value = HAS_POOLED_EU(i915);
+		break;
+	case I915_PARAM_MIN_EU_IN_POOL:
+		value = sseu->min_eu_in_pool;
+		break;
+	case I915_PARAM_HUC_STATUS:
+		value = intel_huc_check_status(&i915->gt.uc.huc);
+		if (value < 0)
+			return value;
+		break;
+	case I915_PARAM_MMAP_GTT_VERSION:
+		/* Though we've started our numbering from 1, and so class all
+		 * earlier versions as 0, in effect their value is undefined as
+		 * the ioctl will report EINVAL for the unknown param!
+		 */
+		value = i915_gem_mmap_gtt_version();
+		break;
+	case I915_PARAM_HAS_SCHEDULER:
+		value = i915->caps.scheduler;
+		break;
+
+	case I915_PARAM_MMAP_VERSION:
+		/* Remember to bump this if the version changes! */
+	case I915_PARAM_HAS_GEM:
+	case I915_PARAM_HAS_PAGEFLIPPING:
+	case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */
+	case I915_PARAM_HAS_RELAXED_FENCING:
+	case I915_PARAM_HAS_COHERENT_RINGS:
+	case I915_PARAM_HAS_RELAXED_DELTA:
+	case I915_PARAM_HAS_GEN7_SOL_RESET:
+	case I915_PARAM_HAS_WAIT_TIMEOUT:
+	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
+	case I915_PARAM_HAS_PINNED_BATCHES:
+	case I915_PARAM_HAS_EXEC_NO_RELOC:
+	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+	case I915_PARAM_HAS_EXEC_SOFTPIN:
+	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
+	case I915_PARAM_HAS_EXEC_CAPTURE:
+	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
+	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+		/* For the time being all of these are always true;
+		 * if some supported hardware does not have one of these
+		 * features this value needs to be provided from
+		 * INTEL_INFO(), a feature macro, or similar.
+		 */
+		value = 1;
+		break;
+	case I915_PARAM_HAS_CONTEXT_ISOLATION:
+		value = intel_engines_has_context_isolation(i915);
+		break;
+	case I915_PARAM_SLICE_MASK:
+		value = sseu->slice_mask;
+		if (!value)
+			return -ENODEV;
+		break;
+	case I915_PARAM_SUBSLICE_MASK:
+		value = sseu->subslice_mask[0];
+		if (!value)
+			return -ENODEV;
+		break;
+	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
+		value = 1000 * RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+		break;
+	case I915_PARAM_MMAP_GTT_COHERENT:
+		value = INTEL_INFO(i915)->has_coherent_ggtt;
+		break;
+	default:
+		DRM_DEBUG("Unknown parameter %d\n", param->param);
+		return -EINVAL;
+	}
+
+	if (put_user(value, param->value))
+		return -EFAULT;
+
+	return 0;
+}
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 11/23] drm/i915: Only include active engines in the capture state
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (8 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 10/23] drm/i915: Isolate i915_getparam_ioctl() Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 12/23] drm/i915: Teach execbuffer to take the engine wakeref not GT Chris Wilson
                   ` (16 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Skip printing out idle engines that did not contribute to the GPU hang.
As the number of engines gets ever larger, we have increasing noise in
the error state where typically there is only one guilty request on one
engine that we need to inspect.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 241 +++++++++++---------------
 drivers/gpu/drm/i915/i915_gpu_error.h |   7 +-
 2 files changed, 106 insertions(+), 142 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2193687eac72..92038063ac74 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -49,27 +49,6 @@
 #define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
 
-static inline const struct intel_engine_cs *
-engine_lookup(const struct drm_i915_private *i915, unsigned int id)
-{
-	if (id >= I915_NUM_ENGINES)
-		return NULL;
-
-	return i915->engine[id];
-}
-
-static inline const char *
-__engine_name(const struct intel_engine_cs *engine)
-{
-	return engine ? engine->name : "";
-}
-
-static const char *
-engine_name(const struct drm_i915_private *i915, unsigned int id)
-{
-	return __engine_name(engine_lookup(i915, id));
-}
-
 static void __sg_set_buf(struct scatterlist *sg,
 			 void *addr, unsigned int len, loff_t it)
 {
@@ -449,7 +428,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  INSTDONE: 0x%08x\n",
 		   ee->instdone.instdone);
 
-	if (ee->engine_id != RCS0 || INTEL_GEN(m->i915) <= 3)
+	if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3)
 		return;
 
 	err_printf(m, "  SC_INSTDONE: 0x%08x\n",
@@ -503,8 +482,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 {
 	int n;
 
-	err_printf(m, "%s command stream:\n",
-		   engine_name(m->i915, ee->engine_id));
+	err_printf(m, "%s command stream:\n", ee->engine->name);
 	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
 	err_printf(m, "  START: 0x%08x\n", ee->start);
 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
@@ -580,9 +558,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 }
 
 static void print_error_obj(struct drm_i915_error_state_buf *m,
-			    struct intel_engine_cs *engine,
+			    const struct intel_engine_cs *engine,
 			    const char *name,
-			    struct drm_i915_error_object *obj)
+			    const struct drm_i915_error_object *obj)
 {
 	char out[ASCII85_BUFSZ];
 	int page;
@@ -679,7 +657,7 @@ static void err_free_sgl(struct scatterlist *sgl)
 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			       struct i915_gpu_state *error)
 {
-	struct drm_i915_error_object *obj;
+	const struct drm_i915_error_engine *ee;
 	struct timespec64 ts;
 	int i, j;
 
@@ -703,15 +681,12 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		   jiffies_to_msecs(jiffies - error->capture),
 		   jiffies_to_msecs(error->capture - error->epoch));
 
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		if (!error->engine[i].context.pid)
-			continue;
-
+	for (ee = error->engine; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
-			   engine_name(m->i915, i),
-			   error->engine[i].context.comm,
-			   error->engine[i].context.pid);
-	}
+			   ee->engine->name,
+			   ee->context.comm,
+			   ee->context.pid);
+
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
@@ -760,17 +735,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	if (IS_GEN(m->i915, 7))
 		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
 
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i], error->epoch);
-	}
+	for (ee = error->engine; ee; ee = ee->next)
+		error_print_engine(m, ee, error->epoch);
 
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		const struct drm_i915_error_engine *ee = &error->engine[i];
+	for (ee = error->engine; ee; ee = ee->next) {
+		const struct drm_i915_error_object *obj;
 
 		obj = ee->batchbuffer;
 		if (obj) {
-			err_puts(m, m->i915->engine[i]->name);
+			err_puts(m, ee->engine->name);
 			if (ee->context.pid)
 				err_printf(m, " (submitted by %s [%d])",
 					   ee->context.comm,
@@ -778,16 +751,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, m->i915->engine[i], NULL, obj);
+			print_error_obj(m, ee->engine, NULL, obj);
 		}
 
 		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, m->i915->engine[i],
-					"user", ee->user_bo[j]);
+			print_error_obj(m, ee->engine, "user", ee->user_bo[j]);
 
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
-				   m->i915->engine[i]->name,
+				   ee->engine->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
 				error_print_request(m, " ",
@@ -795,22 +767,13 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 						    error->epoch);
 		}
 
-		print_error_obj(m, m->i915->engine[i],
-				"ringbuffer", ee->ringbuffer);
-
-		print_error_obj(m, m->i915->engine[i],
-				"HW Status", ee->hws_page);
-
-		print_error_obj(m, m->i915->engine[i],
-				"HW context", ee->ctx);
-
-		print_error_obj(m, m->i915->engine[i],
-				"WA context", ee->wa_ctx);
-
-		print_error_obj(m, m->i915->engine[i],
+		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
+		print_error_obj(m, ee->engine, "HW Status", ee->hws_page);
+		print_error_obj(m, ee->engine, "HW context", ee->ctx);
+		print_error_obj(m, ee->engine, "WA context", ee->wa_ctx);
+		print_error_obj(m, ee->engine,
 				"WA batchbuffer", ee->wa_batchbuffer);
-
-		print_error_obj(m, m->i915->engine[i],
+		print_error_obj(m, ee->engine,
 				"NULL context", ee->default_state);
 	}
 
@@ -959,13 +922,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
 {
 	struct i915_gpu_state *error =
 		container_of(error_ref, typeof(*error), ref);
-	long i, j;
+	long i;
 
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		struct drm_i915_error_engine *ee = &error->engine[i];
+	while (error->engine) {
+		struct drm_i915_error_engine *ee = error->engine;
 
-		for (j = 0; j < ee->user_bo_count; j++)
-			i915_error_object_free(ee->user_bo[j]);
+		error->engine = ee->next;
+
+		for (i = 0; i < ee->user_bo_count; i++)
+			i915_error_object_free(ee->user_bo[i]);
 		kfree(ee->user_bo);
 
 		i915_error_object_free(ee->batchbuffer);
@@ -976,6 +941,7 @@ void __i915_gpu_state_free(struct kref *error_ref)
 		i915_error_object_free(ee->wa_ctx);
 
 		kfree(ee->requests);
+		kfree(ee);
 	}
 
 	kfree(error->overlay);
@@ -1055,23 +1021,17 @@ i915_error_object_create(struct drm_i915_private *i915,
  *
  * It's only a small step better than a random number in its current form.
  */
-static u32 i915_error_generate_code(struct i915_gpu_state *error,
-				    intel_engine_mask_t engine_mask)
+static u32 i915_error_generate_code(struct i915_gpu_state *error)
 {
+	const struct drm_i915_error_engine *ee = error->engine;
+
 	/*
 	 * IPEHR would be an ideal way to detect errors, as it's the gross
 	 * measure of "the command that hung." However, has some very common
 	 * synchronization commands which almost always appear in the case
 	 * strictly a client bug. Use instdone to differentiate those some.
 	 */
-	if (engine_mask) {
-		struct drm_i915_error_engine *ee =
-			&error->engine[ffs(engine_mask)];
-
-		return ee->ipehr ^ ee->instdone.instdone;
-	}
-
-	return 0;
+	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
 }
 
 static void gem_record_fences(struct i915_gpu_state *error)
@@ -1281,9 +1241,11 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 	ee->num_ports = n;
 }
 
-static void record_context(struct drm_i915_error_context *e,
-			   struct i915_gem_context *ctx)
+static bool record_context(struct drm_i915_error_context *e,
+			   const struct i915_request *rq)
 {
+	const struct i915_gem_context *ctx = rq->gem_context;
+
 	if (ctx->pid) {
 		struct task_struct *task;
 
@@ -1300,6 +1262,8 @@ static void record_context(struct drm_i915_error_context *e,
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
+
+	return i915_gem_context_no_error_capture(ctx);
 }
 
 static void
@@ -1364,74 +1328,68 @@ static void
 gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 {
 	struct drm_i915_private *i915 = error->i915;
-	int i;
+	struct intel_engine_cs *engine;
+	struct drm_i915_error_engine *ee;
+
+	ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+	if (!ee)
+		return;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_engine_cs *engine = i915->engine[i];
-		struct drm_i915_error_engine *ee = &error->engine[i];
+	for_each_user_engine(engine, i915) {
 		struct i915_request *request;
 		unsigned long flags;
 
-		ee->engine_id = -1;
-
-		if (!engine)
-			continue;
-
-		ee->engine_id = i;
-
 		/* Refill our page pool before entering atomic section */
 		pool_refill(&compress->pool, ALLOW_FAIL);
 
-		error_record_engine_registers(error, engine, ee);
-		error_record_engine_execlists(engine, ee);
-
 		spin_lock_irqsave(&engine->active.lock, flags);
 		request = intel_engine_find_active_request(engine);
-		if (request) {
-			struct i915_gem_context *ctx = request->gem_context;
-			struct intel_ring *ring = request->ring;
-
-			record_context(&ee->context, ctx);
+		if (!request) {
+			spin_unlock_irqrestore(&engine->active.lock, flags);
+			continue;
+		}
 
-			/* We need to copy these to an anonymous buffer
-			 * as the simplest method to avoid being overwritten
-			 * by userspace.
-			 */
-			ee->batchbuffer =
-				i915_error_object_create(i915,
-							 request->batch,
-							 compress);
+		error->simulated |= record_context(&ee->context, request);
 
-			if (HAS_BROKEN_CS_TLB(i915))
-				ee->wa_batchbuffer =
-				  i915_error_object_create(i915,
-							   engine->gt->scratch,
-							   compress);
-			request_record_user_bo(request, ee, compress);
+		/*
+		 * We need to copy these to an anonymous buffer
+		 * as the simplest method to avoid being overwritten
+		 * by userspace.
+		 */
+		ee->batchbuffer =
+			i915_error_object_create(i915,
+						 request->batch,
+						 compress);
 
-			ee->ctx =
+		if (HAS_BROKEN_CS_TLB(i915))
+			ee->wa_batchbuffer =
 				i915_error_object_create(i915,
-							 request->hw_context->state,
+							 engine->gt->scratch,
 							 compress);
+		request_record_user_bo(request, ee, compress);
 
-			error->simulated |=
-				i915_gem_context_no_error_capture(ctx);
+		ee->ctx =
+			i915_error_object_create(i915,
+						 request->hw_context->state,
+						 compress);
 
-			ee->rq_head = request->head;
-			ee->rq_post = request->postfix;
-			ee->rq_tail = request->tail;
+		ee->rq_head = request->head;
+		ee->rq_post = request->postfix;
+		ee->rq_tail = request->tail;
 
-			ee->cpu_ring_head = ring->head;
-			ee->cpu_ring_tail = ring->tail;
-			ee->ringbuffer =
-				i915_error_object_create(i915,
-							 ring->vma,
-							 compress);
+		ee->cpu_ring_head = request->ring->head;
+		ee->cpu_ring_tail = request->ring->tail;
+		ee->ringbuffer =
+			i915_error_object_create(i915,
+						 request->ring->vma,
+						 compress);
 
-			engine_record_requests(engine, request, ee);
-		}
+		engine_record_requests(engine, request, ee);
 		spin_unlock_irqrestore(&engine->active.lock, flags);
 
+		error_record_engine_registers(error, engine, ee);
+		error_record_engine_execlists(engine, ee);
+
 		ee->hws_page =
 			i915_error_object_create(i915,
 						 engine->status_page.vma,
@@ -1444,7 +1402,18 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 
 		ee->default_state =
 			capture_object(i915, engine->default_state, compress);
+
+		ee->engine = engine;
+
+		ee->next = error->engine;
+		error->engine = ee;
+
+		ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+		if (!ee)
+			return;
 	}
+
+	kfree(ee);
 }
 
 static void
@@ -1573,24 +1542,18 @@ error_msg(struct i915_gpu_state *error,
 	  intel_engine_mask_t engines, const char *msg)
 {
 	int len;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++)
-		if (!error->engine[i].context.pid)
-			engines &= ~BIT(i);
 
 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
 			"GPU HANG: ecode %d:%x:0x%08x",
 			INTEL_GEN(error->i915), engines,
-			i915_error_generate_code(error, engines));
-	if (engines) {
+			i915_error_generate_code(error));
+	if (error->engine) {
 		/* Just show the first executing process, more is confusing */
-		i = __ffs(engines);
 		len += scnprintf(error->error_msg + len,
 				 sizeof(error->error_msg) - len,
 				 ", in %s [%d]",
-				 error->engine[i].context.comm,
-				 error->engine[i].context.pid);
+				 error->engine->context.comm,
+				 error->engine->context.pid);
 	}
 	if (msg)
 		len += scnprintf(error->error_msg + len,
@@ -1631,12 +1594,10 @@ static void capture_params(struct i915_gpu_state *error)
 
 static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
 {
+	const struct drm_i915_error_engine *ee;
 	unsigned long epoch = error->capture;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		const struct drm_i915_error_engine *ee = &error->engine[i];
 
+	for (ee = error->engine; ee; ee = ee->next) {
 		if (ee->hangcheck_timestamp &&
 		    time_before(ee->hangcheck_timestamp, epoch))
 			epoch = ee->hangcheck_timestamp;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a24c35107d16..df9f57766626 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -81,7 +81,8 @@ struct i915_gpu_state {
 	struct intel_display_error_state *display;
 
 	struct drm_i915_error_engine {
-		int engine_id;
+		const struct intel_engine_cs *engine;
+
 		/* Software tracked state */
 		bool idle;
 		unsigned long hangcheck_timestamp;
@@ -158,7 +159,9 @@ struct i915_gpu_state {
 				u32 pp_dir_base;
 			};
 		} vm_info;
-	} engine[I915_NUM_ENGINES];
+
+		struct drm_i915_error_engine *next;
+	} *engine;
 
 	struct scatterlist *sgl, *fit;
 };
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 12/23] drm/i915: Teach execbuffer to take the engine wakeref not GT
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (9 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 11/23] drm/i915: Only include active engines in the capture state Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 13/23] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
                   ` (15 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we would like to couple into the engine wakeref to
free the batch pool on idling. The caveat here is that we therefore want
to track the engine wakeref more precisely and to hold it instead of the
broader GT wakeref as we process the ioctl.

v2: Avoid introducing odd semantics for a shortlived timeline->mutex
acquisition interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 42 +++++++++++++------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4db4463089ce..8d90498eaf46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2139,14 +2139,40 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	if (err)
 		return err;
 
+	/*
+	 * Take a local wakeref for preparing to dispatch the execbuf as
+	 * we expect to access the hardware fairly frequently in the
+	 * process, and require the engine to be kept awake between accesses.
+	 * Upon dispatch, we acquire another prolonged wakeref that we hold
+	 * until the timeline is idle, which in turn releases the wakeref
+	 * taken on the engine, and the parent device.
+	 */
+	err = intel_context_timeline_lock(ce);
+	if (err)
+		goto err_unpin;
+
+	intel_context_enter(ce);
+	intel_context_timeline_unlock(ce);
+
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
+
+err_unpin:
+	intel_context_unpin(ce);
+	return err;
 }
 
 static void eb_unpin_context(struct i915_execbuffer *eb)
 {
-	intel_context_unpin(eb->context);
+	struct intel_context *ce = eb->context;
+	struct intel_timeline *tl = ce->ring->timeline;
+
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	mutex_unlock(&tl->mutex);
+
+	intel_context_unpin(ce);
 }
 
 static unsigned int
@@ -2426,18 +2452,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	/*
-	 * Take a local wakeref for preparing to dispatch the execbuf as
-	 * we expect to access the hardware fairly frequently in the
-	 * process. Upon first dispatch, we acquire another prolonged
-	 * wakeref that we hold until the GPU has been idle for at least
-	 * 100ms.
-	 */
-	intel_gt_pm_get(&eb.i915->gt);
-
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
-		goto err_rpm;
+		goto err_context;
 
 	err = eb_select_engine(&eb, file, args);
 	if (unlikely(err))
@@ -2602,8 +2619,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb_unpin_context(&eb);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
-err_rpm:
-	intel_gt_pm_put(&eb.i915->gt);
+err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 13/23] drm/i915/gt: Track timeline activeness in enter/exit
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (10 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 12/23] drm/i915: Teach execbuffer to take the engine wakeref not GT Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 14/23] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
                   ` (14 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Lift moving the timeline to/from the active_list on enter/exit in order
to shorten the active tracking span in comparison to the existing
pin/unpin.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  1 -
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  4 +
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 98 +++++++------------
 drivers/gpu/drm/i915/gt/intel_timeline.h      |  3 +-
 .../gpu/drm/i915/gt/intel_timeline_types.h    | 18 ++++
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 -
 8 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index b5561cbdc5ea..cccb02b857de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -39,7 +39,6 @@ static void i915_gem_park(struct drm_i915_private *i915)
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 	}
 
-	intel_timelines_park(i915);
 	i915_vma_parked(i915);
 
 	i915_globals_park();
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index c39ad7d19197..0ce5880ace0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -243,10 +243,12 @@ int __init i915_global_context_init(void)
 void intel_context_enter_engine(struct intel_context *ce)
 {
 	intel_engine_pm_get(ce->engine);
+	intel_timeline_enter(ce->ring->timeline);
 }
 
 void intel_context_exit_engine(struct intel_context *ce)
 {
+	intel_timeline_exit(ce->ring->timeline);
 	intel_engine_pm_put(ce->engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e74fbf04a68d..072f65e6a09e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -89,6 +89,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
+	intel_timeline_enter(rq->timeline);
 
 	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3f1b20cc50c2..5c73b93be4fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3253,6 +3253,8 @@ static void virtual_context_enter(struct intel_context *ce)
 
 	for (n = 0; n < ve->num_siblings; n++)
 		intel_engine_pm_get(ve->siblings[n]);
+
+	intel_timeline_enter(ce->ring->timeline);
 }
 
 static void virtual_context_exit(struct intel_context *ce)
@@ -3260,6 +3262,8 @@ static void virtual_context_exit(struct intel_context *ce)
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 	unsigned int n;
 
+	intel_timeline_exit(ce->ring->timeline);
+
 	for (n = 0; n < ve->num_siblings; n++)
 		intel_engine_pm_put(ve->siblings[n]);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 6daa9eb59e19..4af0b9801d91 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -278,64 +278,11 @@ void intel_timelines_init(struct drm_i915_private *i915)
 	timelines_init(&i915->gt);
 }
 
-static void timeline_add_to_active(struct intel_timeline *tl)
-{
-	struct intel_gt_timelines *gt = &tl->gt->timelines;
-
-	mutex_lock(&gt->mutex);
-	list_add(&tl->link, &gt->active_list);
-	mutex_unlock(&gt->mutex);
-}
-
-static void timeline_remove_from_active(struct intel_timeline *tl)
-{
-	struct intel_gt_timelines *gt = &tl->gt->timelines;
-
-	mutex_lock(&gt->mutex);
-	list_del(&tl->link);
-	mutex_unlock(&gt->mutex);
-}
-
-static void timelines_park(struct intel_gt *gt)
-{
-	struct intel_gt_timelines *timelines = &gt->timelines;
-	struct intel_timeline *timeline;
-
-	mutex_lock(&timelines->mutex);
-	list_for_each_entry(timeline, &timelines->active_list, link) {
-		/*
-		 * All known fences are completed so we can scrap
-		 * the current sync point tracking and start afresh,
-		 * any attempt to wait upon a previous sync point
-		 * will be skipped as the fence was signaled.
-		 */
-		i915_syncmap_free(&timeline->sync);
-	}
-	mutex_unlock(&timelines->mutex);
-}
-
-/**
- * intel_timelines_park - called when the driver idles
- * @i915: the drm_i915_private device
- *
- * When the driver is completely idle, we know that all of our sync points
- * have been signaled and our tracking is then entirely redundant. Any request
- * to wait upon an older sync point will be completed instantly as we know
- * the fence is signaled and therefore we will not even look them up in the
- * sync point map.
- */
-void intel_timelines_park(struct drm_i915_private *i915)
-{
-	timelines_park(&i915->gt);
-}
-
 void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
-	i915_syncmap_free(&timeline->sync);
-
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
 	else
@@ -370,6 +317,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	if (tl->pin_count++)
 		return 0;
 	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->active_count);
 
 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -380,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl)
 		offset_in_page(tl->hwsp_offset);
 
 	cacheline_acquire(tl->hwsp_cacheline);
-	timeline_add_to_active(tl);
 
 	return 0;
 
@@ -389,6 +336,40 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	return err;
 }
 
+void intel_timeline_enter(struct intel_timeline *tl)
+{
+	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+	GEM_BUG_ON(!tl->pin_count);
+	if (tl->active_count++)
+		return;
+	GEM_BUG_ON(!tl->active_count); /* overflow? */
+
+	mutex_lock(&timelines->mutex);
+	list_add(&tl->link, &timelines->active_list);
+	mutex_unlock(&timelines->mutex);
+}
+
+void intel_timeline_exit(struct intel_timeline *tl)
+{
+	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+	GEM_BUG_ON(!tl->active_count);
+	if (--tl->active_count)
+		return;
+
+	mutex_lock(&timelines->mutex);
+	list_del(&tl->link);
+	mutex_unlock(&timelines->mutex);
+
+	/*
+	 * Since this timeline is idle, all bariers upon which we were waiting
+	 * must also be complete and so we can discard the last used barriers
+	 * without loss of information.
+	 */
+	i915_syncmap_free(&tl->sync);
+}
+
 static u32 timeline_advance(struct intel_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -546,16 +527,9 @@ void intel_timeline_unpin(struct intel_timeline *tl)
 	if (--tl->pin_count)
 		return;
 
-	timeline_remove_from_active(tl);
+	GEM_BUG_ON(tl->active_count);
 	cacheline_release(tl->hwsp_cacheline);
 
-	/*
-	 * Since this timeline is idle, all bariers upon which we were waiting
-	 * must also be complete and so we can discard the last used barriers
-	 * without loss of information.
-	 */
-	i915_syncmap_free(&tl->sync);
-
 	__i915_vma_unpin(tl->hwsp_ggtt);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index e08cebf64833..f583af1ba18d 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -77,9 +77,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
 }
 
 int intel_timeline_pin(struct intel_timeline *tl);
+void intel_timeline_enter(struct intel_timeline *tl);
 int intel_timeline_get_seqno(struct intel_timeline *tl,
 			     struct i915_request *rq,
 			     u32 *seqno);
+void intel_timeline_exit(struct intel_timeline *tl);
 void intel_timeline_unpin(struct intel_timeline *tl);
 
 int intel_timeline_read_hwsp(struct i915_request *from,
@@ -87,7 +89,6 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 			     u32 *hwsp_offset);
 
 void intel_timelines_init(struct drm_i915_private *i915);
-void intel_timelines_park(struct drm_i915_private *i915);
 void intel_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 9a71aea7a338..b1a9f0c54bf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -25,7 +25,25 @@ struct intel_timeline {
 
 	struct mutex mutex; /* protects the flow of requests */
 
+	/*
+	 * pin_count and active_count track essentially the same thing:
+	 * How many requests are in flight or may be under construction.
+	 *
+	 * We need two distinct counters so that we can assign different
+	 * lifetimes to the events for different use-cases. For example,
+	 * we want to permanently keep the timeline pinned for the kernel
+	 * context so that we can issue requests at any time without having
+	 * to acquire space in the GGTT. However, we want to keep tracking
+	 * the activity (to be able to detect when we become idle) along that
+	 * permanently pinned timeline and so end up requiring two counters.
+	 *
+	 * Note that the active_count is protected by the intel_timeline.mutex,
+	 * but the pin_count is protected by a combination of serialisation
+	 * from the intel_context caller plus internal atomicity.
+	 */
 	unsigned int pin_count;
+	unsigned int active_count;
+
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index f0a840030382..d54113697745 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -816,8 +816,6 @@ static int live_hwsp_recycle(void *arg)
 
 			if (err)
 				goto out;
-
-			intel_timelines_park(i915); /* Encourage recycling! */
 		} while (!__igt_timeout(end_time, NULL));
 	}
 
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 14/23] drm/i915/gt: Convert timeline tracking to spinlock
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (11 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 13/23] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 15/23] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
                   ` (13 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Convert the list manipulation of active to use spinlocks so that we can
perform the updates from underneath a quick interrupt callback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c    | 10 ++++++++--
 drivers/gpu/drm/i915/gt/intel_timeline.c | 12 +++++-------
 drivers/gpu/drm/i915/i915_gem.c          | 20 ++++++++++----------
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 5fd11e361d03..b13f63e52203 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -40,7 +40,7 @@ struct intel_gt {
 	struct intel_uc uc;
 
 	struct intel_gt_timelines {
-		struct mutex mutex; /* protects list */
+		spinlock_t lock; /* protects active_list */
 		struct list_head active_list;
 
 		/* Pack multiple timelines' seqnos into the same page */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 55e2ddcbd215..7d195a5f7da3 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -813,7 +813,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
@@ -821,6 +821,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		if (!rq)
 			continue;
 
+		spin_unlock(&timelines->lock);
+
 		/*
 		 * All internal dependencies (i915_requests) will have
 		 * been flushed by the set-wedge, but we may be stuck waiting
@@ -830,8 +832,12 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 */
 		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
 		i915_request_put(rq);
+
+		/* Restart iteration after droping lock */
+		spin_lock(&timelines->lock);
+		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 
 	intel_gt_sanitize(gt, false);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 4af0b9801d91..355dfc52c804 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -266,7 +266,7 @@ static void timelines_init(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 
-	mutex_init(&timelines->mutex);
+	spin_lock_init(&timelines->lock);
 	INIT_LIST_HEAD(&timelines->active_list);
 
 	spin_lock_init(&timelines->hwsp_lock);
@@ -345,9 +345,9 @@ void intel_timeline_enter(struct intel_timeline *tl)
 		return;
 	GEM_BUG_ON(!tl->active_count); /* overflow? */
 
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_add(&tl->link, &timelines->active_list);
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 }
 
 void intel_timeline_exit(struct intel_timeline *tl)
@@ -358,9 +358,9 @@ void intel_timeline_exit(struct intel_timeline *tl)
 	if (--tl->active_count)
 		return;
 
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_del(&tl->link);
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
@@ -548,8 +548,6 @@ static void timelines_fini(struct intel_gt *gt)
 
 	GEM_BUG_ON(!list_empty(&timelines->active_list));
 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
-
-	mutex_destroy(&timelines->mutex);
 }
 
 void intel_timelines_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 01dd0d1d9bf6..880b9af5f334 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -907,20 +907,20 @@ static int wait_for_engines(struct intel_gt *gt)
 
 static long
 wait_for_timelines(struct drm_i915_private *i915,
-		   unsigned int flags, long timeout)
+		   unsigned int wait, long timeout)
 {
-	struct intel_gt_timelines *gt = &i915->gt.timelines;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl;
 
-	mutex_lock(&gt->mutex);
-	list_for_each_entry(tl, &gt->active_list, link) {
+	spin_lock(&timelines->lock);
+	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
 		rq = i915_active_request_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
-		mutex_unlock(&gt->mutex);
+		spin_unlock(&timelines->lock);
 
 		/*
 		 * "Race-to-idle".
@@ -931,19 +931,19 @@ wait_for_timelines(struct drm_i915_private *i915,
 		 * want to complete as quickly as possible to avoid prolonged
 		 * stalls, so allow the gpu to boost to maximum clocks.
 		 */
-		if (flags & I915_WAIT_FOR_IDLE_BOOST)
+		if (wait & I915_WAIT_FOR_IDLE_BOOST)
 			gen6_rps_boost(rq);
 
-		timeout = i915_request_wait(rq, flags, timeout);
+		timeout = i915_request_wait(rq, wait, timeout);
 		i915_request_put(rq);
 		if (timeout < 0)
 			return timeout;
 
 		/* restart after reacquiring the lock */
-		mutex_lock(&gt->mutex);
-		tl = list_entry(&gt->active_list, typeof(*tl), link);
+		spin_lock(&timelines->lock);
+		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	mutex_unlock(&gt->mutex);
+	spin_unlock(&timelines->lock);
 
 	return timeout;
 }
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 15/23] drm/i915/gt: Guard timeline pinning with its own mutex
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (12 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 14/23] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 16/23] drm/i915/gt: Add to timeline requires the timeline mutex Chris Wilson
                   ` (12 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

In preparation for removing struct_mutex from around context retirement,
we need to make timeline pinning safe. Since multiple engines/contexts
can share a single timeline, it needs to be protected by a mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 27 +++++++++----------
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  6 ++---
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 355dfc52c804..7b476cd55dac 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -211,9 +211,9 @@ int intel_timeline_init(struct intel_timeline *timeline,
 	void *vaddr;
 
 	kref_init(&timeline->kref);
+	atomic_set(&timeline->pin_count, 0);
 
 	timeline->gt = gt;
-	timeline->pin_count = 0;
 
 	timeline->has_initial_breadcrumb = !hwsp;
 	timeline->hwsp_cacheline = NULL;
@@ -280,7 +280,7 @@ void intel_timelines_init(struct drm_i915_private *i915)
 
 void intel_timeline_fini(struct intel_timeline *timeline)
 {
-	GEM_BUG_ON(timeline->pin_count);
+	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	if (timeline->hwsp_cacheline)
@@ -314,33 +314,31 @@ int intel_timeline_pin(struct intel_timeline *tl)
 {
 	int err;
 
-	if (tl->pin_count++)
+	if (atomic_add_unless(&tl->pin_count, 1, 0))
 		return 0;
-	GEM_BUG_ON(!tl->pin_count);
-	GEM_BUG_ON(tl->active_count);
 
 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto unpin;
+		return err;
 
 	tl->hwsp_offset =
 		i915_ggtt_offset(tl->hwsp_ggtt) +
 		offset_in_page(tl->hwsp_offset);
 
 	cacheline_acquire(tl->hwsp_cacheline);
+	if (atomic_fetch_inc(&tl->pin_count)) {
+		cacheline_release(tl->hwsp_cacheline);
+		__i915_vma_unpin(tl->hwsp_ggtt);
+	}
 
 	return 0;
-
-unpin:
-	tl->pin_count = 0;
-	return err;
 }
 
 void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
-	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	if (tl->active_count++)
 		return;
 	GEM_BUG_ON(!tl->active_count); /* overflow? */
@@ -372,7 +370,7 @@ void intel_timeline_exit(struct intel_timeline *tl)
 
 static u32 timeline_advance(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 
 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
@@ -523,11 +521,10 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 
 void intel_timeline_unpin(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
-	if (--tl->pin_count)
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	if (!atomic_dec_and_test(&tl->pin_count))
 		return;
 
-	GEM_BUG_ON(tl->active_count);
 	cacheline_release(tl->hwsp_cacheline);
 
 	__i915_vma_unpin(tl->hwsp_ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index b1a9f0c54bf0..2b1baf2fcc8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -41,7 +41,7 @@ struct intel_timeline {
 	 * but the pin_count is protected by a combination of serialisation
 	 * from the intel_context caller plus internal atomicity.
 	 */
-	unsigned int pin_count;
+	atomic_t pin_count;
 	unsigned int active_count;
 
 	const u32 *hwsp_seqno;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 10cb312462e5..042b456cde49 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -38,13 +38,13 @@ struct mock_ring {
 
 static void mock_timeline_pin(struct intel_timeline *tl)
 {
-	tl->pin_count++;
+	atomic_inc(&tl->pin_count);
 }
 
 static void mock_timeline_unpin(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
-	tl->pin_count--;
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	atomic_dec(&tl->pin_count);
 }
 
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 16/23] drm/i915/gt: Add to timeline requires the timeline mutex
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (13 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 15/23] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 17/23] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
                   ` (11 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Modifying a remote context requires careful serialisation with requests
on that context, and that serialisation requires us to take their
timeline->mutex. Make it so.

Note that while struct_mutex rules, we can't create more than one
request in parallel, but that age is soon coming to an end.

v2: Though it doesn't affect the current users, contexts may share
timelines so check if we already hold the right mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 0ce5880ace0a..bcd69c187313 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -261,10 +261,18 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	/* Only suitable for use in remotely modifying this context */
 	GEM_BUG_ON(rq->hw_context == ce);
 
+	if (rq->timeline != tl) { /* beware timeline sharing */
+		err = mutex_lock_interruptible_nested(&tl->mutex,
+						      SINGLE_DEPTH_NESTING);
+		if (err)
+			return err;
+	}
+	lockdep_assert_held(&tl->mutex);
+
 	/* Queue this switch after all other activity by this context. */
 	err = i915_active_request_set(&tl->last_request, rq);
 	if (err)
-		return err;
+		goto unlock;
 
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
@@ -274,7 +282,12 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	return i915_active_ref(&ce->active, rq->fence.context, rq);
+	err = i915_active_ref(&ce->active, rq->fence.context, rq);
+
+unlock:
+	if (rq->timeline != tl)
+		mutex_unlock(&tl->mutex);
+	return err;
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 17/23] drm/i915: Protect request retirement with timeline->mutex
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (14 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 16/23] drm/i915/gt: Add to timeline requires the timeline mutex Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 18/23] drm/i915: Replace struct_mutex for batch pool serialisation Chris Wilson
                   ` (10 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 183 ++++++++++--------
 drivers/gpu/drm/i915/gt/intel_context.h       |  18 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   2 -
 drivers/gpu/drm/i915/gt/intel_gt.c            |   1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   2 -
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 -
 drivers/gpu/drm/i915/i915_request.c           | 151 +++++++--------
 drivers/gpu/drm/i915/i915_request.h           |   3 -
 11 files changed, 197 insertions(+), 179 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8d90498eaf46..44add172cdc8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -734,63 +734,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-	struct i915_request *rq;
-
-	/*
-	 * Completely unscientific finger-in-the-air estimates for suitable
-	 * maximum user request size (to avoid blocking) and then backoff.
-	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
-		return NULL;
-
-	/*
-	 * Find a request that after waiting upon, there will be at least half
-	 * the ring available. The hysteresis allows us to compete for the
-	 * shared ring and should mean that we sleep less often prior to
-	 * claiming our resources, but not so long that the ring completely
-	 * drains before we can submit our next request.
-	 */
-	list_for_each_entry(rq, &ring->request_list, ring_link) {
-		if (__intel_ring_space(rq->postfix,
-				       ring->emit, ring->size) > ring->size / 2)
-			break;
-	}
-	if (&rq->ring_link == &ring->request_list)
-		return NULL; /* weird, we will check again later for real */
-
-	return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-	struct i915_request *rq;
-	int ret = 0;
-
-	/*
-	 * Apply a light amount of backpressure to prevent excessive hogs
-	 * from blocking waiting for space whilst holding struct_mutex and
-	 * keeping all of their resources pinned.
-	 */
-
-	rq = __eb_wait_for_ring(eb->context->ring);
-	if (rq) {
-		mutex_unlock(&eb->i915->drm.struct_mutex);
-
-		if (i915_request_wait(rq,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT) < 0)
-			ret = -EINTR;
-
-		i915_request_put(rq);
-
-		mutex_lock(&eb->i915->drm.struct_mutex);
-	}
-
-	return ret;
-}
-
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2118,8 +2061,73 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
 {
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ring->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	int err;
+
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		return err;
+
+	err = __intel_context_do_pin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	mutex_lock(&eb->i915->drm.struct_mutex);
+	intel_context_unpin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq;
 	int err;
 
 	/*
@@ -2135,7 +2143,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = __eb_pin_context(eb, ce);
 	if (err)
 		return err;
 
@@ -2147,23 +2155,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * until the timeline is idle, which in turn releases the wakeref
 	 * taken on the engine, and the parent device.
 	 */
-	err = intel_context_timeline_lock(ce);
-	if (err)
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
 		goto err_unpin;
+	}
 
 	intel_context_enter(ce);
-	intel_context_timeline_unlock(ce);
+	rq = eb_throttle(ce);
+
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			err = -EINTR;
+			goto err_exit;
+		}
+
+		i915_request_put(rq);
+	}
 
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 
+err_exit:
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
 err_unpin:
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 	return err;
 }
 
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce = eb->context;
 	struct intel_timeline *tl = ce->ring->timeline;
@@ -2172,7 +2200,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
 
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 }
 
 static unsigned int
@@ -2217,9 +2245,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+	      struct drm_file *file,
+	      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2234,7 +2262,7 @@ eb_select_engine(struct i915_execbuffer *eb,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = eb_pin_context(eb, ce);
+	err = __eb_pin_engine(eb, ce);
 	intel_context_put(ce);
 
 	return err;
@@ -2452,16 +2480,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto err_context;
-
-	err = eb_select_engine(&eb, file, args);
+	err = eb_pin_engine(&eb, file, args);
 	if (unlikely(err))
-		goto err_unlock;
+		goto err_context;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-	if (unlikely(err))
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
 		goto err_engine;
 
 	err = eb_relocate(&eb);
@@ -2615,10 +2639,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
-err_engine:
-	eb_unpin_context(&eb);
-err_unlock:
 	mutex_unlock(&dev->struct_mutex);
+err_engine:
+	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 3f54eb3d10ab..0772f77394c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
@@ -127,17 +128,24 @@ static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
-static inline int __must_check
+static inline struct intel_timeline *__must_check
 intel_context_timeline_lock(struct intel_context *ce)
 	__acquires(&ce->ring->timeline->mutex)
 {
-	return mutex_lock_interruptible(&ce->ring->timeline->mutex);
+	struct intel_timeline *tl = ce->ring->timeline;
+	int err;
+
+	err = mutex_lock_interruptible(&tl->mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	return tl;
 }
 
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
-	__releases(&ce->ring->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+	__releases(&tl->mutex)
 {
-	mutex_unlock(&ce->ring->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 int intel_context_prepare_remote_request(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3ea0b5debe94..368ea5087171 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -745,7 +745,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 				engine->status_page.vma))
 		goto out_frame;
 
-	INIT_LIST_HEAD(&frame->ring.request_list);
 	frame->ring.timeline = &frame->timeline;
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9c927fa408aa..3c84973275ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -70,8 +70,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_timeline *timeline;
-	struct list_head request_list;
-	struct list_head active_link;
 
 	/*
 	 * As we have two types of rings, one global to the engine used
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index de0d6ad5f93c..f663c796b299 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -14,7 +14,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	gt->i915 = i915;
 	gt->uncore = &i915->uncore;
 
-	INIT_LIST_HEAD(&gt->active_rings);
 	INIT_LIST_HEAD(&gt->closed_vma);
 
 	spin_lock_init(&gt->closed_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b13f63e52203..bfdabf49b1e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -48,8 +48,6 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
-	struct list_head active_rings;
-
 	struct intel_wakeref wakeref;
 
 	struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5c73b93be4fb..39c9c11536d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1578,6 +1578,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 {
 	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
+	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index d8efb88f33f3..9d9be5fed9fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1275,7 +1275,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 	GEM_TRACE("ring:%llx unpin\n", ring->timeline->fence_context);
 
 	/* Discard any unused bytes beyond that submitted to hw. */
-	intel_ring_reset(ring, ring->tail);
+	intel_ring_reset(ring, ring->emit);
 
 	i915_vma_unset_ggtt_write(vma);
 	if (i915_vma_is_map_and_fenceable(vma))
@@ -1340,7 +1340,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ring->ref);
-	INIT_LIST_HEAD(&ring->request_list);
 	ring->timeline = intel_timeline_get(timeline);
 
 	ring->size = size;
@@ -1888,21 +1887,25 @@ static int ring_request_alloc(struct i915_request *request)
 
 static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 {
+	struct intel_timeline *tl = ring->timeline;
 	struct i915_request *target;
 	long timeout;
 
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
-	GEM_BUG_ON(list_empty(&ring->request_list));
-	list_for_each_entry(target, &ring->request_list, ring_link) {
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
 		/* Would completion of this request free enough space? */
 		if (bytes <= __intel_ring_space(target->postfix,
 						ring->emit, ring->size))
 			break;
 	}
 
-	if (WARN_ON(&target->ring_link == &ring->request_list))
+	if (GEM_WARN_ON(&target->link == &tl->requests))
 		return -ENOSPC;
 
 	timeout = i915_request_wait(target,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 042b456cde49..42776672c59b 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -68,7 +68,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->base.timeline = &ring->timeline;
 	atomic_set(&ring->base.pin_count, 1);
 
-	INIT_LIST_HEAD(&ring->base.request_list);
 	intel_ring_update_space(&ring->base);
 
 	return &ring->base;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8ac7d14ec8c9..92313a59563c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -180,40 +180,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void advance_ring(struct i915_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	unsigned int tail;
-
-	/*
-	 * We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
-	if (list_is_last(&request->ring_link, &ring->request_list)) {
-		/*
-		 * We may race here with execlists resubmitting this request
-		 * as we retire it. The resubmission will move the ring->tail
-		 * forwards (to request->wa_tail). We either read the
-		 * current value that was written to hw, or the value that
-		 * is just about to be. Either works, if we miss the last two
-		 * noops - they are safe to be replayed on a reset.
-		 */
-		tail = READ_ONCE(request->tail);
-		list_del(&ring->active_link);
-	} else {
-		tail = request->postfix;
-	}
-	list_del_init(&request->ring_link);
-
-	ring->head = tail;
-}
-
 static void free_capture_list(struct i915_request *request)
 {
 	struct i915_capture_list *capture;
@@ -231,7 +197,7 @@ static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&rq->timeline->mutex);
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -243,7 +209,17 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
-	advance_ring(rq);
+	/*
+	 * We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+	rq->ring->head = rq->postfix;
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -320,7 +296,7 @@ static bool i915_request_retire(struct i915_request *rq)
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_ring *ring = rq->ring;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *tmp;
 
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -328,15 +304,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 		  rq->fence.context, rq->fence.seqno,
 		  hwsp_seqno(rq));
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&tl->mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
-	if (list_empty(&rq->ring_link))
-		return;
-
 	do {
-		tmp = list_first_entry(&ring->request_list,
-				       typeof(*tmp), ring_link);
+		tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
@@ -563,29 +535,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
 		if (!i915_request_retire(rq))
 			break;
 }
 
 static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 {
-	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
 
-	if (list_empty(&ring->request_list))
+	if (list_empty(&tl->requests))
 		goto out;
 
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
 	i915_request_retire(rq);
 
 	rq = kmem_cache_alloc(global.slab_requests,
@@ -594,11 +565,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 		return rq;
 
 	/* Ratelimit ourselves to prevent oom from malicious clients */
-	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_last_entry(&tl->requests, typeof(*rq), link);
 	cond_synchronize_rcu(rq->rcustate);
 
 	/* Retire our old requests in the hope that we free some */
-	ring_retire_requests(ring);
+	retire_requests(tl);
 
 out:
 	return kmem_cache_alloc(global.slab_requests, gfp);
@@ -649,7 +620,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq = kmem_cache_alloc(global.slab_requests,
 			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = request_alloc_slow(ce, gfp);
+		rq = request_alloc_slow(tl, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
@@ -741,15 +712,15 @@ struct i915_request *
 i915_request_create(struct intel_context *ce)
 {
 	struct i915_request *rq;
-	int err;
+	struct intel_timeline *tl;
 
-	err = intel_context_timeline_lock(ce);
-	if (err)
-		return ERR_PTR(err);
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return ERR_CAST(tl);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
+	if (!list_is_last(&rq->link, &tl->requests))
 		i915_request_retire(rq);
 
 	intel_context_enter(ce);
@@ -759,22 +730,22 @@ i915_request_create(struct intel_context *ce)
 		goto err_unlock;
 
 	/* Check that we do not interrupt ourselves with a new request */
-	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+	rq->cookie = lockdep_pin_lock(&tl->mutex);
 
 	return rq;
 
 err_unlock:
-	intel_context_timeline_unlock(ce);
+	intel_context_timeline_unlock(tl);
 	return rq;
 }
 
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
-	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+	if (list_is_first(&signal->link, &signal->ring->timeline->requests))
 		return 0;
 
-	signal = list_prev_entry(signal, ring_link);
+	signal = list_prev_entry(signal, link);
 	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
 		return 0;
 
@@ -1167,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	GEM_BUG_ON(rq->reserved_space > ring->space);
 	rq->reserved_space = 0;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1180,11 +1152,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 
 	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&rq->ring_link, &ring->request_list);
-	if (list_is_first(&rq->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &rq->i915->gt.active_rings);
-	rq->emitted_jiffies = jiffies;
-
 	/*
 	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
@@ -1237,10 +1204,11 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 
 void i915_request_add(struct i915_request *rq)
 {
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *prev;
 
-	lockdep_assert_held(&rq->timeline->mutex);
-	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+	lockdep_assert_held(&tl->mutex);
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
 
 	trace_i915_request_add(rq);
 
@@ -1263,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev))
+	if (prev && i915_request_completed(prev) && prev->timeline == tl)
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&rq->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
@@ -1486,18 +1454,43 @@ long i915_request_wait(struct i915_request *rq,
 
 bool i915_retire_requests(struct drm_i915_private *i915)
 {
-	struct intel_ring *ring, *tmp;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
+	struct intel_timeline *tl, *tn;
+	LIST_HEAD(free);
+
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock(&timelines->lock);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+		retire_requests(tl);
 
-	list_for_each_entry_safe(ring, tmp,
-				 &i915->gt.active_rings, active_link) {
-		intel_ring_get(ring); /* last rq holds reference! */
-		ring_retire_requests(ring);
-		intel_ring_put(ring);
+		spin_lock(&timelines->lock);
+
+		/* Restart iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (!--tl->active_count)
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
 	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&i915->gt.active_rings);
+	return !list_empty(&timelines->active_list);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 313df3c37158..22e506e960e0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -223,9 +223,6 @@ struct i915_request {
 	/** timeline->request entry for this request */
 	struct list_head link;
 
-	/** ring->request_list entry for this request */
-	struct list_head ring_link;
-
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 18/23] drm/i915: Replace struct_mutex for batch pool serialisation
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (15 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 17/23] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 19/23] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
                   ` (9 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Switch to tracking activity via i915_active on individual nodes, only
keeping a list of retired objects in the cache, and reaping the cache
when the engine itself idles.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  58 +++---
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |   1 -
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine.h        |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  11 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   | 177 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  34 ++++
 .../gpu/drm/i915/gt/intel_engine_pool_types.h |  29 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   6 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +
 drivers/gpu/drm/i915/i915_debugfs.c           |  66 -------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c    | 132 -------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h    |  26 ---
 16 files changed, 290 insertions(+), 263 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool.h
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d2c1dcda20a1..e23b568a453e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -73,6 +73,7 @@ obj-y += gt/
 gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
+	gt/intel_engine_pool.o \
 	gt/intel_engine_cs.o \
 	gt/intel_engine_pm.o \
 	gt/intel_engine_user.o \
@@ -127,7 +128,6 @@ i915-y += \
 	  $(gem-y) \
 	  i915_active.o \
 	  i915_cmd_parser.o \
-	  i915_gem_batch_pool.o \
 	  i915_gem_evict.o \
 	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 44add172cdc8..19f0f21ee59e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -16,6 +16,7 @@
 
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
+#include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 
@@ -1141,25 +1142,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
-	struct drm_i915_gem_object *obj;
+	struct intel_engine_pool_node *pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	u32 *cmd;
 	int err;
 
-	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
 
-	cmd = i915_gem_object_pin_map(obj,
+	cmd = i915_gem_object_pin_map(pool->obj,
 				      cache->has_llc ?
 				      I915_MAP_FORCE_WB :
 				      I915_MAP_FORCE_WC);
-	i915_gem_object_unpin_pages(obj);
-	if (IS_ERR(cmd))
-		return PTR_ERR(cmd);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_pool;
+	}
 
-	batch = i915_vma_instance(obj, vma->vm, NULL);
+	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto err_unmap;
@@ -1175,6 +1177,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
+	err = intel_engine_pool_mark_active(pool, rq);
+	if (err)
+		goto err_request;
+
 	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
@@ -1200,7 +1206,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	cache->rq_size = 0;
 
 	/* Return with batch mapping (cmd) still pinned */
-	return 0;
+	goto out_pool;
 
 skip_request:
 	i915_request_skip(rq, err);
@@ -1209,7 +1215,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 err_unpin:
 	i915_vma_unpin(batch);
 err_unmap:
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_unpin_map(pool->obj);
+out_pool:
+	intel_engine_pool_put(pool);
 	return err;
 }
 
@@ -1953,18 +1961,17 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 
 static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 {
-	struct drm_i915_gem_object *shadow_batch_obj;
+	struct intel_engine_pool_node *pool;
 	struct i915_vma *vma;
 	int err;
 
-	shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
-						   PAGE_ALIGN(eb->batch_len));
-	if (IS_ERR(shadow_batch_obj))
-		return ERR_CAST(shadow_batch_obj);
+	pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
+	if (IS_ERR(pool))
+		return ERR_CAST(pool);
 
 	err = intel_engine_cmd_parser(eb->engine,
 				      eb->batch->obj,
-				      shadow_batch_obj,
+				      pool->obj,
 				      eb->batch_start_offset,
 				      eb->batch_len,
 				      is_master);
@@ -1973,12 +1980,12 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 			vma = NULL;
 		else
 			vma = ERR_PTR(err);
-		goto out;
+		goto err;
 	}
 
-	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+	vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
 	if (IS_ERR(vma))
-		goto out;
+		goto err;
 
 	eb->vma[eb->buffer_count] = i915_vma_get(vma);
 	eb->flags[eb->buffer_count] =
@@ -1986,8 +1993,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 	vma->exec_flags = &eb->flags[eb->buffer_count];
 	eb->buffer_count++;
 
-out:
-	i915_gem_object_unpin_pages(shadow_batch_obj);
+	vma->private = pool;
+	return vma;
+
+err:
+	intel_engine_pool_put(pool);
 	return vma;
 }
 
@@ -2612,6 +2622,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * to explicitly hold another reference here.
 	 */
 	eb.request->batch = eb.batch;
+	if (eb.batch->private)
+		intel_engine_pool_mark_active(eb.batch->private, eb.request);
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
@@ -2636,6 +2648,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_batch_unpin:
 	if (eb.batch_flags & I915_DISPATCH_SECURE)
 		i915_vma_unpin(eb.batch);
+	if (eb.batch->private)
+		intel_engine_pool_put(eb.batch->private);
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4ea97fca9c35..eccd7f4768f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -66,7 +66,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->mm.link);
 
 	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	init_rcu_head(&obj->rcu);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 34b51fad02de..d474c6ac4100 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -114,7 +114,6 @@ struct drm_i915_gem_object {
 	unsigned int userfault_count;
 	struct list_head userfault_link;
 
-	struct list_head batch_pool_link;
 	I915_SELFTEST_DECLARE(struct list_head st_link);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index cccb02b857de..d80cde458e42 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -34,10 +34,8 @@ static void i915_gem_park(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, i915, id)
 		call_idle_barriers(engine); /* cleanup after wedging */
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
 
 	i915_vma_parked(i915);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 30856383e4c5..52f29e618696 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -9,7 +9,6 @@
 #include <linux/random.h>
 #include <linux/seqlock.h>
 
-#include "i915_gem_batch_pool.h"
 #include "i915_pmu.h"
 #include "i915_reg.h"
 #include "i915_request.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 368ea5087171..405464bef5a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -32,6 +32,7 @@
 
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
 #include "intel_engine_user.h"
 #include "intel_context.h"
 #include "intel_lrc.h"
@@ -494,11 +495,6 @@ int intel_engines_init(struct drm_i915_private *i915)
 	return err;
 }
 
-static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
-{
-	i915_gem_batch_pool_init(&engine->batch_pool, engine);
-}
-
 void intel_engine_init_execlists(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -624,10 +620,11 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init_hangcheck(engine);
-	intel_engine_init_batch_pool(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
 
+	intel_engine_pool_init(&engine->pool);
+
 	/* Use the whole device by default */
 	engine->sseu =
 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -869,9 +866,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 
 	cleanup_status_page(engine);
 
+	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
-	i915_gem_batch_pool_fini(&engine->batch_pool);
 
 	if (engine->default_state)
 		i915_gem_object_put(engine->default_state);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 072f65e6a09e..76c828d11b05 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 
 #include "intel_engine.h"
+#include "intel_engine_pool.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
@@ -117,6 +118,7 @@ static int __engine_park(struct intel_wakeref *wf)
 	GEM_TRACE("%s\n", engine->name);
 
 	intel_engine_disarm_breadcrumbs(engine);
+	intel_engine_pool_park(&engine->pool);
 
 	/* Must be reset upon idling, or we may miss the busy wakeup. */
 	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
new file mode 100644
index 000000000000..03d90b49584a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -0,0 +1,177 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+
+#include "i915_drv.h"
+#include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
+
+static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+{
+	return container_of(pool, struct intel_engine_cs, pool);
+}
+
+static struct list_head *
+bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+{
+	int n;
+
+	/*
+	 * Compute a power-of-two bucket, but throw everything greater than
+	 * 16KiB into the same bucket: i.e. the buckets hold objects of
+	 * (1 page, 2 pages, 4 pages, 8+ pages).
+	 */
+	n = fls(sz >> PAGE_SHIFT) - 1;
+	if (n >= ARRAY_SIZE(pool->cache_list))
+		n = ARRAY_SIZE(pool->cache_list) - 1;
+
+	return &pool->cache_list[n];
+}
+
+static void node_free(struct intel_engine_pool_node *node)
+{
+	i915_gem_object_put(node->obj);
+	i915_active_fini(&node->active);
+	kfree(node);
+}
+
+static int pool_active(struct i915_active *ref)
+{
+	struct intel_engine_pool_node *node =
+		container_of(ref, typeof(*node), active);
+	struct reservation_object *resv = node->obj->base.resv;
+	int err;
+
+	if (reservation_object_trylock(resv)) {
+		reservation_object_add_excl_fence(resv, NULL);
+		reservation_object_unlock(resv);
+	}
+
+	err = i915_gem_object_pin_pages(node->obj);
+	if (err)
+		return err;
+
+	/* Hide this pinned object from the shrinker until retired */
+	i915_gem_object_make_unshrinkable(node->obj);
+
+	return 0;
+}
+
+static void pool_retire(struct i915_active *ref)
+{
+	struct intel_engine_pool_node *node =
+		container_of(ref, typeof(*node), active);
+	struct intel_engine_pool *pool = node->pool;
+	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
+	unsigned long flags;
+
+	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
+
+	i915_gem_object_unpin_pages(node->obj);
+
+	/* Return this object to the shrinker pool */
+	i915_gem_object_make_purgeable(node->obj);
+
+	spin_lock_irqsave(&pool->lock, flags);
+	list_add(&node->link, list);
+	spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct intel_engine_pool_node *
+node_create(struct intel_engine_pool *pool, size_t sz)
+{
+	struct intel_engine_cs *engine = to_engine(pool);
+	struct intel_engine_pool_node *node;
+	struct drm_i915_gem_object *obj;
+
+	node = kmalloc(sizeof(*node),
+		       GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	node->pool = pool;
+	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+
+	obj = i915_gem_object_create_internal(engine->i915, sz);
+	if (IS_ERR(obj)) {
+		i915_active_fini(&node->active);
+		kfree(node);
+		return ERR_CAST(obj);
+	}
+
+	node->obj = obj;
+	return node;
+}
+
+struct intel_engine_pool_node *
+intel_engine_pool_get(struct intel_engine_pool *pool, size_t size)
+{
+	struct intel_engine_pool_node *node;
+	struct list_head *list;
+	unsigned long flags;
+	int ret;
+
+	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
+
+	size = PAGE_ALIGN(size);
+	list = bucket_for_size(pool, size);
+
+	spin_lock_irqsave(&pool->lock, flags);
+	list_for_each_entry(node, list, link) {
+		if (node->obj->base.size < size)
+			continue;
+		list_del(&node->link);
+		break;
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	if (&node->link == list) {
+		node = node_create(pool, size);
+		if (IS_ERR(node))
+			return node;
+	}
+
+	ret = i915_active_acquire(&node->active);
+	if (ret) {
+		node_free(node);
+		return ERR_PTR(ret);
+	}
+
+	return node;
+}
+
+void intel_engine_pool_init(struct intel_engine_pool *pool)
+{
+	int n;
+
+	spin_lock_init(&pool->lock);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		INIT_LIST_HEAD(&pool->cache_list[n]);
+}
+
+void intel_engine_pool_park(struct intel_engine_pool *pool)
+{
+	int n;
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		struct list_head *list = &pool->cache_list[n];
+		struct intel_engine_pool_node *node, *nn;
+
+		list_for_each_entry_safe(node, nn, list, link)
+			node_free(node);
+
+		INIT_LIST_HEAD(list);
+	}
+}
+
+void intel_engine_pool_fini(struct intel_engine_pool *pool)
+{
+	int n;
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
new file mode 100644
index 000000000000..f7a0a660c1c9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_POOL_H
+#define INTEL_ENGINE_POOL_H
+
+#include "intel_engine_pool_types.h"
+#include "i915_active.h"
+#include "i915_request.h"
+
+struct intel_engine_pool_node *
+intel_engine_pool_get(struct intel_engine_pool *pool, size_t size);
+
+static inline int
+intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
+			      struct i915_request *rq)
+{
+	return i915_active_ref(&node->active, rq->fence.context, rq);
+}
+
+static inline void
+intel_engine_pool_put(struct intel_engine_pool_node *node)
+{
+	i915_active_release(&node->active);
+}
+
+void intel_engine_pool_init(struct intel_engine_pool *pool);
+void intel_engine_pool_park(struct intel_engine_pool *pool);
+void intel_engine_pool_fini(struct intel_engine_pool *pool);
+
+#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
new file mode 100644
index 000000000000..e31ee361b76f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_POOL_TYPES_H
+#define INTEL_ENGINE_POOL_TYPES_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include "i915_active_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_engine_pool {
+	spinlock_t lock;
+	struct list_head cache_list[4];
+};
+
+struct intel_engine_pool_node {
+	struct i915_active active;
+	struct drm_i915_gem_object *obj;
+	struct list_head link;
+	struct intel_engine_pool *pool;
+};
+
+#endif /* INTEL_ENGINE_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 3c84973275ae..d764efc20807 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -17,12 +17,12 @@
 #include <linux/types.h>
 
 #include "i915_gem.h"
-#include "i915_gem_batch_pool.h"
 #include "i915_pmu.h"
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
-#include "gt/intel_timeline_types.h"
+#include "intel_engine_pool_types.h"
 #include "intel_sseu.h"
+#include "intel_timeline_types.h"
 #include "intel_wakeref.h"
 #include "intel_workarounds_types.h"
 
@@ -357,7 +357,7 @@ struct intel_engine_cs {
 	 * when the command parser is enabled. Prevents the client from
 	 * modifying the batch contents after software parsing.
 	 */
-	struct i915_gem_batch_pool batch_pool;
+	struct intel_engine_pool pool;
 
 	struct intel_hw_status_page status_page;
 	struct i915_ctx_workarounds wa_ctx;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 42776672c59b..4aabc75b92c2 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -27,6 +27,7 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
 
 #include "mock_engine.h"
 #include "selftests/mock_request.h"
@@ -292,6 +293,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
 
+	intel_engine_pool_init(&engine->pool);
+
 	engine->kernel_context =
 		i915_gem_context_get_engine(i915->kernel_context, engine->id);
 	if (IS_ERR(engine->kernel_context))
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f9a9f589d631..99fdef854c9e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -295,26 +295,6 @@ static int per_file_stats(int id, void *ptr, void *data)
 			   stats.closed); \
 } while (0)
 
-static void print_batch_pool_stats(struct seq_file *m,
-				   struct drm_i915_private *dev_priv)
-{
-	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *engine;
-	struct file_stats stats = {};
-	int j;
-
-	for_each_user_engine(engine, dev_priv) {
-		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
-			list_for_each_entry(obj,
-					    &engine->batch_pool.cache_list[j],
-					    batch_pool_link)
-				per_file_stats(0, obj, &stats);
-		}
-	}
-
-	print_file_stats(m, "[k]batch pool", stats);
-}
-
 static void print_context_stats(struct seq_file *m,
 				struct drm_i915_private *i915)
 {
@@ -377,57 +357,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	if (ret)
 		return ret;
 
-	print_batch_pool_stats(m, i915);
 	print_context_stats(m, i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
 
-static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *engine;
-	int total = 0;
-	int ret, j;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	for_each_user_engine(engine, dev_priv) {
-		for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
-			int count;
-
-			count = 0;
-			list_for_each_entry(obj,
-					    &engine->batch_pool.cache_list[j],
-					    batch_pool_link)
-				count++;
-			seq_printf(m, "%s cache[%d]: %d objects\n",
-				   engine->name, j, count);
-
-			list_for_each_entry(obj,
-					    &engine->batch_pool.cache_list[j],
-					    batch_pool_link) {
-				seq_puts(m, "   ");
-				describe_obj(m, obj);
-				seq_putc(m, '\n');
-			}
-
-			total += count;
-		}
-	}
-
-	seq_printf(m, "total: %d\n", total);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
 static void gen8_display_interrupt_info(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -4379,7 +4314,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_objects", i915_gem_object_info, 0},
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
 	{"i915_gem_interrupt", i915_interrupt_info, 0},
-	{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
 	{"i915_guc_info", i915_guc_info, 0},
 	{"i915_guc_load_status", i915_guc_load_status_info, 0},
 	{"i915_guc_log_dump", i915_guc_log_dump, 0},
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
deleted file mode 100644
index b17f23991253..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#include "i915_gem_batch_pool.h"
-#include "i915_drv.h"
-
-/**
- * DOC: batch pool
- *
- * In order to submit batch buffers as 'secure', the software command parser
- * must ensure that a batch buffer cannot be modified after parsing. It does
- * this by copying the user provided batch buffer contents to a kernel owned
- * buffer from which the hardware will actually execute, and by carefully
- * managing the address space bindings for such buffers.
- *
- * The batch pool framework provides a mechanism for the driver to manage a
- * set of scratch buffers to use for this purpose. The framework can be
- * extended to support other uses cases should they arise.
- */
-
-/**
- * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @pool: the batch buffer pool
- * @engine: the associated request submission engine
- */
-void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
-			      struct intel_engine_cs *engine)
-{
-	int n;
-
-	pool->engine = engine;
-
-	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
-		INIT_LIST_HEAD(&pool->cache_list[n]);
-}
-
-/**
- * i915_gem_batch_pool_fini() - clean up a batch buffer pool
- * @pool: the pool to clean up
- *
- * Note: Callers must hold the struct_mutex.
- */
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
-{
-	int n;
-
-	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
-
-	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
-		struct drm_i915_gem_object *obj, *next;
-
-		list_for_each_entry_safe(obj, next,
-					 &pool->cache_list[n],
-					 batch_pool_link)
-			i915_gem_object_put(obj);
-
-		INIT_LIST_HEAD(&pool->cache_list[n]);
-	}
-}
-
-/**
- * i915_gem_batch_pool_get() - allocate a buffer from the pool
- * @pool: the batch buffer pool
- * @size: the minimum desired size of the returned buffer
- *
- * Returns an inactive buffer from @pool with at least @size bytes,
- * with the pages pinned. The caller must i915_gem_object_unpin_pages()
- * on the returned object.
- *
- * Note: Callers must hold the struct_mutex
- *
- * Return: the buffer object or an error pointer
- */
-struct drm_i915_gem_object *
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
-			size_t size)
-{
-	struct drm_i915_gem_object *obj;
-	struct list_head *list;
-	int n, ret;
-
-	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
-
-	/* Compute a power-of-two bucket, but throw everything greater than
-	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
-	 * (1 page, 2 pages, 4 pages, 8+ pages).
-	 */
-	n = fls(size >> PAGE_SHIFT) - 1;
-	if (n >= ARRAY_SIZE(pool->cache_list))
-		n = ARRAY_SIZE(pool->cache_list) - 1;
-	list = &pool->cache_list[n];
-
-	list_for_each_entry(obj, list, batch_pool_link) {
-		struct reservation_object *resv = obj->base.resv;
-
-		/* The batches are strictly LRU ordered */
-		if (!reservation_object_test_signaled_rcu(resv, true))
-			break;
-
-		/*
-		 * The object is now idle, clear the array of shared
-		 * fences before we add a new request. Although, we
-		 * remain on the same engine, we may be on a different
-		 * timeline and so may continually grow the array,
-		 * trapping a reference to all the old fences, rather
-		 * than replace the existing fence.
-		 */
-		if (rcu_access_pointer(resv->fence)) {
-			reservation_object_lock(resv, NULL);
-			reservation_object_add_excl_fence(resv, NULL);
-			reservation_object_unlock(resv);
-		}
-
-		if (obj->base.size >= size)
-			goto found;
-	}
-
-	obj = i915_gem_object_create_internal(pool->engine->i915, size);
-	if (IS_ERR(obj))
-		return obj;
-
-found:
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ERR_PTR(ret);
-
-	list_move_tail(&obj->batch_pool_link, list);
-	return obj;
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
deleted file mode 100644
index feeeeeaa54d8..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef I915_GEM_BATCH_POOL_H
-#define I915_GEM_BATCH_POOL_H
-
-#include <linux/types.h>
-
-struct drm_i915_gem_object;
-struct intel_engine_cs;
-
-struct i915_gem_batch_pool {
-	struct intel_engine_cs *engine;
-	struct list_head cache_list[4];
-};
-
-void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
-			      struct intel_engine_cs *engine);
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object *
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
-
-#endif /* I915_GEM_BATCH_POOL_H */
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 19/23] drm/i915/gt: Mark context->active_count as protected by timeline->mutex
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (16 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 18/23] drm/i915: Replace struct_mutex for batch pool serialisation Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 20/23] drm/i915: Forgo last_fence active request tracking Chris Wilson
                   ` (8 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

We use timeline->mutex to protect modifications to
context->active_count, and the associated enable/disable callbacks.
Due to complications with engine-pm barrier there is a path where we used
a "superlock" to provide serialised protect and so could not
unconditionally assert with lockdep that it was always held. However,
we can mark the mutex as taken (noting that we may be nested underneath
ourselves) which means we can be reassured the right timeline->mutex is
always treated as held and let lockdep roam free.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 12 ++++++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  4 ++++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 0772f77394c5..08e512fc4ed0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -89,17 +89,20 @@ void intel_context_exit_engine(struct intel_context *ce);
 
 static inline void intel_context_enter(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->ring->timeline->mutex);
 	if (!ce->active_count++)
 		ce->ops->enter(ce);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->ring->timeline->mutex);
 	++ce->active_count;
 }
 
 static inline void intel_context_exit(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->ring->timeline->mutex);
 	GEM_BUG_ON(!ce->active_count);
 	if (!--ce->active_count)
 		ce->ops->exit(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 68a7e979b1a9..92afc207ec80 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,7 +55,7 @@ struct intel_context {
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 
-	unsigned int active_count; /* notionally protected by timeline->mutex */
+	unsigned int active_count; /* protected by timeline->mutex */
 
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 76c828d11b05..1218b7d53b88 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -59,6 +59,16 @@ void intel_engine_park(struct intel_engine_cs *engine)
 	}
 }
 
+static inline void __timeline_mark_lock(struct intel_context *ce)
+{
+	mutex_acquire(&ce->ring->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+}
+
+static inline void __timeline_mark_unlock(struct intel_context *ce)
+{
+	mutex_release(&ce->ring->timeline->mutex.dep_map, 0, _THIS_IP_);
+}
+
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -83,6 +93,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
 	 */
+	__timeline_mark_lock(engine->kernel_context);
 	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
@@ -94,6 +105,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
+	__timeline_mark_unlock(engine->kernel_context);
 
 	return false;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 7b476cd55dac..eafd94d5e211 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -338,6 +338,8 @@ void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	if (tl->active_count++)
 		return;
@@ -352,6 +354,8 @@ void intel_timeline_exit(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!tl->active_count);
 	if (--tl->active_count)
 		return;
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 20/23] drm/i915: Forgo last_fence active request tracking
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (17 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 19/23] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 21/23] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
                   ` (7 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

We were using the last_fence to track the last request that used this
vma that might be interpreted by a fence register and forced ourselves
to wait for this request before modifying any fence register that
overlapped our vma. Due to requirement that we need to track any XY_BLT
command, linear or tiled, this in effect meant that we have to track the
vma for its active lifespan anyway, so we can forgo the explicit
last_fence tracking and just use the whole vma->active.

Another solution would be to pipeline the register updates, and would
help resolve some long running stalls for gen3 (but only gen 2 and 3!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c       |  4 +---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c |  6 ++----
 drivers/gpu/drm/i915/i915_gem_gtt.c       |  1 -
 drivers/gpu/drm/i915/i915_vma.c           | 13 -------------
 drivers/gpu/drm/i915/i915_vma.h           |  1 -
 5 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 99fdef854c9e..caedd865631d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -210,9 +210,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			}
 		}
 		if (vma->fence)
-			seq_printf(m, " , fence: %d%s",
-				   vma->fence->id,
-				   i915_active_request_isset(&vma->last_fence) ? "*" : "");
+			seq_printf(m, " , fence: %d", vma->fence->id);
 		seq_puts(m, ")");
 
 		spin_lock(&obj->vma.lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index bcac359ec661..c9654f1a468f 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -230,16 +230,14 @@ static int fence_update(struct i915_fence_reg *fence,
 			 i915_gem_object_get_tiling(vma->obj)))
 			return -EINVAL;
 
-		ret = i915_active_request_retire(&vma->last_fence,
-					     &vma->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&vma->active);
 		if (ret)
 			return ret;
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
-		ret = i915_active_request_retire(&old->last_fence,
-					     &old->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&old->active);
 		if (ret) {
 			fence->vma = old;
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8304b98b0bf8..2847d71223de 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1868,7 +1868,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 		return ERR_PTR(-ENOMEM);
 
 	i915_active_init(i915, &vma->active, NULL, NULL);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	vma->vm = &ggtt->vm;
 	vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b52f71e0ade6..85a8e6fd34d5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,7 +119,6 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	i915_active_init(vm->i915, &vma->active,
 			 __i915_vma_active, __i915_vma_retire);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -801,8 +800,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
-
 	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
@@ -938,9 +935,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 	obj->mm.dirty = true;
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		__i915_active_request_set(&vma->last_fence, rq);
-
 	export_fence(vma, rq, flags);
 
 	GEM_BUG_ON(!i915_vma_is_active(vma));
@@ -973,14 +967,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * before we are finished).
 		 */
 		__i915_vma_pin(vma);
-
 		ret = i915_active_wait(&vma->active);
-		if (ret)
-			goto unpin;
-
-		ret = i915_active_request_retire(&vma->last_fence,
-					      &vma->vm->i915->drm.struct_mutex);
-unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5c4224749bde..b3d2121be947 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -111,7 +111,6 @@ struct i915_vma {
 #define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
-	struct i915_active_request last_fence;
 
 	/**
 	 * Support different GGTT views into the same object.
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 21/23] drm/i915/overlay: Switch to using i915_active tracking
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (18 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 20/23] drm/i915: Forgo last_fence active request tracking Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 22/23] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
                   ` (6 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Remove the raw i915_active_request tracking in favour of the higher
level i915_active tracking for the sole purpose of making the lockless
transition easier in later patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_overlay.c | 129 +++++++++----------
 drivers/gpu/drm/i915/i915_active.h           |  19 ---
 2 files changed, 64 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 07929726b780..9b3f4f6644f3 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -191,7 +191,8 @@ struct intel_overlay {
 	struct overlay_registers __iomem *regs;
 	u32 flip_addr;
 	/* flip handling */
-	struct i915_active_request last_flip;
+	struct i915_active last_flip;
+	void (*flip_complete)(struct intel_overlay *ovl);
 };
 
 static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
@@ -217,30 +218,25 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
 				  PCI_DEVFN(0, 0), I830_CLOCK_GATE, val);
 }
 
-static void intel_overlay_submit_request(struct intel_overlay *overlay,
-					 struct i915_request *rq,
-					 i915_active_retire_fn retire)
+static struct i915_request *
+alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 {
-	GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip,
-					    &overlay->i915->drm.struct_mutex));
-	i915_active_request_set_retire_fn(&overlay->last_flip, retire,
-					  &overlay->i915->drm.struct_mutex);
-	__i915_active_request_set(&overlay->last_flip, rq);
-	i915_request_add(rq);
-}
+	struct i915_request *rq;
+	int err;
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 struct i915_request *rq,
-					 i915_active_retire_fn retire)
-{
-	intel_overlay_submit_request(overlay, rq, retire);
-	return i915_active_request_retire(&overlay->last_flip,
-					  &overlay->i915->drm.struct_mutex);
-}
+	overlay->flip_complete = fn;
 
-static struct i915_request *alloc_request(struct intel_overlay *overlay)
-{
-	return i915_request_create(overlay->context);
+	rq = i915_request_create(overlay->context);
+	if (IS_ERR(rq))
+		return rq;
+
+	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
+	if (err) {
+		i915_request_add(rq);
+		return ERR_PTR(err);
+	}
+
+	return rq;
 }
 
 /* overlay needs to be disable in OCMD reg */
@@ -252,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 
 	WARN_ON(overlay->active);
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, NULL);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -273,7 +269,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	*cs++ = MI_NOOP;
 	intel_ring_advance(rq, cs);
 
-	return intel_overlay_do_wait_request(overlay, rq, NULL);
+	i915_request_add(rq);
+
+	return i915_active_wait(&overlay->last_flip);
 }
 
 static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
@@ -317,7 +315,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, NULL);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -332,8 +330,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	intel_ring_advance(rq, cs);
 
 	intel_overlay_flip_prepare(overlay, vma);
-
-	intel_overlay_submit_request(overlay, rq, NULL);
+	i915_request_add(rq);
 
 	return 0;
 }
@@ -354,20 +351,13 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
 }
 
 static void
-intel_overlay_release_old_vid_tail(struct i915_active_request *active,
-				   struct i915_request *rq)
+intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
-	struct intel_overlay *overlay =
-		container_of(active, typeof(*overlay), last_flip);
-
 	intel_overlay_release_old_vma(overlay);
 }
 
-static void intel_overlay_off_tail(struct i915_active_request *active,
-				   struct i915_request *rq)
+static void intel_overlay_off_tail(struct intel_overlay *overlay)
 {
-	struct intel_overlay *overlay =
-		container_of(active, typeof(*overlay), last_flip);
 	struct drm_i915_private *dev_priv = overlay->i915;
 
 	intel_overlay_release_old_vma(overlay);
@@ -380,6 +370,16 @@ static void intel_overlay_off_tail(struct i915_active_request *active,
 		i830_overlay_clock_gating(dev_priv, true);
 }
 
+static void
+intel_overlay_last_flip_retire(struct i915_active *active)
+{
+	struct intel_overlay *overlay =
+		container_of(active, typeof(*overlay), last_flip);
+
+	if (overlay->flip_complete)
+		overlay->flip_complete(overlay);
+}
+
 /* overlay needs to be disabled in OCMD reg */
 static int intel_overlay_off(struct intel_overlay *overlay)
 {
@@ -394,7 +394,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, intel_overlay_off_tail);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -417,17 +417,16 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	intel_ring_advance(rq, cs);
 
 	intel_overlay_flip_prepare(overlay, NULL);
+	i915_request_add(rq);
 
-	return intel_overlay_do_wait_request(overlay, rq,
-					     intel_overlay_off_tail);
+	return i915_active_wait(&overlay->last_flip);
 }
 
 /* recover from an interruption due to a signal
  * We have to be careful not to repeat work forever an make forward progess. */
 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	return i915_active_request_retire(&overlay->last_flip,
-					  &overlay->i915->drm.struct_mutex);
+	return i915_active_wait(&overlay->last_flip);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -437,43 +436,40 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
+	struct i915_request *rq;
 	u32 *cs;
-	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	/* Only wait if there is actually an old frame to release to
+	/*
+	 * Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
 	 */
 	if (!overlay->old_vma)
 		return 0;
 
-	if (I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
-		/* synchronous slowpath */
-		struct i915_request *rq;
+	if (!(I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT)) {
+		intel_overlay_release_old_vid_tail(overlay);
+		return 0;
+	}
 
-		rq = alloc_request(overlay);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
+	rq = alloc_request(overlay, intel_overlay_release_old_vid_tail);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
-		cs = intel_ring_begin(rq, 2);
-		if (IS_ERR(cs)) {
-			i915_request_add(rq);
-			return PTR_ERR(cs);
-		}
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
 
-		*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
-		*cs++ = MI_NOOP;
-		intel_ring_advance(rq, cs);
+	*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
 
-		ret = intel_overlay_do_wait_request(overlay, rq,
-						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	} else
-		intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL);
+	i915_request_add(rq);
 
-	return 0;
+	return i915_active_wait(&overlay->last_flip);
 }
 
 void intel_overlay_reset(struct drm_i915_private *dev_priv)
@@ -1375,7 +1371,9 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	INIT_ACTIVE_REQUEST(&overlay->last_flip);
+	i915_active_init(dev_priv,
+			 &overlay->last_flip,
+			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
 	if (ret)
@@ -1409,6 +1407,7 @@ void intel_overlay_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(overlay->active);
 
 	i915_gem_object_put(overlay->reg_bo);
+	i915_active_fini(&overlay->last_flip);
 
 	kfree(overlay);
 }
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 134166d31251..911a8338007a 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -89,25 +89,6 @@ int __must_check
 i915_active_request_set(struct i915_active_request *active,
 			struct i915_request *rq);
 
-/**
- * i915_active_request_set_retire_fn - updates the retirement callback
- * @active - the active tracker
- * @fn - the routine called when the request is retired
- * @mutex - struct_mutex used to guard retirements
- *
- * i915_active_request_set_retire_fn() updates the function pointer that
- * is called when the final request associated with the @active tracker
- * is retired.
- */
-static inline void
-i915_active_request_set_retire_fn(struct i915_active_request *active,
-				  i915_active_retire_fn fn,
-				  struct mutex *mutex)
-{
-	lockdep_assert_held(mutex);
-	active->retire = fn ?: i915_active_retire_noop;
-}
-
 /**
  * i915_active_request_raw - return the active request
  * @active - the active tracker
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 22/23] drm/i915: Extract intel_frontbuffer active tracking
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (19 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 21/23] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 18:38 ` [PATCH 23/23] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
                   ` (5 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

Move the active tracking for the frontbuffer operations out of the
i915_gem_object and into its own first class (refcounted) object. In the
process of detangling, we switch from low level request tracking to the
easier i915_active -- with the plan that this avoids any potential
atomic callbacks as the frontbuffer tracking wishes to sleep as it
flushes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  70 +++--
 drivers/gpu/drm/i915/display/intel_fbdev.c    |  40 ++-
 .../gpu/drm/i915/display/intel_frontbuffer.c  | 255 +++++++++++++-----
 .../gpu/drm/i915/display/intel_frontbuffer.h  |  70 +++--
 drivers/gpu/drm/i915/display/intel_overlay.c  |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  14 +-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   4 -
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  27 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   8 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   5 -
 drivers/gpu/drm/i915/i915_drv.h               |   4 -
 drivers/gpu/drm/i915/i915_gem.c               |  47 +---
 drivers/gpu/drm/i915/i915_vma.c               |   6 +-
 drivers/gpu/drm/i915/intel_drv.h              |   1 +
 16 files changed, 306 insertions(+), 257 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index e25b82d07d4f..219ae5a60b31 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3049,12 +3049,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_object *obj = NULL;
 	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 	struct drm_framebuffer *fb = &plane_config->fb->base;
 	u32 base_aligned = round_down(plane_config->base, PAGE_SIZE);
 	u32 size_aligned = round_up(plane_config->base + plane_config->size,
 				    PAGE_SIZE);
+	struct drm_i915_gem_object *obj;
+	bool ret = false;
 
 	size_aligned -= base_aligned;
 
@@ -3096,7 +3097,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 		break;
 	default:
 		MISSING_CASE(plane_config->tiling);
-		return false;
+		goto out;
 	}
 
 	mode_cmd.pixel_format = fb->format->format;
@@ -3108,16 +3109,15 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 
 	if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) {
 		DRM_DEBUG_KMS("intel fb init failed\n");
-		goto out_unref_obj;
+		goto out;
 	}
 
 
 	DRM_DEBUG_KMS("initial plane fb obj %p\n", obj);
-	return true;
-
-out_unref_obj:
+	ret = true;
+out:
 	i915_gem_object_put(obj);
-	return false;
+	return ret;
 }
 
 static void
@@ -3174,6 +3174,12 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 	intel_disable_plane(plane, crtc_state);
 }
 
+static struct intel_frontbuffer *
+to_intel_frontbuffer(struct drm_framebuffer *fb)
+{
+	return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL;
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			     struct intel_initial_plane_config *plane_config)
@@ -3181,7 +3187,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *c;
-	struct drm_i915_gem_object *obj;
 	struct drm_plane *primary = intel_crtc->base.primary;
 	struct drm_plane_state *plane_state = primary->state;
 	struct intel_plane *intel_plane = to_intel_plane(primary);
@@ -3257,8 +3262,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 		return;
 	}
 
-	obj = intel_fb_obj(fb);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB);
 
 	plane_state->src_x = 0;
 	plane_state->src_y = 0;
@@ -3273,14 +3277,14 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	intel_state->base.src = drm_plane_state_src(plane_state);
 	intel_state->base.dst = drm_plane_state_dest(plane_state);
 
-	if (i915_gem_object_is_tiled(obj))
+	if (plane_config->tiling)
 		dev_priv->preserve_bios_swizzle = true;
 
 	plane_state->fb = fb;
 	plane_state->crtc = &intel_crtc->base;
 
 	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
-		  &obj->frontbuffer_bits);
+		  &to_intel_frontbuffer(fb)->bits);
 }
 
 static int skl_max_plane_width(const struct drm_framebuffer *fb,
@@ -14129,9 +14133,9 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state)
 
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i)
-		i915_gem_track_fb(intel_fb_obj(old_plane_state->base.fb),
-				  intel_fb_obj(new_plane_state->base.fb),
-				  plane->frontbuffer_bit);
+		intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb),
+					to_intel_frontbuffer(new_plane_state->base.fb),
+					plane->frontbuffer_bit);
 }
 
 static int intel_atomic_commit(struct drm_device *dev,
@@ -14415,7 +14419,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		return ret;
 
 	fb_obj_bump_render_priority(obj);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB);
 
 	if (!new_state->fence) { /* implicit fencing */
 		struct dma_fence *fence;
@@ -14678,13 +14682,12 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 			   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	int ret;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *old_fb;
 	struct intel_crtc_state *crtc_state =
 		to_intel_crtc_state(crtc->state);
 	struct intel_crtc_state *new_crtc_state;
+	int ret;
 
 	/*
 	 * When crtc is inactive or there is a modeset pending,
@@ -14752,11 +14755,10 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	if (ret)
 		goto out_unlock;
 
-	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP);
-
-	old_fb = old_plane_state->fb;
-	i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb),
-			  intel_plane->frontbuffer_bit);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
+	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
+				to_intel_frontbuffer(fb),
+				intel_plane->frontbuffer_bit);
 
 	/* Swap plane state */
 	plane->state = new_plane_state;
@@ -15536,15 +15538,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	drm_framebuffer_cleanup(fb);
-
-	i915_gem_object_lock(obj);
-	WARN_ON(!obj->framebuffer_references--);
-	i915_gem_object_unlock(obj);
-
-	i915_gem_object_put(obj);
+	intel_frontbuffer_put(intel_fb->frontbuffer);
 
 	kfree(intel_fb);
 }
@@ -15572,7 +15568,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	i915_gem_object_flush_if_display(obj);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB);
 
 	return 0;
 }
@@ -15594,8 +15590,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	int ret = -EINVAL;
 	int i;
 
+	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
+	if (!intel_fb->frontbuffer)
+		return -ENOMEM;
+
 	i915_gem_object_lock(obj);
-	obj->framebuffer_references++;
 	tiling = i915_gem_object_get_tiling(obj);
 	stride = i915_gem_object_get_stride(obj);
 	i915_gem_object_unlock(obj);
@@ -15712,9 +15711,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	return 0;
 
 err:
-	i915_gem_object_lock(obj);
-	obj->framebuffer_references--;
-	i915_gem_object_unlock(obj);
+	intel_frontbuffer_put(intel_fb->frontbuffer);
 	return ret;
 }
 
@@ -15732,8 +15729,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOENT);
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	if (IS_ERR(fb))
-		i915_gem_object_put(obj);
+	i915_gem_object_put(obj);
 
 	return fb;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 1edd44ee32b2..4b57cdd76699 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -47,13 +47,14 @@
 #include "intel_fbdev.h"
 #include "intel_frontbuffer.h"
 
-static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev)
+static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev)
 {
-	struct drm_i915_gem_object *obj = intel_fb_obj(&ifbdev->fb->base);
-	unsigned int origin =
-		ifbdev->vma_flags & PLANE_HAS_FENCE ? ORIGIN_GTT : ORIGIN_CPU;
+	return ifbdev->fb->frontbuffer;
+}
 
-	intel_fb_obj_invalidate(obj, origin);
+static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev)
+{
+	intel_frontbuffer_invalidate(to_frontbuffer(ifbdev), ORIGIN_CPU);
 }
 
 static int intel_fbdev_set_par(struct fb_info *info)
@@ -120,7 +121,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
-	int size, ret;
+	int size;
 
 	/* we don't do packed 24bpp */
 	if (sizes->surface_bpp == 24)
@@ -147,24 +148,16 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
-		ret = PTR_ERR(obj);
-		goto err;
+		return PTR_ERR(obj);
 	}
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	if (IS_ERR(fb)) {
-		ret = PTR_ERR(fb);
-		goto err_obj;
-	}
+	i915_gem_object_put(obj);
+	if (IS_ERR(fb))
+		return PTR_ERR(fb);
 
 	ifbdev->fb = to_intel_framebuffer(fb);
-
 	return 0;
-
-err_obj:
-	i915_gem_object_put(obj);
-err:
-	return ret;
 }
 
 static int intelfb_create(struct drm_fb_helper *helper,
@@ -180,7 +173,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
-	struct drm_framebuffer *fb;
 	intel_wakeref_t wakeref;
 	struct fb_info *info;
 	struct i915_vma *vma;
@@ -226,8 +218,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unlock;
 	}
 
-	fb = &ifbdev->fb->base;
-	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_frontbuffer(ifbdev), ORIGIN_DIRTYFB);
 
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
@@ -236,7 +227,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unpin;
 	}
 
-	ifbdev->helper.fb = fb;
+	ifbdev->helper.fb = &ifbdev->fb->base;
 
 	info->fbops = &intelfb_ops;
 
@@ -262,13 +253,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (intel_fb_obj(fb)->stolen && !prealloc)
+	if (vma->obj->stolen && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
 	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
-		      fb->width, fb->height, i915_ggtt_offset(vma));
+		      ifbdev->fb->base.width, ifbdev->fb->base.height,
+		      i915_ggtt_offset(vma));
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 44273c10cea5..d11031811a48 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -30,11 +30,11 @@
  * Many features require us to track changes to the currently active
  * frontbuffer, especially rendering targeted at the frontbuffer.
  *
- * To be able to do so GEM tracks frontbuffers using a bitmask for all possible
- * frontbuffer slots through i915_gem_track_fb(). The function in this file are
- * then called when the contents of the frontbuffer are invalidated, when
- * frontbuffer rendering has stopped again to flush out all the changes and when
- * the frontbuffer is exchanged with a flip. Subsystems interested in
+ * To be able to do so we track frontbuffers using a bitmask for all possible
+ * frontbuffer slots through intel_frontbuffer_track(). The functions in this
+ * file are then called when the contents of the frontbuffer are invalidated,
+ * when frontbuffer rendering has stopped again to flush out all the changes
+ * and when the frontbuffer is exchanged with a flip. Subsystems interested in
  * frontbuffer changes (e.g. PSR, FBC, DRRS) should directly put their callbacks
  * into the relevant places and filter for the frontbuffer slots that they are
  * interested int.
@@ -63,28 +63,9 @@
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
 
-void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin,
-			       unsigned int frontbuffer_bits)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (origin == ORIGIN_CS) {
-		spin_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-		spin_unlock(&dev_priv->fb_tracking.lock);
-	}
-
-	might_sleep();
-	intel_psr_invalidate(dev_priv, frontbuffer_bits, origin);
-	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
-	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
-}
-
 /**
- * intel_frontbuffer_flush - flush frontbuffer
- * @dev_priv: i915 device
+ * frontbuffer_flush - flush frontbuffer
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  * @origin: which operation caused the flush
  *
@@ -94,45 +75,27 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
-				    unsigned frontbuffer_bits,
-				    enum fb_op_origin origin)
+static void frontbuffer_flush(struct drm_i915_private *i915,
+			      unsigned int frontbuffer_bits,
+			      enum fb_op_origin origin)
 {
 	/* Delay flushing when rings are still busy.*/
-	spin_lock(&dev_priv->fb_tracking.lock);
-	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
+	frontbuffer_bits &= ~i915->fb_tracking.busy_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
 	if (!frontbuffer_bits)
 		return;
 
 	might_sleep();
-	intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
-	intel_psr_flush(dev_priv, frontbuffer_bits, origin);
-	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
-}
-
-void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  enum fb_op_origin origin,
-			  unsigned int frontbuffer_bits)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (origin == ORIGIN_CS) {
-		spin_lock(&dev_priv->fb_tracking.lock);
-		/* Filter out new bits since rendering started. */
-		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
-		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-		spin_unlock(&dev_priv->fb_tracking.lock);
-	}
-
-	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
+	intel_edp_drrs_flush(i915, frontbuffer_bits);
+	intel_psr_flush(i915, frontbuffer_bits, origin);
+	intel_fbc_flush(i915, frontbuffer_bits, origin);
 }
 
 /**
  * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. The actual
@@ -142,19 +105,19 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
 				    unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
-	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
+	spin_lock(&i915->fb_tracking.lock);
+	i915->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
-	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 }
 
 /**
  * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after the flip has been latched and will complete
@@ -162,23 +125,22 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
 				     unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
 	/* Mask any cancelled flips. */
-	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
-	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	frontbuffer_bits &= i915->fb_tracking.flip_bits;
+	i915->fb_tracking.flip_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
 	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev_priv,
-					frontbuffer_bits, ORIGIN_FLIP);
+		frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
  * intel_frontbuffer_flip - synchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. This is for
@@ -187,13 +149,160 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip(struct drm_i915_private *i915,
 			    unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
-	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
-	intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
+	frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
+}
+
+void __intel_fb_invalidate(struct intel_frontbuffer *front,
+			   enum fb_op_origin origin,
+			   unsigned int frontbuffer_bits)
+{
+	struct drm_i915_private *i915 = to_i915(front->obj->base.dev);
+
+	if (origin == ORIGIN_CS) {
+		spin_lock(&i915->fb_tracking.lock);
+		i915->fb_tracking.busy_bits |= frontbuffer_bits;
+		i915->fb_tracking.flip_bits &= ~frontbuffer_bits;
+		spin_unlock(&i915->fb_tracking.lock);
+	}
+
+	might_sleep();
+	intel_psr_invalidate(i915, frontbuffer_bits, origin);
+	intel_edp_drrs_invalidate(i915, frontbuffer_bits);
+	intel_fbc_invalidate(i915, frontbuffer_bits, origin);
+}
+
+void __intel_fb_flush(struct intel_frontbuffer *front,
+		      enum fb_op_origin origin,
+		      unsigned int frontbuffer_bits)
+{
+	struct drm_i915_private *i915 = to_i915(front->obj->base.dev);
+
+	if (origin == ORIGIN_CS) {
+		spin_lock(&i915->fb_tracking.lock);
+		/* Filter out new bits since rendering started. */
+		frontbuffer_bits &= i915->fb_tracking.busy_bits;
+		i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+		spin_unlock(&i915->fb_tracking.lock);
+	}
+
+	if (frontbuffer_bits)
+		frontbuffer_flush(i915, frontbuffer_bits, origin);
+}
+
+static int frontbuffer_active(struct i915_active *ref)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), write);
+
+	kref_get(&front->ref);
+	return 0;
+}
+
+static void frontbuffer_retire(struct i915_active *ref)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), write);
+
+	intel_frontbuffer_flush(front, ORIGIN_CS);
+	intel_frontbuffer_put(front);
+}
+
+static void frontbuffer_release(struct kref *ref)
+	__releases(&to_i915(front->obj->base.dev)->fb_tracking.lock)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), ref);
+
+	front->obj->frontbuffer = NULL;
+	spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock);
+
+	i915_gem_object_put(front->obj);
+	kfree(front);
+}
+
+struct intel_frontbuffer *
+intel_frontbuffer_get(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_frontbuffer *front;
+
+	spin_lock(&i915->fb_tracking.lock);
+	front = obj->frontbuffer;
+	if (front)
+		kref_get(&front->ref);
+	spin_unlock(&i915->fb_tracking.lock);
+	if (front)
+		return front;
+
+	front = kmalloc(sizeof(*front), GFP_KERNEL);
+	if (!front)
+		return NULL;
+
+	front->obj = obj;
+	kref_init(&front->ref);
+	atomic_set(&front->bits, 0);
+	i915_active_init(i915, &front->write,
+			 frontbuffer_active, frontbuffer_retire);
+
+	spin_lock(&i915->fb_tracking.lock);
+	if (obj->frontbuffer) {
+		kfree(front);
+		front = obj->frontbuffer;
+		kref_get(&front->ref);
+	} else {
+		i915_gem_object_get(obj);
+		obj->frontbuffer = front;
+	}
+	spin_unlock(&i915->fb_tracking.lock);
+
+	return front;
+}
+
+void intel_frontbuffer_put(struct intel_frontbuffer *front)
+{
+	kref_put_lock(&front->ref,
+		      frontbuffer_release,
+		      &to_i915(front->obj->base.dev)->fb_tracking.lock);
+}
+
+/**
+ * intel_frontbuffer_track - update frontbuffer tracking
+ * @old: current buffer for the frontbuffer slots
+ * @new: new buffer for the frontbuffer slots
+ * @frontbuffer_bits: bitmask of frontbuffer slots
+ *
+ * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
+ * from @old and setting them in @new. Both @old and @new can be NULL.
+ */
+void intel_frontbuffer_track(struct intel_frontbuffer *old,
+			     struct intel_frontbuffer *new,
+			     unsigned int frontbuffer_bits)
+{
+	/*
+	 * Control of individual bits within the mask are guarded by
+	 * the owning plane->mutex, i.e. we can never see concurrent
+	 * manipulation of individual bits. But since the bitfield as a whole
+	 * is updated using RMW, we need to use atomics in order to update
+	 * the bits.
+	 */
+	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
+		     BITS_PER_TYPE(atomic_t));
+
+	if (old) {
+		WARN_ON(!(atomic_read(&old->bits) & frontbuffer_bits));
+		atomic_andnot(frontbuffer_bits, &old->bits);
+	}
+
+	if (new) {
+		WARN_ON(atomic_read(&new->bits) & frontbuffer_bits);
+		atomic_or(frontbuffer_bits, &new->bits);
+	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index 5727320c8084..adc64d61a4a5 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -24,7 +24,10 @@
 #ifndef __INTEL_FRONTBUFFER_H__
 #define __INTEL_FRONTBUFFER_H__
 
-#include "gem/i915_gem_object.h"
+#include <linux/atomic.h>
+#include <linux/kref.h>
+
+#include "i915_active.h"
 
 struct drm_i915_private;
 struct drm_i915_gem_object;
@@ -37,23 +40,30 @@ enum fb_op_origin {
 	ORIGIN_DIRTYFB,
 };
 
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
+struct intel_frontbuffer {
+	struct kref ref;
+	atomic_t bits;
+	struct i915_active write;
+	struct drm_i915_gem_object *obj;
+};
+
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
 				    unsigned frontbuffer_bits);
-void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
 				     unsigned frontbuffer_bits);
-void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip(struct drm_i915_private *i915,
 			    unsigned frontbuffer_bits);
 
-void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin,
-			       unsigned int frontbuffer_bits);
-void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  enum fb_op_origin origin,
-			  unsigned int frontbuffer_bits);
+struct intel_frontbuffer *
+intel_frontbuffer_get(struct drm_i915_gem_object *obj);
+
+void __intel_fb_invalidate(struct intel_frontbuffer *front,
+			   enum fb_op_origin origin,
+			   unsigned int frontbuffer_bits);
 
 /**
- * intel_fb_obj_invalidate - invalidate frontbuffer object
- * @obj: GEM object to invalidate
+ * intel_frontbuffer_invalidate - invalidate frontbuffer object
+ * @front: GEM object to invalidate
  * @origin: which operation caused the invalidation
  *
  * This function gets called every time rendering on the given object starts and
@@ -62,37 +72,53 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
  * until the rendering completes or a flip on this frontbuffer plane is
  * scheduled.
  */
-static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-					   enum fb_op_origin origin)
+static inline bool intel_frontbuffer_invalidate(struct intel_frontbuffer *front,
+						enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
 
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!front)
+		return false;
+
+	frontbuffer_bits = atomic_read(&front->bits);
 	if (!frontbuffer_bits)
 		return false;
 
-	__intel_fb_obj_invalidate(obj, origin, frontbuffer_bits);
+	__intel_fb_invalidate(front, origin, frontbuffer_bits);
 	return true;
 }
 
+void __intel_fb_flush(struct intel_frontbuffer *front,
+		      enum fb_op_origin origin,
+		      unsigned int frontbuffer_bits);
+
 /**
- * intel_fb_obj_flush - flush frontbuffer object
- * @obj: GEM object to flush
+ * intel_frontbuffer_flush - flush frontbuffer object
+ * @front: GEM object to flush
  * @origin: which operation caused the flush
  *
  * This function gets called every time rendering on the given object has
  * completed and frontbuffer caching can be started again.
  */
-static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-				      enum fb_op_origin origin)
+static inline void intel_frontbuffer_flush(struct intel_frontbuffer *front,
+					   enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
 
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!front)
+		return;
+
+	frontbuffer_bits = atomic_read(&front->bits);
 	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_flush(obj, origin, frontbuffer_bits);
+	__intel_fb_flush(front, origin, frontbuffer_bits);
 }
 
+void intel_frontbuffer_track(struct intel_frontbuffer *old,
+			     struct intel_frontbuffer *new,
+			     unsigned int frontbuffer_bits);
+
+void intel_frontbuffer_put(struct intel_frontbuffer *front);
+
 #endif /* __INTEL_FRONTBUFFER_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 9b3f4f6644f3..1a15fa34205c 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -281,9 +281,9 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 
 	WARN_ON(overlay->old_vma);
 
-	i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL,
-			  vma ? vma->obj : NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(pipe));
+	intel_frontbuffer_track(overlay->vma ? overlay->vma->obj->frontbuffer : NULL,
+				vma ? vma->obj->frontbuffer : NULL,
+				INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	intel_frontbuffer_flip_prepare(overlay->i915,
 				       INTEL_FRONTBUFFER_OVERLAY(pipe));
@@ -768,7 +768,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
 	}
-	intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB);
 
 	ret = i915_vma_put_fence(vma);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 5295285d5843..a65d401f891c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -48,7 +48,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj)
 {
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	drm_clflush_sg(obj->mm.pages);
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 }
 
 static void i915_clflush_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 2e3ce2a69653..a1afc2690e9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -551,13 +551,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -661,9 +654,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 
 	i915_gem_object_unlock(obj);
 
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
+	if (write_domain)
+		intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -783,7 +775,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	}
 
 out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 	obj->mm.dirty = true;
 	/* return with the pages pinned */
 	return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a564c1e4231b..71d10ae90922 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -101,9 +101,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		up_write(&mm->mmap_sem);
 		if (IS_ERR_VALUE(addr))
 			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
 	}
 	i915_gem_object_put(obj);
 
@@ -283,7 +280,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		 * Userspace is now writing through an untracked VMA, abandon
 		 * all hope that the hardware is able to track future writes.
 		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
 
 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 		if (IS_ERR(vma) && !view.type) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index eccd7f4768f8..cfed28cc6185 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,16 +45,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
@@ -71,10 +61,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->ops = ops;
 
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
@@ -186,7 +172,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
 		atomic_set(&obj->mm.pages_pin_count, 0);
@@ -259,6 +244,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
+	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
@@ -290,13 +277,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		queue_work(i915->wq, &i915->mm.free_work);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	return !(obj->cache_level == I915_CACHE_NONE ||
@@ -319,8 +299,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 		for_each_ggtt_vma(vma, obj)
 			intel_gt_flush_ggtt_writes(vma->vm->gt);
 
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 
 		for_each_ggtt_vma(vma, obj) {
 			if (vma->iomap)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3714cf234d64..abc23e7e13a7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -161,7 +161,7 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
-	return READ_ONCE(obj->framebuffer_references);
+	return READ_ONCE(obj->frontbuffer);
 }
 
 static inline unsigned int
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index d474c6ac4100..ede0eb4218a8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -13,6 +13,7 @@
 #include "i915_selftest.h"
 
 struct drm_i915_gem_object;
+struct intel_fronbuffer;
 
 /*
  * struct i915_lut_handle tracks the fast lookups from handle to vma used
@@ -141,9 +142,7 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;
 
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
+	struct intel_frontbuffer *frontbuffer;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	unsigned int tiling_and_stride;
@@ -224,9 +223,6 @@ struct drm_i915_gem_object {
 		bool quirked:1;
 	} mm;
 
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index caedd865631d..86269de5550a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -136,7 +136,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct intel_engine_cs *engine;
 	struct i915_vma *vma;
-	unsigned int frontbuffer_bits;
 	int pin_count = 0;
 
 	seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
@@ -226,10 +225,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
-
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
-	if (frontbuffer_bits)
-		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
 struct file_stats {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c85e0d5adab8..0750c9303061 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2480,10 +2480,6 @@ int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      u32 handle, u64 *offset);
 int i915_gem_mmap_gtt_version(void);
 
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-		       struct drm_i915_gem_object *new,
-		       unsigned frontbuffer_bits);
-
 int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 880b9af5f334..b9f61286f452 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -138,17 +138,19 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
 
-	/* We manually control the domain here and pretend that it
+	/*
+	 * We manually control the domain here and pretend that it
 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 	 */
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+
 	if (copy_from_user(vaddr, user_data, args->size))
 		return -EFAULT;
 
 	drm_clflush_virt_range(vaddr, args->size);
 	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
 
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 	return 0;
 }
 
@@ -593,7 +595,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		goto out_unpin;
 	}
 
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
@@ -635,7 +637,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		user_data += page_length;
 		offset += page_length;
 	}
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
@@ -728,7 +730,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 	i915_gem_object_unlock_fence(obj, fence);
 
 	return ret;
@@ -1774,39 +1776,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	return ret;
 }
 
-/**
- * i915_gem_track_fb - update frontbuffer tracking
- * @old: current GEM buffer for the frontbuffer slots
- * @new: new GEM buffer for the frontbuffer slots
- * @frontbuffer_bits: bitmask of frontbuffer slots
- *
- * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
- * from @old and setting them in @new. Both @old and @new can be NULL.
- */
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-		       struct drm_i915_gem_object *new,
-		       unsigned frontbuffer_bits)
-{
-	/* Control of individual bits within the mask are guarded by
-	 * the owning plane->mutex, i.e. we can never see concurrent
-	 * manipulation of individual bits. But since the bitfield as a whole
-	 * is updated using RMW, we need to use atomics in order to update
-	 * the bits.
-	 */
-	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
-		     BITS_PER_TYPE(atomic_t));
-
-	if (old) {
-		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
-		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
-	}
-
-	if (new) {
-		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
-		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
-	}
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 85a8e6fd34d5..c387770c3764 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -927,8 +927,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	if (flags & EXEC_OBJECT_WRITE) {
 		obj->write_domain = I915_GEM_DOMAIN_RENDER;
 
-		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
-			__i915_active_request_set(&obj->frontbuffer_write, rq);
+		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
+			i915_active_ref(&obj->frontbuffer->write,
+					rq->fence.context,
+					rq);
 
 		obj->read_domains = 0;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c4016164c34e..9ad9d1bde5f4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -69,6 +69,7 @@ enum intel_output_type {
 
 struct intel_framebuffer {
 	struct drm_framebuffer base;
+	struct intel_frontbuffer *frontbuffer;
 	struct intel_rotation_info rot_info;
 
 	/* for each plane in the normal GTT view */
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 23/23] drm/i915: Markup expected timeline locks for i915_active
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (20 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 22/23] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
@ 2019-07-23 18:38 ` Chris Wilson
  2019-07-23 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Patchwork
                   ` (4 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-23 18:38 UTC (permalink / raw)
  To: intel-gfx

As every i915_active_request should be serialised by a dedicated lock,
i915_active consists of a tree of locks; one for each node. Markup up
the i915_active_request with what lock is supposed to be guarding it so
that we can verify that the serialised updated are indeed serialised.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  2 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  7 +++----
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  4 ++++
 .../gpu/drm/i915/gt/selftests/mock_timeline.c |  2 +-
 drivers/gpu/drm/i915/i915_active.c            | 20 +++++++++++++------
 drivers/gpu/drm/i915/i915_active.h            | 12 +++++++++--
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +++
 drivers/gpu/drm/i915/i915_vma.c               |  4 ++--
 drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +--
 13 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 1a15fa34205c..d5e06b463567 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 	if (IS_ERR(rq))
 		return rq;
 
-	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
+	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
 	if (err) {
 		i915_request_add(rq);
 		return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 2312a0c6af89..f15b0b82e562 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -197,7 +197,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (err)
 		goto out_request;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e31431fa141e..e595fa239674 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -927,7 +927,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+			err = i915_active_ref(&cb->base, rq->timeline, rq);
 
 		i915_request_add(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index bcd69c187313..b5a12b796a49 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -282,7 +282,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	err = i915_active_ref(&ce->active, rq->fence.context, rq);
+	err = i915_active_ref(&ce->active, rq->timeline, rq);
 
 unlock:
 	if (rq->timeline != tl)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index f7a0a660c1c9..8d069efd9457 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -18,7 +18,7 @@ static inline int
 intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
 			      struct i915_request *rq)
 {
-	return i915_active_ref(&node->active, rq->fence.context, rq);
+	return i915_active_ref(&node->active, rq->timeline, rq);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index eafd94d5e211..02fbe11b671b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -440,8 +440,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active,
-			      tl->fence_context, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
 	if (err)
 		goto err_cacheline;
 
@@ -492,7 +491,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 			 struct i915_request *rq)
 {
-	return i915_active_ref(&cl->active, rq->fence.context, rq);
+	return i915_active_ref(&cl->active, rq->timeline, rq);
 }
 
 int intel_timeline_read_hwsp(struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index d54113697745..321481403165 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -689,7 +689,9 @@ static int live_hwsp_wrap(void *arg)
 
 		tl->seqno = -4u;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
@@ -704,7 +706,9 @@ static int live_hwsp_wrap(void *arg)
 		}
 		hwsp_seqno[0] = tl->hwsp_seqno;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 5c549205828a..598170efcaf6 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 13f304a29fc8..ab313c931abf 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -131,10 +131,11 @@ node_retire(struct i915_active_request *base, struct i915_request *rq)
 }
 
 static struct i915_active_request *
-active_instance(struct i915_active *ref, u64 idx)
+active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
 	struct rb_node **p, *parent;
+	u64 idx = tl->fence_context;
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -173,7 +174,7 @@ active_instance(struct i915_active *ref, u64 idx)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, NULL, node_retire);
+	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -206,18 +207,20 @@ void __i915_active_init(struct drm_i915_private *i915,
 }
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq)
 {
 	struct i915_active_request *active;
 	int err;
 
+	lockdep_assert_held(&tl->mutex);
+
 	/* Prevent reaping in case we malloc/wait while building the tree */
 	err = i915_active_acquire(ref);
 	if (err)
 		return err;
 
-	active = active_instance(ref, timeline);
+	active = active_instance(ref, tl);
 	if (!active) {
 		err = -ENOMEM;
 		goto out;
@@ -353,6 +356,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	GEM_BUG_ON(!engine->mask);
 	for_each_engine_masked(engine, i915, engine->mask, tmp) {
 		struct intel_context *kctx = engine->kernel_context;
+		struct intel_timeline *tl = kctx->ring->timeline;
 		struct active_node *node;
 
 		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
@@ -361,9 +365,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			goto unwind;
 		}
 
-		i915_active_request_init(&node->base,
+		i915_active_request_init(&node->base, &tl->mutex,
 					 (void *)engine, node_retire);
-		node->timeline = kctx->ring->timeline->fence_context;
+		node->timeline = tl->fence_context;
 		node->ref = ref;
 		atomic_inc(&ref->count);
 
@@ -441,6 +445,10 @@ int i915_active_request_set(struct i915_active_request *active,
 {
 	int err;
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
+
 	/* Must maintain ordering wrt previous active requests */
 	err = i915_request_await_active_request(rq, active);
 	if (err)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 911a8338007a..164c2b074cfc 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -58,15 +58,20 @@ void i915_active_retire_noop(struct i915_active_request *active,
  */
 static inline void
 i915_active_request_init(struct i915_active_request *active,
+			 struct mutex *lock,
 			 struct i915_request *rq,
 			 i915_active_retire_fn retire)
 {
 	RCU_INIT_POINTER(active->request, rq);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_active_retire_noop;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	active->lock = lock;
+#endif
 }
 
-#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
+#define INIT_ACTIVE_REQUEST(name, lock) \
+	i915_active_request_init((name), (lock), NULL, NULL)
 
 /**
  * i915_active_request_set - updates the tracker to watch the current request
@@ -81,6 +86,9 @@ static inline void
 __i915_active_request_set(struct i915_active_request *active,
 			  struct i915_request *request)
 {
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
 	list_move(&active->link, &request->active_list);
 	rcu_assign_pointer(active->request, request);
 }
@@ -362,7 +370,7 @@ void __i915_active_init(struct drm_i915_private *i915,
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq);
 
 int i915_active_wait(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 5b0a3024ce24..4311c2fab6e6 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -24,6 +24,9 @@ struct i915_active_request {
 	struct i915_request __rcu *request;
 	struct list_head link;
 	i915_active_retire_fn retire;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	struct mutex *lock;
+#endif
 };
 
 struct active_node;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index c387770c3764..f7f516a75c66 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -919,7 +919,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (unlikely(err))
 		return err;
 
@@ -929,7 +929,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 
 		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
 			i915_active_ref(&obj->frontbuffer->write,
-					rq->fence.context,
+					rq->timeline,
 					rq);
 
 		obj->read_domains = 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index e5cd5d47e380..77d844ac8b71 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -110,8 +110,7 @@ __live_active_setup(struct drm_i915_private *i915)
 						       submit,
 						       GFP_KERNEL);
 		if (err >= 0)
-			err = i915_active_ref(&active->base,
-					      rq->fence.context, rq);
+			err = i915_active_ref(&active->base, rq->timeline, rq);
 		i915_request_add(rq);
 		if (err) {
 			pr_err("Failed to track active ref!\n");
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (21 preceding siblings ...)
  2019-07-23 18:38 ` [PATCH 23/23] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
@ 2019-07-23 20:16 ` Patchwork
  2019-07-23 20:27 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Patchwork @ 2019-07-23 20:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
URL   : https://patchwork.freedesktop.org/series/64128/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
5142c7938082 drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
c1c2cd5fa447 drm/i915/gt: Provide a local intel_context.vm
d33e99f4a915 drm/i915: Remove lrc default desc from GEM context
29da852e2c4d drm/i915: Push the ring creation flags to the backend
f0b041defb87 drm/i915: Flush extra hard after writing relocations through the GTT
425aa62d3f56 drm/i915: Hide unshrinkable context objects from the shrinker
770eaf4859b7 drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
-:222: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#222: 
new file mode 100644

-:227: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#227: FILE: drivers/gpu/drm/i915/gt/intel_engine_user.c:1:
+/*

-:228: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#228: FILE: drivers/gpu/drm/i915/gt/intel_engine_user.c:2:
+ * SPDX-License-Identifier: MIT

-:299: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#299: FILE: drivers/gpu/drm/i915/gt/intel_engine_user.h:1:
+/*

-:300: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#300: FILE: drivers/gpu/drm/i915/gt/intel_engine_user.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 415 lines checked
555f44871de4 drm/i915: Introduce for_each_user_engine()
-:215: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'engine__' - possible side-effects?
#215: FILE: drivers/gpu/drm/i915/i915_drv.h:1921:
+#define for_each_user_engine(engine__, i915__) \
+	for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\
+	     (engine__); \
+	     (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node)))

total: 0 errors, 0 warnings, 1 checks, 187 lines checked
ef677c306ddc drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc
792a613c5337 drm/i915: Isolate i915_getparam_ioctl()
-:236: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#236: 
new file mode 100644

-:241: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#241: FILE: drivers/gpu/drm/i915/i915_getparam.c:1:
+/*

-:242: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#242: FILE: drivers/gpu/drm/i915/i915_getparam.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 0 checks, 374 lines checked
07f880518d29 drm/i915: Only include active engines in the capture state
d6f963b6f644 drm/i915: Teach execbuffer to take the engine wakeref not GT
0cb1b4286d96 drm/i915/gt: Track timeline activeness in enter/exit
f8d56d1a25c2 drm/i915/gt: Convert timeline tracking to spinlock
850da2bb8f36 drm/i915/gt: Guard timeline pinning with its own mutex
a603363aef5e drm/i915/gt: Add to timeline requires the timeline mutex
f221052faf5e drm/i915: Protect request retirement with timeline->mutex
77c6fb9eab20 drm/i915: Replace struct_mutex for batch pool serialisation
-:305: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#305: 
new file mode 100644

-:310: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#310: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool.c:1:
+/*

-:311: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#311: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool.c:2:
+ * SPDX-License-Identifier: MIT

-:493: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#493: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool.h:1:
+/*

-:494: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#494: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool.h:2:
+ * SPDX-License-Identifier: MIT

-:533: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#533: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool_types.h:1:
+/*

-:534: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#534: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool_types.h:2:
+ * SPDX-License-Identifier: MIT

-:550: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#550: FILE: drivers/gpu/drm/i915/gt/intel_engine_pool_types.h:18:
+	spinlock_t lock;

total: 0 errors, 7 warnings, 1 checks, 604 lines checked
31fa8535fe72 drm/i915/gt: Mark context->active_count as protected by timeline->mutex
7b475e35d161 drm/i915: Forgo last_fence active request tracking
afc26d30fb09 drm/i915/overlay: Switch to using i915_active tracking
d68803e8a458 drm/i915: Extract intel_frontbuffer active tracking
d2824ed0cc5d drm/i915: Markup expected timeline locks for i915_active
-:281: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#281: FILE: drivers/gpu/drm/i915/i915_active_types.h:28:
+	struct mutex *lock;

total: 0 errors, 0 warnings, 1 checks, 233 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (22 preceding siblings ...)
  2019-07-23 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Patchwork
@ 2019-07-23 20:27 ` Patchwork
  2019-07-23 20:38 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  26 siblings, 0 replies; 55+ messages in thread
From: Patchwork @ 2019-07-23 20:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
URL   : https://patchwork.freedesktop.org/series/64128/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
Okay!

Commit: drm/i915/gt: Provide a local intel_context.vm
Okay!

Commit: drm/i915: Remove lrc default desc from GEM context
Okay!

Commit: drm/i915: Push the ring creation flags to the backend
Okay!

Commit: drm/i915: Flush extra hard after writing relocations through the GTT
Okay!

Commit: drm/i915: Hide unshrinkable context objects from the shrinker
Okay!

Commit: drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Introduce for_each_user_engine()
Okay!

Commit: drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc
Okay!

Commit: drm/i915: Isolate i915_getparam_ioctl()
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Only include active engines in the capture state
-drivers/gpu/drm/i915/i915_gpu_error.c:973:21: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gpu_error.c:973:21: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gpu_error.c:973:21: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gpu_error.c:973:21: warning: expression using sizeof(void)

Commit: drm/i915: Teach execbuffer to take the engine wakeref not GT
Okay!

Commit: drm/i915/gt: Track timeline activeness in enter/exit
Okay!

Commit: drm/i915/gt: Convert timeline tracking to spinlock
Okay!

Commit: drm/i915/gt: Guard timeline pinning with its own mutex
Okay!

Commit: drm/i915/gt: Add to timeline requires the timeline mutex
Okay!

Commit: drm/i915: Protect request retirement with timeline->mutex
Okay!

Commit: drm/i915: Replace struct_mutex for batch pool serialisation
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915/gt: Mark context->active_count as protected by timeline->mutex
Okay!

Commit: drm/i915: Forgo last_fence active request tracking
Okay!

Commit: drm/i915/overlay: Switch to using i915_active tracking
Okay!

Commit: drm/i915: Extract intel_frontbuffer active tracking
Okay!

Commit: drm/i915: Markup expected timeline locks for i915_active
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (23 preceding siblings ...)
  2019-07-23 20:27 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-07-23 20:38 ` Patchwork
  2019-07-24  4:13 ` ✗ Fi.CI.IGT: failure " Patchwork
  2019-07-24  8:56 ` [PATCH 01/23] " Tvrtko Ursulin
  26 siblings, 0 replies; 55+ messages in thread
From: Patchwork @ 2019-07-23 20:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
URL   : https://patchwork.freedesktop.org/series/64128/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6543 -> Patchwork_13730
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/

Known issues
------------

  Here are the changes found in Patchwork_13730 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_execlists:
    - fi-icl-guc:         [PASS][1] -> [INCOMPLETE][2] ([fdo#107713])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-icl-guc/igt@i915_selftest@live_execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-icl-guc/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_hangcheck:
    - fi-kbl-guc:         [PASS][3] -> [INCOMPLETE][4] ([fdo#108744])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-kbl-guc/igt@i915_selftest@live_hangcheck.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-kbl-guc/igt@i915_selftest@live_hangcheck.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u2:          [PASS][5] -> [FAIL][6] ([fdo#103167])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload:
    - fi-blb-e6850:       [INCOMPLETE][7] ([fdo#107718]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-blb-e6850/igt@i915_module_load@reload.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-blb-e6850/igt@i915_module_load@reload.html

  * igt@kms_busy@basic-flip-a:
    - fi-kbl-7567u:       [SKIP][9] ([fdo#109271] / [fdo#109278]) -> [PASS][10] +2 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-kbl-7567u/igt@kms_busy@basic-flip-a.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-kbl-7567u/igt@kms_busy@basic-flip-a.html

  * igt@kms_busy@basic-flip-c:
    - fi-kbl-7500u:       [SKIP][11] ([fdo#109271] / [fdo#109278]) -> [PASS][12] +2 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-kbl-7500u/igt@kms_busy@basic-flip-c.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-kbl-7500u/igt@kms_busy@basic-flip-c.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u3:          [FAIL][13] ([fdo#103167]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html

  * igt@prime_vgem@basic-fence-read:
    - fi-icl-u3:          [DMESG-WARN][15] ([fdo#107724]) -> [PASS][16] +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/fi-icl-u3/igt@prime_vgem@basic-fence-read.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/fi-icl-u3/igt@prime_vgem@basic-fence-read.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278


Participating hosts (56 -> 45)
------------------------------

  Missing    (11): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-skl-iommu fi-icl-y fi-icl-dsi fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6543 -> Patchwork_13730

  CI-20190529: 20190529
  CI_DRM_6543: ef1bb6d271fab3750ce23b548954df7b28da8ce7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5109: e5fd509e16ec649436be31f38eaa5b85cb7f72f1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13730: d2824ed0cc5d1efa4beb914e05578cc310e3951d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d2824ed0cc5d drm/i915: Markup expected timeline locks for i915_active
d68803e8a458 drm/i915: Extract intel_frontbuffer active tracking
afc26d30fb09 drm/i915/overlay: Switch to using i915_active tracking
7b475e35d161 drm/i915: Forgo last_fence active request tracking
31fa8535fe72 drm/i915/gt: Mark context->active_count as protected by timeline->mutex
77c6fb9eab20 drm/i915: Replace struct_mutex for batch pool serialisation
f221052faf5e drm/i915: Protect request retirement with timeline->mutex
a603363aef5e drm/i915/gt: Add to timeline requires the timeline mutex
850da2bb8f36 drm/i915/gt: Guard timeline pinning with its own mutex
f8d56d1a25c2 drm/i915/gt: Convert timeline tracking to spinlock
0cb1b4286d96 drm/i915/gt: Track timeline activeness in enter/exit
d6f963b6f644 drm/i915: Teach execbuffer to take the engine wakeref not GT
07f880518d29 drm/i915: Only include active engines in the capture state
792a613c5337 drm/i915: Isolate i915_getparam_ioctl()
ef677c306ddc drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc
555f44871de4 drm/i915: Introduce for_each_user_engine()
770eaf4859b7 drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
425aa62d3f56 drm/i915: Hide unshrinkable context objects from the shrinker
f0b041defb87 drm/i915: Flush extra hard after writing relocations through the GTT
29da852e2c4d drm/i915: Push the ring creation flags to the backend
d33e99f4a915 drm/i915: Remove lrc default desc from GEM context
c1c2cd5fa447 drm/i915/gt: Provide a local intel_context.vm
5142c7938082 drm/i915: Move aliasing_ppgtt underneath its i915_ggtt

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (24 preceding siblings ...)
  2019-07-23 20:38 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-07-24  4:13 ` Patchwork
  2019-07-24  8:56 ` [PATCH 01/23] " Tvrtko Ursulin
  26 siblings, 0 replies; 55+ messages in thread
From: Patchwork @ 2019-07-24  4:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
URL   : https://patchwork.freedesktop.org/series/64128/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6543_full -> Patchwork_13730_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_13730_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_13730_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_13730_full:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@mock_timelines:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl5/igt@i915_selftest@mock_timelines.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl2/igt@i915_selftest@mock_timelines.html

  * igt@runner@aborted:
    - shard-hsw:          NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-hsw6/igt@runner@aborted.html
    - shard-kbl:          NOTRUN -> [FAIL][4]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl7/igt@runner@aborted.html
    - shard-apl:          NOTRUN -> [FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-apl6/igt@runner@aborted.html
    - shard-snb:          NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-snb1/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_13730_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_balancer@busy:
    - shard-kbl:          [PASS][7] -> [FAIL][8] ([fdo#110946])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl7/igt@gem_exec_balancer@busy.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl7/igt@gem_exec_balancer@busy.html
    - shard-skl:          [PASS][9] -> [FAIL][10] ([fdo#110946])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl10/igt@gem_exec_balancer@busy.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl2/igt@gem_exec_balancer@busy.html
    - shard-glk:          [PASS][11] -> [FAIL][12] ([fdo#110946])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-glk9/igt@gem_exec_balancer@busy.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-glk5/igt@gem_exec_balancer@busy.html

  * igt@gem_sync@basic-many-each:
    - shard-iclb:         [PASS][13] -> [INCOMPLETE][14] ([fdo#107713] / [fdo#109100])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb1/igt@gem_sync@basic-many-each.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb7/igt@gem_sync@basic-many-each.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [PASS][15] -> [DMESG-WARN][16] ([fdo#108566]) +3 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-apl7/igt@gem_workarounds@suspend-resume-context.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-apl7/igt@gem_workarounds@suspend-resume-context.html

  * igt@i915_selftest@mock_timelines:
    - shard-glk:          [PASS][17] -> [INCOMPLETE][18] ([fdo#103359] / [k.org#198133])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-glk2/igt@i915_selftest@mock_timelines.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-glk9/igt@i915_selftest@mock_timelines.html
    - shard-hsw:          [PASS][19] -> [INCOMPLETE][20] ([fdo#103540])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-hsw1/igt@i915_selftest@mock_timelines.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-hsw6/igt@i915_selftest@mock_timelines.html
    - shard-kbl:          [PASS][21] -> [INCOMPLETE][22] ([fdo#103665])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl1/igt@i915_selftest@mock_timelines.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl7/igt@i915_selftest@mock_timelines.html
    - shard-iclb:         [PASS][23] -> [INCOMPLETE][24] ([fdo#107713])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb7/igt@i915_selftest@mock_timelines.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb1/igt@i915_selftest@mock_timelines.html
    - shard-snb:          [PASS][25] -> [INCOMPLETE][26] ([fdo#105411])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-snb2/igt@i915_selftest@mock_timelines.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-snb1/igt@i915_selftest@mock_timelines.html

  * igt@kms_cursor_legacy@cursor-vs-flip-toggle:
    - shard-hsw:          [PASS][27] -> [FAIL][28] ([fdo#103355])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-hsw4/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-hsw6/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-glk:          [PASS][29] -> [FAIL][30] ([fdo#105363])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-glk7/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-glk1/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-skl:          [PASS][31] -> [INCOMPLETE][32] ([fdo#109507])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl3/igt@kms_flip@flip-vs-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl4/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-kbl:          [PASS][33] -> [DMESG-WARN][34] ([fdo#108566]) +2 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl4/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip@modeset-vs-vblank-race:
    - shard-glk:          [PASS][35] -> [FAIL][36] ([fdo#103060])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-glk1/igt@kms_flip@modeset-vs-vblank-race.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-glk3/igt@kms_flip@modeset-vs-vblank-race.html

  * igt@kms_flip_tiling@flip-to-x-tiled:
    - shard-skl:          [PASS][37] -> [FAIL][38] ([fdo#108134])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl5/igt@kms_flip_tiling@flip-to-x-tiled.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl4/igt@kms_flip_tiling@flip-to-x-tiled.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-pwrite:
    - shard-iclb:         [PASS][39] -> [FAIL][40] ([fdo#103167]) +3 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-pwrite.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-shrfb-draw-pwrite.html

  * igt@kms_plane@plane-panning-top-left-pipe-a-planes:
    - shard-snb:          [PASS][41] -> [SKIP][42] ([fdo#109271])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-snb5/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-snb7/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [PASS][43] -> [FAIL][44] ([fdo#108145])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl3/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl2/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][45] -> [SKIP][46] ([fdo#109441]) +2 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb8/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_sequence@get-forked-busy:
    - shard-apl:          [PASS][47] -> [INCOMPLETE][48] ([fdo#103927]) +1 similar issue
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-apl6/igt@kms_sequence@get-forked-busy.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-apl3/igt@kms_sequence@get-forked-busy.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][49] -> [FAIL][50] ([fdo#99912])
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl6/igt@kms_setmode@basic.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl1/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-b-ts-continuation-suspend:
    - shard-skl:          [PASS][51] -> [INCOMPLETE][52] ([fdo#104108]) +1 similar issue
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl1/igt@kms_vblank@pipe-b-ts-continuation-suspend.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl9/igt@kms_vblank@pipe-b-ts-continuation-suspend.html

  
#### Possible fixes ####

  * igt@gem_ctx_isolation@bcs0-s3:
    - shard-apl:          [DMESG-WARN][53] ([fdo#108566]) -> [PASS][54] +1 similar issue
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-apl8/igt@gem_ctx_isolation@bcs0-s3.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-apl8/igt@gem_ctx_isolation@bcs0-s3.html

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [SKIP][55] ([fdo#110854]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb8/igt@gem_exec_balancer@smoke.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb4/igt@gem_exec_balancer@smoke.html

  * igt@i915_pm_rpm@gem-evict-pwrite:
    - shard-iclb:         [INCOMPLETE][57] ([fdo#107713] / [fdo#108840]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb7/igt@i915_pm_rpm@gem-evict-pwrite.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb7/igt@i915_pm_rpm@gem-evict-pwrite.html

  * igt@kms_cursor_crc@pipe-c-cursor-64x21-random:
    - shard-iclb:         [INCOMPLETE][59] ([fdo#107713]) -> [PASS][60] +1 similar issue
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb7/igt@kms_cursor_crc@pipe-c-cursor-64x21-random.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb8/igt@kms_cursor_crc@pipe-c-cursor-64x21-random.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-skl:          [INCOMPLETE][61] ([fdo#110741]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl10/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl2/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-glk:          [FAIL][63] ([fdo#105363]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-glk5/igt@kms_flip@flip-vs-expired-vblank.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-glk4/igt@kms_flip@flip-vs-expired-vblank.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          [FAIL][65] ([fdo#105363]) -> [PASS][66]
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl3/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl2/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-hsw:          [INCOMPLETE][67] ([fdo#103540]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-hsw4/igt@kms_flip@flip-vs-suspend.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-hsw8/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-iclb:         [FAIL][69] ([fdo#103167]) -> [PASS][70] +6 similar issues
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb8/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-kbl:          [DMESG-WARN][71] ([fdo#108566]) -> [PASS][72] +3 similar issues
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-kbl:          [INCOMPLETE][73] ([fdo#103665]) -> [PASS][74]
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-skl:          [INCOMPLETE][75] ([fdo#104108]) -> [PASS][76] +1 similar issue
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl5/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl5/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_psr@psr2_cursor_blt:
    - shard-iclb:         [SKIP][77] ([fdo#109441]) -> [PASS][78] +1 similar issue
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-iclb8/igt@kms_psr@psr2_cursor_blt.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-iclb2/igt@kms_psr@psr2_cursor_blt.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][79] ([fdo#110728]) -> [PASS][80]
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-skl5/igt@perf@blocking.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-skl4/igt@perf@blocking.html

  * igt@perf_pmu@rc6:
    - shard-kbl:          [SKIP][81] ([fdo#109271]) -> [PASS][82]
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6543/shard-kbl2/igt@perf_pmu@rc6.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/shard-kbl3/igt@perf_pmu@rc6.html

  
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108134]: https://bugs.freedesktop.org/show_bug.cgi?id=108134
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108840]: https://bugs.freedesktop.org/show_bug.cgi?id=108840
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109507]: https://bugs.freedesktop.org/show_bug.cgi?id=109507
  [fdo#110728]: https://bugs.freedesktop.org/show_bug.cgi?id=110728
  [fdo#110741]: https://bugs.freedesktop.org/show_bug.cgi?id=110741
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#110946]: https://bugs.freedesktop.org/show_bug.cgi?id=110946
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (9 -> 9)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6543 -> Patchwork_13730

  CI-20190529: 20190529
  CI_DRM_6543: ef1bb6d271fab3750ce23b548954df7b28da8ce7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5109: e5fd509e16ec649436be31f38eaa5b85cb7f72f1 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13730: d2824ed0cc5d1efa4beb914e05578cc310e3951d @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13730/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
                   ` (25 preceding siblings ...)
  2019-07-24  4:13 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-07-24  8:56 ` Tvrtko Ursulin
  2019-07-24  9:27   ` Chris Wilson
  26 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-24  8:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/07/2019 19:38, Chris Wilson wrote:
> The aliasing_ppgtt provides a PIN_USER alias for the global gtt, so move
> it under the i915_ggtt to simplify later transformations to enable
> intel_context.vm.

Can you pin point what exactly it makes easier in the following patch? 
Just the __context_pin_ppgtt change? I have reservations about ggtt 
embedding aliasing ppgtt. But I guess it is handy for some usages.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  7 +-
>   .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 69 ++++++++++++-------
>   drivers/gpu/drm/i915/i915_drv.h               |  3 -
>   drivers/gpu/drm/i915/i915_gem_gtt.c           | 36 +++++-----
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  3 +
>   drivers/gpu/drm/i915/i915_vma.c               |  2 +-
>   7 files changed, 71 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index ffb59d96d4d8..0f6b0678f548 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -459,8 +459,7 @@ __create_context(struct drm_i915_private *i915)
>   	i915_gem_context_set_recoverable(ctx);
>   
>   	ctx->ring_size = 4 * PAGE_SIZE;
> -	ctx->desc_template =
> -		default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
> +	ctx->desc_template = default_desc_template(i915, NULL);
>   
>   	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>   		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
> @@ -2258,8 +2257,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   		args->size = 0;
>   		if (ctx->vm)
>   			args->value = ctx->vm->total;
> -		else if (to_i915(dev)->mm.aliasing_ppgtt)
> -			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
> +		else if (to_i915(dev)->ggtt.alias)
> +			args->value = to_i915(dev)->ggtt.alias->vm.total;
>   		else
>   			args->value = to_i915(dev)->ggtt.vm.total;
>   		break;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index db7856f0f31e..bbd17d4b8ffd 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -1190,7 +1190,7 @@ static int igt_ctx_readonly(void *arg)
>   		goto out_unlock;
>   	}
>   
> -	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
> +	vm = ctx->vm ?: &i915->ggtt.alias->vm;
>   	if (!vm || !vm->has_read_only) {
>   		err = 0;
>   		goto out_unlock;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 1de19dac4a14..b056f25c66f2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1392,30 +1392,41 @@ static void ring_context_destroy(struct kref *ref)
>   	intel_context_free(ce);
>   }
>   
> -static int __context_pin_ppgtt(struct i915_gem_context *ctx)
> +static struct i915_address_space *vm_alias(struct intel_context *ce)
> +{
> +	struct i915_address_space *vm;
> +
> +	vm = ce->gem_context->vm;
> +	if (!vm)
> +		vm = &ce->engine->gt->ggtt->alias->vm;
> +
> +	return vm;

vm_or_alias? Still not good.. get_vm might pass since it is local?

> +}
> +
> +static int __context_pin_ppgtt(struct intel_context *ce)
>   {
>   	struct i915_address_space *vm;
>   	int err = 0;
>   
> -	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
> +	vm = vm_alias(ce);
>   	if (vm)

Can't return NULL it seems. (Same below.)

>   		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
>   
>   	return err;
>   }
>   
> -static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
> +static void __context_unpin_ppgtt(struct intel_context *ce)
>   {
>   	struct i915_address_space *vm;
>   
> -	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
> +	vm = vm_alias(ce);
>   	if (vm)
>   		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
>   }
>   
>   static void ring_context_unpin(struct intel_context *ce)
>   {
> -	__context_unpin_ppgtt(ce->gem_context);
> +	__context_unpin_ppgtt(ce);
>   }
>   
>   static struct i915_vma *
> @@ -1509,7 +1520,7 @@ static int ring_context_pin(struct intel_context *ce)
>   	if (err)
>   		return err;
>   
> -	err = __context_pin_ppgtt(ce->gem_context);
> +	err = __context_pin_ppgtt(ce);
>   	if (err)
>   		goto err_active;
>   
> @@ -1701,7 +1712,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>   	return 0;
>   }
>   
> -static int remap_l3(struct i915_request *rq, int slice)
> +static int remap_l3_slice(struct i915_request *rq, int slice)
>   {
>   	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
>   	int i;
> @@ -1729,15 +1740,34 @@ static int remap_l3(struct i915_request *rq, int slice)
>   	return 0;
>   }
>   
> +static int remap_l3(struct i915_request *rq)
> +{
> +	struct i915_gem_context *ctx = rq->gem_context;
> +	int i, err;
> +
> +	if (!ctx->remap_slice)
> +		return 0;
> +
> +	for (i = 0; i < MAX_L3_SLICES; i++) {

err declaration could go here but meh..

> +		if (!(ctx->remap_slice & BIT(i)))
> +			continue;
> +
> +		err = remap_l3_slice(rq, i);
> +		if (err)
> +			return err;

... or could stay at top and here you break and return err at the end. 
More meh. Depending whether it is important or not to clear 
ctx->remap_slice on error.

> +	}
> +
> +	ctx->remap_slice = 0;
> +	return 0;
> +}
> +
>   static int switch_context(struct i915_request *rq)
>   {
>   	struct intel_engine_cs *engine = rq->engine;
> -	struct i915_gem_context *ctx = rq->gem_context;
> -	struct i915_address_space *vm =
> -		ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm;
> +	struct i915_address_space *vm = vm_alias(rq->hw_context);
>   	unsigned int unwind_mm = 0;
>   	u32 hw_flags = 0;
> -	int ret, i;
> +	int ret;
>   
>   	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
>   
> @@ -1781,7 +1811,7 @@ static int switch_context(struct i915_request *rq)
>   		 * as nothing actually executes using the kernel context; it
>   		 * is purely used for flushing user contexts.
>   		 */
> -		if (i915_gem_context_is_kernel(ctx))
> +		if (i915_gem_context_is_kernel(rq->gem_context))
>   			hw_flags = MI_RESTORE_INHIBIT;
>   
>   		ret = mi_set_context(rq, hw_flags);
> @@ -1815,18 +1845,9 @@ static int switch_context(struct i915_request *rq)
>   			goto err_mm;
>   	}
>   
> -	if (ctx->remap_slice) {
> -		for (i = 0; i < MAX_L3_SLICES; i++) {
> -			if (!(ctx->remap_slice & BIT(i)))
> -				continue;
> -
> -			ret = remap_l3(rq, i);
> -			if (ret)
> -				goto err_mm;
> -		}
> -
> -		ctx->remap_slice = 0;
> -	}
> +	ret = remap_l3(rq);
> +	if (ret)
> +		goto err_mm;
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0e44cc4b2ca1..269a1b32b48b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -784,9 +784,6 @@ struct i915_gem_mm {
>   	 */
>   	struct vfsmount *gemfs;
>   
> -	/** PPGTT used for aliasing the PPGTT with the GTT */
> -	struct i915_ppgtt *aliasing_ppgtt;
> -
>   	struct notifier_block oom_notifier;
>   	struct notifier_block vmap_notifier;
>   	struct shrinker shrinker;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 4dd1fa956143..8304b98b0bf8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2446,18 +2446,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>   		pte_flags |= PTE_READ_ONLY;
>   
>   	if (flags & I915_VMA_LOCAL_BIND) {
> -		struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
> +		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;

Keeping the variable name would have reduced the churn.

>   
>   		if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
> -			ret = appgtt->vm.allocate_va_range(&appgtt->vm,
> -							   vma->node.start,
> -							   vma->size);
> +			ret = alias->vm.allocate_va_range(&alias->vm,
> +							  vma->node.start,
> +							  vma->size);
>   			if (ret)
>   				return ret;
>   		}
>   
> -		appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
> -					  pte_flags);
> +		alias->vm.insert_entries(&alias->vm, vma,
> +					 cache_level, pte_flags);
>   	}
>   
>   	if (flags & I915_VMA_GLOBAL_BIND) {
> @@ -2485,7 +2485,8 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
>   	}
>   
>   	if (vma->flags & I915_VMA_LOCAL_BIND) {
> -		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
> +		struct i915_address_space *vm =
> +			&i915_vm_to_ggtt(vma->vm)->alias->vm;
>   
>   		vm->clear_range(vm, vma->node.start, vma->size);
>   	}
> @@ -2542,13 +2543,12 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node,
>   		*end -= I915_GTT_PAGE_SIZE;
>   }
>   
> -static int init_aliasing_ppgtt(struct drm_i915_private *i915)
> +static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>   {
> -	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_ppgtt *ppgtt;
>   	int err;
>   
> -	ppgtt = i915_ppgtt_create(i915);
> +	ppgtt = i915_ppgtt_create(ggtt->vm.i915);
>   	if (IS_ERR(ppgtt))
>   		return PTR_ERR(ppgtt);
>   
> @@ -2567,7 +2567,7 @@ static int init_aliasing_ppgtt(struct drm_i915_private *i915)
>   	if (err)
>   		goto err_ppgtt;
>   
> -	i915->mm.aliasing_ppgtt = ppgtt;
> +	ggtt->alias = ppgtt;
>   
>   	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
>   	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
> @@ -2582,14 +2582,14 @@ static int init_aliasing_ppgtt(struct drm_i915_private *i915)
>   	return err;
>   }
>   
> -static void fini_aliasing_ppgtt(struct drm_i915_private *i915)
> +static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
>   {
> -	struct i915_ggtt *ggtt = &i915->ggtt;
> +	struct drm_i915_private *i915 = ggtt->vm.i915;
>   	struct i915_ppgtt *ppgtt;
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> -	ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
> +	ppgtt = fetch_and_zero(&ggtt->alias);
>   	if (!ppgtt)
>   		goto out;
>   
> @@ -2706,7 +2706,7 @@ int i915_init_ggtt(struct drm_i915_private *i915)
>   		return ret;
>   
>   	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
> -		ret = init_aliasing_ppgtt(i915);
> +		ret = init_aliasing_ppgtt(&i915->ggtt);
>   		if (ret)
>   			cleanup_init_ggtt(&i915->ggtt);
>   	}
> @@ -2752,7 +2752,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
>   {
>   	struct pagevec *pvec;
>   
> -	fini_aliasing_ppgtt(i915);
> +	fini_aliasing_ppgtt(&i915->ggtt);
>   
>   	ggtt_cleanup_hw(&i915->ggtt);
>   
> @@ -3585,7 +3585,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
>   	GEM_BUG_ON(range_overflows(offset, size, vm->total));
> -	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> +	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
>   	GEM_BUG_ON(drm_mm_node_allocated(node));
>   
>   	node->size = size;
> @@ -3682,7 +3682,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>   	GEM_BUG_ON(start >= end);
>   	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> -	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> +	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
>   	GEM_BUG_ON(drm_mm_node_allocated(node));
>   
>   	if (unlikely(range_overflows(start, size, end)))
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index cea59ef1a365..51274483502e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -394,6 +394,9 @@ struct i915_ggtt {
>   	void __iomem *gsm;
>   	void (*invalidate)(struct i915_ggtt *ggtt);
>   
> +	/** PPGTT used for aliasing the PPGTT with the GTT */
> +	struct i915_ppgtt *alias;
> +
>   	bool do_idle_maps;
>   
>   	int mtrr;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index ee73baf29415..eb16a1a93bbc 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -104,7 +104,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   	struct rb_node *rb, **p;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
> -	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> +	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
>   
>   	vma = i915_vma_alloc();
>   	if (vma == NULL)
> 

Nothing but nitpicks, looks okay in principle.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-07-23 18:38 ` [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context Chris Wilson
@ 2019-07-24  9:20   ` Tvrtko Ursulin
  2019-08-01  8:37     ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-24  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/07/2019 19:38, Chris Wilson wrote:
> We only compute the lrc_descriptor() on pinning the context, i.e.
> infrequently, so we do not benefit from storing the template as the
> addressing mode is also fixed for the lifetime of the intel_context.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
>   drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
>   4 files changed, 10 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index b28c7ca681a8..1b3dc7258ef2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context *ctx)
>   	i915_gem_context_put(ctx);
>   }
>   
> -static u32 default_desc_template(const struct drm_i915_private *i915,
> -				 const struct i915_address_space *vm)
> -{
> -	u32 address_mode;
> -	u32 desc;
> -
> -	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> -
> -	address_mode = INTEL_LEGACY_32B_CONTEXT;
> -	if (vm && i915_vm_is_4lvl(vm))
> -		address_mode = INTEL_LEGACY_64B_CONTEXT;
> -	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> -
> -	if (IS_GEN(i915, 8))
> -		desc |= GEN8_CTX_L3LLC_COHERENT;
> -
> -	/* TODO: WaDisableLiteRestore when we start using semaphore
> -	 * signalling between Command Streamers
> -	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
> -	 */
> -
> -	return desc;
> -}
> -
>   static struct i915_gem_context *
>   __create_context(struct drm_i915_private *i915)
>   {
> @@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
>   	i915_gem_context_set_recoverable(ctx);
>   
>   	ctx->ring_size = 4 * PAGE_SIZE;
> -	ctx->desc_template = default_desc_template(i915, NULL);
>   
>   	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>   		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
> @@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
>   	struct i915_gem_engines_iter it;
>   	struct intel_context *ce;
>   
> +	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
> +
>   	ctx->vm = i915_vm_get(vm);
> -	ctx->desc_template = default_desc_template(ctx->i915, vm);
>   
>   	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
>   		i915_vm_put(ce->vm);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 0ee61482ef94..a02d98494078 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -171,8 +171,6 @@ struct i915_gem_context {
>   
>   	/** ring_size: size for allocating the per-engine ring buffer */
>   	u32 ring_size;
> -	/** desc_template: invariant fields for the HW context descriptor */
> -	u32 desc_template;
>   
>   	/** guilty_count: How many times this context has caused a GPU hang. */
>   	atomic_t guilty_count;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 632344c163a8..5fdac40015cf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
>   	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
>   
> -	desc = ctx->desc_template;				/* bits  0-11 */
> -	GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
> +	desc = INTEL_LEGACY_32B_CONTEXT;
> +	if (i915_vm_is_4lvl(ce->vm))
> +		desc = INTEL_LEGACY_64B_CONTEXT;

if-else now that the vm null check is gone.

> +	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
> +
> +	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> +	if (IS_GEN(engine->i915, 8))
> +		desc |= GEN8_CTX_L3LLC_COHERENT;

Don't know.. it's nicer to keep it stored it both for Gen and context 
state. What's the problem with it?

Regards,

Tvrtko

>   
>   	desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
>   								/* bits 12-31 */
> -	GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
> -
>   	/*
>   	 * The following 32bits are copied into the OA reports (dword 2).
>   	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index f68798ab1e7c..4c018fb1359c 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -291,9 +291,6 @@ shadow_context_descriptor_update(struct intel_context *ce,
>   	 * Update bits 0-11 of the context descriptor which includes flags
>   	 * like GEN8_CTX_* cached in desc_template
>   	 */
> -	desc &= U64_MAX << 12;
> -	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
> -
>   	desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
>   	desc |= workload->ctx_desc.addressing_mode <<
>   		GEN8_CTX_ADDRESSING_MODE_SHIFT;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-24  8:56 ` [PATCH 01/23] " Tvrtko Ursulin
@ 2019-07-24  9:27   ` Chris Wilson
  2019-07-24  9:37     ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-24  9:27 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-07-24 09:56:34)
> 
> On 23/07/2019 19:38, Chris Wilson wrote:
> > The aliasing_ppgtt provides a PIN_USER alias for the global gtt, so move
> > it under the i915_ggtt to simplify later transformations to enable
> > intel_context.vm.
> 
> Can you pin point what exactly it makes easier in the following patch? 
> Just the __context_pin_ppgtt change? I have reservations about ggtt 
> embedding aliasing ppgtt. But I guess it is handy for some usages.

The aliasing_ggtt is an adjunct of i915_ggtt. Conceptually, we replace
the ggtt with one that has both global and user tracking with a reserved
slice for the aliasing pd. It should be a function of gt not mm, and it
purely an extension of our ggtt.

For aliasing [gen6] user contexts, our address space refers to the
i915_ggtt, but we must write our entries (for the most part) into the
alias. Having ce->vm always pointing to the correct gtt has been high on
the wish list for about 6 years, it just never fell into place. This I
feel is the missing link.

> > +static struct i915_address_space *vm_alias(struct intel_context *ce)
> > +{
> > +     struct i915_address_space *vm;
> > +
> > +     vm = ce->gem_context->vm;
> > +     if (!vm)
> > +             vm = &ce->engine->gt->ggtt->alias->vm;
> > +
> > +     return vm;
> 
> vm_or_alias? Still not good.. get_vm might pass since it is local?

vm is a perfectly acceptable alias for itself. I prefer vm_alias() as I
think it makes it clearer that we are principally concerned with the
PIN_USER aspect of the gtt.

> > +}
> > +
> > +static int __context_pin_ppgtt(struct intel_context *ce)
> >   {
> >       struct i915_address_space *vm;
> >       int err = 0;
> >   
> > -     vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
> > +     vm = vm_alias(ce);
> >       if (vm)
> 
> Can't return NULL it seems. (Same below.)

Are you so sure?

ce->gem_context->vm is only !NULL if there is a full-ppgtt
&ggtt->alias->vm is only !NULL if there is an aliasing-ppgtt

There may be contexts with neither (gen4, gen5).

> > @@ -1701,7 +1712,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> >       return 0;
> >   }
> >   
> > -static int remap_l3(struct i915_request *rq, int slice)
> > +static int remap_l3_slice(struct i915_request *rq, int slice)
> >   {
> >       u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
> >       int i;
> > @@ -1729,15 +1740,34 @@ static int remap_l3(struct i915_request *rq, int slice)
> >       return 0;
> >   }
> >   
> > +static int remap_l3(struct i915_request *rq)
> > +{
> > +     struct i915_gem_context *ctx = rq->gem_context;
> > +     int i, err;
> > +
> > +     if (!ctx->remap_slice)
> > +             return 0;
> > +
> > +     for (i = 0; i < MAX_L3_SLICES; i++) {
> 
> err declaration could go here but meh..
> 
> > +             if (!(ctx->remap_slice & BIT(i)))
> > +                     continue;
> > +
> > +             err = remap_l3_slice(rq, i);
> > +             if (err)
> > +                     return err;
> 
> ... or could stay at top and here you break and return err at the end. 
> More meh. Depending whether it is important or not to clear 
> ctx->remap_slice on error.

We don't want to clear remap_slice on error as we haven't applied it and
should try again on the next attempted request.

> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 4dd1fa956143..8304b98b0bf8 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2446,18 +2446,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> >               pte_flags |= PTE_READ_ONLY;
> >   
> >       if (flags & I915_VMA_LOCAL_BIND) {
> > -             struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
> > +             struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
> 
> Keeping the variable name would have reduced the churn.

I went for consistency with the more succinct name.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-24  9:27   ` Chris Wilson
@ 2019-07-24  9:37     ` Chris Wilson
  2019-07-24  9:47       ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-24  9:37 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2019-07-24 10:27:38)
> Quoting Tvrtko Ursulin (2019-07-24 09:56:34)
> > 
> > On 23/07/2019 19:38, Chris Wilson wrote:
> > > +static int __context_pin_ppgtt(struct intel_context *ce)
> > >   {
> > >       struct i915_address_space *vm;
> > >       int err = 0;
> > >   
> > > -     vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
> > > +     vm = vm_alias(ce);
> > >       if (vm)
> > 
> > Can't return NULL it seems. (Same below.)
> 
> Are you so sure?
> 
> ce->gem_context->vm is only !NULL if there is a full-ppgtt
> &ggtt->alias->vm is only !NULL if there is an aliasing-ppgtt
> 
> There may be contexts with neither (gen4, gen5).

It's not until the next patch where we always set ce->vm will it be
non-NULL. Point taken.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-24  9:37     ` Chris Wilson
@ 2019-07-24  9:47       ` Chris Wilson
  2019-07-24  9:54         ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-24  9:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2019-07-24 10:37:19)
> Quoting Chris Wilson (2019-07-24 10:27:38)
> > Quoting Tvrtko Ursulin (2019-07-24 09:56:34)
> > > 
> > > On 23/07/2019 19:38, Chris Wilson wrote:
> > > > +static int __context_pin_ppgtt(struct intel_context *ce)
> > > >   {
> > > >       struct i915_address_space *vm;
> > > >       int err = 0;
> > > >   
> > > > -     vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
> > > > +     vm = vm_alias(ce);
> > > >       if (vm)
> > > 
> > > Can't return NULL it seems. (Same below.)
> > 
> > Are you so sure?
> > 
> > ce->gem_context->vm is only !NULL if there is a full-ppgtt
> > &ggtt->alias->vm is only !NULL if there is an aliasing-ppgtt
> > 
> > There may be contexts with neither (gen4, gen5).
> 
> It's not until the next patch where we always set ce->vm will it be
> non-NULL. Point taken.

Except, we purposely replace the i915_ggtt ce->vm with its alias, which
may still be NULL. (Which is fine as that is perma-pinned and we don't
need to fiddle.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt
  2019-07-24  9:47       ` Chris Wilson
@ 2019-07-24  9:54         ` Tvrtko Ursulin
  0 siblings, 0 replies; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-24  9:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 24/07/2019 10:47, Chris Wilson wrote:
> Quoting Chris Wilson (2019-07-24 10:37:19)
>> Quoting Chris Wilson (2019-07-24 10:27:38)
>>> Quoting Tvrtko Ursulin (2019-07-24 09:56:34)
>>>>
>>>> On 23/07/2019 19:38, Chris Wilson wrote:
>>>>> +static int __context_pin_ppgtt(struct intel_context *ce)
>>>>>    {
>>>>>        struct i915_address_space *vm;
>>>>>        int err = 0;
>>>>>    
>>>>> -     vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
>>>>> +     vm = vm_alias(ce);
>>>>>        if (vm)
>>>>
>>>> Can't return NULL it seems. (Same below.)
>>>
>>> Are you so sure?
>>>
>>> ce->gem_context->vm is only !NULL if there is a full-ppgtt
>>> &ggtt->alias->vm is only !NULL if there is an aliasing-ppgtt
>>>
>>> There may be contexts with neither (gen4, gen5).
>>
>> It's not until the next patch where we always set ce->vm will it be
>> non-NULL. Point taken.
> 
> Except, we purposely replace the i915_ggtt ce->vm with its alias, which
> may still be NULL. (Which is fine as that is perma-pinned and we don't
> need to fiddle.)

I saw address of and though "it can't be NULL", but forgot about tricks 
with member at offset zero.. nvm.. Drop a comment if you feel like it. 
At least it is now consolidated so there is a place.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-23 18:38 ` [PATCH 04/23] drm/i915: Push the ring creation flags to the backend Chris Wilson
@ 2019-07-24 11:11   ` Tvrtko Ursulin
  2019-07-26  8:43     ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-24 11:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/07/2019 19:38, Chris Wilson wrote:
> Push the ring creation flags from the outer GEM context to the inner
> intel_cotnext to avoid an unsightly back-reference from inside the
> backend.

Sorry I find this quite ugly. Passing in integers in pointers filed and 
casting back and forth.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 25 +++++++++++++------
>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  3 ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  1 +
>   drivers/gpu/drm/i915/gt/intel_context.h       |  5 ++++
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  5 ++--
>   drivers/gpu/drm/i915/i915_debugfs.c           | 23 +++++++++++------
>   6 files changed, 41 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 1b3dc7258ef2..18b226bc5e3a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -296,6 +296,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
>   			return ERR_CAST(ce);
>   		}
>   
> +		ce->ring = __intel_context_ring_size(SZ_16K);
> +
>   		e->engines[id] = ce;
>   	}
>   	e->num_engines = id;
> @@ -434,8 +436,6 @@ __create_context(struct drm_i915_private *i915)
>   	i915_gem_context_set_bannable(ctx);
>   	i915_gem_context_set_recoverable(ctx);
>   
> -	ctx->ring_size = 4 * PAGE_SIZE;
> -
>   	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>   		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
>   
> @@ -565,8 +565,15 @@ i915_gem_context_create_gvt(struct drm_device *dev)
>   	i915_gem_context_set_closed(ctx); /* not user accessible */
>   	i915_gem_context_clear_bannable(ctx);
>   	i915_gem_context_set_force_single_submission(ctx);
> -	if (!USES_GUC_SUBMISSION(to_i915(dev)))
> -		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
> +	if (!USES_GUC_SUBMISSION(to_i915(dev))) {
> +		const unsigned long ring_size = 512 * SZ_4K; /* max */
> +		struct i915_gem_engines_iter it;
> +		struct intel_context *ce;
> +
> +		for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
> +			ce->ring = __intel_context_ring_size(ring_size);
> +		i915_gem_context_unlock_engines(ctx);
> +	}
>   
>   	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
>   out:
> @@ -605,7 +612,6 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   
>   	i915_gem_context_clear_bannable(ctx);
>   	ctx->sched.priority = I915_USER_PRIORITY(prio);
> -	ctx->ring_size = PAGE_SIZE;
>   
>   	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
>   
> @@ -1589,6 +1595,7 @@ set_engines(struct i915_gem_context *ctx,
>   	for (n = 0; n < num_engines; n++) {
>   		struct i915_engine_class_instance ci;
>   		struct intel_engine_cs *engine;
> +		struct intel_context *ce;
>   
>   		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
>   			__free_engines(set.engines, n);
> @@ -1611,11 +1618,15 @@ set_engines(struct i915_gem_context *ctx,
>   			return -ENOENT;
>   		}
>   
> -		set.engines->engines[n] = intel_context_create(ctx, engine);
> -		if (!set.engines->engines[n]) {
> +		ce = intel_context_create(ctx, engine);
> +		if (!ce) {
>   			__free_engines(set.engines, n);
>   			return -ENOMEM;
>   		}
> +
> +		ce->ring = __intel_context_ring_size(SZ_16K);
> +
> +		set.engines->engines[n] = ce;
>   	}
>   	set.engines->num_engines = num_engines;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index a02d98494078..260d59cc3de8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -169,9 +169,6 @@ struct i915_gem_context {
>   
>   	struct i915_sched_attr sched;
>   
> -	/** ring_size: size for allocating the per-engine ring buffer */
> -	u32 ring_size;
> -
>   	/** guilty_count: How many times this context has caused a GPU hang. */
>   	atomic_t guilty_count;
>   	/**
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 9e4f51ce52ff..295fa0ddbcac 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -196,6 +196,7 @@ intel_context_init(struct intel_context *ce,
>   	ce->engine = engine;
>   	ce->ops = engine->cops;
>   	ce->sseu = engine->sseu;
> +	ce->ring = __intel_context_ring_size(SZ_4K);
>   
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 23c7e4c0ce7c..3f54eb3d10ab 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -145,4 +145,9 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>   
>   struct i915_request *intel_context_create_request(struct intel_context *ce);
>   
> +static inline struct intel_ring *__intel_context_ring_size(u64 sz)
> +{
> +	return u64_to_ptr(struct intel_ring, sz);
> +}
> +
>   #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 5fdac40015cf..3f1b20cc50c2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3140,9 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>   		goto error_deref_obj;
>   	}
>   
> -	ring = intel_engine_create_ring(engine,
> -					timeline,
> -					ce->gem_context->ring_size);
> +	ring = intel_engine_create_ring(engine, timeline,
> +					(unsigned long)ce->ring);
>   	intel_timeline_put(timeline);
>   	if (IS_ERR(ring)) {
>   		ret = PTR_ERR(ring);
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 6d3911469801..e237bcecfa1f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
>   
>   		for_each_gem_engine(ce,
>   				    i915_gem_context_lock_engines(ctx), it) {
> -			if (ce->state)
> -				per_file_stats(0, ce->state->obj, &kstats);
> -			if (ce->ring)
> +			intel_context_lock_pinned(ce);
> +			if (intel_context_is_pinned(ce)) {

And these hunks do not seem to belong to this patch.

> +				if (ce->state)
> +					per_file_stats(0,
> +						       ce->state->obj, &kstats);
>   				per_file_stats(0, ce->ring->vma->obj, &kstats);
> +			}
> +			intel_context_unlock_pinned(ce);
>   		}
>   		i915_gem_context_unlock_engines(ctx);
>   
> @@ -1677,12 +1681,15 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   
>   		for_each_gem_engine(ce,
>   				    i915_gem_context_lock_engines(ctx), it) {
> -			seq_printf(m, "%s: ", ce->engine->name);
> -			if (ce->state)
> -				describe_obj(m, ce->state->obj);
> -			if (ce->ring)
> +			intel_context_lock_pinned(ce);
> +			if (intel_context_is_pinned(ce)) {
> +				seq_printf(m, "%s: ", ce->engine->name);
> +				if (ce->state)
> +					describe_obj(m, ce->state->obj);
>   				describe_ctx_ring(m, ce->ring);
> -			seq_putc(m, '\n');
> +				seq_putc(m, '\n');
> +			}
> +			intel_context_unlock_pinned(ce);
>   		}
>   		i915_gem_context_unlock_engines(ctx);
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-23 18:38 ` [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT Chris Wilson
@ 2019-07-25 21:21   ` Daniele Ceraolo Spurio
  2019-07-26  9:22   ` Tvrtko Ursulin
  1 sibling, 0 replies; 55+ messages in thread
From: Daniele Ceraolo Spurio @ 2019-07-25 21:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 7/23/19 11:38 AM, Chris Wilson wrote:
> To maintain a fast lookup from a GT centric irq handler, we want the
> engine lookup tables on the intel_gt. To avoid having multiple copies of
> the same multi-dimension lookup table, move the generic user engine
> lookup into an rbtree (for fast and flexible indexing).
> 
> v2: Split uabi_instance cf uabi_class
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

But I'd like an ack from Tvrtko as well since I know he has some similar 
planned changes around this as well.

Daniele

> ---
>   drivers/gpu/drm/i915/Makefile                |  1 +
>   drivers/gpu/drm/i915/gem/i915_gem_context.c  |  3 +-
>   drivers/gpu/drm/i915/gt/intel_engine.h       |  3 -
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 53 +++++-----------
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++-
>   drivers/gpu/drm/i915/gt/intel_engine_user.c  | 66 ++++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_engine_user.h  | 20 ++++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h     |  4 ++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++--
>   drivers/gpu/drm/i915/i915_drv.h              |  7 ++-
>   drivers/gpu/drm/i915/i915_irq.c              |  2 +-
>   drivers/gpu/drm/i915/i915_pmu.c              |  3 +-
>   drivers/gpu/drm/i915/i915_query.c            |  2 +-
>   drivers/gpu/drm/i915/i915_trace.h            | 10 +--
>   14 files changed, 138 insertions(+), 60 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 524516251a40..fafc3763dc2d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -74,6 +74,7 @@ gt-y += \
>   	gt/intel_context.o \
>   	gt/intel_engine_cs.o \
>   	gt/intel_engine_pm.o \
> +	gt/intel_engine_user.o \
>   	gt/intel_gt.o \
>   	gt/intel_gt_pm.o \
>   	gt/intel_hangcheck.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 18b226bc5e3a..e31431fa141e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -70,6 +70,7 @@
>   #include <drm/i915_drm.h>
>   
>   #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_engine_user.h"
>   
>   #include "i915_gem_context.h"
>   #include "i915_globals.h"
> @@ -1740,7 +1741,7 @@ get_engines(struct i915_gem_context *ctx,
>   
>   		if (e->engines[n]) {
>   			ci.engine_class = e->engines[n]->engine->uabi_class;
> -			ci.engine_instance = e->engines[n]->engine->instance;
> +			ci.engine_instance = e->engines[n]->engine->uabi_instance;
>   		}
>   
>   		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index db5c73ce86ee..30856383e4c5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -432,9 +432,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   		       struct drm_printer *m,
>   		       const char *header, ...);
>   
> -struct intel_engine_cs *
> -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> -
>   static inline void intel_engine_context_in(struct intel_engine_cs *engine)
>   {
>   	unsigned long flags;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 65cbf1d9118d..ed5c4e161e6e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -32,6 +32,7 @@
>   
>   #include "intel_engine.h"
>   #include "intel_engine_pm.h"
> +#include "intel_engine_user.h"
>   #include "intel_context.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
> @@ -285,9 +286,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
>   	intel_engine_set_hwsp_writemask(engine, ~0u);
>   }
>   
> -static int
> -intel_engine_setup(struct drm_i915_private *dev_priv,
> -		   enum intel_engine_id id)
> +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   {
>   	const struct engine_info *info = &intel_engines[id];
>   	struct intel_engine_cs *engine;
> @@ -303,10 +302,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
>   		return -EINVAL;
>   
> -	if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
> +	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
>   		return -EINVAL;
>   
> -	GEM_BUG_ON(dev_priv->engine[id]);
>   	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
>   	if (!engine)
>   		return -ENOMEM;
> @@ -315,12 +313,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   
>   	engine->id = id;
>   	engine->mask = BIT(id);
> -	engine->i915 = dev_priv;
> -	engine->gt = &dev_priv->gt;
> -	engine->uncore = &dev_priv->uncore;
> +	engine->i915 = gt->i915;
> +	engine->gt = gt;
> +	engine->uncore = gt->uncore;
>   	__sprint_engine_name(engine->name, info);
>   	engine->hw_id = engine->guc_id = info->hw_id;
> -	engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
> +	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   
> @@ -331,13 +329,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->destroy = (typeof(engine->destroy))kfree;
>   
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
> +	engine->uabi_instance = info->instance;
>   
> -	engine->context_size = intel_engine_context_size(dev_priv,
> +	engine->context_size = intel_engine_context_size(gt->i915,
>   							 engine->class);
>   	if (WARN_ON(engine->context_size > BIT(20)))
>   		engine->context_size = 0;
>   	if (engine->context_size)
> -		DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
> +		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
>   
>   	/* Nothing to do here, execute in order of dependencies */
>   	engine->schedule = NULL;
> @@ -349,8 +348,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	/* Scrub mmio state on takeover */
>   	intel_engine_sanitize_mmio(engine);
>   
> -	dev_priv->engine_class[info->class][info->instance] = engine;
> -	dev_priv->engine[id] = engine;
> +	engine->gt->engine_class[info->class][info->instance] = engine;
> +
> +	intel_engine_add_user(engine);
> +	gt->i915->engine[id] = engine;
> +
>   	return 0;
>   }
>   
> @@ -433,7 +435,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
>   		if (!HAS_ENGINE(i915, i))
>   			continue;
>   
> -		err = intel_engine_setup(i915, i);
> +		err = intel_engine_setup(&i915->gt, i);
>   		if (err)
>   			goto cleanup;
>   
> @@ -1501,29 +1503,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	intel_engine_print_breadcrumbs(engine, m);
>   }
>   
> -static u8 user_class_map[] = {
> -	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
> -	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
> -	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
> -	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
> -};
> -
> -struct intel_engine_cs *
> -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
> -{
> -	if (class >= ARRAY_SIZE(user_class_map))
> -		return NULL;
> -
> -	class = user_class_map[class];
> -
> -	GEM_BUG_ON(class > MAX_ENGINE_CLASS);
> -
> -	if (instance > MAX_ENGINE_INSTANCE)
> -		return NULL;
> -
> -	return i915->engine_class[class][instance];
> -}
> -
>   /**
>    * intel_enable_engine_stats() - Enable engine busy tracking on engine
>    * @engine: engine to enable stats collection
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 8be63019d707..9c927fa408aa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -12,6 +12,7 @@
>   #include <linux/kref.h>
>   #include <linux/list.h>
>   #include <linux/llist.h>
> +#include <linux/rbtree.h>
>   #include <linux/timer.h>
>   #include <linux/types.h>
>   
> @@ -267,15 +268,19 @@ struct intel_engine_cs {
>   	unsigned int guc_id;
>   	intel_engine_mask_t mask;
>   
> -	u8 uabi_class;
> -
>   	u8 class;
>   	u8 instance;
> +
> +	u8 uabi_class;
> +	u8 uabi_instance;
> +
>   	u32 context_size;
>   	u32 mmio_base;
>   
>   	u32 uabi_capabilities;
>   
> +	struct rb_node uabi_node;
> +
>   	struct intel_sseu sseu;
>   
>   	struct intel_ring *buffer;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> new file mode 100644
> index 000000000000..f74fb4d2fa0d
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -0,0 +1,66 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_engine.h"
> +#include "intel_engine_user.h"
> +
> +struct intel_engine_cs *
> +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
> +{
> +	struct rb_node *p = i915->uabi_engines.rb_node;
> +
> +	while (p) {
> +		struct intel_engine_cs *it =
> +			rb_entry(p, typeof(*it), uabi_node);
> +
> +		if (class < it->uabi_class)
> +			p = p->rb_left;
> +		else if (class > it->uabi_class ||
> +			 instance > it->uabi_instance)
> +			p = p->rb_right;
> +		else if (instance < it->uabi_instance)
> +			p = p->rb_left;
> +		else
> +			return it;
> +	}
> +
> +	return NULL;
> +}
> +
> +void intel_engine_add_user(struct intel_engine_cs *engine)
> +{
> +	struct rb_root *root = &engine->i915->uabi_engines;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &root->rb_node;
> +	while (*p) {
> +		struct intel_engine_cs *it;
> +
> +		parent = *p;
> +		it = rb_entry(parent, typeof(*it), uabi_node);
> +
> +		/* All user class:instance identifiers must be unique */
> +		GEM_BUG_ON(it->uabi_class == engine->uabi_class &&
> +			   it->uabi_instance == engine->uabi_instance);
> +
> +		if (engine->uabi_class < it->uabi_class)
> +			p = &parent->rb_left;
> +		else if (engine->uabi_class > it->uabi_class ||
> +			 engine->uabi_instance > it->uabi_instance)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	rb_link_node(&engine->uabi_node, parent, p);
> +	rb_insert_color(&engine->uabi_node, root);
> +
> +	GEM_BUG_ON(intel_engine_lookup_user(engine->i915,
> +					    engine->uabi_class,
> +					    engine->uabi_instance) != engine);
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> new file mode 100644
> index 000000000000..091dc8a4a39f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_ENGINE_USER_H
> +#define INTEL_ENGINE_USER_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +struct intel_engine_cs;
> +
> +struct intel_engine_cs *
> +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> +
> +void intel_engine_add_user(struct intel_engine_cs *engine);
> +
> +#endif /* INTEL_ENGINE_USER_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 34d4a868e4f1..5fd11e361d03 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -21,6 +21,7 @@
>   
>   struct drm_i915_private;
>   struct i915_ggtt;
> +struct intel_engine_cs;
>   struct intel_uncore;
>   
>   struct intel_hangcheck {
> @@ -76,6 +77,9 @@ struct intel_gt {
>   	u32 pm_ier;
>   
>   	u32 pm_guc_events;
> +
> +	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
> +					    [MAX_ENGINE_INSTANCE + 1];
>   };
>   
>   enum intel_gt_scratch_field {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 60f27e52d267..eb40a58665be 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1773,6 +1773,7 @@ static int live_virtual_engine(void *arg)
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
>   	struct intel_engine_cs *engine;
> +	struct intel_gt *gt = &i915->gt;
>   	enum intel_engine_id id;
>   	unsigned int class, inst;
>   	int err = -ENODEV;
> @@ -1796,10 +1797,10 @@ static int live_virtual_engine(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				continue;
>   
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> @@ -1920,6 +1921,7 @@ static int live_virtual_mask(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_gt *gt = &i915->gt;
>   	unsigned int class, inst;
>   	int err = 0;
>   
> @@ -1933,10 +1935,10 @@ static int live_virtual_mask(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				break;
>   
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> @@ -2097,6 +2099,7 @@ static int live_virtual_bond(void *arg)
>   	};
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_gt *gt = &i915->gt;
>   	unsigned int class, inst;
>   	int err = 0;
>   
> @@ -2111,11 +2114,11 @@ static int live_virtual_bond(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				break;
>   
>   			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 269a1b32b48b..12a7fdabc2f0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1371,11 +1371,12 @@ struct drm_i915_private {
>   	wait_queue_head_t gmbus_wait_queue;
>   
>   	struct pci_dev *bridge_dev;
> -	struct intel_engine_cs *engine[I915_NUM_ENGINES];
> +
>   	/* Context used internally to idle the GPU and setup initial state */
>   	struct i915_gem_context *kernel_context;
> -	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
> -					    [MAX_ENGINE_INSTANCE + 1];
> +
> +	struct intel_engine_cs *engine[I915_NUM_ENGINES];
> +	struct rb_root uabi_engines;
>   
>   	struct resource mch_res;
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 11c73af92597..4d98e8597637 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3109,7 +3109,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
>   	struct intel_engine_cs *engine;
>   
>   	if (instance <= MAX_ENGINE_INSTANCE)
> -		engine = gt->i915->engine_class[class][instance];
> +		engine = gt->engine_class[class][instance];
>   	else
>   		engine = NULL;
>   
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index eff86483bec0..bdf7963a043b 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -8,6 +8,7 @@
>   #include <linux/pm_runtime.h>
>   
>   #include "gt/intel_engine.h"
> +#include "gt/intel_engine_user.h"
>   
>   #include "i915_drv.h"
>   #include "i915_pmu.h"
> @@ -926,7 +927,7 @@ create_event_attributes(struct drm_i915_private *i915)
>   			i915_iter =
>   				add_i915_attr(i915_iter, str,
>   					      __I915_PMU_ENGINE(engine->uabi_class,
> -								engine->instance,
> +								engine->uabi_instance,
>   								engine_events[i].sample));
>   
>   			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 7b7016171057..70b1ad38e615 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -127,7 +127,7 @@ query_engine_info(struct drm_i915_private *i915,
>   
>   	for_each_engine(engine, i915, id) {
>   		info.engine.engine_class = engine->uabi_class;
> -		info.engine.engine_instance = engine->instance;
> +		info.engine.engine_instance = engine->uabi_instance;
>   		info.capabilities = engine->uabi_capabilities;
>   
>   		if (__copy_to_user(info_ptr, &info, sizeof(info)))
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index da18b8d6b80c..1d11245c4c87 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -677,7 +677,7 @@ TRACE_EVENT(i915_request_queue,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->flags = flags;
> @@ -706,7 +706,7 @@ DECLARE_EVENT_CLASS(i915_request,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   ),
> @@ -751,7 +751,7 @@ TRACE_EVENT(i915_request_in,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->prio = rq->sched.attr.priority;
> @@ -782,7 +782,7 @@ TRACE_EVENT(i915_request_out,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->completed = i915_request_completed(rq);
> @@ -847,7 +847,7 @@ TRACE_EVENT(i915_request_wait_begin,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->flags = flags;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-24 11:11   ` Tvrtko Ursulin
@ 2019-07-26  8:43     ` Chris Wilson
  2019-07-29 12:59       ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-26  8:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
> 
> On 23/07/2019 19:38, Chris Wilson wrote:
> > Push the ring creation flags from the outer GEM context to the inner
> > intel_cotnext to avoid an unsightly back-reference from inside the
> > backend.
> 
> Sorry I find this quite ugly. Passing in integers in pointers filed and 
> casting back and forth.

But better than a union, since once the intel_context is active, the
ring is always a ring.

> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 6d3911469801..e237bcecfa1f 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
> >   
> >               for_each_gem_engine(ce,
> >                                   i915_gem_context_lock_engines(ctx), it) {
> > -                     if (ce->state)
> > -                             per_file_stats(0, ce->state->obj, &kstats);
> > -                     if (ce->ring)
> > +                     intel_context_lock_pinned(ce);
> > +                     if (intel_context_is_pinned(ce)) {
> 
> And these hunks do not seem to belong to this patch.

Then you are mistaken. The bug is older, but it gets triggered by this
patch.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-23 18:38 ` [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT Chris Wilson
  2019-07-25 21:21   ` Daniele Ceraolo Spurio
@ 2019-07-26  9:22   ` Tvrtko Ursulin
  2019-07-26  9:33     ` Chris Wilson
  1 sibling, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-26  9:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/07/2019 19:38, Chris Wilson wrote:
> To maintain a fast lookup from a GT centric irq handler, we want the
> engine lookup tables on the intel_gt. To avoid having multiple copies of
> the same multi-dimension lookup table, move the generic user engine
> lookup into an rbtree (for fast and flexible indexing).
> 
> v2: Split uabi_instance cf uabi_class
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                |  1 +
>   drivers/gpu/drm/i915/gem/i915_gem_context.c  |  3 +-
>   drivers/gpu/drm/i915/gt/intel_engine.h       |  3 -
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 53 +++++-----------
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++-
>   drivers/gpu/drm/i915/gt/intel_engine_user.c  | 66 ++++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_engine_user.h  | 20 ++++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h     |  4 ++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++--
>   drivers/gpu/drm/i915/i915_drv.h              |  7 ++-
>   drivers/gpu/drm/i915/i915_irq.c              |  2 +-
>   drivers/gpu/drm/i915/i915_pmu.c              |  3 +-
>   drivers/gpu/drm/i915/i915_query.c            |  2 +-
>   drivers/gpu/drm/i915/i915_trace.h            | 10 +--
>   14 files changed, 138 insertions(+), 60 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_user.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 524516251a40..fafc3763dc2d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -74,6 +74,7 @@ gt-y += \
>   	gt/intel_context.o \
>   	gt/intel_engine_cs.o \
>   	gt/intel_engine_pm.o \
> +	gt/intel_engine_user.o \
>   	gt/intel_gt.o \
>   	gt/intel_gt_pm.o \
>   	gt/intel_hangcheck.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 18b226bc5e3a..e31431fa141e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -70,6 +70,7 @@
>   #include <drm/i915_drm.h>
>   
>   #include "gt/intel_lrc_reg.h"
> +#include "gt/intel_engine_user.h"
>   
>   #include "i915_gem_context.h"
>   #include "i915_globals.h"
> @@ -1740,7 +1741,7 @@ get_engines(struct i915_gem_context *ctx,
>   
>   		if (e->engines[n]) {
>   			ci.engine_class = e->engines[n]->engine->uabi_class;
> -			ci.engine_instance = e->engines[n]->engine->instance;
> +			ci.engine_instance = e->engines[n]->engine->uabi_instance;
>   		}
>   
>   		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index db5c73ce86ee..30856383e4c5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -432,9 +432,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   		       struct drm_printer *m,
>   		       const char *header, ...);
>   
> -struct intel_engine_cs *
> -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> -
>   static inline void intel_engine_context_in(struct intel_engine_cs *engine)
>   {
>   	unsigned long flags;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 65cbf1d9118d..ed5c4e161e6e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -32,6 +32,7 @@
>   
>   #include "intel_engine.h"
>   #include "intel_engine_pm.h"
> +#include "intel_engine_user.h"
>   #include "intel_context.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
> @@ -285,9 +286,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
>   	intel_engine_set_hwsp_writemask(engine, ~0u);
>   }
>   
> -static int
> -intel_engine_setup(struct drm_i915_private *dev_priv,
> -		   enum intel_engine_id id)
> +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   {
>   	const struct engine_info *info = &intel_engines[id];
>   	struct intel_engine_cs *engine;
> @@ -303,10 +302,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
>   		return -EINVAL;
>   
> -	if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
> +	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
>   		return -EINVAL;
>   
> -	GEM_BUG_ON(dev_priv->engine[id]);
>   	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
>   	if (!engine)
>   		return -ENOMEM;
> @@ -315,12 +313,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   
>   	engine->id = id;
>   	engine->mask = BIT(id);
> -	engine->i915 = dev_priv;
> -	engine->gt = &dev_priv->gt;
> -	engine->uncore = &dev_priv->uncore;
> +	engine->i915 = gt->i915;
> +	engine->gt = gt;
> +	engine->uncore = gt->uncore;
>   	__sprint_engine_name(engine->name, info);
>   	engine->hw_id = engine->guc_id = info->hw_id;
> -	engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
> +	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   
> @@ -331,13 +329,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->destroy = (typeof(engine->destroy))kfree;
>   
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
> +	engine->uabi_instance = info->instance;
>   
> -	engine->context_size = intel_engine_context_size(dev_priv,
> +	engine->context_size = intel_engine_context_size(gt->i915,
>   							 engine->class);
>   	if (WARN_ON(engine->context_size > BIT(20)))
>   		engine->context_size = 0;
>   	if (engine->context_size)
> -		DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
> +		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
>   
>   	/* Nothing to do here, execute in order of dependencies */
>   	engine->schedule = NULL;
> @@ -349,8 +348,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	/* Scrub mmio state on takeover */
>   	intel_engine_sanitize_mmio(engine);
>   
> -	dev_priv->engine_class[info->class][info->instance] = engine;
> -	dev_priv->engine[id] = engine;
> +	engine->gt->engine_class[info->class][info->instance] = engine;

No need to go through engine to gt here.

> +
> +	intel_engine_add_user(engine);
> +	gt->i915->engine[id] = engine;
> +
>   	return 0;
>   }
>   
> @@ -433,7 +435,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
>   		if (!HAS_ENGINE(i915, i))
>   			continue;
>   
> -		err = intel_engine_setup(i915, i);
> +		err = intel_engine_setup(&i915->gt, i);
>   		if (err)
>   			goto cleanup;
>   
> @@ -1501,29 +1503,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	intel_engine_print_breadcrumbs(engine, m);
>   }
>   
> -static u8 user_class_map[] = {
> -	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
> -	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
> -	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
> -	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
> -};
> -
> -struct intel_engine_cs *
> -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
> -{
> -	if (class >= ARRAY_SIZE(user_class_map))
> -		return NULL;
> -
> -	class = user_class_map[class];
> -
> -	GEM_BUG_ON(class > MAX_ENGINE_CLASS);
> -
> -	if (instance > MAX_ENGINE_INSTANCE)
> -		return NULL;
> -
> -	return i915->engine_class[class][instance];
> -}
> -
>   /**
>    * intel_enable_engine_stats() - Enable engine busy tracking on engine
>    * @engine: engine to enable stats collection
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 8be63019d707..9c927fa408aa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -12,6 +12,7 @@
>   #include <linux/kref.h>
>   #include <linux/list.h>
>   #include <linux/llist.h>
> +#include <linux/rbtree.h>
>   #include <linux/timer.h>
>   #include <linux/types.h>
>   
> @@ -267,15 +268,19 @@ struct intel_engine_cs {
>   	unsigned int guc_id;
>   	intel_engine_mask_t mask;
>   
> -	u8 uabi_class;
> -
>   	u8 class;
>   	u8 instance;
> +
> +	u8 uabi_class;
> +	u8 uabi_instance;
> +
>   	u32 context_size;
>   	u32 mmio_base;
>   
>   	u32 uabi_capabilities;
>   
> +	struct rb_node uabi_node;
> +
>   	struct intel_sseu sseu;
>   
>   	struct intel_ring *buffer;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> new file mode 100644
> index 000000000000..f74fb4d2fa0d
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -0,0 +1,66 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_engine.h"
> +#include "intel_engine_user.h"
> +
> +struct intel_engine_cs *
> +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
> +{
> +	struct rb_node *p = i915->uabi_engines.rb_node;
> +
> +	while (p) {
> +		struct intel_engine_cs *it =
> +			rb_entry(p, typeof(*it), uabi_node);
> +
> +		if (class < it->uabi_class)
> +			p = p->rb_left;
> +		else if (class > it->uabi_class ||
> +			 instance > it->uabi_instance)
> +			p = p->rb_right;
> +		else if (instance < it->uabi_instance)
> +			p = p->rb_left;
> +		else
> +			return it;
> +	}
> +
> +	return NULL;
> +}
> +
> +void intel_engine_add_user(struct intel_engine_cs *engine)
> +{
> +	struct rb_root *root = &engine->i915->uabi_engines;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &root->rb_node;
> +	while (*p) {
> +		struct intel_engine_cs *it;
> +
> +		parent = *p;
> +		it = rb_entry(parent, typeof(*it), uabi_node);
> +
> +		/* All user class:instance identifiers must be unique */
> +		GEM_BUG_ON(it->uabi_class == engine->uabi_class &&
> +			   it->uabi_instance == engine->uabi_instance);
> +
> +		if (engine->uabi_class < it->uabi_class)
> +			p = &parent->rb_left;
> +		else if (engine->uabi_class > it->uabi_class ||
> +			 engine->uabi_instance > it->uabi_instance)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	rb_link_node(&engine->uabi_node, parent, p);
> +	rb_insert_color(&engine->uabi_node, root);
> +
> +	GEM_BUG_ON(intel_engine_lookup_user(engine->i915,
> +					    engine->uabi_class,
> +					    engine->uabi_instance) != engine);
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> new file mode 100644
> index 000000000000..091dc8a4a39f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_ENGINE_USER_H
> +#define INTEL_ENGINE_USER_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +struct intel_engine_cs;
> +
> +struct intel_engine_cs *
> +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
> +
> +void intel_engine_add_user(struct intel_engine_cs *engine);
> +
> +#endif /* INTEL_ENGINE_USER_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 34d4a868e4f1..5fd11e361d03 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -21,6 +21,7 @@
>   
>   struct drm_i915_private;
>   struct i915_ggtt;
> +struct intel_engine_cs;
>   struct intel_uncore;
>   
>   struct intel_hangcheck {
> @@ -76,6 +77,9 @@ struct intel_gt {
>   	u32 pm_ier;
>   
>   	u32 pm_guc_events;
> +
> +	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
> +					    [MAX_ENGINE_INSTANCE + 1];
>   };
>   
>   enum intel_gt_scratch_field {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 60f27e52d267..eb40a58665be 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1773,6 +1773,7 @@ static int live_virtual_engine(void *arg)
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
>   	struct intel_engine_cs *engine;
> +	struct intel_gt *gt = &i915->gt;
>   	enum intel_engine_id id;
>   	unsigned int class, inst;
>   	int err = -ENODEV;
> @@ -1796,10 +1797,10 @@ static int live_virtual_engine(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				continue;
>   
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> @@ -1920,6 +1921,7 @@ static int live_virtual_mask(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_gt *gt = &i915->gt;
>   	unsigned int class, inst;
>   	int err = 0;
>   
> @@ -1933,10 +1935,10 @@ static int live_virtual_mask(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				break;
>   
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> @@ -2097,6 +2099,7 @@ static int live_virtual_bond(void *arg)
>   	};
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_gt *gt = &i915->gt;
>   	unsigned int class, inst;
>   	int err = 0;
>   
> @@ -2111,11 +2114,11 @@ static int live_virtual_bond(void *arg)
>   
>   		nsibling = 0;
>   		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> -			if (!i915->engine_class[class][inst])
> +			if (!gt->engine_class[class][inst])
>   				break;
>   
>   			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> -			siblings[nsibling++] = i915->engine_class[class][inst];
> +			siblings[nsibling++] = gt->engine_class[class][inst];
>   		}
>   		if (nsibling < 2)
>   			continue;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 269a1b32b48b..12a7fdabc2f0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1371,11 +1371,12 @@ struct drm_i915_private {
>   	wait_queue_head_t gmbus_wait_queue;
>   
>   	struct pci_dev *bridge_dev;
> -	struct intel_engine_cs *engine[I915_NUM_ENGINES];
> +
>   	/* Context used internally to idle the GPU and setup initial state */
>   	struct i915_gem_context *kernel_context;
> -	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
> -					    [MAX_ENGINE_INSTANCE + 1];
> +
> +	struct intel_engine_cs *engine[I915_NUM_ENGINES];
> +	struct rb_root uabi_engines;
>   
>   	struct resource mch_res;
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 11c73af92597..4d98e8597637 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3109,7 +3109,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
>   	struct intel_engine_cs *engine;
>   
>   	if (instance <= MAX_ENGINE_INSTANCE)
> -		engine = gt->i915->engine_class[class][instance];
> +		engine = gt->engine_class[class][instance];
>   	else
>   		engine = NULL;
>   
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index eff86483bec0..bdf7963a043b 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -8,6 +8,7 @@
>   #include <linux/pm_runtime.h>
>   
>   #include "gt/intel_engine.h"
> +#include "gt/intel_engine_user.h"
>   
>   #include "i915_drv.h"
>   #include "i915_pmu.h"
> @@ -926,7 +927,7 @@ create_event_attributes(struct drm_i915_private *i915)
>   			i915_iter =
>   				add_i915_attr(i915_iter, str,
>   					      __I915_PMU_ENGINE(engine->uabi_class,
> -								engine->instance,
> +								engine->uabi_instance,
>   								engine_events[i].sample));
>   
>   			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 7b7016171057..70b1ad38e615 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -127,7 +127,7 @@ query_engine_info(struct drm_i915_private *i915,
>   
>   	for_each_engine(engine, i915, id) {
>   		info.engine.engine_class = engine->uabi_class;
> -		info.engine.engine_instance = engine->instance;
> +		info.engine.engine_instance = engine->uabi_instance;
>   		info.capabilities = engine->uabi_capabilities;
>   
>   		if (__copy_to_user(info_ptr, &info, sizeof(info)))
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index da18b8d6b80c..1d11245c4c87 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -677,7 +677,7 @@ TRACE_EVENT(i915_request_queue,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->flags = flags;
> @@ -706,7 +706,7 @@ DECLARE_EVENT_CLASS(i915_request,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   ),
> @@ -751,7 +751,7 @@ TRACE_EVENT(i915_request_in,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->prio = rq->sched.attr.priority;
> @@ -782,7 +782,7 @@ TRACE_EVENT(i915_request_out,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->completed = i915_request_completed(rq);
> @@ -847,7 +847,7 @@ TRACE_EVENT(i915_request_wait_begin,
>   			   __entry->dev = rq->i915->drm.primary->index;
>   			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
> -			   __entry->instance = rq->engine->instance;
> +			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   __entry->flags = flags;
> 

I read it, relatively rushed, since pressure keeps getting applied! :/

There are some good parts and implementation looks okay, but I am not 
sure we need a tree. Nodes are bigger than pointers, management code is 
bigger, lookup is slower.. is it a win all things considered?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-26  9:22   ` Tvrtko Ursulin
@ 2019-07-26  9:33     ` Chris Wilson
  2019-07-26  9:51       ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-26  9:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-07-26 10:22:08)
> 
> On 23/07/2019 19:38, Chris Wilson wrote:
> I read it, relatively rushed, since pressure keeps getting applied! :/
> 
> There are some good parts and implementation looks okay, but I am not 
> sure we need a tree. Nodes are bigger than pointers, management code is 
> bigger, lookup is slower.. is it a win all things considered?

A big win imo. Consider that this interface is purely debug, the primary
interface runtime will be via gt->engines, the nodes are much smaller
than the sparse array.

I am adamant that we are not adding more sparse arrays. A 2D lookup
table since that matches the HW, but even then we may just end up with
LUT (1 extra pointer load to replace the sparse array with a compact?)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-26  9:33     ` Chris Wilson
@ 2019-07-26  9:51       ` Tvrtko Ursulin
  2019-07-26  9:57         ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-26  9:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 26/07/2019 10:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-07-26 10:22:08)
>>
>> On 23/07/2019 19:38, Chris Wilson wrote:
>> I read it, relatively rushed, since pressure keeps getting applied! :/
>>
>> There are some good parts and implementation looks okay, but I am not
>> sure we need a tree. Nodes are bigger than pointers, management code is
>> bigger, lookup is slower.. is it a win all things considered?
> 
> A big win imo. Consider that this interface is purely debug, the primary
> interface runtime will be via gt->engines, the nodes are much smaller
> than the sparse array.

I guess it depends. One rb_node is three pointers and can only be used 
from a single tree. Nor does the patch replaces all sparse arrays.

> I am adamant that we are not adding more sparse arrays. A 2D lookup
> table since that matches the HW, but even then we may just end up with
> LUT (1 extra pointer load to replace the sparse array with a compact?)

I feel it's too early for this specific patch.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT
  2019-07-26  9:51       ` Tvrtko Ursulin
@ 2019-07-26  9:57         ` Chris Wilson
  0 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-07-26  9:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-07-26 10:51:01)
> 
> On 26/07/2019 10:33, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-07-26 10:22:08)
> >>
> >> On 23/07/2019 19:38, Chris Wilson wrote:
> >> I read it, relatively rushed, since pressure keeps getting applied! :/
> >>
> >> There are some good parts and implementation looks okay, but I am not
> >> sure we need a tree. Nodes are bigger than pointers, management code is
> >> bigger, lookup is slower.. is it a win all things considered?
> > 
> > A big win imo. Consider that this interface is purely debug, the primary
> > interface runtime will be via gt->engines, the nodes are much smaller
> > than the sparse array.
> 
> I guess it depends. One rb_node is three pointers and can only be used 
> from a single tree. Nor does the patch replaces all sparse arrays.

There would be reasonable objection if I removed all the arrays in one
go :-p

> > I am adamant that we are not adding more sparse arrays. A 2D lookup
> > table since that matches the HW, but even then we may just end up with
> > LUT (1 extra pointer load to replace the sparse array with a compact?)
> 
> I feel it's too early for this specific patch.

It's too early? The whole point is to enable gt-centrification for the
later patches and lift gt initialisation out of GEM.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-26  8:43     ` Chris Wilson
@ 2019-07-29 12:59       ` Tvrtko Ursulin
  2019-07-30  9:38         ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-07-29 12:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 26/07/2019 09:43, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
>>
>> On 23/07/2019 19:38, Chris Wilson wrote:
>>> Push the ring creation flags from the outer GEM context to the inner
>>> intel_cotnext to avoid an unsightly back-reference from inside the
>>> backend.
>>
>> Sorry I find this quite ugly. Passing in integers in pointers filed and
>> casting back and forth.
> 
> But better than a union, since once the intel_context is active, the
> ring is always a ring.

Unless it is u64 size. I am not buying it. :)

>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 6d3911469801..e237bcecfa1f 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
>>>    
>>>                for_each_gem_engine(ce,
>>>                                    i915_gem_context_lock_engines(ctx), it) {
>>> -                     if (ce->state)
>>> -                             per_file_stats(0, ce->state->obj, &kstats);
>>> -                     if (ce->ring)
>>> +                     intel_context_lock_pinned(ce);
>>> +                     if (intel_context_is_pinned(ce)) {
>>
>> And these hunks do not seem to belong to this patch.
> 
> Then you are mistaken. The bug is older, but it gets triggered by this
> patch.

Gets triggered or gets fixed? Perhaps commit message needs improving 
since it talks about avoiding an unsightly back-reference (and I argue 
ce->ring = u64 size is at least equally unsightly), and not fixing any bugs.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-29 12:59       ` Tvrtko Ursulin
@ 2019-07-30  9:38         ` Chris Wilson
  2019-08-01  8:42           ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-07-30  9:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-07-29 13:59:22)
> 
> On 26/07/2019 09:43, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
> >>
> >> On 23/07/2019 19:38, Chris Wilson wrote:
> >>> Push the ring creation flags from the outer GEM context to the inner
> >>> intel_cotnext to avoid an unsightly back-reference from inside the
> >>> backend.
> >>
> >> Sorry I find this quite ugly. Passing in integers in pointers filed and
> >> casting back and forth.
> > 
> > But better than a union, since once the intel_context is active, the
> > ring is always a ring.
> 
> Unless it is u64 size. I am not buying it. :)

We don't need u64 size? I don't understand what you mean.

> >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >>> index 6d3911469801..e237bcecfa1f 100644
> >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >>> @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
> >>>    
> >>>                for_each_gem_engine(ce,
> >>>                                    i915_gem_context_lock_engines(ctx), it) {
> >>> -                     if (ce->state)
> >>> -                             per_file_stats(0, ce->state->obj, &kstats);
> >>> -                     if (ce->ring)
> >>> +                     intel_context_lock_pinned(ce);
> >>> +                     if (intel_context_is_pinned(ce)) {
> >>
> >> And these hunks do not seem to belong to this patch.
> > 
> > Then you are mistaken. The bug is older, but it gets triggered by this
> > patch.
> 
> Gets triggered or gets fixed? Perhaps commit message needs improving 
> since it talks about avoiding an unsightly back-reference (and I argue 
> ce->ring = u64 size is at least equally unsightly), and not fixing any bugs.

The bug is a potential race condition inside the debug. What is hit here
is that without the state of the pin known, the meaning of ce->ring is
unknown (whereas the other bug is that condition can change during
evaluation).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-07-24  9:20   ` Tvrtko Ursulin
@ 2019-08-01  8:37     ` Tvrtko Ursulin
  2019-08-01  8:41       ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-08-01  8:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 24/07/2019 10:20, Tvrtko Ursulin wrote:
> 
> On 23/07/2019 19:38, Chris Wilson wrote:
>> We only compute the lrc_descriptor() on pinning the context, i.e.
>> infrequently, so we do not benefit from storing the template as the
>> addressing mode is also fixed for the lifetime of the intel_context.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
>>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
>>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
>>   drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
>>   4 files changed, 10 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index b28c7ca681a8..1b3dc7258ef2 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context 
>> *ctx)
>>       i915_gem_context_put(ctx);
>>   }
>> -static u32 default_desc_template(const struct drm_i915_private *i915,
>> -                 const struct i915_address_space *vm)
>> -{
>> -    u32 address_mode;
>> -    u32 desc;
>> -
>> -    desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
>> -
>> -    address_mode = INTEL_LEGACY_32B_CONTEXT;
>> -    if (vm && i915_vm_is_4lvl(vm))
>> -        address_mode = INTEL_LEGACY_64B_CONTEXT;
>> -    desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
>> -
>> -    if (IS_GEN(i915, 8))
>> -        desc |= GEN8_CTX_L3LLC_COHERENT;
>> -
>> -    /* TODO: WaDisableLiteRestore when we start using semaphore
>> -     * signalling between Command Streamers
>> -     * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
>> -     */
>> -
>> -    return desc;
>> -}
>> -
>>   static struct i915_gem_context *
>>   __create_context(struct drm_i915_private *i915)
>>   {
>> @@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
>>       i915_gem_context_set_recoverable(ctx);
>>       ctx->ring_size = 4 * PAGE_SIZE;
>> -    ctx->desc_template = default_desc_template(i915, NULL);
>>       for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>>           ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
>> @@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct 
>> i915_address_space *vm)
>>       struct i915_gem_engines_iter it;
>>       struct intel_context *ce;
>> +    GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
>> +
>>       ctx->vm = i915_vm_get(vm);
>> -    ctx->desc_template = default_desc_template(ctx->i915, vm);
>>       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
>>           i915_vm_put(ce->vm);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
>> b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>> index 0ee61482ef94..a02d98494078 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>> @@ -171,8 +171,6 @@ struct i915_gem_context {
>>       /** ring_size: size for allocating the per-engine ring buffer */
>>       u32 ring_size;
>> -    /** desc_template: invariant fields for the HW context descriptor */
>> -    u32 desc_template;
>>       /** guilty_count: How many times this context has caused a GPU 
>> hang. */
>>       atomic_t guilty_count;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 632344c163a8..5fdac40015cf 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct 
>> intel_engine_cs *engine)
>>       BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
>>       BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > 
>> (BIT(GEN11_SW_CTX_ID_WIDTH)));
>> -    desc = ctx->desc_template;                /* bits  0-11 */
>> -    GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
>> +    desc = INTEL_LEGACY_32B_CONTEXT;
>> +    if (i915_vm_is_4lvl(ce->vm))
>> +        desc = INTEL_LEGACY_64B_CONTEXT;
> 
> if-else now that the vm null check is gone.
> 
>> +    desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
>> +
>> +    desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
>> +    if (IS_GEN(engine->i915, 8))
>> +        desc |= GEN8_CTX_L3LLC_COHERENT;
> 
> Don't know.. it's nicer to keep it stored it both for Gen and context 
> state. What's the problem with it?

Ping.

> 
> Regards,
> 
> Tvrtko
> 
>>       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
>>                                   /* bits 12-31 */
>> -    GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
>> -
>>       /*
>>        * The following 32bits are copied into the OA reports (dword 2).
>>        * Consider updating oa_get_render_ctx_id in i915_perf.c when 
>> changing
>> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c 
>> b/drivers/gpu/drm/i915/gvt/scheduler.c
>> index f68798ab1e7c..4c018fb1359c 100644
>> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
>> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
>> @@ -291,9 +291,6 @@ shadow_context_descriptor_update(struct 
>> intel_context *ce,
>>        * Update bits 0-11 of the context descriptor which includes flags
>>        * like GEN8_CTX_* cached in desc_template
>>        */
>> -    desc &= U64_MAX << 12;
>> -    desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
>> -
>>       desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
>>       desc |= workload->ctx_desc.addressing_mode <<
>>           GEN8_CTX_ADDRESSING_MODE_SHIFT;
>>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01  8:37     ` Tvrtko Ursulin
@ 2019-08-01  8:41       ` Chris Wilson
  2019-08-01  8:53         ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-08-01  8:41 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 09:37:38)
> 
> On 24/07/2019 10:20, Tvrtko Ursulin wrote:
> > 
> > On 23/07/2019 19:38, Chris Wilson wrote:
> >> We only compute the lrc_descriptor() on pinning the context, i.e.
> >> infrequently, so we do not benefit from storing the template as the
> >> addressing mode is also fixed for the lifetime of the intel_context.
> >>
> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> ---
> >>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
> >>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
> >>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
> >>   drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
> >>   4 files changed, 10 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> >> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> index b28c7ca681a8..1b3dc7258ef2 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> @@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context 
> >> *ctx)
> >>       i915_gem_context_put(ctx);
> >>   }
> >> -static u32 default_desc_template(const struct drm_i915_private *i915,
> >> -                 const struct i915_address_space *vm)
> >> -{
> >> -    u32 address_mode;
> >> -    u32 desc;
> >> -
> >> -    desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> >> -
> >> -    address_mode = INTEL_LEGACY_32B_CONTEXT;
> >> -    if (vm && i915_vm_is_4lvl(vm))
> >> -        address_mode = INTEL_LEGACY_64B_CONTEXT;
> >> -    desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> >> -
> >> -    if (IS_GEN(i915, 8))
> >> -        desc |= GEN8_CTX_L3LLC_COHERENT;
> >> -
> >> -    /* TODO: WaDisableLiteRestore when we start using semaphore
> >> -     * signalling between Command Streamers
> >> -     * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
> >> -     */
> >> -
> >> -    return desc;
> >> -}
> >> -
> >>   static struct i915_gem_context *
> >>   __create_context(struct drm_i915_private *i915)
> >>   {
> >> @@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
> >>       i915_gem_context_set_recoverable(ctx);
> >>       ctx->ring_size = 4 * PAGE_SIZE;
> >> -    ctx->desc_template = default_desc_template(i915, NULL);
> >>       for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
> >>           ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
> >> @@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct 
> >> i915_address_space *vm)
> >>       struct i915_gem_engines_iter it;
> >>       struct intel_context *ce;
> >> +    GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
> >> +
> >>       ctx->vm = i915_vm_get(vm);
> >> -    ctx->desc_template = default_desc_template(ctx->i915, vm);
> >>       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> >>           i915_vm_put(ce->vm);
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
> >> b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >> index 0ee61482ef94..a02d98494078 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >> @@ -171,8 +171,6 @@ struct i915_gem_context {
> >>       /** ring_size: size for allocating the per-engine ring buffer */
> >>       u32 ring_size;
> >> -    /** desc_template: invariant fields for the HW context descriptor */
> >> -    u32 desc_template;
> >>       /** guilty_count: How many times this context has caused a GPU 
> >> hang. */
> >>       atomic_t guilty_count;
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> >> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >> index 632344c163a8..5fdac40015cf 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >> @@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct 
> >> intel_engine_cs *engine)
> >>       BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
> >>       BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > 
> >> (BIT(GEN11_SW_CTX_ID_WIDTH)));
> >> -    desc = ctx->desc_template;                /* bits  0-11 */
> >> -    GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
> >> +    desc = INTEL_LEGACY_32B_CONTEXT;
> >> +    if (i915_vm_is_4lvl(ce->vm))
> >> +        desc = INTEL_LEGACY_64B_CONTEXT;
> > 
> > if-else now that the vm null check is gone.
> > 
> >> +    desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
> >> +
> >> +    desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> >> +    if (IS_GEN(engine->i915, 8))
> >> +        desc |= GEN8_CTX_L3LLC_COHERENT;
> > 
> > Don't know.. it's nicer to keep it stored it both for Gen and context 
> > state. What's the problem with it?
> 
> Ping.

There's no gem_context.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-07-30  9:38         ` Chris Wilson
@ 2019-08-01  8:42           ` Tvrtko Ursulin
  2019-08-01  8:45             ` Chris Wilson
  2019-08-01  8:46             ` Chris Wilson
  0 siblings, 2 replies; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-08-01  8:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/07/2019 10:38, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-07-29 13:59:22)
>>
>> On 26/07/2019 09:43, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
>>>>
>>>> On 23/07/2019 19:38, Chris Wilson wrote:
>>>>> Push the ring creation flags from the outer GEM context to the inner
>>>>> intel_cotnext to avoid an unsightly back-reference from inside the
>>>>> backend.
>>>>
>>>> Sorry I find this quite ugly. Passing in integers in pointers filed and
>>>> casting back and forth.
>>>
>>> But better than a union, since once the intel_context is active, the
>>> ring is always a ring.
>>
>> Unless it is u64 size. I am not buying it. :)
> 
> We don't need u64 size? I don't understand what you mean.

I complained about very unobvious and questionable hack of passing the 
size in the pointer field and you said it is better than an union. For 
me union certainly rates way higher than the casing hack with a macro.

>>>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> index 6d3911469801..e237bcecfa1f 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
>>>>>     
>>>>>                 for_each_gem_engine(ce,
>>>>>                                     i915_gem_context_lock_engines(ctx), it) {
>>>>> -                     if (ce->state)
>>>>> -                             per_file_stats(0, ce->state->obj, &kstats);
>>>>> -                     if (ce->ring)
>>>>> +                     intel_context_lock_pinned(ce);
>>>>> +                     if (intel_context_is_pinned(ce)) {
>>>>
>>>> And these hunks do not seem to belong to this patch.
>>>
>>> Then you are mistaken. The bug is older, but it gets triggered by this
>>> patch.
>>
>> Gets triggered or gets fixed? Perhaps commit message needs improving
>> since it talks about avoiding an unsightly back-reference (and I argue
>> ce->ring = u64 size is at least equally unsightly), and not fixing any bugs.
> 
> The bug is a potential race condition inside the debug. What is hit here
> is that without the state of the pin known, the meaning of ce->ring is
> unknown (whereas the other bug is that condition can change during
> evaluation).

Commit doesn't say anything about fixing bugs. It talks about making the 
code prettier.

If here we need a pin, then it should be a separate patch which says so 
and does only one thing.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-08-01  8:42           ` Tvrtko Ursulin
@ 2019-08-01  8:45             ` Chris Wilson
  2019-08-01  8:46             ` Chris Wilson
  1 sibling, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-08-01  8:45 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 09:42:15)
> 
> On 30/07/2019 10:38, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-07-29 13:59:22)
> >>
> >> On 26/07/2019 09:43, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
> >>>>
> >>>> On 23/07/2019 19:38, Chris Wilson wrote:
> >>>>> Push the ring creation flags from the outer GEM context to the inner
> >>>>> intel_cotnext to avoid an unsightly back-reference from inside the
> >>>>> backend.
> >>>>
> >>>> Sorry I find this quite ugly. Passing in integers in pointers filed and
> >>>> casting back and forth.
> >>>
> >>> But better than a union, since once the intel_context is active, the
> >>> ring is always a ring.
> >>
> >> Unless it is u64 size. I am not buying it. :)
> > 
> > We don't need u64 size? I don't understand what you mean.
> 
> I complained about very unobvious and questionable hack of passing the 
> size in the pointer field and you said it is better than an union. For 
> me union certainly rates way higher than the casing hack with a macro.
> 
> >>>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >>>>> index 6d3911469801..e237bcecfa1f 100644
> >>>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >>>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >>>>> @@ -328,10 +328,14 @@ static void print_context_stats(struct seq_file *m,
> >>>>>     
> >>>>>                 for_each_gem_engine(ce,
> >>>>>                                     i915_gem_context_lock_engines(ctx), it) {
> >>>>> -                     if (ce->state)
> >>>>> -                             per_file_stats(0, ce->state->obj, &kstats);
> >>>>> -                     if (ce->ring)
> >>>>> +                     intel_context_lock_pinned(ce);
> >>>>> +                     if (intel_context_is_pinned(ce)) {
> >>>>
> >>>> And these hunks do not seem to belong to this patch.
> >>>
> >>> Then you are mistaken. The bug is older, but it gets triggered by this
> >>> patch.
> >>
> >> Gets triggered or gets fixed? Perhaps commit message needs improving
> >> since it talks about avoiding an unsightly back-reference (and I argue
> >> ce->ring = u64 size is at least equally unsightly), and not fixing any bugs.
> > 
> > The bug is a potential race condition inside the debug. What is hit here
> > is that without the state of the pin known, the meaning of ce->ring is
> > unknown (whereas the other bug is that condition can change during
> > evaluation).
> 
> Commit doesn't say anything about fixing bugs. It talks about making the 
> code prettier.
> 
> If here we need a pin, then it should be a separate patch which says so 
> and does only one thing.

It's not as if we are going to backport it... The bug is trivial to hit
when ring is alias, to hit it going the other way requires more mutex
evasion.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/23] drm/i915: Push the ring creation flags to the backend
  2019-08-01  8:42           ` Tvrtko Ursulin
  2019-08-01  8:45             ` Chris Wilson
@ 2019-08-01  8:46             ` Chris Wilson
  1 sibling, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-08-01  8:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 09:42:15)
> 
> On 30/07/2019 10:38, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-07-29 13:59:22)
> >>
> >> On 26/07/2019 09:43, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2019-07-24 12:11:54)
> >>>>
> >>>> On 23/07/2019 19:38, Chris Wilson wrote:
> >>>>> Push the ring creation flags from the outer GEM context to the inner
> >>>>> intel_cotnext to avoid an unsightly back-reference from inside the
> >>>>> backend.
> >>>>
> >>>> Sorry I find this quite ugly. Passing in integers in pointers filed and
> >>>> casting back and forth.
> >>>
> >>> But better than a union, since once the intel_context is active, the
> >>> ring is always a ring.
> >>
> >> Unless it is u64 size. I am not buying it. :)
> > 
> > We don't need u64 size? I don't understand what you mean.
> 
> I complained about very unobvious and questionable hack of passing the 
> size in the pointer field and you said it is better than an union. For 
> me union certainly rates way higher than the casing hack with a macro.

We store values inside pointer slots all the time, sometimes even memory
addresses.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01  8:41       ` Chris Wilson
@ 2019-08-01  8:53         ` Tvrtko Ursulin
  2019-08-01 10:57           ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-08-01  8:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/08/2019 09:41, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-08-01 09:37:38)
>>
>> On 24/07/2019 10:20, Tvrtko Ursulin wrote:
>>>
>>> On 23/07/2019 19:38, Chris Wilson wrote:
>>>> We only compute the lrc_descriptor() on pinning the context, i.e.
>>>> infrequently, so we do not benefit from storing the template as the
>>>> addressing mode is also fixed for the lifetime of the intel_context.
>>>>
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>> ---
>>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
>>>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
>>>>    drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
>>>>    drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
>>>>    4 files changed, 10 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> index b28c7ca681a8..1b3dc7258ef2 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> @@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context
>>>> *ctx)
>>>>        i915_gem_context_put(ctx);
>>>>    }
>>>> -static u32 default_desc_template(const struct drm_i915_private *i915,
>>>> -                 const struct i915_address_space *vm)
>>>> -{
>>>> -    u32 address_mode;
>>>> -    u32 desc;
>>>> -
>>>> -    desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
>>>> -
>>>> -    address_mode = INTEL_LEGACY_32B_CONTEXT;
>>>> -    if (vm && i915_vm_is_4lvl(vm))
>>>> -        address_mode = INTEL_LEGACY_64B_CONTEXT;
>>>> -    desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
>>>> -
>>>> -    if (IS_GEN(i915, 8))
>>>> -        desc |= GEN8_CTX_L3LLC_COHERENT;
>>>> -
>>>> -    /* TODO: WaDisableLiteRestore when we start using semaphore
>>>> -     * signalling between Command Streamers
>>>> -     * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
>>>> -     */
>>>> -
>>>> -    return desc;
>>>> -}
>>>> -
>>>>    static struct i915_gem_context *
>>>>    __create_context(struct drm_i915_private *i915)
>>>>    {
>>>> @@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
>>>>        i915_gem_context_set_recoverable(ctx);
>>>>        ctx->ring_size = 4 * PAGE_SIZE;
>>>> -    ctx->desc_template = default_desc_template(i915, NULL);
>>>>        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
>>>>            ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
>>>> @@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct
>>>> i915_address_space *vm)
>>>>        struct i915_gem_engines_iter it;
>>>>        struct intel_context *ce;
>>>> +    GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
>>>> +
>>>>        ctx->vm = i915_vm_get(vm);
>>>> -    ctx->desc_template = default_desc_template(ctx->i915, vm);
>>>>        for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
>>>>            i915_vm_put(ce->vm);
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>> index 0ee61482ef94..a02d98494078 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>> @@ -171,8 +171,6 @@ struct i915_gem_context {
>>>>        /** ring_size: size for allocating the per-engine ring buffer */
>>>>        u32 ring_size;
>>>> -    /** desc_template: invariant fields for the HW context descriptor */
>>>> -    u32 desc_template;
>>>>        /** guilty_count: How many times this context has caused a GPU
>>>> hang. */
>>>>        atomic_t guilty_count;
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>> index 632344c163a8..5fdac40015cf 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>> @@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct
>>>> intel_engine_cs *engine)
>>>>        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
>>>>        BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID >
>>>> (BIT(GEN11_SW_CTX_ID_WIDTH)));
>>>> -    desc = ctx->desc_template;                /* bits  0-11 */
>>>> -    GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
>>>> +    desc = INTEL_LEGACY_32B_CONTEXT;
>>>> +    if (i915_vm_is_4lvl(ce->vm))
>>>> +        desc = INTEL_LEGACY_64B_CONTEXT;
>>>
>>> if-else now that the vm null check is gone.
>>>
>>>> +    desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
>>>> +
>>>> +    desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
>>>> +    if (IS_GEN(engine->i915, 8))
>>>> +        desc |= GEN8_CTX_L3LLC_COHERENT;
>>>
>>> Don't know.. it's nicer to keep it stored it both for Gen and context
>>> state. What's the problem with it?
>>
>> Ping.
> 
> There's no gem_context.

We could store it in ce then. We already have well defined control 
points for when vm changes when all are updated.

If done like this then it looks like assigning ctx->hw_id could also do 
the default_desc update, so that we can avoid even more work done at pin 
time.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01  8:53         ` Tvrtko Ursulin
@ 2019-08-01 10:57           ` Chris Wilson
  2019-08-01 11:13             ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-08-01 10:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 09:53:15)
> 
> On 01/08/2019 09:41, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-08-01 09:37:38)
> >>
> >> On 24/07/2019 10:20, Tvrtko Ursulin wrote:
> >>>
> >>> On 23/07/2019 19:38, Chris Wilson wrote:
> >>>> We only compute the lrc_descriptor() on pinning the context, i.e.
> >>>> infrequently, so we do not benefit from storing the template as the
> >>>> addressing mode is also fixed for the lifetime of the intel_context.
> >>>>
> >>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 28 ++-----------------
> >>>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  2 --
> >>>>    drivers/gpu/drm/i915/gt/intel_lrc.c           | 12 +++++---
> >>>>    drivers/gpu/drm/i915/gvt/scheduler.c          |  3 --
> >>>>    4 files changed, 10 insertions(+), 35 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> index b28c7ca681a8..1b3dc7258ef2 100644
> >>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> @@ -397,30 +397,6 @@ static void context_close(struct i915_gem_context
> >>>> *ctx)
> >>>>        i915_gem_context_put(ctx);
> >>>>    }
> >>>> -static u32 default_desc_template(const struct drm_i915_private *i915,
> >>>> -                 const struct i915_address_space *vm)
> >>>> -{
> >>>> -    u32 address_mode;
> >>>> -    u32 desc;
> >>>> -
> >>>> -    desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> >>>> -
> >>>> -    address_mode = INTEL_LEGACY_32B_CONTEXT;
> >>>> -    if (vm && i915_vm_is_4lvl(vm))
> >>>> -        address_mode = INTEL_LEGACY_64B_CONTEXT;
> >>>> -    desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> >>>> -
> >>>> -    if (IS_GEN(i915, 8))
> >>>> -        desc |= GEN8_CTX_L3LLC_COHERENT;
> >>>> -
> >>>> -    /* TODO: WaDisableLiteRestore when we start using semaphore
> >>>> -     * signalling between Command Streamers
> >>>> -     * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
> >>>> -     */
> >>>> -
> >>>> -    return desc;
> >>>> -}
> >>>> -
> >>>>    static struct i915_gem_context *
> >>>>    __create_context(struct drm_i915_private *i915)
> >>>>    {
> >>>> @@ -459,7 +435,6 @@ __create_context(struct drm_i915_private *i915)
> >>>>        i915_gem_context_set_recoverable(ctx);
> >>>>        ctx->ring_size = 4 * PAGE_SIZE;
> >>>> -    ctx->desc_template = default_desc_template(i915, NULL);
> >>>>        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
> >>>>            ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
> >>>> @@ -478,8 +453,9 @@ __set_ppgtt(struct i915_gem_context *ctx, struct
> >>>> i915_address_space *vm)
> >>>>        struct i915_gem_engines_iter it;
> >>>>        struct intel_context *ce;
> >>>> +    GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
> >>>> +
> >>>>        ctx->vm = i915_vm_get(vm);
> >>>> -    ctx->desc_template = default_desc_template(ctx->i915, vm);
> >>>>        for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> >>>>            i915_vm_put(ce->vm);
> >>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>>> b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>>> index 0ee61482ef94..a02d98494078 100644
> >>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>>> @@ -171,8 +171,6 @@ struct i915_gem_context {
> >>>>        /** ring_size: size for allocating the per-engine ring buffer */
> >>>>        u32 ring_size;
> >>>> -    /** desc_template: invariant fields for the HW context descriptor */
> >>>> -    u32 desc_template;
> >>>>        /** guilty_count: How many times this context has caused a GPU
> >>>> hang. */
> >>>>        atomic_t guilty_count;
> >>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>> index 632344c163a8..5fdac40015cf 100644
> >>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>> @@ -418,13 +418,17 @@ lrc_descriptor(struct intel_context *ce, struct
> >>>> intel_engine_cs *engine)
> >>>>        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
> >>>>        BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID >
> >>>> (BIT(GEN11_SW_CTX_ID_WIDTH)));
> >>>> -    desc = ctx->desc_template;                /* bits  0-11 */
> >>>> -    GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
> >>>> +    desc = INTEL_LEGACY_32B_CONTEXT;
> >>>> +    if (i915_vm_is_4lvl(ce->vm))
> >>>> +        desc = INTEL_LEGACY_64B_CONTEXT;
> >>>
> >>> if-else now that the vm null check is gone.
> >>>
> >>>> +    desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
> >>>> +
> >>>> +    desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> >>>> +    if (IS_GEN(engine->i915, 8))
> >>>> +        desc |= GEN8_CTX_L3LLC_COHERENT;
> >>>
> >>> Don't know.. it's nicer to keep it stored it both for Gen and context
> >>> state. What's the problem with it?
> >>
> >> Ping.
> > 
> > There's no gem_context.
> 
> We could store it in ce then. We already have well defined control 
> points for when vm changes when all are updated.

We are storing it in ce; it's not like we recompute it all that often,
and when we do it's because we have rebound the vma.

> If done like this then it looks like assigning ctx->hw_id could also do 
> the default_desc update, so that we can avoid even more work done at pin 
> time.

What ctx->hw_id? You are imagining things again :-p

Remember that we only do this on first pin from idle, not every pin.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 10:57           ` Chris Wilson
@ 2019-08-01 11:13             ` Chris Wilson
  2019-08-01 15:29               ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-08-01 11:13 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2019-08-01 11:57:06)
> Quoting Tvrtko Ursulin (2019-08-01 09:53:15)
> > We could store it in ce then. We already have well defined control 
> > points for when vm changes when all are updated.
> 
> We are storing it in ce; it's not like we recompute it all that often,
> and when we do it's because we have rebound the vma.
> 
> > If done like this then it looks like assigning ctx->hw_id could also do 
> > the default_desc update, so that we can avoid even more work done at pin 
> > time.
> 
> What ctx->hw_id? You are imagining things again :-p
> 
> Remember that we only do this on first pin from idle, not every pin.

Fwiw, I quickly looked at only doing it if the vma is rebound, but
that's move branches just to save a couple. The low frequency at which
we have to actually compute this (walk a few more branches inside an
already branchy contxt_pin) doesn't seem to justify the extra storage for
me. It's not like we are recomputing lrc_desc on every submit as it once
was.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 11:13             ` Chris Wilson
@ 2019-08-01 15:29               ` Tvrtko Ursulin
  2019-08-01 15:48                 ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-08-01 15:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/08/2019 12:13, Chris Wilson wrote:
> Quoting Chris Wilson (2019-08-01 11:57:06)
>> Quoting Tvrtko Ursulin (2019-08-01 09:53:15)
>>> We could store it in ce then. We already have well defined control
>>> points for when vm changes when all are updated.
>>
>> We are storing it in ce; it's not like we recompute it all that often,
>> and when we do it's because we have rebound the vma.
>>
>>> If done like this then it looks like assigning ctx->hw_id could also do
>>> the default_desc update, so that we can avoid even more work done at pin
>>> time.
>>
>> What ctx->hw_id? You are imagining things again :-p
>>
>> Remember that we only do this on first pin from idle, not every pin.
> 
> Fwiw, I quickly looked at only doing it if the vma is rebound, but
> that's move branches just to save a couple. The low frequency at which
> we have to actually compute this (walk a few more branches inside an
> already branchy contxt_pin) doesn't seem to justify the extra storage for
> me. It's not like we are recomputing lrc_desc on every submit as it once
> was.

On every submit if last request got retired in the meantime, no, for 
instance bursty loads? Yeah it is very inconsequential but at some point 
we made an effort to cache as much as possible what is invariant so it 
saddens me a bit to remove that.

For instance Icelake engine dependent stuff sneaked into 
intel_lrc.c/lrc_desriptors at some point, which is also against the 
spirit of caching. If we were to move the cached value in ce then we 
would be able to remove that and have it once again minimal in there.

Not only just minimal, but not separated in two separate places. I guess 
this patch improves things in that respect - consolidates the lrc_desc 
computation once again.

I did not get the part about VMA re-binding. I did not suggest to move 
the lrca offset into cache as well. I was just thinking about the gen, 
engine and vm dependent bits could naturally go into 
i915_gem_context.c/default_desc_template. Just need to take (engine, 
hw_id, vm).

And virtual engine would have to re-compute it when moving engines. Hm.. 
we don't seem to do that? Only when pinning we set it up based on 
sibling[0] so how it all works? We don't re-pin when moving engine I 
thought.

Aside that, if you are still not convinced my argument makes sense, you 
can have my ack.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 15:29               ` Tvrtko Ursulin
@ 2019-08-01 15:48                 ` Chris Wilson
  2019-08-01 16:00                   ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-08-01 15:48 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 16:29:53)
> 
> On 01/08/2019 12:13, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-08-01 11:57:06)
> >> Quoting Tvrtko Ursulin (2019-08-01 09:53:15)
> >>> We could store it in ce then. We already have well defined control
> >>> points for when vm changes when all are updated.
> >>
> >> We are storing it in ce; it's not like we recompute it all that often,
> >> and when we do it's because we have rebound the vma.
> >>
> >>> If done like this then it looks like assigning ctx->hw_id could also do
> >>> the default_desc update, so that we can avoid even more work done at pin
> >>> time.
> >>
> >> What ctx->hw_id? You are imagining things again :-p
> >>
> >> Remember that we only do this on first pin from idle, not every pin.
> > 
> > Fwiw, I quickly looked at only doing it if the vma is rebound, but
> > that's move branches just to save a couple. The low frequency at which
> > we have to actually compute this (walk a few more branches inside an
> > already branchy contxt_pin) doesn't seem to justify the extra storage for
> > me. It's not like we are recomputing lrc_desc on every submit as it once
> > was.
> 
> On every submit if last request got retired in the meantime, no, for 
> instance bursty loads? Yeah it is very inconsequential but at some point 
> we made an effort to cache as much as possible what is invariant so it 
> saddens me a bit to remove that.

Once we have hw_id out of the way, we only need to set the bottom 32b
here.
 
> For instance Icelake engine dependent stuff sneaked into 
> intel_lrc.c/lrc_desriptors at some point, which is also against the 
> spirit of caching. If we were to move the cached value in ce then we 
> would be able to remove that and have it once again minimal in there.

Well we can set all bits but hw_id/lrca at init time. How about if I run
that past you?

> Not only just minimal, but not separated in two separate places. I guess 
> this patch improves things in that respect - consolidates the lrc_desc 
> computation once again.
> 
> I did not get the part about VMA re-binding. I did not suggest to move 
> the lrca offset into cache as well. I was just thinking about the gen, 
> engine and vm dependent bits could naturally go into 
> i915_gem_context.c/default_desc_template. Just need to take (engine, 
> hw_id, vm).

I'm just thinking about the bit that changes inside ce->lrc_desc.
 
> And virtual engine would have to re-compute it when moving engines. Hm.. 
> we don't seem to do that? Only when pinning we set it up based on 
> sibling[0] so how it all works? We don't re-pin when moving engine I 
> thought.

No. We don't. Whoops. Good job clearly nothing uses that then.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 15:48                 ` Chris Wilson
@ 2019-08-01 16:00                   ` Chris Wilson
  2019-08-01 16:22                     ` Tvrtko Ursulin
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2019-08-01 16:00 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2019-08-01 16:48:33)
> Quoting Tvrtko Ursulin (2019-08-01 16:29:53)
> > For instance Icelake engine dependent stuff sneaked into 
> > intel_lrc.c/lrc_desriptors at some point, which is also against the 
> > spirit of caching. If we were to move the cached value in ce then we 
> > would be able to remove that and have it once again minimal in there.
> 
> Well we can set all bits but hw_id/lrca at init time. How about if I run
> that past you?

 static u64
-lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
+base_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-       struct i915_gem_context *ctx = ce->gem_context;
        u64 desc;

        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -426,18 +425,12 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
        if (IS_GEN(engine->i915, 8))
                desc |= GEN8_CTX_L3LLC_COHERENT;

-       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
-                                                               /* bits 12-31 */
        /*
         * The following 32bits are copied into the OA reports (dword 2).
         * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
         * anything below.
         */
        if (INTEL_GEN(engine->i915) >= 11) {
-               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-                                                               /* bits 37-47 */
-
                desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
                                                                /* bits 48-53 */

@@ -445,8 +438,29 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)

                desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
                                                                /* bits 61-63 */
+       }
+
+       return desc;
+}
+
+static u64
+update_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+       struct i915_gem_context *ctx = ce->gem_context;
+       u64 desc = ce->lrc_desc;
+
+       desc &= ~GENMASK_ULL(31, 12);
+       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
+
+       if (INTEL_GEN(engine->i915) >= 11) {
+               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
+
+               desc &= ~GENMASK_ULL(47, 37);
+               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
        } else {
                GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
+
+               desc &= ~GENMASK_ULL(52, 32);
                desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;   /* bits 32-52 */
        }

@@ -1631,7 +1645,7 @@ __execlists_context_pin(struct intel_context *ce,
        if (ret)
                goto unpin_map;

-       ce->lrc_desc = lrc_descriptor(ce, engine);
+       ce->lrc_desc = update_lrc_descriptor(ce, engine);
        ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
        __execlists_update_reg_state(ce, engine);

@@ -3126,6 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
        ce->ring = ring;
        ce->state = vma;

+       ce->lrc_desc = base_lrc_descriptor(ce, engine);
+
        return 0;

 error_ring_free:

That's pretty much the same amount of work in context_pin. I'm not
convinced that caching between pins achieves very much.

Concur?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 16:00                   ` Chris Wilson
@ 2019-08-01 16:22                     ` Tvrtko Ursulin
  2019-08-01 16:36                       ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Tvrtko Ursulin @ 2019-08-01 16:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 01/08/2019 17:00, Chris Wilson wrote:
> Quoting Chris Wilson (2019-08-01 16:48:33)
>> Quoting Tvrtko Ursulin (2019-08-01 16:29:53)
>>> For instance Icelake engine dependent stuff sneaked into
>>> intel_lrc.c/lrc_desriptors at some point, which is also against the
>>> spirit of caching. If we were to move the cached value in ce then we
>>> would be able to remove that and have it once again minimal in there.
>>
>> Well we can set all bits but hw_id/lrca at init time. How about if I run
>> that past you?
> 
>   static u64
> -lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
> +base_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   {
> -       struct i915_gem_context *ctx = ce->gem_context;
>          u64 desc;
> 
>          BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
> @@ -426,18 +425,12 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>          if (IS_GEN(engine->i915, 8))
>                  desc |= GEN8_CTX_L3LLC_COHERENT;
> 
> -       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
> -                                                               /* bits 12-31 */
>          /*
>           * The following 32bits are copied into the OA reports (dword 2).
>           * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
>           * anything below.
>           */
>          if (INTEL_GEN(engine->i915) >= 11) {
> -               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
> -               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
> -                                                               /* bits 37-47 */
> -
>                  desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
>                                                                  /* bits 48-53 */
> 
> @@ -445,8 +438,29 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
> 
>                  desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
>                                                                  /* bits 61-63 */
> +       }
> +
> +       return desc;
> +}
> +
> +static u64
> +update_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
> +{
> +       struct i915_gem_context *ctx = ce->gem_context;
> +       u64 desc = ce->lrc_desc;
> +
> +       desc &= ~GENMASK_ULL(31, 12);
> +       desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
> +
> +       if (INTEL_GEN(engine->i915) >= 11) {
> +               GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
> +
> +               desc &= ~GENMASK_ULL(47, 37);
> +               desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
>          } else {
>                  GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
> +
> +               desc &= ~GENMASK_ULL(52, 32);
>                  desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;   /* bits 32-52 */
>          }
> 
> @@ -1631,7 +1645,7 @@ __execlists_context_pin(struct intel_context *ce,
>          if (ret)
>                  goto unpin_map;
> 
> -       ce->lrc_desc = lrc_descriptor(ce, engine);
> +       ce->lrc_desc = update_lrc_descriptor(ce, engine);
>          ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>          __execlists_update_reg_state(ce, engine);
> 
> @@ -3126,6 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>          ce->ring = ring;
>          ce->state = vma;
> 
> +       ce->lrc_desc = base_lrc_descriptor(ce, engine);
> +
>          return 0;
> 
>   error_ring_free:
> 
> That's pretty much the same amount of work in context_pin. I'm not
> convinced that caching between pins achieves very much.
> 
> Concur?

Who kidnapped real Chris? :D We could merge the mask clearing and reduce 
pin to one conditional and one and, shift, or. :)

Okay, have it your way.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context
  2019-08-01 16:22                     ` Tvrtko Ursulin
@ 2019-08-01 16:36                       ` Chris Wilson
  0 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2019-08-01 16:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-08-01 17:22:27)
> 
> On 01/08/2019 17:00, Chris Wilson wrote:
> Who kidnapped real Chris? :D We could merge the mask clearing and reduce 
> pin to one conditional and one and, shift, or. :)

Don't worry in about 24 patches time, we can remove the branches.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2019-08-01 16:36 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-23 18:38 [PATCH 01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Chris Wilson
2019-07-23 18:38 ` [PATCH 02/23] drm/i915/gt: Provide a local intel_context.vm Chris Wilson
2019-07-23 18:38 ` [PATCH 03/23] drm/i915: Remove lrc default desc from GEM context Chris Wilson
2019-07-24  9:20   ` Tvrtko Ursulin
2019-08-01  8:37     ` Tvrtko Ursulin
2019-08-01  8:41       ` Chris Wilson
2019-08-01  8:53         ` Tvrtko Ursulin
2019-08-01 10:57           ` Chris Wilson
2019-08-01 11:13             ` Chris Wilson
2019-08-01 15:29               ` Tvrtko Ursulin
2019-08-01 15:48                 ` Chris Wilson
2019-08-01 16:00                   ` Chris Wilson
2019-08-01 16:22                     ` Tvrtko Ursulin
2019-08-01 16:36                       ` Chris Wilson
2019-07-23 18:38 ` [PATCH 04/23] drm/i915: Push the ring creation flags to the backend Chris Wilson
2019-07-24 11:11   ` Tvrtko Ursulin
2019-07-26  8:43     ` Chris Wilson
2019-07-29 12:59       ` Tvrtko Ursulin
2019-07-30  9:38         ` Chris Wilson
2019-08-01  8:42           ` Tvrtko Ursulin
2019-08-01  8:45             ` Chris Wilson
2019-08-01  8:46             ` Chris Wilson
2019-07-23 18:38 ` [PATCH 05/23] drm/i915: Flush extra hard after writing relocations through the GTT Chris Wilson
2019-07-23 18:38 ` [PATCH 06/23] drm/i915: Hide unshrinkable context objects from the shrinker Chris Wilson
2019-07-23 18:38 ` [PATCH 07/23] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT Chris Wilson
2019-07-25 21:21   ` Daniele Ceraolo Spurio
2019-07-26  9:22   ` Tvrtko Ursulin
2019-07-26  9:33     ` Chris Wilson
2019-07-26  9:51       ` Tvrtko Ursulin
2019-07-26  9:57         ` Chris Wilson
2019-07-23 18:38 ` [PATCH 08/23] drm/i915: Introduce for_each_user_engine() Chris Wilson
2019-07-23 18:38 ` [PATCH 09/23] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc Chris Wilson
2019-07-23 18:38 ` [PATCH 10/23] drm/i915: Isolate i915_getparam_ioctl() Chris Wilson
2019-07-23 18:38 ` [PATCH 11/23] drm/i915: Only include active engines in the capture state Chris Wilson
2019-07-23 18:38 ` [PATCH 12/23] drm/i915: Teach execbuffer to take the engine wakeref not GT Chris Wilson
2019-07-23 18:38 ` [PATCH 13/23] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
2019-07-23 18:38 ` [PATCH 14/23] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
2019-07-23 18:38 ` [PATCH 15/23] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
2019-07-23 18:38 ` [PATCH 16/23] drm/i915/gt: Add to timeline requires the timeline mutex Chris Wilson
2019-07-23 18:38 ` [PATCH 17/23] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
2019-07-23 18:38 ` [PATCH 18/23] drm/i915: Replace struct_mutex for batch pool serialisation Chris Wilson
2019-07-23 18:38 ` [PATCH 19/23] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
2019-07-23 18:38 ` [PATCH 20/23] drm/i915: Forgo last_fence active request tracking Chris Wilson
2019-07-23 18:38 ` [PATCH 21/23] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
2019-07-23 18:38 ` [PATCH 22/23] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
2019-07-23 18:38 ` [PATCH 23/23] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
2019-07-23 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/23] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt Patchwork
2019-07-23 20:27 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-23 20:38 ` ✓ Fi.CI.BAT: success " Patchwork
2019-07-24  4:13 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-07-24  8:56 ` [PATCH 01/23] " Tvrtko Ursulin
2019-07-24  9:27   ` Chris Wilson
2019-07-24  9:37     ` Chris Wilson
2019-07-24  9:47       ` Chris Wilson
2019-07-24  9:54         ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.