All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path
@ 2017-12-13 12:50 Michał Winiarski
  2017-12-13 12:50 ` [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex Michał Winiarski
                   ` (9 more replies)
  0 siblings, 10 replies; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

We need shared data for actions (e.g. guc suspend/resume), and we're
using those with GuC submission disabled.
Let's introduce intel_guc_init and move shared data alloc there.

This fixes GPF during module unload with HuC, but without GuC submission:

BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809
IP: intel_guc_suspend+0x34/0x140 [i915]
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal
intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel
mei_me i2c_i801 mei prime_numbers [last unloaded: i915]
CPU: 2 PID: 2794 Comm: rmmod Tainted: G     U  W  O 4.15.0-rc2+ #297
Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016
task: 0000000055945c61 task.stack: 00000000264ccb43
RIP: 0010:intel_guc_suspend+0x34/0x140 [i915]
RSP: 0018:ffffc90000483df8 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000
RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000
RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40
R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0
FS:  00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0
Call Trace:
 i915_gem_suspend+0x9d/0x130 [i915]
 ? i915_driver_unload+0x68/0x180 [i915]
 i915_driver_unload+0x70/0x180 [i915]
 i915_pci_remove+0x15/0x20 [i915]
 pci_device_remove+0x36/0xb0
 device_release_driver_internal+0x15f/0x220
 driver_detach+0x3a/0x80
 bus_remove_driver+0x58/0xd0
 pci_unregister_driver+0x29/0x90
 SyS_delete_module+0x150/0x1e0
 entry_SYSCALL_64_fastpath+0x23/0x9a
RIP: 0033:0x7ff43b51b5c7
RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7
RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808
RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000
R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0
R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0
Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f
f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80
f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f
RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8
CR2: 00000000000000f9
---[ end trace 23a192a61d937a3e ]---

Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication")
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/intel_guc.c            | 51 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h            |  2 ++
 drivers/gpu/drm/i915/intel_guc_submission.c | 37 +--------------------
 drivers/gpu/drm/i915/intel_uc.c             | 10 +++---
 4 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 177ee69ca9b1..92ed22f38fc4 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -69,6 +69,57 @@ void intel_guc_init_early(struct intel_guc *guc)
 	guc->notify = gen8_guc_raise_irq;
 }
 
+static int guc_shared_data_create(struct intel_guc *guc)
+{
+	struct i915_vma *vma;
+	void *vaddr;
+
+	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		i915_vma_unpin_and_release(&vma);
+		return PTR_ERR(vaddr);
+	}
+
+	guc->shared_data = vma;
+	guc->shared_data_vaddr = vaddr;
+
+	return 0;
+}
+
+static void guc_shared_data_destroy(struct intel_guc *guc)
+{
+	i915_gem_object_unpin_map(guc->shared_data->obj);
+	i915_vma_unpin_and_release(&guc->shared_data);
+}
+
+int intel_guc_init(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	int ret;
+
+	ret = guc_shared_data_create(guc);
+	if (ret)
+		return ret;
+	GEM_BUG_ON(!guc->shared_data);
+
+	/* We need to notify the guc whenever we change the GGTT */
+	i915_ggtt_enable_guc(dev_priv);
+
+	return 0;
+}
+
+void intel_guc_fini(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	i915_ggtt_disable_guc(dev_priv);
+	guc_shared_data_destroy(guc);
+}
+
 static u32 get_gt_type(struct drm_i915_private *dev_priv)
 {
 	/* XXX: GT type based on PCI device ID? field seems unused by fw */
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 59856726d2bc..81659e223e11 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 void intel_guc_init_early(struct intel_guc *guc);
 void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_init_params(struct intel_guc *guc);
+int intel_guc_init(struct intel_guc *guc);
+void intel_guc_fini(struct intel_guc *guc);
 int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 912ff143d531..c020560c395e 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -447,33 +447,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
 	memset(desc, 0, sizeof(*desc));
 }
 
-static int guc_shared_data_create(struct intel_guc *guc)
-{
-	struct i915_vma *vma;
-	void *vaddr;
-
-	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		i915_vma_unpin_and_release(&vma);
-		return PTR_ERR(vaddr);
-	}
-
-	guc->shared_data = vma;
-	guc->shared_data_vaddr = vaddr;
-
-	return 0;
-}
-
-static void guc_shared_data_destroy(struct intel_guc *guc)
-{
-	i915_gem_object_unpin_map(guc->shared_data->obj);
-	i915_vma_unpin_and_release(&guc->shared_data);
-}
-
 /* Construct a Work Item and append it to the GuC's Work Queue */
 static void guc_wq_item_append(struct intel_guc_client *client,
 			       u32 target_engine, u32 context_desc,
@@ -1279,14 +1252,9 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	 */
 	GEM_BUG_ON(!guc->stage_desc_pool);
 
-	ret = guc_shared_data_create(guc);
-	if (ret)
-		goto err_stage_desc_pool;
-	GEM_BUG_ON(!guc->shared_data);
-
 	ret = intel_guc_log_create(guc);
 	if (ret < 0)
-		goto err_shared_data;
+		goto err_stage_desc_pool;
 
 	ret = guc_preempt_work_create(guc);
 	if (ret)
@@ -1304,8 +1272,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	guc_preempt_work_destroy(guc);
 err_log:
 	intel_guc_log_destroy(guc);
-err_shared_data:
-	guc_shared_data_destroy(guc);
 err_stage_desc_pool:
 	guc_stage_desc_pool_destroy(guc);
 	return ret;
@@ -1316,7 +1282,6 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 	guc_ads_destroy(guc);
 	guc_preempt_work_destroy(guc);
 	intel_guc_log_destroy(guc);
-	guc_shared_data_destroy(guc);
 	guc_stage_desc_pool_destroy(guc);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 461047c86e0d..3040a0e00142 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -24,6 +24,7 @@
 
 #include "intel_uc.h"
 #include "intel_guc_submission.h"
+#include "intel_guc.h"
 #include "i915_drv.h"
 
 /* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -204,8 +205,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
 
-	/* We need to notify the guc whenever we change the GGTT */
-	i915_ggtt_enable_guc(dev_priv);
+	ret = intel_guc_init(guc);
+	if (ret)
+		goto err_out;
 
 	if (USES_GUC_SUBMISSION(dev_priv)) {
 		/*
@@ -298,7 +300,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	if (USES_GUC_SUBMISSION(dev_priv))
 		intel_guc_submission_fini(guc);
 err_guc:
-	i915_ggtt_disable_guc(dev_priv);
+	intel_guc_fini(guc);
 err_out:
 	/*
 	 * Note that there is no fallback as either user explicitly asked for
@@ -330,5 +332,5 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 		intel_guc_submission_fini(guc);
 	}
 
-	i915_ggtt_disable_guc(dev_priv);
+	intel_guc_fini(guc);
 }
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 15:23   ` Chris Wilson
  2017-12-13 12:50 ` [PATCH v2 3/8] drm/i915/guc: Extract guc_init from guc_init_hw Michał Winiarski
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

This gets rid of the following lockdep splat:

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted
------------------------------------------------------
debugfs_test/1351 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #6 (&mm->mmap_sem){++++}:
       __might_fault+0x63/0x90
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xe6/0x150
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #5 (&sb->s_type->i_mutex_key#5){++++}:
       lockref_get+0x9/0x20

-> #4 ((completion)&req.done){+.+.}:
       wait_for_common+0x54/0x210
       devtmpfs_create_node+0x130/0x150
       device_add+0x5ad/0x5e0
       device_create_groups_vargs+0xd4/0xe0
       device_create+0x35/0x40
       msr_device_create+0x22/0x40
       cpuhp_invoke_callback+0xc5/0xbf0
       cpuhp_thread_fun+0x167/0x210
       smpboot_thread_fn+0x17f/0x270
       kthread+0x173/0x1b0
       ret_from_fork+0x24/0x30

-> #3 (cpuhp_state-up){+.+.}:
       cpuhp_issue_call+0x132/0x1c0
       __cpuhp_setup_state_cpuslocked+0x12f/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_writeback_init+0x3a/0x5c
       start_kernel+0x393/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #2 (cpuhp_state_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       __cpuhp_setup_state_cpuslocked+0x4b/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_alloc_init+0x1f/0x26
       start_kernel+0x139/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #1 (cpu_hotplug_lock.rw_sem){++++}:
       cpus_read_lock+0x34/0xa0
       apply_workqueue_attrs+0xd/0x40
       __alloc_workqueue_key+0x2c7/0x4e1
       intel_guc_submission_init+0x10c/0x650 [i915]
       intel_uc_init_hw+0x29e/0x460 [i915]
       i915_gem_init_hw+0xca/0x290 [i915]
       i915_gem_init+0x115/0x3a0 [i915]
       i915_driver_load+0x9a8/0x16c0 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       lock_acquire+0xaf/0x200
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x760 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x85b/0xe40
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x22/0x30

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#5);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1351:
 #0:  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1
Hardware name:                  /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug+0x230/0x3b0
 check_prev_add+0x439/0x7b0
 ? lockdep_init_map_crosslock+0x20/0x20
 ? unwind_get_return_address+0x16/0x30
 ? __lock_acquire+0x1385/0x15a0
 __lock_acquire+0x1385/0x15a0
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x760 [i915]
 __do_fault+0x15/0x70
 __handle_mm_fault+0x85b/0xe40
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 page_fault+0x22/0x30
RIP: 0033:0x7f98d6f49116
RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283
RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680
RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0
RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0
R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001
R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0

v2: Init preempt_work unconditionally (Chris)

Testcase: igt/debugfs_test/read_all_entries
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c             |  1 +
 drivers/gpu/drm/i915/i915_gem.c             |  4 ++
 drivers/gpu/drm/i915/intel_guc.c            | 57 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h            |  2 +
 drivers/gpu/drm/i915/intel_guc_log.c        | 23 ----------
 drivers/gpu/drm/i915/intel_guc_submission.c | 70 +++++++----------------------
 drivers/gpu/drm/i915/intel_guc_submission.h |  2 +
 drivers/gpu/drm/i915/intel_uc.c             | 26 +++++++++++
 drivers/gpu/drm/i915/intel_uc.h             |  2 +
 9 files changed, 110 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 721ccce1832f..285c8b238bff 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
 	i915_gem_contexts_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	intel_uc_fini_wq(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8c3d801696b7..4b2ca43a610f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5160,6 +5160,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	ret = intel_uc_init_wq(dev_priv);
+	if (ret)
+		return ret;
+
 	/* This is just a security blanket to placate dragons.
 	 * On some systems, we very sporadically observe that the first TLBs
 	 * used by the CS may be stale, despite us poking the TLB reset. If
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 92ed22f38fc4..3c6bf5a34c3c 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc)
 	guc->notify = gen8_guc_raise_irq;
 }
 
+int intel_guc_init_wq(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	/*
+	 * GuC log buffer flush work item has to do register access to
+	 * send the ack to GuC and this work item, if not synced before
+	 * suspend, can potentially get executed after the GFX device is
+	 * suspended.
+	 * By marking the WQ as freezable, we don't have to bother about
+	 * flushing of this work item from the suspend hooks, the pending
+	 * work item if any will be either executed before the suspend
+	 * or scheduled later on resume. This way the handling of work
+	 * item can be kept same between system suspend & rpm suspend.
+	 */
+	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
+						WQ_HIGHPRI | WQ_FREEZABLE);
+	if (!guc->log.runtime.flush_wq)
+		return -ENOMEM;
+
+	/*
+	 * Even though both sending GuC action, and adding a new workitem to
+	 * GuC workqueue are serialized (each with its own locking), since
+	 * we're using mutliple engines, it's possible that we're going to
+	 * issue a preempt request with two (or more - each for different
+	 * engine) workitems in GuC queue. In this situation, GuC may submit
+	 * all of them, which will make us very confused.
+	 * Our preemption contexts may even already be complete - before we
+	 * even had the chance to sent the preempt action to GuC!. Rather
+	 * than introducing yet another lock, we can just use ordered workqueue
+	 * to make sure we're always sending a single preemption request with a
+	 * single workitem.
+	 */
+	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+	    USES_GUC_SUBMISSION(dev_priv)) {
+		guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
+							  WQ_HIGHPRI);
+		if (!guc->preempt_wq) {
+			destroy_workqueue(guc->log.runtime.flush_wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+void intel_guc_fini_wq(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+	    USES_GUC_SUBMISSION(dev_priv))
+		destroy_workqueue(guc->preempt_wq);
+
+	destroy_workqueue(guc->log.runtime.flush_wq);
+}
+
 static int guc_shared_data_create(struct intel_guc *guc)
 {
 	struct i915_vma *vma;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 81659e223e11..52856a97477d 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 void intel_guc_init_early(struct intel_guc *guc);
 void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_init_params(struct intel_guc *guc);
+int intel_guc_init_wq(struct intel_guc *guc);
+void intel_guc_fini_wq(struct intel_guc *guc);
 int intel_guc_init(struct intel_guc *guc);
 void intel_guc_fini(struct intel_guc *guc);
 int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 1a2c5eed9929..eaedd63e3819 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -411,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc)
 	guc->log.runtime.relay_chan = guc_log_relay_chan;
 
 	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
-
-	/*
-	 * GuC log buffer flush work item has to do register access to
-	 * send the ack to GuC and this work item, if not synced before
-	 * suspend, can potentially get executed after the GFX device is
-	 * suspended.
-	 * By marking the WQ as freezable, we don't have to bother about
-	 * flushing of this work item from the suspend hooks, the pending
-	 * work item if any will be either executed before the suspend
-	 * or scheduled later on resume. This way the handling of work
-	 * item can be kept same between system suspend & rpm suspend.
-	 */
-	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
-						WQ_HIGHPRI | WQ_FREEZABLE);
-	if (!guc->log.runtime.flush_wq) {
-		DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
-		ret = -ENOMEM;
-		goto err_relaychan;
-	}
-
 	return 0;
 
-err_relaychan:
-	relay_close(guc->log.runtime.relay_chan);
 err_vaddr:
 	i915_gem_object_unpin_map(guc->log.vma->obj);
 	guc->log.runtime.buf_addr = NULL;
@@ -450,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc)
 	if (!guc_log_has_runtime(guc))
 		return;
 
-	destroy_workqueue(guc->log.runtime.flush_wq);
 	relay_close(guc->log.runtime.relay_chan);
 	i915_gem_object_unpin_map(guc->log.vma->obj);
 	guc->log.runtime.buf_addr = NULL;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index c020560c395e..8f4b274d66a7 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1187,57 +1187,15 @@ static void guc_ads_destroy(struct intel_guc *guc)
 	i915_vma_unpin_and_release(&guc->ads_vma);
 }
 
-static int guc_preempt_work_create(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/*
-	 * Even though both sending GuC action, and adding a new workitem to
-	 * GuC workqueue are serialized (each with its own locking), since
-	 * we're using mutliple engines, it's possible that we're going to
-	 * issue a preempt request with two (or more - each for different
-	 * engine) workitems in GuC queue. In this situation, GuC may submit
-	 * all of them, which will make us very confused.
-	 * Our preemption contexts may even already be complete - before we
-	 * even had the chance to sent the preempt action to GuC!. Rather
-	 * than introducing yet another lock, we can just use ordered workqueue
-	 * to make sure we're always sending a single preemption request with a
-	 * single workitem.
-	 */
-	guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
-						  WQ_HIGHPRI);
-	if (!guc->preempt_wq)
-		return -ENOMEM;
-
-	for_each_engine(engine, dev_priv, id) {
-		guc->preempt_work[id].engine = engine;
-		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
-	}
-
-	return 0;
-}
-
-static void guc_preempt_work_destroy(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		cancel_work_sync(&guc->preempt_work[id].work);
-
-	destroy_workqueue(guc->preempt_wq);
-	guc->preempt_wq = NULL;
-}
-
 /*
  * Set up the memory resources to be shared with the GuC (via the GGTT)
  * at firmware loading time.
  */
 int intel_guc_submission_init(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int ret;
 
 	if (guc->stage_desc_pool)
@@ -1256,20 +1214,18 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	if (ret < 0)
 		goto err_stage_desc_pool;
 
-	ret = guc_preempt_work_create(guc);
-	if (ret)
-		goto err_log;
-	GEM_BUG_ON(!guc->preempt_wq);
-
 	ret = guc_ads_create(guc);
 	if (ret < 0)
-		goto err_wq;
+		goto err_log;
 	GEM_BUG_ON(!guc->ads_vma);
 
+	for_each_engine(engine, dev_priv, id) {
+		guc->preempt_work[id].engine = engine;
+		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
+	}
+
 	return 0;
 
-err_wq:
-	guc_preempt_work_destroy(guc);
 err_log:
 	intel_guc_log_destroy(guc);
 err_stage_desc_pool:
@@ -1279,8 +1235,14 @@ int intel_guc_submission_init(struct intel_guc *guc)
 
 void intel_guc_submission_fini(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, dev_priv, id)
+		cancel_work_sync(&guc->preempt_work[id].work);
+
 	guc_ads_destroy(guc);
-	guc_preempt_work_destroy(guc);
 	intel_guc_log_destroy(guc);
 	guc_stage_desc_pool_destroy(guc);
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index 021fe85c8f71..fb081cefef93 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -77,5 +77,7 @@ int intel_guc_submission_init(struct intel_guc *guc);
 int intel_guc_submission_enable(struct intel_guc *guc);
 void intel_guc_submission_disable(struct intel_guc *guc);
 void intel_guc_submission_fini(struct intel_guc *guc);
+int intel_guc_preempt_work_create(struct intel_guc *guc);
+void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 3040a0e00142..785850838a44 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -188,6 +188,32 @@ static void guc_disable_communication(struct intel_guc *guc)
 	guc->send = intel_guc_send_nop;
 }
 
+int intel_uc_init_wq(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	if (!USES_GUC(dev_priv))
+		return 0;
+
+	ret = intel_guc_init_wq(&dev_priv->guc);
+	if (ret) {
+		DRM_ERROR("Couldn't allocate workqueues for GuC\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
+{
+	if (!USES_GUC(dev_priv))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	intel_guc_fini_wq(&dev_priv->guc);
+}
+
 int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 7a59e2486e9e..53edfeaf56b0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -33,6 +33,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv);
 void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
+int intel_uc_init_wq(struct drm_i915_private *dev_priv);
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v2 3/8] drm/i915/guc: Extract guc_init from guc_init_hw
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
  2017-12-13 12:50 ` [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 12:50 ` [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc Michał Winiarski
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

After GPU reset, GuC HW needs to be reinitialized (with FW reload).
Unfortunately, we're doing some extra work there (mostly allocating stuff),
work that can be moved to guc_init and called once at driver load time.

As a side effect we're no longer hitting an assert in
i915_ggtt_enable_guc on suspend/resume.

v2: Do not duplicate disable_communication / reset_guc_interrupts

References: 04f7b24eccdf ("drm/i915/guc: Assert that we switch between known ggtt->invalidate functions")
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |  1 +
 drivers/gpu/drm/i915/i915_gem.c |  4 +++
 drivers/gpu/drm/i915/intel_uc.c | 71 ++++++++++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_uc.h |  2 ++
 4 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 285c8b238bff..ca9f4b2862eb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -617,6 +617,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(dev_priv);
+	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b2ca43a610f..77e4091a7c71 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5187,6 +5187,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	intel_init_gt_powersave(dev_priv);
 
+	ret = intel_uc_init(dev_priv);
+	if (ret)
+		goto out_unlock;
+
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 785850838a44..907deac6e3fa 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -214,26 +214,20 @@ void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
 	intel_guc_fini_wq(&dev_priv->guc);
 }
 
-int intel_uc_init_hw(struct drm_i915_private *dev_priv)
+int intel_uc_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
-	struct intel_huc *huc = &dev_priv->huc;
-	int ret, attempts;
+	int ret;
 
 	if (!USES_GUC(dev_priv))
 		return 0;
 
-	if (!HAS_GUC(dev_priv)) {
-		ret = -ENODEV;
-		goto err_out;
-	}
-
-	guc_disable_communication(guc);
-	gen9_reset_guc_interrupts(dev_priv);
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
 
 	ret = intel_guc_init(guc);
 	if (ret)
-		goto err_out;
+		return ret;
 
 	if (USES_GUC_SUBMISSION(dev_priv)) {
 		/*
@@ -241,10 +235,44 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 		 * if we are planning to enable submission later
 		 */
 		ret = intel_guc_submission_init(guc);
-		if (ret)
-			goto err_guc;
+		if (ret) {
+			intel_guc_fini(guc);
+			return ret;
+		}
 	}
 
+	return 0;
+}
+
+void intel_uc_fini(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+
+	if (!USES_GUC(dev_priv))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	if (USES_GUC_SUBMISSION(dev_priv))
+		intel_guc_submission_fini(guc);
+
+	intel_guc_fini(guc);
+}
+
+int intel_uc_init_hw(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_huc *huc = &dev_priv->huc;
+	int ret, attempts;
+
+	if (!USES_GUC(dev_priv))
+		return 0;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	guc_disable_communication(guc);
+	gen9_reset_guc_interrupts(dev_priv);
+
 	/* init WOPCM */
 	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
 	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
@@ -264,12 +292,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 		 */
 		ret = __intel_uc_reset_hw(dev_priv);
 		if (ret)
-			goto err_submission;
+			goto err_out;
 
 		if (USES_HUC(dev_priv)) {
 			ret = intel_huc_init_hw(huc);
 			if (ret)
-				goto err_submission;
+				goto err_out;
 		}
 
 		intel_guc_init_params(guc);
@@ -322,11 +350,6 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	guc_disable_communication(guc);
 err_log_capture:
 	guc_capture_load_err_log(guc);
-err_submission:
-	if (USES_GUC_SUBMISSION(dev_priv))
-		intel_guc_submission_fini(guc);
-err_guc:
-	intel_guc_fini(guc);
 err_out:
 	/*
 	 * Note that there is no fallback as either user explicitly asked for
@@ -348,15 +371,13 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 	if (!USES_GUC(dev_priv))
 		return;
 
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
 	if (USES_GUC_SUBMISSION(dev_priv))
 		intel_guc_submission_disable(guc);
 
 	guc_disable_communication(guc);
 
-	if (USES_GUC_SUBMISSION(dev_priv)) {
+	if (USES_GUC_SUBMISSION(dev_priv))
 		gen9_disable_guc_interrupts(dev_priv);
-		intel_guc_submission_fini(guc);
-	}
-
-	intel_guc_fini(guc);
 }
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 53edfeaf56b0..8a7249722ef1 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -37,6 +37,8 @@ int intel_uc_init_wq(struct drm_i915_private *dev_priv);
 void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
+int intel_uc_init(struct drm_i915_private *dev_priv);
+void intel_uc_fini(struct drm_i915_private *dev_priv);
 
 static inline bool intel_uc_is_using_guc(void)
 {
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
  2017-12-13 12:50 ` [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex Michał Winiarski
  2017-12-13 12:50 ` [PATCH v2 3/8] drm/i915/guc: Extract guc_init from guc_init_hw Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 15:24   ` Chris Wilson
  2017-12-13 12:50 ` [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation Michał Winiarski
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

To make this operation a bit cleaner, we should make sure that the HW
can catch up by calling the new implementation right away.
Note that currently we're only touching the vfunc at module load time
(before GuC is even loaded), so this shouldn't cause any functional
changes.

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0579b0c8581..c5f393870532 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3552,6 +3552,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
 
 	i915->ggtt.invalidate = guc_ggtt_invalidate;
+
+	i915_ggtt_invalidate(i915);
 }
 
 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
@@ -3560,6 +3562,8 @@ void i915_ggtt_disable_guc(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
 
 	i915->ggtt.invalidate = gen6_ggtt_invalidate;
+
+	i915_ggtt_invalidate(i915);
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (2 preceding siblings ...)
  2017-12-13 12:50 ` [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 19:03   ` Michel Thierry
  2017-12-13 12:50 ` [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init Michał Winiarski
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

Full GPU reset causes GuC to be reset. This means that every time we're
doing a reset, we need to talk to GuC and tell it about doorbells.
Let's separate the communication part (create_doorbell) from our
internal bookkeeping (reserve_doorbell) so that we can cleanly separate
the initialization done at module load from reinitialization done at
reset in the following patch.
While I'm here, let's also add a proper (although slightly asymetric)
cleanup that doesn't try to communicate with GuC after it's already
gone, getting rid of "expected" warnings caused by GuC action failures
on module unload.

Note that I've also removed one of the tests (bitmap out of sync), since
it doesn't make much sense anymore - bitmaps are now not expected to
change during the lifetime of a client.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 151 ++++++++--------------------
 drivers/gpu/drm/i915/selftests/intel_guc.c  | 110 +++++++++-----------
 2 files changed, 88 insertions(+), 173 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8f4b274d66a7..c74e78b6ba41 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -88,7 +88,7 @@ static inline bool is_high_priority(struct intel_guc_client *client)
 		client->priority == GUC_CLIENT_PRIORITY_HIGH);
 }
 
-static int __reserve_doorbell(struct intel_guc_client *client)
+static int reserve_doorbell(struct intel_guc_client *client)
 {
 	unsigned long offset;
 	unsigned long end;
@@ -120,7 +120,7 @@ static int __reserve_doorbell(struct intel_guc_client *client)
 	return 0;
 }
 
-static void __unreserve_doorbell(struct intel_guc_client *client)
+static void unreserve_doorbell(struct intel_guc_client *client)
 {
 	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
 
@@ -188,32 +188,21 @@ static bool has_doorbell(struct intel_guc_client *client)
 	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
 }
 
-static int __create_doorbell(struct intel_guc_client *client)
+static void __create_doorbell(struct intel_guc_client *client)
 {
 	struct guc_doorbell_info *doorbell;
-	int err;
 
 	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_ENABLED;
 	doorbell->cookie = 0;
-
-	err = __guc_allocate_doorbell(client->guc, client->stage_id);
-	if (err) {
-		doorbell->db_status = GUC_DOORBELL_DISABLED;
-		DRM_ERROR("Couldn't create client %u doorbell: %d\n",
-			  client->stage_id, err);
-	}
-
-	return err;
 }
 
-static int __destroy_doorbell(struct intel_guc_client *client)
+static void __destroy_doorbell(struct intel_guc_client *client)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
 	struct guc_doorbell_info *doorbell;
 	u16 db_id = client->doorbell_id;
 
-	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
 
 	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_DISABLED;
@@ -225,50 +214,42 @@ static int __destroy_doorbell(struct intel_guc_client *client)
 	 */
 	if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10))
 		WARN_ONCE(true, "Doorbell never became invalid after disable\n");
-
-	return __guc_deallocate_doorbell(client->guc, client->stage_id);
 }
 
 static int create_doorbell(struct intel_guc_client *client)
 {
 	int ret;
 
-	ret = __reserve_doorbell(client);
-	if (ret)
-		return ret;
-
 	__update_doorbell_desc(client, client->doorbell_id);
+	__create_doorbell(client);
 
-	ret = __create_doorbell(client);
-	if (ret)
-		goto err;
+	ret = __guc_allocate_doorbell(client->guc, client->stage_id);
+	if (ret) {
+		__destroy_doorbell(client);
+		__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
+		DRM_ERROR("Couldn't create client %u doorbell: %d\n",
+			  client->stage_id, ret);
+		return ret;
+	}
 
 	return 0;
-
-err:
-	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-	__unreserve_doorbell(client);
-	return ret;
 }
 
 static int destroy_doorbell(struct intel_guc_client *client)
 {
-	int err;
+	int ret;
 
 	GEM_BUG_ON(!has_doorbell(client));
 
-	/* XXX: wait for any interrupts */
-	/* XXX: wait for workqueue to drain */
-
-	err = __destroy_doorbell(client);
-	if (err)
-		return err;
+	__destroy_doorbell(client);
+	ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
+	if (ret)
+		DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
+			  client->stage_id, ret);
 
 	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
 
-	__unreserve_doorbell(client);
-
-	return 0;
+	return ret;
 }
 
 static unsigned long __select_cacheline(struct intel_guc *guc)
@@ -839,73 +820,18 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 	return false;
 }
 
-/*
- * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and
- * reloaded the GuC FW) we can use this function to tell the GuC to reassign the
- * doorbell to the rightful owner.
- */
-static int __reset_doorbell(struct intel_guc_client *client, u16 db_id)
-{
-	int err;
-
-	__update_doorbell_desc(client, db_id);
-	err = __create_doorbell(client);
-	if (!err)
-		err = __destroy_doorbell(client);
-
-	return err;
-}
-
-/*
- * Set up & tear down each unused doorbell in turn, to ensure that all doorbell
- * HW is (re)initialised. For that end, we might have to borrow the first
- * client. Also, tell GuC about all the doorbells in use by all clients.
- * We do this because the KMD, the GuC and the doorbell HW can easily go out of
- * sync (e.g. we can reset the GuC, but not the doorbel HW).
- */
-static int guc_init_doorbell_hw(struct intel_guc *guc)
+static int guc_clients_doorbell_init(struct intel_guc *guc)
 {
-	struct intel_guc_client *client = guc->execbuf_client;
-	bool recreate_first_client = false;
 	u16 db_id;
 	int ret;
 
-	/* For unused doorbells, make sure they are disabled */
-	for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) {
-		if (doorbell_ok(guc, db_id))
-			continue;
-
-		if (has_doorbell(client)) {
-			/* Borrow execbuf_client (we will recreate it later) */
-			destroy_doorbell(client);
-			recreate_first_client = true;
-		}
-
-		ret = __reset_doorbell(client, db_id);
-		WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret);
-	}
-
-	if (recreate_first_client) {
-		ret = __reserve_doorbell(client);
-		if (unlikely(ret)) {
-			DRM_ERROR("Couldn't re-reserve first client db: %d\n",
-				  ret);
-			return ret;
-		}
-
-		__update_doorbell_desc(client, client->doorbell_id);
-	}
-
-	/* Now for every client (and not only execbuf_client) make sure their
-	 * doorbells are known by the GuC
-	 */
-	ret = __create_doorbell(guc->execbuf_client);
+	ret = create_doorbell(guc->execbuf_client);
 	if (ret)
 		return ret;
 
-	ret = __create_doorbell(guc->preempt_client);
+	ret = create_doorbell(guc->preempt_client);
 	if (ret) {
-		__destroy_doorbell(guc->execbuf_client);
+		destroy_doorbell(guc->execbuf_client);
 		return ret;
 	}
 
@@ -916,6 +842,19 @@ static int guc_init_doorbell_hw(struct intel_guc *guc)
 	return 0;
 }
 
+static void guc_clients_doorbell_fini(struct intel_guc *guc)
+{
+	/*
+	 * By the time we're here, GuC has already been reset.
+	 * Instead of trying (in vain) to communicate with it, let's just
+	 * cleanup the doorbell HW and our internal state.
+	 */
+	__destroy_doorbell(guc->preempt_client);
+	__update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID);
+	__destroy_doorbell(guc->execbuf_client);
+	__update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID);
+}
+
 /**
  * guc_client_alloc() - Allocate an intel_guc_client
  * @dev_priv:	driver private data structure
@@ -991,7 +930,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	guc_proc_desc_init(guc, client);
 	guc_stage_desc_init(guc, client);
 
-	ret = create_doorbell(client);
+	ret = reserve_doorbell(client);
 	if (ret)
 		goto err_vaddr;
 
@@ -1015,16 +954,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 
 static void guc_client_free(struct intel_guc_client *client)
 {
-	/*
-	 * XXX: wait for any outstanding submissions before freeing memory.
-	 * Be sure to drop any locks
-	 */
-
-	/* FIXME: in many cases, by the time we get here the GuC has been
-	 * reset, so we cannot destroy the doorbell properly. Ignore the
-	 * error message for now
-	 */
-	destroy_doorbell(client);
+	unreserve_doorbell(client);
 	guc_stage_desc_fini(client->guc, client);
 	i915_gem_object_unpin_map(client->vma->obj);
 	i915_vma_unpin_and_release(&client->vma);
@@ -1366,7 +1296,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	if (err)
 		goto err_free_clients;
 
-	err = guc_init_doorbell_hw(guc);
+	err = guc_clients_doorbell_init(guc);
 	if (err)
 		goto err_free_clients;
 
@@ -1398,6 +1328,7 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 	GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */
 
 	guc_interrupts_release(dev_priv);
+	guc_clients_doorbell_fini(guc);
 
 	/* Revert back to manual ELSP submission */
 	intel_engines_reset_default_submission(dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 68d6a69c738f..3f9016466dea 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -85,21 +85,26 @@ static int validate_client(struct intel_guc_client *client,
 		return 0;
 }
 
+static bool client_doorbell_in_sync(struct intel_guc_client *client)
+{
+	return doorbell_ok(client->guc, client->doorbell_id);
+}
+
 /*
- * Check that guc_init_doorbell_hw is doing what it should.
+ * Check that we're able to synchronize guc_clients with their doorbells
  *
- * During GuC submission enable, we create GuC clients and their doorbells,
- * but after resetting the microcontroller (resume & gpu reset), these
- * GuC clients are still around, but the status of their doorbells may be
- * incorrect. This is the reason behind validating that the doorbells status
- * expected by the driver matches what the GuC/HW have.
+ * We're creating clients and reserving doorbells once, at module load. During
+ * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
+ * GuC being reset. In other words - GuC clients are still around, but the
+ * status of their doorbells may be incorrect. This is the reason behind
+ * validating that the doorbells status expected by the driver matches what the
+ * GuC/HW have.
  */
-static int igt_guc_init_doorbell_hw(void *args)
+static int igt_guc_clients(void *args)
 {
 	struct drm_i915_private *dev_priv = args;
 	struct intel_guc *guc;
-	DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS);
-	int i, err = 0;
+	int err = 0;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
@@ -148,10 +153,21 @@ static int igt_guc_init_doorbell_hw(void *args)
 		goto out;
 	}
 
-	/* each client should have received a doorbell during alloc */
+	/* each client should now have reserved a doorbell */
 	if (!has_doorbell(guc->execbuf_client) ||
 	    !has_doorbell(guc->preempt_client)) {
-		pr_err("guc_clients_create didn't create doorbells\n");
+		pr_err("guc_clients_create didn't reserve doorbells\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Now create the doorbells */
+	guc_clients_doorbell_init(guc);
+
+	/* each client should now have received a doorbell */
+	if (!client_doorbell_in_sync(guc->execbuf_client) ||
+	    !client_doorbell_in_sync(guc->preempt_client)) {
+		pr_err("failed to initialize the doorbells\n");
 		err = -EINVAL;
 		goto out;
 	}
@@ -160,25 +176,26 @@ static int igt_guc_init_doorbell_hw(void *args)
 	 * Basic test - an attempt to reallocate a valid doorbell to the
 	 * client it is currently assigned should not cause a failure.
 	 */
-	err = guc_init_doorbell_hw(guc);
+	err = guc_clients_doorbell_init(guc);
 	if (err)
 		goto out;
 
 	/*
 	 * Negative test - a client with no doorbell (invalid db id).
-	 * Each client gets a doorbell when it is created, after destroying
-	 * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the
-	 * firmware will reject any attempt to allocate a doorbell with an
-	 * invalid id (db has to be reserved before allocation).
+	 * After destroying the doorbell, the db id is changed to
+	 * GUC_DOORBELL_INVALID and the firmware will reject any attempt to
+	 * allocate a doorbell with an invalid id (db has to be reserved before
+	 * allocation).
 	 */
 	destroy_doorbell(guc->execbuf_client);
-	if (has_doorbell(guc->execbuf_client)) {
+	if (client_doorbell_in_sync(guc->execbuf_client)) {
 		pr_err("destroy db did not work\n");
 		err = -EINVAL;
 		goto out;
 	}
 
-	err = guc_init_doorbell_hw(guc);
+	unreserve_doorbell(guc->execbuf_client);
+	err = guc_clients_doorbell_init(guc);
 	if (err != -EIO) {
 		pr_err("unexpected (err = %d)", err);
 		goto out;
@@ -191,33 +208,13 @@ static int igt_guc_init_doorbell_hw(void *args)
 	}
 
 	/* clean after test */
-	err = create_doorbell(guc->execbuf_client);
-	if (err) {
-		pr_err("recreate doorbell failed\n");
-		goto out;
-	}
-
-	/*
-	 * Negative test - doorbell_bitmap out of sync, will trigger a few of
-	 * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the
-	 * doorbells from our clients don't fail.
-	 */
-	bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS);
-	for (i = 0; i < GUC_NUM_DOORBELLS; i++)
-		if (i % 2)
-			test_and_change_bit(i, guc->doorbell_bitmap);
-
-	err = guc_init_doorbell_hw(guc);
+	err = reserve_doorbell(guc->execbuf_client);
 	if (err) {
-		pr_err("out of sync doorbell caused an error\n");
-		goto out;
+		pr_err("failed to reserve back the doorbell back\n");
 	}
-
-	/* restore 'correct' db bitmap */
-	bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS);
-	err = guc_init_doorbell_hw(guc);
+	err = create_doorbell(guc->execbuf_client);
 	if (err) {
-		pr_err("restored doorbell caused an error\n");
+		pr_err("recreate doorbell failed\n");
 		goto out;
 	}
 
@@ -226,8 +223,11 @@ static int igt_guc_init_doorbell_hw(void *args)
 	 * Leave clean state for other test, plus the driver always destroy the
 	 * clients during unload.
 	 */
+	destroy_doorbell(guc->execbuf_client);
+	destroy_doorbell(guc->preempt_client);
 	guc_clients_destroy(guc);
 	guc_clients_create(guc);
+	guc_clients_doorbell_init(guc);
 unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
@@ -309,25 +309,7 @@ static int igt_guc_doorbells(void *arg)
 
 		db_id = clients[i]->doorbell_id;
 
-		/*
-		 * Client alloc gives us a doorbell, but we want to exercise
-		 * this ourselves (this resembles guc_init_doorbell_hw)
-		 */
-		destroy_doorbell(clients[i]);
-		if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) {
-			pr_err("[%d] destroy db did not work!\n", i);
-			err = -EINVAL;
-			goto out;
-		}
-
-		err = __reserve_doorbell(clients[i]);
-		if (err) {
-			pr_err("[%d] Failed to reserve a doorbell\n", i);
-			goto out;
-		}
-
-		__update_doorbell_desc(clients[i], clients[i]->doorbell_id);
-		err = __create_doorbell(clients[i]);
+		err = create_doorbell(clients[i]);
 		if (err) {
 			pr_err("[%d] Failed to create a doorbell\n", i);
 			goto out;
@@ -348,8 +330,10 @@ static int igt_guc_doorbells(void *arg)
 
 out:
 	for (i = 0; i < ATTEMPTS; i++)
-		if (!IS_ERR_OR_NULL(clients[i]))
+		if (!IS_ERR_OR_NULL(clients[i])) {
+			destroy_doorbell(clients[i]);
 			guc_client_free(clients[i]);
+		}
 unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
@@ -358,7 +342,7 @@ static int igt_guc_doorbells(void *arg)
 int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_guc_init_doorbell_hw),
+		SUBTEST(igt_guc_clients),
 		SUBTEST(igt_guc_doorbells),
 	};
 
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (3 preceding siblings ...)
  2017-12-13 12:50 ` [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 19:10   ` Michel Thierry
  2017-12-13 12:50 ` [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function Michał Winiarski
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

We can now move the clients allocation to submission_init path, rather
than keeping the condition inside submission_enable called on every
reset.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 33 ++++++++++-------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index c74e78b6ba41..488110602e7e 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1149,6 +1149,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
 		goto err_log;
 	GEM_BUG_ON(!guc->ads_vma);
 
+	ret = guc_clients_create(guc);
+	if (ret)
+		return ret;
+
 	for_each_engine(engine, dev_priv, id) {
 		guc->preempt_work[id].engine = engine;
 		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
@@ -1172,6 +1176,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 	for_each_engine(engine, dev_priv, id)
 		cancel_work_sync(&guc->preempt_work[id].work);
 
+	guc_clients_destroy(guc);
 	guc_ads_destroy(guc);
 	intel_guc_log_destroy(guc);
 	guc_stage_desc_pool_destroy(guc);
@@ -1277,28 +1282,18 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
-	/*
-	 * We're being called on both module initialization and on reset,
-	 * until this flow is changed, we're using regular client presence to
-	 * determine which case are we in, and whether we should allocate new
-	 * clients or just reset their workqueues.
-	 */
-	if (!guc->execbuf_client) {
-		err = guc_clients_create(guc);
-		if (err)
-			return err;
-	} else {
-		guc_reset_wq(guc->execbuf_client);
-		guc_reset_wq(guc->preempt_client);
-	}
+	GEM_BUG_ON(!guc->execbuf_client);
+
+	guc_reset_wq(guc->execbuf_client);
+	guc_reset_wq(guc->preempt_client);
 
 	err = intel_guc_sample_forcewake(guc);
 	if (err)
-		goto err_free_clients;
+		return err;
 
 	err = guc_clients_doorbell_init(guc);
 	if (err)
-		goto err_free_clients;
+		return err;
 
 	/* Take over from manual control of ELSP (execlists) */
 	guc_interrupts_capture(dev_priv);
@@ -1315,10 +1310,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	}
 
 	return 0;
-
-err_free_clients:
-	guc_clients_destroy(guc);
-	return err;
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1332,8 +1323,6 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 
 	/* Revert back to manual ELSP submission */
 	intel_engines_reset_default_submission(dev_priv);
-
-	guc_clients_destroy(guc);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (4 preceding siblings ...)
  2017-12-13 12:50 ` [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 15:23   ` Michal Wajdeczko
  2017-12-13 12:50 ` [PATCH 8/8] HAX Enable GuC Submission for CI Michał Winiarski
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

We have the selftest that's checking doorbell create/destroy, so there's
no need to check all doorbells delaying the reset every time.
We do want to have that extra sanity check at module load/unload though.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 488110602e7e..4d2409466a3a 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -820,9 +820,19 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 	return false;
 }
 
-static int guc_clients_doorbell_init(struct intel_guc *guc)
+static bool guc_verify_doorbells(struct intel_guc *guc)
 {
 	u16 db_id;
+
+	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
+		if (!doorbell_ok(guc, db_id))
+			return false;
+
+	return true;
+}
+
+static int guc_clients_doorbell_init(struct intel_guc *guc)
+{
 	int ret;
 
 	ret = create_doorbell(guc->execbuf_client);
@@ -835,10 +845,6 @@ static int guc_clients_doorbell_init(struct intel_guc *guc)
 		return ret;
 	}
 
-	/* Read back & verify all (used & unused) doorbell registers */
-	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
-		WARN_ON(!doorbell_ok(guc, db_id));
-
 	return 0;
 }
 
@@ -1149,6 +1155,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
 		goto err_log;
 	GEM_BUG_ON(!guc->ads_vma);
 
+	WARN_ON(!guc_verify_doorbells(guc));
 	ret = guc_clients_create(guc);
 	if (ret)
 		return ret;
@@ -1177,6 +1184,8 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 		cancel_work_sync(&guc->preempt_work[id].work);
 
 	guc_clients_destroy(guc);
+	WARN_ON(!guc_verify_doorbells(guc));
+
 	guc_ads_destroy(guc);
 	intel_guc_log_destroy(guc);
 	guc_stage_desc_pool_destroy(guc);
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 8/8] HAX Enable GuC Submission for CI
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (5 preceding siblings ...)
  2017-12-13 12:50 ` [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function Michał Winiarski
@ 2017-12-13 12:50 ` Michał Winiarski
  2017-12-13 18:15   ` Chris Wilson
  2017-12-13 15:08 ` ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path Patchwork
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 19+ messages in thread
From: Michał Winiarski @ 2017-12-13 12:50 UTC (permalink / raw)
  To: intel-gfx

Also:

Revert "drm/i915/GuC/GLK: Load GuC on GLK"

Now that we no longer have fallback to execlists submission available,
if the firmware is selected by the driver but is not available on the
system (like in this case - where the FW is not yet released), we're
unable to get a clean CI run.

This reverts commit 90f192c8241e431d2c3076e4f2cb99ac25bfb1c5.
---
 drivers/gpu/drm/i915/i915_params.h  | 2 +-
 drivers/gpu/drm/i915/intel_guc_fw.c | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 792ce26d7449..9725c5ad8ac6 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -45,7 +45,7 @@
 	param(int, disable_power_well, -1) \
 	param(int, enable_ips, 1) \
 	param(int, invert_brightness, 0) \
-	param(int, enable_guc, 0) \
+	param(int, enable_guc, -1) \
 	param(int, guc_log_level, -1) \
 	param(char *, guc_firmware_path, NULL) \
 	param(char *, huc_firmware_path, NULL) \
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index cbc51c960425..3b0932942857 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -39,9 +39,6 @@
 #define KBL_FW_MAJOR 9
 #define KBL_FW_MINOR 39
 
-#define GLK_FW_MAJOR 10
-#define GLK_FW_MINOR 56
-
 #define GUC_FW_PATH(platform, major, minor) \
        "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
 
@@ -54,8 +51,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
-#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
-
 static void guc_fw_select(struct intel_uc_fw *guc_fw)
 {
 	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
@@ -82,10 +77,6 @@ static void guc_fw_select(struct intel_uc_fw *guc_fw)
 		guc_fw->path = I915_KBL_GUC_UCODE;
 		guc_fw->major_ver_wanted = KBL_FW_MAJOR;
 		guc_fw->minor_ver_wanted = KBL_FW_MINOR;
-	} else if (IS_GEMINILAKE(dev_priv)) {
-		guc_fw->path = I915_GLK_GUC_UCODE;
-		guc_fw->major_ver_wanted = GLK_FW_MAJOR;
-		guc_fw->minor_ver_wanted = GLK_FW_MINOR;
 	} else {
 		DRM_WARN("%s: No firmware known for this platform!\n",
 			 intel_uc_fw_type_repr(guc_fw->type));
-- 
2.14.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (6 preceding siblings ...)
  2017-12-13 12:50 ` [PATCH 8/8] HAX Enable GuC Submission for CI Michał Winiarski
@ 2017-12-13 15:08 ` Patchwork
  2017-12-13 15:29 ` [PATCH 1/8] " Chris Wilson
  2017-12-13 17:33 ` ✓ Fi.CI.IGT: success for series starting with [1/8] " Patchwork
  9 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2017-12-13 15:08 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path
URL   : https://patchwork.freedesktop.org/series/35287/
State : success

== Summary ==

Series 35287v1 series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path
https://patchwork.freedesktop.org/api/1.0/series/35287/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                pass       -> DMESG-FAIL (fi-elk-e7500) fdo#103989 +1
Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-kbl-7500u) fdo#104062
Test kms_chamelium:
        Subgroup dp-crc-fast:
                pass       -> DMESG-FAIL (fi-kbl-7500u) fdo#103841
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> DMESG-WARN (fi-kbl-r) fdo#104172
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#104062 https://bugs.freedesktop.org/show_bug.cgi?id=104062
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:437s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:439s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:511s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:279s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:511s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:512s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:487s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:476s
fi-elk-e7500     total:224  pass:163  dwarn:14  dfail:1   fail:0   skip:45 
fi-gdg-551       total:288  pass:178  dwarn:1   dfail:0   fail:1   skip:108 time:266s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:405s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:419s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:391s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:479s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:430s
fi-kbl-7500u     total:288  pass:262  dwarn:1   dfail:1   fail:0   skip:24  time:484s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:528s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:464s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:531s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:587s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:451s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:534s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:557s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:493s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:447s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:412s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:610s
fi-cnl-y         total:217  pass:196  dwarn:0   dfail:0   fail:0   skip:20 
fi-glk-dsi       total:288  pass:10   dwarn:0   dfail:3   fail:0   skip:275 time:33s

f97f7de6c725c07feea8de61aadc1d0574301bdb drm-tip: 2017y-12m-13d-13h-46m-37s UTC integration manifest
df75d6018dc1 HAX Enable GuC Submission for CI
13b8a1ea018f drm/i915/guc: Extract doorbell verification into a function
7b68452943ac drm/i915/guc: Extract clients allocation to submission_init
89a4804778f8 drm/i915/guc: Extract doorbell creation from client allocation
1249f6d1c89e drm/i915/guc: Call invalidate after changing the vfunc
16d145bb5562 drm/i915/guc: Extract guc_init from guc_init_hw
4f2afebe9e23 drm/i915/guc: Move GuC workqueue allocations outside of the mutex
ec31e0a92956 drm/i915/guc: Move shared data allocation away from submission path

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7484/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex
  2017-12-13 12:50 ` [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex Michał Winiarski
@ 2017-12-13 15:23   ` Chris Wilson
  2017-12-13 16:00     ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-12-13 15:23 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Quoting Michał Winiarski (2017-12-13 12:50:40)
> This gets rid of the following lockdep splat:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted
> ------------------------------------------------------
> debugfs_test/1351 is trying to acquire lock:
>  (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915]
> 
> but task is already holding lock:
>  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #6 (&mm->mmap_sem){++++}:
>        __might_fault+0x63/0x90
>        _copy_to_user+0x1e/0x70
>        filldir+0x8c/0xf0
>        dcache_readdir+0xeb/0x160
>        iterate_dir+0xe6/0x150
>        SyS_getdents+0xa0/0x130
>        entry_SYSCALL_64_fastpath+0x1c/0x89
> 
> -> #5 (&sb->s_type->i_mutex_key#5){++++}:
>        lockref_get+0x9/0x20
> 
> -> #4 ((completion)&req.done){+.+.}:
>        wait_for_common+0x54/0x210
>        devtmpfs_create_node+0x130/0x150
>        device_add+0x5ad/0x5e0
>        device_create_groups_vargs+0xd4/0xe0
>        device_create+0x35/0x40
>        msr_device_create+0x22/0x40
>        cpuhp_invoke_callback+0xc5/0xbf0
>        cpuhp_thread_fun+0x167/0x210
>        smpboot_thread_fn+0x17f/0x270
>        kthread+0x173/0x1b0
>        ret_from_fork+0x24/0x30
> 
> -> #3 (cpuhp_state-up){+.+.}:
>        cpuhp_issue_call+0x132/0x1c0
>        __cpuhp_setup_state_cpuslocked+0x12f/0x2a0
>        __cpuhp_setup_state+0x3a/0x50
>        page_writeback_init+0x3a/0x5c
>        start_kernel+0x393/0x3e2
>        secondary_startup_64+0xa5/0xb0
> 
> -> #2 (cpuhp_state_mutex){+.+.}:
>        __mutex_lock+0x81/0x9b0
>        __cpuhp_setup_state_cpuslocked+0x4b/0x2a0
>        __cpuhp_setup_state+0x3a/0x50
>        page_alloc_init+0x1f/0x26
>        start_kernel+0x139/0x3e2
>        secondary_startup_64+0xa5/0xb0
> 
> -> #1 (cpu_hotplug_lock.rw_sem){++++}:
>        cpus_read_lock+0x34/0xa0
>        apply_workqueue_attrs+0xd/0x40
>        __alloc_workqueue_key+0x2c7/0x4e1
>        intel_guc_submission_init+0x10c/0x650 [i915]
>        intel_uc_init_hw+0x29e/0x460 [i915]
>        i915_gem_init_hw+0xca/0x290 [i915]
>        i915_gem_init+0x115/0x3a0 [i915]
>        i915_driver_load+0x9a8/0x16c0 [i915]
>        i915_pci_probe+0x2e/0x90 [i915]
>        pci_device_probe+0x9c/0x120
>        driver_probe_device+0x2a3/0x480
>        __driver_attach+0xd9/0xe0
>        bus_for_each_dev+0x57/0x90
>        bus_add_driver+0x168/0x260
>        driver_register+0x52/0xc0
>        do_one_initcall+0x39/0x150
>        do_init_module+0x56/0x1ef
>        load_module+0x231c/0x2d70
>        SyS_finit_module+0xa5/0xe0
>        entry_SYSCALL_64_fastpath+0x1c/0x89
> 
> -> #0 (&dev->struct_mutex){+.+.}:
>        lock_acquire+0xaf/0x200
>        __mutex_lock+0x81/0x9b0
>        i915_mutex_lock_interruptible+0x47/0x130 [i915]
>        i915_gem_fault+0x201/0x760 [i915]
>        __do_fault+0x15/0x70
>        __handle_mm_fault+0x85b/0xe40
>        handle_mm_fault+0x14f/0x2f0
>        __do_page_fault+0x2d1/0x560
>        page_fault+0x22/0x30
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&mm->mmap_sem);
>                                lock(&sb->s_type->i_mutex_key#5);
>                                lock(&mm->mmap_sem);
>   lock(&dev->struct_mutex);
> 
>  *** DEADLOCK ***
> 
> 1 lock held by debugfs_test/1351:
>  #0:  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560
> 
> stack backtrace:
> CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1
> Hardware name:                  /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017
> Call Trace:
>  dump_stack+0x5f/0x86
>  print_circular_bug+0x230/0x3b0
>  check_prev_add+0x439/0x7b0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  ? unwind_get_return_address+0x16/0x30
>  ? __lock_acquire+0x1385/0x15a0
>  __lock_acquire+0x1385/0x15a0
>  lock_acquire+0xaf/0x200
>  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
>  __mutex_lock+0x81/0x9b0
>  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
>  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
>  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
>  i915_mutex_lock_interruptible+0x47/0x130 [i915]
>  ? __pm_runtime_resume+0x4f/0x80
>  i915_gem_fault+0x201/0x760 [i915]
>  __do_fault+0x15/0x70
>  __handle_mm_fault+0x85b/0xe40
>  handle_mm_fault+0x14f/0x2f0
>  __do_page_fault+0x2d1/0x560
>  page_fault+0x22/0x30
> RIP: 0033:0x7f98d6f49116
> RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283
> RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680
> RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0
> RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0
> R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001
> R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0
> 
> v2: Init preempt_work unconditionally (Chris)
> 
> Testcase: igt/debugfs_test/read_all_entries

With a #i915.enable_guc=1 addendum (or something)

> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c             |  1 +
>  drivers/gpu/drm/i915/i915_gem.c             |  4 ++
>  drivers/gpu/drm/i915/intel_guc.c            | 57 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_guc.h            |  2 +
>  drivers/gpu/drm/i915/intel_guc_log.c        | 23 ----------
>  drivers/gpu/drm/i915/intel_guc_submission.c | 70 +++++++----------------------
>  drivers/gpu/drm/i915/intel_guc_submission.h |  2 +
>  drivers/gpu/drm/i915/intel_uc.c             | 26 +++++++++++
>  drivers/gpu/drm/i915/intel_uc.h             |  2 +
>  9 files changed, 110 insertions(+), 77 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 721ccce1832f..285c8b238bff 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
>         i915_gem_contexts_fini(dev_priv);
>         mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +       intel_uc_fini_wq(dev_priv);
>         i915_gem_cleanup_userptr(dev_priv);
>  
>         i915_gem_drain_freed_objects(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8c3d801696b7..4b2ca43a610f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5160,6 +5160,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>         if (ret)
>                 return ret;
>  
> +       ret = intel_uc_init_wq(dev_priv);
> +       if (ret)
> +               return ret;
> +
>         /* This is just a security blanket to placate dragons.
>          * On some systems, we very sporadically observe that the first TLBs
>          * used by the CS may be stale, despite us poking the TLB reset. If
> diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
> index 92ed22f38fc4..3c6bf5a34c3c 100644
> --- a/drivers/gpu/drm/i915/intel_guc.c
> +++ b/drivers/gpu/drm/i915/intel_guc.c
> @@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc)
>         guc->notify = gen8_guc_raise_irq;
>  }
>  
> +int intel_guc_init_wq(struct intel_guc *guc)
> +{
> +       struct drm_i915_private *dev_priv = guc_to_i915(guc);

if (!HAS_GUC()) or if (!USES_GUC()) ?

I think you want at least the former.

But other than the extra allocation,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function
  2017-12-13 12:50 ` [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function Michał Winiarski
@ 2017-12-13 15:23   ` Michal Wajdeczko
  2017-12-13 19:10     ` Michel Thierry
  0 siblings, 1 reply; 19+ messages in thread
From: Michal Wajdeczko @ 2017-12-13 15:23 UTC (permalink / raw)
  To: intel-gfx, Michał Winiarski

On Wed, 13 Dec 2017 13:50:45 +0100, Michał Winiarski  
<michal.winiarski@intel.com> wrote:

> We have the selftest that's checking doorbell create/destroy, so there's
> no need to check all doorbells delaying the reset every time.
> We do want to have that extra sanity check at module load/unload though.
>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc
  2017-12-13 12:50 ` [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc Michał Winiarski
@ 2017-12-13 15:24   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-12-13 15:24 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Quoting Michał Winiarski (2017-12-13 12:50:42)
> To make this operation a bit cleaner, we should make sure that the HW
> can catch up by calling the new implementation right away.
> Note that currently we're only touching the vfunc at module load time
> (before GuC is even loaded), so this shouldn't cause any functional
> changes.
> 
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (7 preceding siblings ...)
  2017-12-13 15:08 ` ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path Patchwork
@ 2017-12-13 15:29 ` Chris Wilson
  2017-12-13 17:33 ` ✓ Fi.CI.IGT: success for series starting with [1/8] " Patchwork
  9 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-12-13 15:29 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Quoting Michał Winiarski (2017-12-13 12:50:39)
> We need shared data for actions (e.g. guc suspend/resume), and we're
> using those with GuC submission disabled.
> Let's introduce intel_guc_init and move shared data alloc there.
> 
> This fixes GPF during module unload with HuC, but without GuC submission:
> 
> BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809
> IP: intel_guc_suspend+0x34/0x140 [i915]
> PGD 0 P4D 0
> Oops: 0000 [#1] PREEMPT SMP
> Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal
> intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel
> mei_me i2c_i801 mei prime_numbers [last unloaded: i915]
> CPU: 2 PID: 2794 Comm: rmmod Tainted: G     U  W  O 4.15.0-rc2+ #297
> Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016
> task: 0000000055945c61 task.stack: 00000000264ccb43
> RIP: 0010:intel_guc_suspend+0x34/0x140 [i915]
> RSP: 0018:ffffc90000483df8 EFLAGS: 00010286
> RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000
> RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000
> RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40
> R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0
> FS:  00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0
> Call Trace:
>  i915_gem_suspend+0x9d/0x130 [i915]
>  ? i915_driver_unload+0x68/0x180 [i915]
>  i915_driver_unload+0x70/0x180 [i915]
>  i915_pci_remove+0x15/0x20 [i915]
>  pci_device_remove+0x36/0xb0
>  device_release_driver_internal+0x15f/0x220
>  driver_detach+0x3a/0x80
>  bus_remove_driver+0x58/0xd0
>  pci_unregister_driver+0x29/0x90
>  SyS_delete_module+0x150/0x1e0
>  entry_SYSCALL_64_fastpath+0x23/0x9a
> RIP: 0033:0x7ff43b51b5c7
> RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
> RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7
> RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808
> RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000
> R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0
> R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0
> Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f
> f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80
> f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f
> RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8
> CR2: 00000000000000f9
> ---[ end trace 23a192a61d937a3e ]---
> 
> Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication")
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>

The movement looks ok, I'll let others chime in if they think the
ordering is wrong,
Reviewed: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex
  2017-12-13 15:23   ` Chris Wilson
@ 2017-12-13 16:00     ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-12-13 16:00 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Quoting Chris Wilson (2017-12-13 15:23:31)
> Quoting Michał Winiarski (2017-12-13 12:50:40)
> > This gets rid of the following lockdep splat:
> > 
> > ======================================================
> > WARNING: possible circular locking dependency detected
> > 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted
> > ------------------------------------------------------
> > debugfs_test/1351 is trying to acquire lock:
> >  (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915]
> > 
> > but task is already holding lock:
> >  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560
> > 
> > which lock already depends on the new lock.
> > 
> > the existing dependency chain (in reverse order) is:
> > 
> > -> #6 (&mm->mmap_sem){++++}:
> >        __might_fault+0x63/0x90
> >        _copy_to_user+0x1e/0x70
> >        filldir+0x8c/0xf0
> >        dcache_readdir+0xeb/0x160
> >        iterate_dir+0xe6/0x150
> >        SyS_getdents+0xa0/0x130
> >        entry_SYSCALL_64_fastpath+0x1c/0x89
> > 
> > -> #5 (&sb->s_type->i_mutex_key#5){++++}:
> >        lockref_get+0x9/0x20
> > 
> > -> #4 ((completion)&req.done){+.+.}:
> >        wait_for_common+0x54/0x210
> >        devtmpfs_create_node+0x130/0x150
> >        device_add+0x5ad/0x5e0
> >        device_create_groups_vargs+0xd4/0xe0
> >        device_create+0x35/0x40
> >        msr_device_create+0x22/0x40
> >        cpuhp_invoke_callback+0xc5/0xbf0
> >        cpuhp_thread_fun+0x167/0x210
> >        smpboot_thread_fn+0x17f/0x270
> >        kthread+0x173/0x1b0
> >        ret_from_fork+0x24/0x30
> > 
> > -> #3 (cpuhp_state-up){+.+.}:
> >        cpuhp_issue_call+0x132/0x1c0
> >        __cpuhp_setup_state_cpuslocked+0x12f/0x2a0
> >        __cpuhp_setup_state+0x3a/0x50
> >        page_writeback_init+0x3a/0x5c
> >        start_kernel+0x393/0x3e2
> >        secondary_startup_64+0xa5/0xb0
> > 
> > -> #2 (cpuhp_state_mutex){+.+.}:
> >        __mutex_lock+0x81/0x9b0
> >        __cpuhp_setup_state_cpuslocked+0x4b/0x2a0
> >        __cpuhp_setup_state+0x3a/0x50
> >        page_alloc_init+0x1f/0x26
> >        start_kernel+0x139/0x3e2
> >        secondary_startup_64+0xa5/0xb0
> > 
> > -> #1 (cpu_hotplug_lock.rw_sem){++++}:
> >        cpus_read_lock+0x34/0xa0
> >        apply_workqueue_attrs+0xd/0x40
> >        __alloc_workqueue_key+0x2c7/0x4e1
> >        intel_guc_submission_init+0x10c/0x650 [i915]
> >        intel_uc_init_hw+0x29e/0x460 [i915]
> >        i915_gem_init_hw+0xca/0x290 [i915]
> >        i915_gem_init+0x115/0x3a0 [i915]
> >        i915_driver_load+0x9a8/0x16c0 [i915]
> >        i915_pci_probe+0x2e/0x90 [i915]
> >        pci_device_probe+0x9c/0x120
> >        driver_probe_device+0x2a3/0x480
> >        __driver_attach+0xd9/0xe0
> >        bus_for_each_dev+0x57/0x90
> >        bus_add_driver+0x168/0x260
> >        driver_register+0x52/0xc0
> >        do_one_initcall+0x39/0x150
> >        do_init_module+0x56/0x1ef
> >        load_module+0x231c/0x2d70
> >        SyS_finit_module+0xa5/0xe0
> >        entry_SYSCALL_64_fastpath+0x1c/0x89
> > 
> > -> #0 (&dev->struct_mutex){+.+.}:
> >        lock_acquire+0xaf/0x200
> >        __mutex_lock+0x81/0x9b0
> >        i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >        i915_gem_fault+0x201/0x760 [i915]
> >        __do_fault+0x15/0x70
> >        __handle_mm_fault+0x85b/0xe40
> >        handle_mm_fault+0x14f/0x2f0
> >        __do_page_fault+0x2d1/0x560
> >        page_fault+0x22/0x30
> > 
> > other info that might help us debug this:
> > 
> > Chain exists of:
> >   &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem
> > 
> >  Possible unsafe locking scenario:
> > 
> >        CPU0                    CPU1
> >        ----                    ----
> >   lock(&mm->mmap_sem);
> >                                lock(&sb->s_type->i_mutex_key#5);
> >                                lock(&mm->mmap_sem);
> >   lock(&dev->struct_mutex);
> > 
> >  *** DEADLOCK ***
> > 
> > 1 lock held by debugfs_test/1351:
> >  #0:  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560
> > 
> > stack backtrace:
> > CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1
> > Hardware name:                  /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017
> > Call Trace:
> >  dump_stack+0x5f/0x86
> >  print_circular_bug+0x230/0x3b0
> >  check_prev_add+0x439/0x7b0
> >  ? lockdep_init_map_crosslock+0x20/0x20
> >  ? unwind_get_return_address+0x16/0x30
> >  ? __lock_acquire+0x1385/0x15a0
> >  __lock_acquire+0x1385/0x15a0
> >  lock_acquire+0xaf/0x200
> >  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >  __mutex_lock+0x81/0x9b0
> >  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >  i915_mutex_lock_interruptible+0x47/0x130 [i915]
> >  ? __pm_runtime_resume+0x4f/0x80
> >  i915_gem_fault+0x201/0x760 [i915]
> >  __do_fault+0x15/0x70
> >  __handle_mm_fault+0x85b/0xe40
> >  handle_mm_fault+0x14f/0x2f0
> >  __do_page_fault+0x2d1/0x560
> >  page_fault+0x22/0x30
> > RIP: 0033:0x7f98d6f49116
> > RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283
> > RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680
> > RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0
> > RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0
> > R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001
> > R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0
> > 
> > v2: Init preempt_work unconditionally (Chris)
> > 
> > Testcase: igt/debugfs_test/read_all_entries
> 
> With a #i915.enable_guc=1 addendum (or something)
> 
> > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c             |  1 +
> >  drivers/gpu/drm/i915/i915_gem.c             |  4 ++
> >  drivers/gpu/drm/i915/intel_guc.c            | 57 +++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_guc.h            |  2 +
> >  drivers/gpu/drm/i915/intel_guc_log.c        | 23 ----------
> >  drivers/gpu/drm/i915/intel_guc_submission.c | 70 +++++++----------------------
> >  drivers/gpu/drm/i915/intel_guc_submission.h |  2 +
> >  drivers/gpu/drm/i915/intel_uc.c             | 26 +++++++++++
> >  drivers/gpu/drm/i915/intel_uc.h             |  2 +
> >  9 files changed, 110 insertions(+), 77 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 721ccce1832f..285c8b238bff 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
> >         i915_gem_contexts_fini(dev_priv);
> >         mutex_unlock(&dev_priv->drm.struct_mutex);
> >  
> > +       intel_uc_fini_wq(dev_priv);
> >         i915_gem_cleanup_userptr(dev_priv);
> >  
> >         i915_gem_drain_freed_objects(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 8c3d801696b7..4b2ca43a610f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -5160,6 +5160,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> >         if (ret)
> >                 return ret;
> >  
> > +       ret = intel_uc_init_wq(dev_priv);
> > +       if (ret)
> > +               return ret;
> > +
> >         /* This is just a security blanket to placate dragons.
> >          * On some systems, we very sporadically observe that the first TLBs
> >          * used by the CS may be stale, despite us poking the TLB reset. If
> > diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
> > index 92ed22f38fc4..3c6bf5a34c3c 100644
> > --- a/drivers/gpu/drm/i915/intel_guc.c
> > +++ b/drivers/gpu/drm/i915/intel_guc.c
> > @@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc)
> >         guc->notify = gen8_guc_raise_irq;
> >  }
> >  
> > +int intel_guc_init_wq(struct intel_guc *guc)
> > +{
> > +       struct drm_i915_private *dev_priv = guc_to_i915(guc);
> 
> if (!HAS_GUC()) or if (!USES_GUC()) ?
> 
> I think you want at least the former.
> 
> But other than the extra allocation,
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Michał pointed out that intel_uc_init_wq contains the HAS_GUC, and I
just confused the similar function names.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path
  2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
                   ` (8 preceding siblings ...)
  2017-12-13 15:29 ` [PATCH 1/8] " Chris Wilson
@ 2017-12-13 17:33 ` Patchwork
  9 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2017-12-13 17:33 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path
URL   : https://patchwork.freedesktop.org/series/35287/
State : success

== Summary ==

Warning: bzip CI_DRM_3507/shard-glkb6/results33.json.bz2 wasn't in correct JSON format
Test gem_tiled_swapping:
        Subgroup non-threaded:
                pass       -> INCOMPLETE (shard-snb) fdo#104009
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-hsw) fdo#99912
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                skip       -> PASS       (shard-hsw) fdo#103375
Test gem_eio:
        Subgroup in-flight-contexts:
                pass       -> DMESG-WARN (shard-snb) fdo#104058
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
                pass       -> FAIL       (shard-snb) fdo#101623

fdo#104009 https://bugs.freedesktop.org/show_bug.cgi?id=104009
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623

shard-hsw        total:2712 pass:1537 dwarn:1   dfail:0   fail:10  skip:1164 time:9469s
shard-snb        total:2694 pass:1301 dwarn:2   dfail:0   fail:12  skip:1378 time:7914s
Blacklisted hosts:
shard-apl        total:2690 pass:1664 dwarn:1   dfail:0   fail:22  skip:1002 time:13540s
shard-kbl        total:2540 pass:1626 dwarn:18  dfail:2   fail:23  skip:867 time:9796s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7484/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 8/8] HAX Enable GuC Submission for CI
  2017-12-13 12:50 ` [PATCH 8/8] HAX Enable GuC Submission for CI Michał Winiarski
@ 2017-12-13 18:15   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-12-13 18:15 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Quoting Michał Winiarski (2017-12-13 12:50:46)
> Also:
> 
> Revert "drm/i915/GuC/GLK: Load GuC on GLK"
> 
> Now that we no longer have fallback to execlists submission available,
> if the firmware is selected by the driver but is not available on the
> system (like in this case - where the FW is not yet released), we're
> unable to get a clean CI run.

If the FW hasn't yet been released, should we even be listing it in the
driver? We just had the issue with being strict on not changing dmc
before it was available from linux-firmware to avoid imposing infeasible
demands on the user. In this case it is not so bad since it's not yet
enabled by default, but doesn't it generate a warning about missing
firmware at install time?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation
  2017-12-13 12:50 ` [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation Michał Winiarski
@ 2017-12-13 19:03   ` Michel Thierry
  0 siblings, 0 replies; 19+ messages in thread
From: Michel Thierry @ 2017-12-13 19:03 UTC (permalink / raw)
  To: Winiarski, Michal, intel-gfx

On 13/12/17 04:50, Winiarski, Michal wrote:
> Full GPU reset causes GuC to be reset. This means that every time we're
> doing a reset, we need to talk to GuC and tell it about doorbells.
> Let's separate the communication part (create_doorbell) from our
> internal bookkeeping (reserve_doorbell) so that we can cleanly separate
> the initialization done at module load from reinitialization done at
> reset in the following patch.
> While I'm here, let's also add a proper (although slightly asymetric)
> cleanup that doesn't try to communicate with GuC after it's already
> gone, getting rid of "expected" warnings caused by GuC action failures
> on module unload.
> 
> Note that I've also removed one of the tests (bitmap out of sync), since
> it doesn't make much sense anymore - bitmaps are now not expected to
> change during the lifetime of a client.
> 
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 151 ++++++++--------------------
>   drivers/gpu/drm/i915/selftests/intel_guc.c  | 110 +++++++++-----------
>   2 files changed, 88 insertions(+), 173 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8f4b274d66a7..c74e78b6ba41 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -88,7 +88,7 @@ static inline bool is_high_priority(struct intel_guc_client *client)
>                  client->priority == GUC_CLIENT_PRIORITY_HIGH);
>   }
> 
> -static int __reserve_doorbell(struct intel_guc_client *client)
> +static int reserve_doorbell(struct intel_guc_client *client)
>   {
>          unsigned long offset;
>          unsigned long end;
> @@ -120,7 +120,7 @@ static int __reserve_doorbell(struct intel_guc_client *client)
>          return 0;
>   }
> 
> -static void __unreserve_doorbell(struct intel_guc_client *client)
> +static void unreserve_doorbell(struct intel_guc_client *client)
>   {
>          GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
> 
> @@ -188,32 +188,21 @@ static bool has_doorbell(struct intel_guc_client *client)
>          return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
>   }
> 
> -static int __create_doorbell(struct intel_guc_client *client)
> +static void __create_doorbell(struct intel_guc_client *client)
>   {
>          struct guc_doorbell_info *doorbell;
> -       int err;
> 
>          doorbell = __get_doorbell(client);
>          doorbell->db_status = GUC_DOORBELL_ENABLED;
>          doorbell->cookie = 0;
> -
> -       err = __guc_allocate_doorbell(client->guc, client->stage_id);
> -       if (err) {
> -               doorbell->db_status = GUC_DOORBELL_DISABLED;
> -               DRM_ERROR("Couldn't create client %u doorbell: %d\n",
> -                         client->stage_id, err);
> -       }
> -
> -       return err;
>   }

__create_doorbell isn't creating anything, and now it is just changing 
the db status, but that has nothing to do with this patch.

Reviewed-by: Michel Thierry <michel.thierry@intel.com>

> 
> -static int __destroy_doorbell(struct intel_guc_client *client)
> +static void __destroy_doorbell(struct intel_guc_client *client)
>   {
>          struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
>          struct guc_doorbell_info *doorbell;
>          u16 db_id = client->doorbell_id;
> 
> -       GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
> 
>          doorbell = __get_doorbell(client);
>          doorbell->db_status = GUC_DOORBELL_DISABLED;
> @@ -225,50 +214,42 @@ static int __destroy_doorbell(struct intel_guc_client *client)
>           */
>          if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10))
>                  WARN_ONCE(true, "Doorbell never became invalid after disable\n");
> -
> -       return __guc_deallocate_doorbell(client->guc, client->stage_id);
>   }
> 
>   static int create_doorbell(struct intel_guc_client *client)
>   {
>          int ret;
> 
> -       ret = __reserve_doorbell(client);
> -       if (ret)
> -               return ret;
> -
>          __update_doorbell_desc(client, client->doorbell_id);
> +       __create_doorbell(client);
> 
> -       ret = __create_doorbell(client);
> -       if (ret)
> -               goto err;
> +       ret = __guc_allocate_doorbell(client->guc, client->stage_id);
> +       if (ret) {
> +               __destroy_doorbell(client);
> +               __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> +               DRM_ERROR("Couldn't create client %u doorbell: %d\n",
> +                         client->stage_id, ret);
> +               return ret;
> +       }
> 
>          return 0;
> -
> -err:
> -       __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> -       __unreserve_doorbell(client);
> -       return ret;
>   }
> 
>   static int destroy_doorbell(struct intel_guc_client *client)
>   {
> -       int err;
> +       int ret;
> 
>          GEM_BUG_ON(!has_doorbell(client));
> 
> -       /* XXX: wait for any interrupts */
> -       /* XXX: wait for workqueue to drain */
> -
> -       err = __destroy_doorbell(client);
> -       if (err)
> -               return err;
> +       __destroy_doorbell(client);
> +       ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
> +       if (ret)
> +               DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
> +                         client->stage_id, ret);
> 
>          __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
> 
> -       __unreserve_doorbell(client);
> -
> -       return 0;
> +       return ret;
>   }
> 
>   static unsigned long __select_cacheline(struct intel_guc *guc)
> @@ -839,73 +820,18 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
>          return false;
>   }
> 
> -/*
> - * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and
> - * reloaded the GuC FW) we can use this function to tell the GuC to reassign the
> - * doorbell to the rightful owner.
> - */
> -static int __reset_doorbell(struct intel_guc_client *client, u16 db_id)
> -{
> -       int err;
> -
> -       __update_doorbell_desc(client, db_id);
> -       err = __create_doorbell(client);
> -       if (!err)
> -               err = __destroy_doorbell(client);
> -
> -       return err;
> -}
> -
> -/*
> - * Set up & tear down each unused doorbell in turn, to ensure that all doorbell
> - * HW is (re)initialised. For that end, we might have to borrow the first
> - * client. Also, tell GuC about all the doorbells in use by all clients.
> - * We do this because the KMD, the GuC and the doorbell HW can easily go out of
> - * sync (e.g. we can reset the GuC, but not the doorbel HW).
> - */
> -static int guc_init_doorbell_hw(struct intel_guc *guc)
> +static int guc_clients_doorbell_init(struct intel_guc *guc)
>   {
> -       struct intel_guc_client *client = guc->execbuf_client;
> -       bool recreate_first_client = false;
>          u16 db_id;
>          int ret;
> 
> -       /* For unused doorbells, make sure they are disabled */
> -       for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) {
> -               if (doorbell_ok(guc, db_id))
> -                       continue;
> -
> -               if (has_doorbell(client)) {
> -                       /* Borrow execbuf_client (we will recreate it later) */
> -                       destroy_doorbell(client);
> -                       recreate_first_client = true;
> -               }
> -
> -               ret = __reset_doorbell(client, db_id);
> -               WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret);
> -       }
> -
> -       if (recreate_first_client) {
> -               ret = __reserve_doorbell(client);
> -               if (unlikely(ret)) {
> -                       DRM_ERROR("Couldn't re-reserve first client db: %d\n",
> -                                 ret);
> -                       return ret;
> -               }
> -
> -               __update_doorbell_desc(client, client->doorbell_id);
> -       }
> -
> -       /* Now for every client (and not only execbuf_client) make sure their
> -        * doorbells are known by the GuC
> -        */
> -       ret = __create_doorbell(guc->execbuf_client);
> +       ret = create_doorbell(guc->execbuf_client);
>          if (ret)
>                  return ret;
> 
> -       ret = __create_doorbell(guc->preempt_client);
> +       ret = create_doorbell(guc->preempt_client);
>          if (ret) {
> -               __destroy_doorbell(guc->execbuf_client);
> +               destroy_doorbell(guc->execbuf_client);
>                  return ret;
>          }
> 
> @@ -916,6 +842,19 @@ static int guc_init_doorbell_hw(struct intel_guc *guc)
>          return 0;
>   }
> 
> +static void guc_clients_doorbell_fini(struct intel_guc *guc)
> +{
> +       /*
> +        * By the time we're here, GuC has already been reset.
> +        * Instead of trying (in vain) to communicate with it, let's just
> +        * cleanup the doorbell HW and our internal state.
> +        */
> +       __destroy_doorbell(guc->preempt_client);
> +       __update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID);
> +       __destroy_doorbell(guc->execbuf_client);
> +       __update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID);
> +}
> +
>   /**
>    * guc_client_alloc() - Allocate an intel_guc_client
>    * @dev_priv:  driver private data structure
> @@ -991,7 +930,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
>          guc_proc_desc_init(guc, client);
>          guc_stage_desc_init(guc, client);
> 
> -       ret = create_doorbell(client);
> +       ret = reserve_doorbell(client);
>          if (ret)
>                  goto err_vaddr;
> 
> @@ -1015,16 +954,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
> 
>   static void guc_client_free(struct intel_guc_client *client)
>   {
> -       /*
> -        * XXX: wait for any outstanding submissions before freeing memory.
> -        * Be sure to drop any locks
> -        */
> -
> -       /* FIXME: in many cases, by the time we get here the GuC has been
> -        * reset, so we cannot destroy the doorbell properly. Ignore the
> -        * error message for now
> -        */
> -       destroy_doorbell(client);
> +       unreserve_doorbell(client);
>          guc_stage_desc_fini(client->guc, client);
>          i915_gem_object_unpin_map(client->vma->obj);
>          i915_vma_unpin_and_release(&client->vma);
> @@ -1366,7 +1296,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>          if (err)
>                  goto err_free_clients;
> 
> -       err = guc_init_doorbell_hw(guc);
> +       err = guc_clients_doorbell_init(guc);
>          if (err)
>                  goto err_free_clients;
> 
> @@ -1398,6 +1328,7 @@ void intel_guc_submission_disable(struct intel_guc *guc)
>          GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */
> 
>          guc_interrupts_release(dev_priv);
> +       guc_clients_doorbell_fini(guc);
> 
>          /* Revert back to manual ELSP submission */
>          intel_engines_reset_default_submission(dev_priv);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
> index 68d6a69c738f..3f9016466dea 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_guc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
> @@ -85,21 +85,26 @@ static int validate_client(struct intel_guc_client *client,
>                  return 0;
>   }
> 
> +static bool client_doorbell_in_sync(struct intel_guc_client *client)
> +{
> +       return doorbell_ok(client->guc, client->doorbell_id);
> +}
> +
>   /*
> - * Check that guc_init_doorbell_hw is doing what it should.
> + * Check that we're able to synchronize guc_clients with their doorbells
>    *
> - * During GuC submission enable, we create GuC clients and their doorbells,
> - * but after resetting the microcontroller (resume & gpu reset), these
> - * GuC clients are still around, but the status of their doorbells may be
> - * incorrect. This is the reason behind validating that the doorbells status
> - * expected by the driver matches what the GuC/HW have.
> + * We're creating clients and reserving doorbells once, at module load. During
> + * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
> + * GuC being reset. In other words - GuC clients are still around, but the
> + * status of their doorbells may be incorrect. This is the reason behind
> + * validating that the doorbells status expected by the driver matches what the
> + * GuC/HW have.
>    */
> -static int igt_guc_init_doorbell_hw(void *args)
> +static int igt_guc_clients(void *args)
>   {
>          struct drm_i915_private *dev_priv = args;
>          struct intel_guc *guc;
> -       DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS);
> -       int i, err = 0;
> +       int err = 0;
> 
>          GEM_BUG_ON(!HAS_GUC(dev_priv));
>          mutex_lock(&dev_priv->drm.struct_mutex);
> @@ -148,10 +153,21 @@ static int igt_guc_init_doorbell_hw(void *args)
>                  goto out;
>          }
> 
> -       /* each client should have received a doorbell during alloc */
> +       /* each client should now have reserved a doorbell */
>          if (!has_doorbell(guc->execbuf_client) ||
>              !has_doorbell(guc->preempt_client)) {
> -               pr_err("guc_clients_create didn't create doorbells\n");
> +               pr_err("guc_clients_create didn't reserve doorbells\n");
> +               err = -EINVAL;
> +               goto out;
> +       }
> +
> +       /* Now create the doorbells */
> +       guc_clients_doorbell_init(guc);
> +
> +       /* each client should now have received a doorbell */
> +       if (!client_doorbell_in_sync(guc->execbuf_client) ||
> +           !client_doorbell_in_sync(guc->preempt_client)) {
> +               pr_err("failed to initialize the doorbells\n");
>                  err = -EINVAL;
>                  goto out;
>          }
> @@ -160,25 +176,26 @@ static int igt_guc_init_doorbell_hw(void *args)
>           * Basic test - an attempt to reallocate a valid doorbell to the
>           * client it is currently assigned should not cause a failure.
>           */
> -       err = guc_init_doorbell_hw(guc);
> +       err = guc_clients_doorbell_init(guc);
>          if (err)
>                  goto out;
> 
>          /*
>           * Negative test - a client with no doorbell (invalid db id).
> -        * Each client gets a doorbell when it is created, after destroying
> -        * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the
> -        * firmware will reject any attempt to allocate a doorbell with an
> -        * invalid id (db has to be reserved before allocation).
> +        * After destroying the doorbell, the db id is changed to
> +        * GUC_DOORBELL_INVALID and the firmware will reject any attempt to
> +        * allocate a doorbell with an invalid id (db has to be reserved before
> +        * allocation).
>           */
>          destroy_doorbell(guc->execbuf_client);
> -       if (has_doorbell(guc->execbuf_client)) {
> +       if (client_doorbell_in_sync(guc->execbuf_client)) {
>                  pr_err("destroy db did not work\n");
>                  err = -EINVAL;
>                  goto out;
>          }
> 
> -       err = guc_init_doorbell_hw(guc);
> +       unreserve_doorbell(guc->execbuf_client);
> +       err = guc_clients_doorbell_init(guc);
>          if (err != -EIO) {
>                  pr_err("unexpected (err = %d)", err);
>                  goto out;
> @@ -191,33 +208,13 @@ static int igt_guc_init_doorbell_hw(void *args)
>          }
> 
>          /* clean after test */
> -       err = create_doorbell(guc->execbuf_client);
> -       if (err) {
> -               pr_err("recreate doorbell failed\n");
> -               goto out;
> -       }
> -
> -       /*
> -        * Negative test - doorbell_bitmap out of sync, will trigger a few of
> -        * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the
> -        * doorbells from our clients don't fail.
> -        */
> -       bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS);
> -       for (i = 0; i < GUC_NUM_DOORBELLS; i++)
> -               if (i % 2)
> -                       test_and_change_bit(i, guc->doorbell_bitmap);
> -
> -       err = guc_init_doorbell_hw(guc);
> +       err = reserve_doorbell(guc->execbuf_client);
>          if (err) {
> -               pr_err("out of sync doorbell caused an error\n");
> -               goto out;
> +               pr_err("failed to reserve back the doorbell back\n");
>          }
> -
> -       /* restore 'correct' db bitmap */
> -       bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS);
> -       err = guc_init_doorbell_hw(guc);
> +       err = create_doorbell(guc->execbuf_client);
>          if (err) {
> -               pr_err("restored doorbell caused an error\n");
> +               pr_err("recreate doorbell failed\n");
>                  goto out;
>          }
> 
> @@ -226,8 +223,11 @@ static int igt_guc_init_doorbell_hw(void *args)
>           * Leave clean state for other test, plus the driver always destroy the
>           * clients during unload.
>           */
> +       destroy_doorbell(guc->execbuf_client);
> +       destroy_doorbell(guc->preempt_client);
>          guc_clients_destroy(guc);
>          guc_clients_create(guc);
> +       guc_clients_doorbell_init(guc);
>   unlock:
>          mutex_unlock(&dev_priv->drm.struct_mutex);
>          return err;
> @@ -309,25 +309,7 @@ static int igt_guc_doorbells(void *arg)
> 
>                  db_id = clients[i]->doorbell_id;
> 
> -               /*
> -                * Client alloc gives us a doorbell, but we want to exercise
> -                * this ourselves (this resembles guc_init_doorbell_hw)
> -                */
> -               destroy_doorbell(clients[i]);
> -               if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) {
> -                       pr_err("[%d] destroy db did not work!\n", i);
> -                       err = -EINVAL;
> -                       goto out;
> -               }
> -
> -               err = __reserve_doorbell(clients[i]);
> -               if (err) {
> -                       pr_err("[%d] Failed to reserve a doorbell\n", i);
> -                       goto out;
> -               }
> -
> -               __update_doorbell_desc(clients[i], clients[i]->doorbell_id);
> -               err = __create_doorbell(clients[i]);
> +               err = create_doorbell(clients[i]);
>                  if (err) {
>                          pr_err("[%d] Failed to create a doorbell\n", i);
>                          goto out;
> @@ -348,8 +330,10 @@ static int igt_guc_doorbells(void *arg)
> 
>   out:
>          for (i = 0; i < ATTEMPTS; i++)
> -               if (!IS_ERR_OR_NULL(clients[i]))
> +               if (!IS_ERR_OR_NULL(clients[i])) {
> +                       destroy_doorbell(clients[i]);
>                          guc_client_free(clients[i]);
> +               }
>   unlock:
>          mutex_unlock(&dev_priv->drm.struct_mutex);
>          return err;
> @@ -358,7 +342,7 @@ static int igt_guc_doorbells(void *arg)
>   int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
>   {
>          static const struct i915_subtest tests[] = {
> -               SUBTEST(igt_guc_init_doorbell_hw),
> +               SUBTEST(igt_guc_clients),
>                  SUBTEST(igt_guc_doorbells),
>          };
> 
> --
> 2.14.3
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init
  2017-12-13 12:50 ` [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init Michał Winiarski
@ 2017-12-13 19:10   ` Michel Thierry
  0 siblings, 0 replies; 19+ messages in thread
From: Michel Thierry @ 2017-12-13 19:10 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

On 13/12/17 04:50, Michał Winiarski wrote:
> We can now move the clients allocation to submission_init path, rather
> than keeping the condition inside submission_enable called on every
> reset.
> 
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---

Reviewed-by: Michel Thierry <michel.thierry@intel.com>

>   drivers/gpu/drm/i915/intel_guc_submission.c | 33 ++++++++++-------------------
>   1 file changed, 11 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index c74e78b6ba41..488110602e7e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -1149,6 +1149,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
>                  goto err_log;
>          GEM_BUG_ON(!guc->ads_vma);
> 
> +       ret = guc_clients_create(guc);
> +       if (ret)
> +               return ret;
> +
>          for_each_engine(engine, dev_priv, id) {
>                  guc->preempt_work[id].engine = engine;
>                  INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
> @@ -1172,6 +1176,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
>          for_each_engine(engine, dev_priv, id)
>                  cancel_work_sync(&guc->preempt_work[id].work);
> 
> +       guc_clients_destroy(guc);
>          guc_ads_destroy(guc);
>          intel_guc_log_destroy(guc);
>          guc_stage_desc_pool_destroy(guc);
> @@ -1277,28 +1282,18 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>                       sizeof(struct guc_wq_item) *
>                       I915_NUM_ENGINES > GUC_WQ_SIZE);
> 
> -       /*
> -        * We're being called on both module initialization and on reset,
> -        * until this flow is changed, we're using regular client presence to
> -        * determine which case are we in, and whether we should allocate new
> -        * clients or just reset their workqueues.
> -        */
> -       if (!guc->execbuf_client) {
> -               err = guc_clients_create(guc);
> -               if (err)
> -                       return err;
> -       } else {
> -               guc_reset_wq(guc->execbuf_client);
> -               guc_reset_wq(guc->preempt_client);
> -       }
> +       GEM_BUG_ON(!guc->execbuf_client);
> +
> +       guc_reset_wq(guc->execbuf_client);
> +       guc_reset_wq(guc->preempt_client);
> 
>          err = intel_guc_sample_forcewake(guc);
>          if (err)
> -               goto err_free_clients;
> +               return err;
> 
>          err = guc_clients_doorbell_init(guc);
>          if (err)
> -               goto err_free_clients;
> +               return err;
> 
>          /* Take over from manual control of ELSP (execlists) */
>          guc_interrupts_capture(dev_priv);
> @@ -1315,10 +1310,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>          }
> 
>          return 0;
> -
> -err_free_clients:
> -       guc_clients_destroy(guc);
> -       return err;
>   }
> 
>   void intel_guc_submission_disable(struct intel_guc *guc)
> @@ -1332,8 +1323,6 @@ void intel_guc_submission_disable(struct intel_guc *guc)
> 
>          /* Revert back to manual ELSP submission */
>          intel_engines_reset_default_submission(dev_priv);
> -
> -       guc_clients_destroy(guc);
>   }
> 
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> --
> 2.14.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function
  2017-12-13 15:23   ` Michal Wajdeczko
@ 2017-12-13 19:10     ` Michel Thierry
  0 siblings, 0 replies; 19+ messages in thread
From: Michel Thierry @ 2017-12-13 19:10 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx, Winiarski, Michal

On 13/12/17 07:23, Michal Wajdeczko wrote:
> On Wed, 13 Dec 2017 13:50:45 +0100, Michał Winiarski
> <michal.winiarski@intel.com> wrote:
> 
>> We have the selftest that's checking doorbell create/destroy, so there's
>> no need to check all doorbells delaying the reset every time.
>> We do want to have that extra sanity check at module load/unload though.
>>
>> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> ---
> 
> Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>


Reviewed-by: Michel Thierry <michel.thierry@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-12-13 19:10 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-13 12:50 [PATCH 1/8] drm/i915/guc: Move shared data allocation away from submission path Michał Winiarski
2017-12-13 12:50 ` [PATCH v2 2/8] drm/i915/guc: Move GuC workqueue allocations outside of the mutex Michał Winiarski
2017-12-13 15:23   ` Chris Wilson
2017-12-13 16:00     ` Chris Wilson
2017-12-13 12:50 ` [PATCH v2 3/8] drm/i915/guc: Extract guc_init from guc_init_hw Michał Winiarski
2017-12-13 12:50 ` [PATCH 4/8] drm/i915/guc: Call invalidate after changing the vfunc Michał Winiarski
2017-12-13 15:24   ` Chris Wilson
2017-12-13 12:50 ` [PATCH 5/8] drm/i915/guc: Extract doorbell creation from client allocation Michał Winiarski
2017-12-13 19:03   ` Michel Thierry
2017-12-13 12:50 ` [PATCH 6/8] drm/i915/guc: Extract clients allocation to submission_init Michał Winiarski
2017-12-13 19:10   ` Michel Thierry
2017-12-13 12:50 ` [PATCH 7/8] drm/i915/guc: Extract doorbell verification into a function Michał Winiarski
2017-12-13 15:23   ` Michal Wajdeczko
2017-12-13 19:10     ` Michel Thierry
2017-12-13 12:50 ` [PATCH 8/8] HAX Enable GuC Submission for CI Michał Winiarski
2017-12-13 18:15   ` Chris Wilson
2017-12-13 15:08 ` ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915/guc: Move shared data allocation away from submission path Patchwork
2017-12-13 15:29 ` [PATCH 1/8] " Chris Wilson
2017-12-13 17:33 ` ✓ Fi.CI.IGT: success for series starting with [1/8] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.