All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 13:22 ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-10-05 13:22 UTC (permalink / raw)
  To: Intel Graphics Development
  Cc: LKML, Daniel Vetter, Chris Wilson, Tvrtko Ursulin,
	Joonas Lahtinen, Peter Zijlstra, Thomas Gleixner, Sasha Levin,
	Daniel Vetter

4.14-rc1 gained the fancy new cross-release support in lockdep, which
seems to have uncovered a few more rules about what is allowed and
isn't.

This one here seems to indicate that allocating a work-queue while
holding mmap_sem is a no-go, so let's try to preallocate it.

Of course another way to break this chain would be somewhere in the
cpu hotplug code, since this isn't the only trace we're finding now
which goes through msr_create_device.

Full lockdep splat:

======================================================
WARNING: possible circular locking dependency detected
4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
------------------------------------------------------
kworker/3:4/562 is trying to acquire lock:
 (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40

but task is already holding lock:
 (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #6 (&dev->struct_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_interruptible_nested+0x1b/0x20
       i915_mutex_lock_interruptible+0x51/0x130 [i915]
       i915_gem_fault+0x209/0x650 [i915]
       __do_fault+0x1e/0x80
       __handle_mm_fault+0xa08/0xed0
       handle_mm_fault+0x156/0x300
       __do_page_fault+0x2c5/0x570
       do_page_fault+0x28/0x250
       page_fault+0x22/0x30

-> #5 (&mm->mmap_sem){++++}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __might_fault+0x68/0x90
       _copy_to_user+0x23/0x70
       filldir+0xa5/0x120
       dcache_readdir+0xf9/0x170
       iterate_dir+0x69/0x1a0
       SyS_getdents+0xa5/0x140
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #4 (&sb->s_type->i_mutex_key#5){++++}:
       down_write+0x3b/0x70
       handle_create+0xcb/0x1e0
       devtmpfsd+0x139/0x180
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #3 ((complete)&req.done){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       wait_for_common+0x58/0x210
       wait_for_completion+0x1d/0x20
       devtmpfs_create_node+0x13d/0x160
       device_add+0x5eb/0x620
       device_create_groups_vargs+0xe0/0xf0
       device_create+0x3a/0x40
       msr_device_create+0x2b/0x40
       cpuhp_invoke_callback+0xc9/0xbf0
       cpuhp_thread_fun+0x17b/0x240
       smpboot_thread_fn+0x18a/0x280
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #2 (cpuhp_state-up){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpuhp_issue_call+0x133/0x1c0
       __cpuhp_setup_state_cpuslocked+0x139/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_writeback_init+0x43/0x67
       pagecache_init+0x3d/0x42
       start_kernel+0x3a8/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #1 (cpuhp_state_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       __cpuhp_setup_state_cpuslocked+0x53/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_alloc_init+0x28/0x30
       start_kernel+0x145/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #0 (cpu_hotplug_lock.rw_sem){++++}:
       check_prev_add+0x430/0x840
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpus_read_lock+0x3d/0xb0
       stop_machine+0x1c/0x40
       i915_gem_set_wedged+0x1a/0x20 [i915]
       i915_reset+0xb9/0x230 [i915]
       i915_reset_device+0x1f6/0x260 [i915]
       i915_handle_error+0x2d8/0x430 [i915]
       hangcheck_declare_hang+0xd3/0xf0 [i915]
       i915_hangcheck_elapsed+0x262/0x2d0 [i915]
       process_one_work+0x233/0x660
       worker_thread+0x4e/0x3b0
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

other info that might help us debug this:

Chain exists of:
  cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev->struct_mutex);
                               lock(&mm->mmap_sem);
                               lock(&dev->struct_mutex);
  lock(cpu_hotplug_lock.rw_sem);

 *** DEADLOCK ***

3 locks held by kworker/3:4/562:
 #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
 #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
 #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]

stack backtrace:
CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
Workqueue: events_long i915_hangcheck_elapsed [i915]
Call Trace:
 dump_stack+0x68/0x9f
 print_circular_bug+0x235/0x3c0
 ? lockdep_init_map_crosslock+0x20/0x20
 check_prev_add+0x430/0x840
 ? irq_work_queue+0x86/0xe0
 ? wake_up_klogd+0x53/0x70
 __lock_acquire+0x1420/0x15e0
 ? __lock_acquire+0x1420/0x15e0
 ? lockdep_init_map_crosslock+0x20/0x20
 lock_acquire+0xb0/0x200
 ? stop_machine+0x1c/0x40
 ? i915_gem_object_truncate+0x50/0x50 [i915]
 cpus_read_lock+0x3d/0xb0
 ? stop_machine+0x1c/0x40
 stop_machine+0x1c/0x40
 i915_gem_set_wedged+0x1a/0x20 [i915]
 i915_reset+0xb9/0x230 [i915]
 i915_reset_device+0x1f6/0x260 [i915]
 ? gen8_gt_irq_ack+0x170/0x170 [i915]
 ? work_on_cpu_safe+0x60/0x60
 i915_handle_error+0x2d8/0x430 [i915]
 ? vsnprintf+0xd1/0x4b0
 ? scnprintf+0x3a/0x70
 hangcheck_declare_hang+0xd3/0xf0 [i915]
 ? intel_runtime_pm_put+0x56/0xa0 [i915]
 i915_hangcheck_elapsed+0x262/0x2d0 [i915]
 process_one_work+0x233/0x660
 worker_thread+0x4e/0x3b0
 kthread+0x152/0x190
 ? process_one_work+0x660/0x660
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x27/0x40
Setting dangerous option reset - tainting kernel
i915 0000:00:02.0: Resetting chip after gpu hang
Setting dangerous option reset - tainting kernel
i915 0000:00:02.0: Resetting chip after gpu hang

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

--

Patch itself pretty much untested, I just want to figure out whether
we should fix this (and similar backtraces going through
msr_create_device) in i915, or whether the cpu hotplug folks will take
care of them all.

Afaict this is not because of changes in 4.14-rc1, but really only due
to the new cross-release checks.

Note that the above trace is on top of -rc3 (plus plenty of drm/i915
stuff), so should have Peter's recent lockdep fixes for cpu up vs.
down already.
-Daniel
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 65 +++++++++++++++------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2d4996de7331..afe166921572 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -160,36 +160,6 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
 };
 
-static struct i915_mmu_notifier *
-i915_mmu_notifier_create(struct mm_struct *mm)
-{
-	struct i915_mmu_notifier *mn;
-	int ret;
-
-	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
-	if (mn == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&mn->lock);
-	mn->mn.ops = &i915_gem_userptr_notifier;
-	mn->objects = RB_ROOT_CACHED;
-	mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
-	if (mn->wq == NULL) {
-		kfree(mn);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	 /* Protected by mmap_sem (write-lock) */
-	ret = __mmu_notifier_register(&mn->mn, mm);
-	if (ret) {
-		destroy_workqueue(mn->wq);
-		kfree(mn);
-		return ERR_PTR(ret);
-	}
-
-	return mn;
-}
-
 static void
 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
@@ -210,23 +180,46 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 static struct i915_mmu_notifier *
 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 {
-	struct i915_mmu_notifier *mn = mm->mn;
+	struct i915_mmu_notifier *mn = mm->mn, *free_mn;
+	int ret;
 
 	mn = mm->mn;
 	if (mn)
 		return mn;
 
+	free_mn = kmalloc(sizeof(*free_mn), GFP_KERNEL);
+	if (free_mn == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&free_mn->lock);
+	free_mn->mn.ops = &i915_gem_userptr_notifier;
+	free_mn->objects = RB_ROOT_CACHED;
+	/* must allocate wq outside of mm->mmap_sem */
+	free_mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
+	if (free_mn->wq == NULL) {
+		ret = -ENOMEM;
+		goto err_kfree;
+	}
+
 	down_write(&mm->mm->mmap_sem);
 	mutex_lock(&mm->i915->mm_lock);
-	if ((mn = mm->mn) == NULL) {
-		mn = i915_mmu_notifier_create(mm->mm);
-		if (!IS_ERR(mn))
-			mm->mn = mn;
+	if (mm->mn == NULL) {
+		 /* Protected by mmap_sem (write-lock) */
+		ret = __mmu_notifier_register(&free_mn->mn, mm->mm);
+		if (ret == 0)
+			mm->mn = mn =free_mn;
+	} else {
+		mn = mm->mn;
 	}
 	mutex_unlock(&mm->i915->mm_lock);
 	up_write(&mm->mm->mmap_sem);
 
-	return mn;
+err_wq:
+	destroy_workqueue(free_mn->wq);
+err_kfree:
+	kfree(free_mn);
+
+	return ret == 0 ? mn : ERR_PTR(ret);
 }
 
 static int
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 13:22 ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-10-05 13:22 UTC (permalink / raw)
  To: Intel Graphics Development
  Cc: Peter Zijlstra, Daniel Vetter, LKML, Daniel Vetter,
	Thomas Gleixner, Sasha Levin

4.14-rc1 gained the fancy new cross-release support in lockdep, which
seems to have uncovered a few more rules about what is allowed and
isn't.

This one here seems to indicate that allocating a work-queue while
holding mmap_sem is a no-go, so let's try to preallocate it.

Of course another way to break this chain would be somewhere in the
cpu hotplug code, since this isn't the only trace we're finding now
which goes through msr_create_device.

Full lockdep splat:

======================================================
WARNING: possible circular locking dependency detected
4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
------------------------------------------------------
kworker/3:4/562 is trying to acquire lock:
 (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40

but task is already holding lock:
 (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #6 (&dev->struct_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_interruptible_nested+0x1b/0x20
       i915_mutex_lock_interruptible+0x51/0x130 [i915]
       i915_gem_fault+0x209/0x650 [i915]
       __do_fault+0x1e/0x80
       __handle_mm_fault+0xa08/0xed0
       handle_mm_fault+0x156/0x300
       __do_page_fault+0x2c5/0x570
       do_page_fault+0x28/0x250
       page_fault+0x22/0x30

-> #5 (&mm->mmap_sem){++++}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __might_fault+0x68/0x90
       _copy_to_user+0x23/0x70
       filldir+0xa5/0x120
       dcache_readdir+0xf9/0x170
       iterate_dir+0x69/0x1a0
       SyS_getdents+0xa5/0x140
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #4 (&sb->s_type->i_mutex_key#5){++++}:
       down_write+0x3b/0x70
       handle_create+0xcb/0x1e0
       devtmpfsd+0x139/0x180
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #3 ((complete)&req.done){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       wait_for_common+0x58/0x210
       wait_for_completion+0x1d/0x20
       devtmpfs_create_node+0x13d/0x160
       device_add+0x5eb/0x620
       device_create_groups_vargs+0xe0/0xf0
       device_create+0x3a/0x40
       msr_device_create+0x2b/0x40
       cpuhp_invoke_callback+0xc9/0xbf0
       cpuhp_thread_fun+0x17b/0x240
       smpboot_thread_fn+0x18a/0x280
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #2 (cpuhp_state-up){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpuhp_issue_call+0x133/0x1c0
       __cpuhp_setup_state_cpuslocked+0x139/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_writeback_init+0x43/0x67
       pagecache_init+0x3d/0x42
       start_kernel+0x3a8/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #1 (cpuhp_state_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       __cpuhp_setup_state_cpuslocked+0x53/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_alloc_init+0x28/0x30
       start_kernel+0x145/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #0 (cpu_hotplug_lock.rw_sem){++++}:
       check_prev_add+0x430/0x840
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpus_read_lock+0x3d/0xb0
       stop_machine+0x1c/0x40
       i915_gem_set_wedged+0x1a/0x20 [i915]
       i915_reset+0xb9/0x230 [i915]
       i915_reset_device+0x1f6/0x260 [i915]
       i915_handle_error+0x2d8/0x430 [i915]
       hangcheck_declare_hang+0xd3/0xf0 [i915]
       i915_hangcheck_elapsed+0x262/0x2d0 [i915]
       process_one_work+0x233/0x660
       worker_thread+0x4e/0x3b0
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

other info that might help us debug this:

Chain exists of:
  cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev->struct_mutex);
                               lock(&mm->mmap_sem);
                               lock(&dev->struct_mutex);
  lock(cpu_hotplug_lock.rw_sem);

 *** DEADLOCK ***

3 locks held by kworker/3:4/562:
 #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
 #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
 #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]

stack backtrace:
CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
Workqueue: events_long i915_hangcheck_elapsed [i915]
Call Trace:
 dump_stack+0x68/0x9f
 print_circular_bug+0x235/0x3c0
 ? lockdep_init_map_crosslock+0x20/0x20
 check_prev_add+0x430/0x840
 ? irq_work_queue+0x86/0xe0
 ? wake_up_klogd+0x53/0x70
 __lock_acquire+0x1420/0x15e0
 ? __lock_acquire+0x1420/0x15e0
 ? lockdep_init_map_crosslock+0x20/0x20
 lock_acquire+0xb0/0x200
 ? stop_machine+0x1c/0x40
 ? i915_gem_object_truncate+0x50/0x50 [i915]
 cpus_read_lock+0x3d/0xb0
 ? stop_machine+0x1c/0x40
 stop_machine+0x1c/0x40
 i915_gem_set_wedged+0x1a/0x20 [i915]
 i915_reset+0xb9/0x230 [i915]
 i915_reset_device+0x1f6/0x260 [i915]
 ? gen8_gt_irq_ack+0x170/0x170 [i915]
 ? work_on_cpu_safe+0x60/0x60
 i915_handle_error+0x2d8/0x430 [i915]
 ? vsnprintf+0xd1/0x4b0
 ? scnprintf+0x3a/0x70
 hangcheck_declare_hang+0xd3/0xf0 [i915]
 ? intel_runtime_pm_put+0x56/0xa0 [i915]
 i915_hangcheck_elapsed+0x262/0x2d0 [i915]
 process_one_work+0x233/0x660
 worker_thread+0x4e/0x3b0
 kthread+0x152/0x190
 ? process_one_work+0x660/0x660
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x27/0x40
Setting dangerous option reset - tainting kernel
i915 0000:00:02.0: Resetting chip after gpu hang
Setting dangerous option reset - tainting kernel
i915 0000:00:02.0: Resetting chip after gpu hang

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

--

Patch itself pretty much untested, I just want to figure out whether
we should fix this (and similar backtraces going through
msr_create_device) in i915, or whether the cpu hotplug folks will take
care of them all.

Afaict this is not because of changes in 4.14-rc1, but really only due
to the new cross-release checks.

Note that the above trace is on top of -rc3 (plus plenty of drm/i915
stuff), so should have Peter's recent lockdep fixes for cpu up vs.
down already.
-Daniel
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 65 +++++++++++++++------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2d4996de7331..afe166921572 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -160,36 +160,6 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
 };
 
-static struct i915_mmu_notifier *
-i915_mmu_notifier_create(struct mm_struct *mm)
-{
-	struct i915_mmu_notifier *mn;
-	int ret;
-
-	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
-	if (mn == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&mn->lock);
-	mn->mn.ops = &i915_gem_userptr_notifier;
-	mn->objects = RB_ROOT_CACHED;
-	mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
-	if (mn->wq == NULL) {
-		kfree(mn);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	 /* Protected by mmap_sem (write-lock) */
-	ret = __mmu_notifier_register(&mn->mn, mm);
-	if (ret) {
-		destroy_workqueue(mn->wq);
-		kfree(mn);
-		return ERR_PTR(ret);
-	}
-
-	return mn;
-}
-
 static void
 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
@@ -210,23 +180,46 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 static struct i915_mmu_notifier *
 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 {
-	struct i915_mmu_notifier *mn = mm->mn;
+	struct i915_mmu_notifier *mn = mm->mn, *free_mn;
+	int ret;
 
 	mn = mm->mn;
 	if (mn)
 		return mn;
 
+	free_mn = kmalloc(sizeof(*free_mn), GFP_KERNEL);
+	if (free_mn == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&free_mn->lock);
+	free_mn->mn.ops = &i915_gem_userptr_notifier;
+	free_mn->objects = RB_ROOT_CACHED;
+	/* must allocate wq outside of mm->mmap_sem */
+	free_mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
+	if (free_mn->wq == NULL) {
+		ret = -ENOMEM;
+		goto err_kfree;
+	}
+
 	down_write(&mm->mm->mmap_sem);
 	mutex_lock(&mm->i915->mm_lock);
-	if ((mn = mm->mn) == NULL) {
-		mn = i915_mmu_notifier_create(mm->mm);
-		if (!IS_ERR(mn))
-			mm->mn = mn;
+	if (mm->mn == NULL) {
+		 /* Protected by mmap_sem (write-lock) */
+		ret = __mmu_notifier_register(&free_mn->mn, mm->mm);
+		if (ret == 0)
+			mm->mn = mn =free_mn;
+	} else {
+		mn = mm->mn;
 	}
 	mutex_unlock(&mm->i915->mm_lock);
 	up_write(&mm->mm->mmap_sem);
 
-	return mn;
+err_wq:
+	destroy_workqueue(free_mn->wq);
+err_kfree:
+	kfree(free_mn);
+
+	return ret == 0 ? mn : ERR_PTR(ret);
 }
 
 static int
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 13:22 ` Daniel Vetter
@ 2017-10-05 13:51   ` Chris Wilson
  -1 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-05 13:51 UTC (permalink / raw)
  To: Daniel Vetter, Intel Graphics Development
  Cc: LKML, Daniel Vetter, Tvrtko Ursulin, Joonas Lahtinen,
	Peter Zijlstra, Thomas Gleixner, Sasha Levin, Daniel Vetter

Quoting Daniel Vetter (2017-10-05 14:22:06)
> 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> seems to have uncovered a few more rules about what is allowed and
> isn't.
> 
> This one here seems to indicate that allocating a work-queue while
> holding mmap_sem is a no-go, so let's try to preallocate it.
> 
> Of course another way to break this chain would be somewhere in the
> cpu hotplug code, since this isn't the only trace we're finding now
> which goes through msr_create_device.
> 
> Full lockdep splat:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
> ------------------------------------------------------
> kworker/3:4/562 is trying to acquire lock:
>  (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40
> 
> but task is already holding lock:
>  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #6 (&dev->struct_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_interruptible_nested+0x1b/0x20
>        i915_mutex_lock_interruptible+0x51/0x130 [i915]
>        i915_gem_fault+0x209/0x650 [i915]
>        __do_fault+0x1e/0x80
>        __handle_mm_fault+0xa08/0xed0
>        handle_mm_fault+0x156/0x300
>        __do_page_fault+0x2c5/0x570
>        do_page_fault+0x28/0x250
>        page_fault+0x22/0x30
> 
> -> #5 (&mm->mmap_sem){++++}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __might_fault+0x68/0x90
>        _copy_to_user+0x23/0x70
>        filldir+0xa5/0x120
>        dcache_readdir+0xf9/0x170
>        iterate_dir+0x69/0x1a0
>        SyS_getdents+0xa5/0x140
>        entry_SYSCALL_64_fastpath+0x1c/0xb1
> 
> -> #4 (&sb->s_type->i_mutex_key#5){++++}:
>        down_write+0x3b/0x70
>        handle_create+0xcb/0x1e0
>        devtmpfsd+0x139/0x180
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #3 ((complete)&req.done){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        wait_for_common+0x58/0x210
>        wait_for_completion+0x1d/0x20
>        devtmpfs_create_node+0x13d/0x160
>        device_add+0x5eb/0x620
>        device_create_groups_vargs+0xe0/0xf0
>        device_create+0x3a/0x40
>        msr_device_create+0x2b/0x40
>        cpuhp_invoke_callback+0xc9/0xbf0
>        cpuhp_thread_fun+0x17b/0x240
>        smpboot_thread_fn+0x18a/0x280
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #2 (cpuhp_state-up){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpuhp_issue_call+0x133/0x1c0
>        __cpuhp_setup_state_cpuslocked+0x139/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_writeback_init+0x43/0x67
>        pagecache_init+0x3d/0x42
>        start_kernel+0x3a8/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #1 (cpuhp_state_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_nested+0x1b/0x20
>        __cpuhp_setup_state_cpuslocked+0x53/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_alloc_init+0x28/0x30
>        start_kernel+0x145/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #0 (cpu_hotplug_lock.rw_sem){++++}:
>        check_prev_add+0x430/0x840
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpus_read_lock+0x3d/0xb0
>        stop_machine+0x1c/0x40
>        i915_gem_set_wedged+0x1a/0x20 [i915]
>        i915_reset+0xb9/0x230 [i915]
>        i915_reset_device+0x1f6/0x260 [i915]
>        i915_handle_error+0x2d8/0x430 [i915]
>        hangcheck_declare_hang+0xd3/0xf0 [i915]
>        i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>        process_one_work+0x233/0x660
>        worker_thread+0x4e/0x3b0
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&dev->struct_mutex);
>                                lock(&mm->mmap_sem);
>                                lock(&dev->struct_mutex);
>   lock(cpu_hotplug_lock.rw_sem);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kworker/3:4/562:
>  #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> stack backtrace:
> CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
> Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
> Workqueue: events_long i915_hangcheck_elapsed [i915]
> Call Trace:
>  dump_stack+0x68/0x9f
>  print_circular_bug+0x235/0x3c0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  check_prev_add+0x430/0x840
>  ? irq_work_queue+0x86/0xe0
>  ? wake_up_klogd+0x53/0x70
>  __lock_acquire+0x1420/0x15e0
>  ? __lock_acquire+0x1420/0x15e0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  lock_acquire+0xb0/0x200
>  ? stop_machine+0x1c/0x40
>  ? i915_gem_object_truncate+0x50/0x50 [i915]
>  cpus_read_lock+0x3d/0xb0
>  ? stop_machine+0x1c/0x40
>  stop_machine+0x1c/0x40
>  i915_gem_set_wedged+0x1a/0x20 [i915]
>  i915_reset+0xb9/0x230 [i915]
>  i915_reset_device+0x1f6/0x260 [i915]
>  ? gen8_gt_irq_ack+0x170/0x170 [i915]
>  ? work_on_cpu_safe+0x60/0x60
>  i915_handle_error+0x2d8/0x430 [i915]
>  ? vsnprintf+0xd1/0x4b0
>  ? scnprintf+0x3a/0x70
>  hangcheck_declare_hang+0xd3/0xf0 [i915]
>  ? intel_runtime_pm_put+0x56/0xa0 [i915]
>  i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>  process_one_work+0x233/0x660
>  worker_thread+0x4e/0x3b0
>  kthread+0x152/0x190
>  ? process_one_work+0x660/0x660
>  ? kthread_create_on_node+0x40/0x40
>  ret_from_fork+0x27/0x40
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Sasha Levin <alexander.levin@verizon.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> 
> --
> 
> Patch itself pretty much untested, I just want to figure out whether
> we should fix this (and similar backtraces going through
> msr_create_device) in i915, or whether the cpu hotplug folks will take
> care of them all.

Looking at the patch, we shrink the time under mmap_sem/mm_lock so seems
sensible (just from that pov).

> Afaict this is not because of changes in 4.14-rc1, but really only due
> to the new cross-release checks.
> 
> Note that the above trace is on top of -rc3 (plus plenty of drm/i915
> stuff), so should have Peter's recent lockdep fixes for cpu up vs.
> down already.
> -Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 65 +++++++++++++++------------------
>  1 file changed, 29 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 2d4996de7331..afe166921572 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -160,36 +160,6 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>         .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
>  };
>  
> -static struct i915_mmu_notifier *
> -i915_mmu_notifier_create(struct mm_struct *mm)
> -{
> -       struct i915_mmu_notifier *mn;
> -       int ret;
> -
> -       mn = kmalloc(sizeof(*mn), GFP_KERNEL);
> -       if (mn == NULL)
> -               return ERR_PTR(-ENOMEM);
> -
> -       spin_lock_init(&mn->lock);
> -       mn->mn.ops = &i915_gem_userptr_notifier;
> -       mn->objects = RB_ROOT_CACHED;
> -       mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
> -       if (mn->wq == NULL) {
> -               kfree(mn);
> -               return ERR_PTR(-ENOMEM);
> -       }
> -
> -        /* Protected by mmap_sem (write-lock) */
> -       ret = __mmu_notifier_register(&mn->mn, mm);
> -       if (ret) {
> -               destroy_workqueue(mn->wq);
> -               kfree(mn);
> -               return ERR_PTR(ret);
> -       }
> -
> -       return mn;
> -}
> -
>  static void
>  i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  {
> @@ -210,23 +180,46 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  static struct i915_mmu_notifier *
>  i915_mmu_notifier_find(struct i915_mm_struct *mm)
>  {
> -       struct i915_mmu_notifier *mn = mm->mn;
> +       struct i915_mmu_notifier *mn = mm->mn, *free_mn;
> +       int ret;
>  
>         mn = mm->mn;
>         if (mn)
>                 return mn;
>  
> +       free_mn = kmalloc(sizeof(*free_mn), GFP_KERNEL);
> +       if (free_mn == NULL)
> +               return ERR_PTR(-ENOMEM);
> +
> +       spin_lock_init(&free_mn->lock);
> +       free_mn->mn.ops = &i915_gem_userptr_notifier;
> +       free_mn->objects = RB_ROOT_CACHED;
> +       /* must allocate wq outside of mm->mmap_sem */
> +       free_mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
> +       if (free_mn->wq == NULL) {
> +               ret = -ENOMEM;
> +               goto err_kfree;
> +       }
> +
>         down_write(&mm->mm->mmap_sem);
>         mutex_lock(&mm->i915->mm_lock);
> -       if ((mn = mm->mn) == NULL) {
> -               mn = i915_mmu_notifier_create(mm->mm);
> -               if (!IS_ERR(mn))
> -                       mm->mn = mn;
> +       if (mm->mn == NULL) {
> +                /* Protected by mmap_sem (write-lock) */
> +               ret = __mmu_notifier_register(&free_mn->mn, mm->mm);
> +               if (ret == 0)
> +                       mm->mn = mn =free_mn;
> +       } else {
> +               mn = mm->mn;
>         }
>         mutex_unlock(&mm->i915->mm_lock);
>         up_write(&mm->mm->mmap_sem);
>  
> -       return mn;
> +err_wq:
> +       destroy_workqueue(free_mn->wq);
> +err_kfree:
> +       kfree(free_mn);
> +
> +       return ret == 0 ? mn : ERR_PTR(ret);
>  }

I had a go,

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2d4996de7331..f9b3406401af 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -164,7 +164,6 @@ static struct i915_mmu_notifier *
 i915_mmu_notifier_create(struct mm_struct *mm)
 {
        struct i915_mmu_notifier *mn;
-       int ret;
 
        mn = kmalloc(sizeof(*mn), GFP_KERNEL);
        if (mn == NULL)
@@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
                return ERR_PTR(-ENOMEM);
        }
 
-        /* Protected by mmap_sem (write-lock) */
-       ret = __mmu_notifier_register(&mn->mn, mm);
-       if (ret) {
-               destroy_workqueue(mn->wq);
-               kfree(mn);
-               return ERR_PTR(ret);
-       }
-
        return mn;
 }
 
@@ -210,23 +201,37 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 static struct i915_mmu_notifier *
 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 {
-       struct i915_mmu_notifier *mn = mm->mn;
+       struct i915_mmu_notifier *mn;
+       int err;
 
        mn = mm->mn;
        if (mn)
                return mn;
 
+       mn = i915_mmu_notifier_create(mm->mm);
+       if (IS_ERR(mn))
+               return mn;
+
+       err = 0;
        down_write(&mm->mm->mmap_sem);
        mutex_lock(&mm->i915->mm_lock);
-       if ((mn = mm->mn) == NULL) {
-               mn = i915_mmu_notifier_create(mm->mm);
-               if (!IS_ERR(mn))
-                       mm->mn = mn;
+       if (mm->mn == NULL) {
+               /* Protected by mmap_sem (write-lock) */
+               err = __mmu_notifier_register(&mn->mn, mm->mm);
+               if (!err) {
+                       /* Protected by mm_lock */
+                       mm->mn = fetch_and_zero(&mn);
+               }
        }
        mutex_unlock(&mm->i915->mm_lock);
        up_write(&mm->mm->mmap_sem);
 
-       return mn;
+       if (mn) {
+               destroy_workqueue(mn->wq);
+               kfree(mn);
+       }
+
+       return err ? ERR_PTR(err) : mm->mn;
 }

So not sold that we want to keep the separate single use
i915_mmu_notifier_create() (the onion unwind is pretty solid).

But it's safe enough to return mm->mn (it's stable once we've proved it's
set) and that allows us to avoid to a second free_mn. I'm prefer the
register above as I think it's that touch clearer.

In particular, I believe your use of free_mn is buggy as you still free
it after assigning it to mm->mn.
-Chris

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 13:51   ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-05 13:51 UTC (permalink / raw)
  To: Intel Graphics Development
  Cc: Peter Zijlstra, Daniel Vetter, LKML, Sasha Levin, Daniel Vetter,
	Thomas Gleixner

Quoting Daniel Vetter (2017-10-05 14:22:06)
> 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> seems to have uncovered a few more rules about what is allowed and
> isn't.
> 
> This one here seems to indicate that allocating a work-queue while
> holding mmap_sem is a no-go, so let's try to preallocate it.
> 
> Of course another way to break this chain would be somewhere in the
> cpu hotplug code, since this isn't the only trace we're finding now
> which goes through msr_create_device.
> 
> Full lockdep splat:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
> ------------------------------------------------------
> kworker/3:4/562 is trying to acquire lock:
>  (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40
> 
> but task is already holding lock:
>  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #6 (&dev->struct_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_interruptible_nested+0x1b/0x20
>        i915_mutex_lock_interruptible+0x51/0x130 [i915]
>        i915_gem_fault+0x209/0x650 [i915]
>        __do_fault+0x1e/0x80
>        __handle_mm_fault+0xa08/0xed0
>        handle_mm_fault+0x156/0x300
>        __do_page_fault+0x2c5/0x570
>        do_page_fault+0x28/0x250
>        page_fault+0x22/0x30
> 
> -> #5 (&mm->mmap_sem){++++}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __might_fault+0x68/0x90
>        _copy_to_user+0x23/0x70
>        filldir+0xa5/0x120
>        dcache_readdir+0xf9/0x170
>        iterate_dir+0x69/0x1a0
>        SyS_getdents+0xa5/0x140
>        entry_SYSCALL_64_fastpath+0x1c/0xb1
> 
> -> #4 (&sb->s_type->i_mutex_key#5){++++}:
>        down_write+0x3b/0x70
>        handle_create+0xcb/0x1e0
>        devtmpfsd+0x139/0x180
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #3 ((complete)&req.done){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        wait_for_common+0x58/0x210
>        wait_for_completion+0x1d/0x20
>        devtmpfs_create_node+0x13d/0x160
>        device_add+0x5eb/0x620
>        device_create_groups_vargs+0xe0/0xf0
>        device_create+0x3a/0x40
>        msr_device_create+0x2b/0x40
>        cpuhp_invoke_callback+0xc9/0xbf0
>        cpuhp_thread_fun+0x17b/0x240
>        smpboot_thread_fn+0x18a/0x280
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #2 (cpuhp_state-up){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpuhp_issue_call+0x133/0x1c0
>        __cpuhp_setup_state_cpuslocked+0x139/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_writeback_init+0x43/0x67
>        pagecache_init+0x3d/0x42
>        start_kernel+0x3a8/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #1 (cpuhp_state_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_nested+0x1b/0x20
>        __cpuhp_setup_state_cpuslocked+0x53/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_alloc_init+0x28/0x30
>        start_kernel+0x145/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #0 (cpu_hotplug_lock.rw_sem){++++}:
>        check_prev_add+0x430/0x840
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpus_read_lock+0x3d/0xb0
>        stop_machine+0x1c/0x40
>        i915_gem_set_wedged+0x1a/0x20 [i915]
>        i915_reset+0xb9/0x230 [i915]
>        i915_reset_device+0x1f6/0x260 [i915]
>        i915_handle_error+0x2d8/0x430 [i915]
>        hangcheck_declare_hang+0xd3/0xf0 [i915]
>        i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>        process_one_work+0x233/0x660
>        worker_thread+0x4e/0x3b0
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&dev->struct_mutex);
>                                lock(&mm->mmap_sem);
>                                lock(&dev->struct_mutex);
>   lock(cpu_hotplug_lock.rw_sem);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kworker/3:4/562:
>  #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> stack backtrace:
> CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
> Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
> Workqueue: events_long i915_hangcheck_elapsed [i915]
> Call Trace:
>  dump_stack+0x68/0x9f
>  print_circular_bug+0x235/0x3c0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  check_prev_add+0x430/0x840
>  ? irq_work_queue+0x86/0xe0
>  ? wake_up_klogd+0x53/0x70
>  __lock_acquire+0x1420/0x15e0
>  ? __lock_acquire+0x1420/0x15e0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  lock_acquire+0xb0/0x200
>  ? stop_machine+0x1c/0x40
>  ? i915_gem_object_truncate+0x50/0x50 [i915]
>  cpus_read_lock+0x3d/0xb0
>  ? stop_machine+0x1c/0x40
>  stop_machine+0x1c/0x40
>  i915_gem_set_wedged+0x1a/0x20 [i915]
>  i915_reset+0xb9/0x230 [i915]
>  i915_reset_device+0x1f6/0x260 [i915]
>  ? gen8_gt_irq_ack+0x170/0x170 [i915]
>  ? work_on_cpu_safe+0x60/0x60
>  i915_handle_error+0x2d8/0x430 [i915]
>  ? vsnprintf+0xd1/0x4b0
>  ? scnprintf+0x3a/0x70
>  hangcheck_declare_hang+0xd3/0xf0 [i915]
>  ? intel_runtime_pm_put+0x56/0xa0 [i915]
>  i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>  process_one_work+0x233/0x660
>  worker_thread+0x4e/0x3b0
>  kthread+0x152/0x190
>  ? process_one_work+0x660/0x660
>  ? kthread_create_on_node+0x40/0x40
>  ret_from_fork+0x27/0x40
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Sasha Levin <alexander.levin@verizon.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> 
> --
> 
> Patch itself pretty much untested, I just want to figure out whether
> we should fix this (and similar backtraces going through
> msr_create_device) in i915, or whether the cpu hotplug folks will take
> care of them all.

Looking at the patch, we shrink the time under mmap_sem/mm_lock so seems
sensible (just from that pov).

> Afaict this is not because of changes in 4.14-rc1, but really only due
> to the new cross-release checks.
> 
> Note that the above trace is on top of -rc3 (plus plenty of drm/i915
> stuff), so should have Peter's recent lockdep fixes for cpu up vs.
> down already.
> -Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 65 +++++++++++++++------------------
>  1 file changed, 29 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 2d4996de7331..afe166921572 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -160,36 +160,6 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>         .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
>  };
>  
> -static struct i915_mmu_notifier *
> -i915_mmu_notifier_create(struct mm_struct *mm)
> -{
> -       struct i915_mmu_notifier *mn;
> -       int ret;
> -
> -       mn = kmalloc(sizeof(*mn), GFP_KERNEL);
> -       if (mn == NULL)
> -               return ERR_PTR(-ENOMEM);
> -
> -       spin_lock_init(&mn->lock);
> -       mn->mn.ops = &i915_gem_userptr_notifier;
> -       mn->objects = RB_ROOT_CACHED;
> -       mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
> -       if (mn->wq == NULL) {
> -               kfree(mn);
> -               return ERR_PTR(-ENOMEM);
> -       }
> -
> -        /* Protected by mmap_sem (write-lock) */
> -       ret = __mmu_notifier_register(&mn->mn, mm);
> -       if (ret) {
> -               destroy_workqueue(mn->wq);
> -               kfree(mn);
> -               return ERR_PTR(ret);
> -       }
> -
> -       return mn;
> -}
> -
>  static void
>  i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  {
> @@ -210,23 +180,46 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  static struct i915_mmu_notifier *
>  i915_mmu_notifier_find(struct i915_mm_struct *mm)
>  {
> -       struct i915_mmu_notifier *mn = mm->mn;
> +       struct i915_mmu_notifier *mn = mm->mn, *free_mn;
> +       int ret;
>  
>         mn = mm->mn;
>         if (mn)
>                 return mn;
>  
> +       free_mn = kmalloc(sizeof(*free_mn), GFP_KERNEL);
> +       if (free_mn == NULL)
> +               return ERR_PTR(-ENOMEM);
> +
> +       spin_lock_init(&free_mn->lock);
> +       free_mn->mn.ops = &i915_gem_userptr_notifier;
> +       free_mn->objects = RB_ROOT_CACHED;
> +       /* must allocate wq outside of mm->mmap_sem */
> +       free_mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
> +       if (free_mn->wq == NULL) {
> +               ret = -ENOMEM;
> +               goto err_kfree;
> +       }
> +
>         down_write(&mm->mm->mmap_sem);
>         mutex_lock(&mm->i915->mm_lock);
> -       if ((mn = mm->mn) == NULL) {
> -               mn = i915_mmu_notifier_create(mm->mm);
> -               if (!IS_ERR(mn))
> -                       mm->mn = mn;
> +       if (mm->mn == NULL) {
> +                /* Protected by mmap_sem (write-lock) */
> +               ret = __mmu_notifier_register(&free_mn->mn, mm->mm);
> +               if (ret == 0)
> +                       mm->mn = mn =free_mn;
> +       } else {
> +               mn = mm->mn;
>         }
>         mutex_unlock(&mm->i915->mm_lock);
>         up_write(&mm->mm->mmap_sem);
>  
> -       return mn;
> +err_wq:
> +       destroy_workqueue(free_mn->wq);
> +err_kfree:
> +       kfree(free_mn);
> +
> +       return ret == 0 ? mn : ERR_PTR(ret);
>  }

I had a go,

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2d4996de7331..f9b3406401af 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -164,7 +164,6 @@ static struct i915_mmu_notifier *
 i915_mmu_notifier_create(struct mm_struct *mm)
 {
        struct i915_mmu_notifier *mn;
-       int ret;
 
        mn = kmalloc(sizeof(*mn), GFP_KERNEL);
        if (mn == NULL)
@@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
                return ERR_PTR(-ENOMEM);
        }
 
-        /* Protected by mmap_sem (write-lock) */
-       ret = __mmu_notifier_register(&mn->mn, mm);
-       if (ret) {
-               destroy_workqueue(mn->wq);
-               kfree(mn);
-               return ERR_PTR(ret);
-       }
-
        return mn;
 }
 
@@ -210,23 +201,37 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 static struct i915_mmu_notifier *
 i915_mmu_notifier_find(struct i915_mm_struct *mm)
 {
-       struct i915_mmu_notifier *mn = mm->mn;
+       struct i915_mmu_notifier *mn;
+       int err;
 
        mn = mm->mn;
        if (mn)
                return mn;
 
+       mn = i915_mmu_notifier_create(mm->mm);
+       if (IS_ERR(mn))
+               return mn;
+
+       err = 0;
        down_write(&mm->mm->mmap_sem);
        mutex_lock(&mm->i915->mm_lock);
-       if ((mn = mm->mn) == NULL) {
-               mn = i915_mmu_notifier_create(mm->mm);
-               if (!IS_ERR(mn))
-                       mm->mn = mn;
+       if (mm->mn == NULL) {
+               /* Protected by mmap_sem (write-lock) */
+               err = __mmu_notifier_register(&mn->mn, mm->mm);
+               if (!err) {
+                       /* Protected by mm_lock */
+                       mm->mn = fetch_and_zero(&mn);
+               }
        }
        mutex_unlock(&mm->i915->mm_lock);
        up_write(&mm->mm->mmap_sem);
 
-       return mn;
+       if (mn) {
+               destroy_workqueue(mn->wq);
+               kfree(mn);
+       }
+
+       return err ? ERR_PTR(err) : mm->mn;
 }

So not sold that we want to keep the separate single use
i915_mmu_notifier_create() (the onion unwind is pretty solid).

But it's safe enough to return mm->mn (it's stable once we've proved it's
set) and that allows us to avoid to a second free_mn. I'm prefer the
register above as I think it's that touch clearer.

In particular, I believe your use of free_mn is buggy as you still free
it after assigning it to mm->mn.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 13:22 ` Daniel Vetter
@ 2017-10-05 13:57   ` Chris Wilson
  -1 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-05 13:57 UTC (permalink / raw)
  To: Daniel Vetter, Intel Graphics Development
  Cc: LKML, Daniel Vetter, Tvrtko Ursulin, Joonas Lahtinen,
	Peter Zijlstra, Thomas Gleixner, Sasha Levin, Daniel Vetter

Quoting Daniel Vetter (2017-10-05 14:22:06)
> 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> seems to have uncovered a few more rules about what is allowed and
> isn't.
> 
> This one here seems to indicate that allocating a work-queue while
> holding mmap_sem is a no-go, so let's try to preallocate it.
> 
> Of course another way to break this chain would be somewhere in the
> cpu hotplug code, since this isn't the only trace we're finding now
> which goes through msr_create_device.
> 
> Full lockdep splat:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
> ------------------------------------------------------
> kworker/3:4/562 is trying to acquire lock:
>  (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40
> 
> but task is already holding lock:
>  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #6 (&dev->struct_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_interruptible_nested+0x1b/0x20
>        i915_mutex_lock_interruptible+0x51/0x130 [i915]
>        i915_gem_fault+0x209/0x650 [i915]
>        __do_fault+0x1e/0x80
>        __handle_mm_fault+0xa08/0xed0
>        handle_mm_fault+0x156/0x300
>        __do_page_fault+0x2c5/0x570
>        do_page_fault+0x28/0x250
>        page_fault+0x22/0x30
> 
> -> #5 (&mm->mmap_sem){++++}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __might_fault+0x68/0x90
>        _copy_to_user+0x23/0x70
>        filldir+0xa5/0x120
>        dcache_readdir+0xf9/0x170
>        iterate_dir+0x69/0x1a0
>        SyS_getdents+0xa5/0x140
>        entry_SYSCALL_64_fastpath+0x1c/0xb1
> 
> -> #4 (&sb->s_type->i_mutex_key#5){++++}:
>        down_write+0x3b/0x70
>        handle_create+0xcb/0x1e0
>        devtmpfsd+0x139/0x180
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #3 ((complete)&req.done){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        wait_for_common+0x58/0x210
>        wait_for_completion+0x1d/0x20
>        devtmpfs_create_node+0x13d/0x160
>        device_add+0x5eb/0x620
>        device_create_groups_vargs+0xe0/0xf0
>        device_create+0x3a/0x40
>        msr_device_create+0x2b/0x40
>        cpuhp_invoke_callback+0xc9/0xbf0
>        cpuhp_thread_fun+0x17b/0x240
>        smpboot_thread_fn+0x18a/0x280
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #2 (cpuhp_state-up){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpuhp_issue_call+0x133/0x1c0
>        __cpuhp_setup_state_cpuslocked+0x139/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_writeback_init+0x43/0x67
>        pagecache_init+0x3d/0x42
>        start_kernel+0x3a8/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #1 (cpuhp_state_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_nested+0x1b/0x20
>        __cpuhp_setup_state_cpuslocked+0x53/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_alloc_init+0x28/0x30
>        start_kernel+0x145/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #0 (cpu_hotplug_lock.rw_sem){++++}:
>        check_prev_add+0x430/0x840
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpus_read_lock+0x3d/0xb0
>        stop_machine+0x1c/0x40
>        i915_gem_set_wedged+0x1a/0x20 [i915]
>        i915_reset+0xb9/0x230 [i915]
>        i915_reset_device+0x1f6/0x260 [i915]
>        i915_handle_error+0x2d8/0x430 [i915]
>        hangcheck_declare_hang+0xd3/0xf0 [i915]
>        i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>        process_one_work+0x233/0x660
>        worker_thread+0x4e/0x3b0
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&dev->struct_mutex);
>                                lock(&mm->mmap_sem);
>                                lock(&dev->struct_mutex);
>   lock(cpu_hotplug_lock.rw_sem);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kworker/3:4/562:
>  #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> stack backtrace:
> CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
> Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
> Workqueue: events_long i915_hangcheck_elapsed [i915]
> Call Trace:
>  dump_stack+0x68/0x9f
>  print_circular_bug+0x235/0x3c0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  check_prev_add+0x430/0x840
>  ? irq_work_queue+0x86/0xe0
>  ? wake_up_klogd+0x53/0x70
>  __lock_acquire+0x1420/0x15e0
>  ? __lock_acquire+0x1420/0x15e0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  lock_acquire+0xb0/0x200
>  ? stop_machine+0x1c/0x40
>  ? i915_gem_object_truncate+0x50/0x50 [i915]
>  cpus_read_lock+0x3d/0xb0
>  ? stop_machine+0x1c/0x40
>  stop_machine+0x1c/0x40
>  i915_gem_set_wedged+0x1a/0x20 [i915]
>  i915_reset+0xb9/0x230 [i915]
>  i915_reset_device+0x1f6/0x260 [i915]
>  ? gen8_gt_irq_ack+0x170/0x170 [i915]
>  ? work_on_cpu_safe+0x60/0x60
>  i915_handle_error+0x2d8/0x430 [i915]
>  ? vsnprintf+0xd1/0x4b0
>  ? scnprintf+0x3a/0x70
>  hangcheck_declare_hang+0xd3/0xf0 [i915]
>  ? intel_runtime_pm_put+0x56/0xa0 [i915]
>  i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>  process_one_work+0x233/0x660
>  worker_thread+0x4e/0x3b0
>  kthread+0x152/0x190
>  ? process_one_work+0x660/0x660
>  ? kthread_create_on_node+0x40/0x40
>  ret_from_fork+0x27/0x40
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang

Fwiw, this does not occur on machines with
# CONFI_X86_MSR is not set
-Chris

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 13:57   ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-05 13:57 UTC (permalink / raw)
  To: Intel Graphics Development
  Cc: Peter Zijlstra, Daniel Vetter, LKML, Sasha Levin, Daniel Vetter,
	Thomas Gleixner

Quoting Daniel Vetter (2017-10-05 14:22:06)
> 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> seems to have uncovered a few more rules about what is allowed and
> isn't.
> 
> This one here seems to indicate that allocating a work-queue while
> holding mmap_sem is a no-go, so let's try to preallocate it.
> 
> Of course another way to break this chain would be somewhere in the
> cpu hotplug code, since this isn't the only trace we're finding now
> which goes through msr_create_device.
> 
> Full lockdep splat:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G     U
> ------------------------------------------------------
> kworker/3:4/562 is trying to acquire lock:
>  (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40
> 
> but task is already holding lock:
>  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #6 (&dev->struct_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_interruptible_nested+0x1b/0x20
>        i915_mutex_lock_interruptible+0x51/0x130 [i915]
>        i915_gem_fault+0x209/0x650 [i915]
>        __do_fault+0x1e/0x80
>        __handle_mm_fault+0xa08/0xed0
>        handle_mm_fault+0x156/0x300
>        __do_page_fault+0x2c5/0x570
>        do_page_fault+0x28/0x250
>        page_fault+0x22/0x30
> 
> -> #5 (&mm->mmap_sem){++++}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __might_fault+0x68/0x90
>        _copy_to_user+0x23/0x70
>        filldir+0xa5/0x120
>        dcache_readdir+0xf9/0x170
>        iterate_dir+0x69/0x1a0
>        SyS_getdents+0xa5/0x140
>        entry_SYSCALL_64_fastpath+0x1c/0xb1
> 
> -> #4 (&sb->s_type->i_mutex_key#5){++++}:
>        down_write+0x3b/0x70
>        handle_create+0xcb/0x1e0
>        devtmpfsd+0x139/0x180
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #3 ((complete)&req.done){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        wait_for_common+0x58/0x210
>        wait_for_completion+0x1d/0x20
>        devtmpfs_create_node+0x13d/0x160
>        device_add+0x5eb/0x620
>        device_create_groups_vargs+0xe0/0xf0
>        device_create+0x3a/0x40
>        msr_device_create+0x2b/0x40
>        cpuhp_invoke_callback+0xc9/0xbf0
>        cpuhp_thread_fun+0x17b/0x240
>        smpboot_thread_fn+0x18a/0x280
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> -> #2 (cpuhp_state-up){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpuhp_issue_call+0x133/0x1c0
>        __cpuhp_setup_state_cpuslocked+0x139/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_writeback_init+0x43/0x67
>        pagecache_init+0x3d/0x42
>        start_kernel+0x3a8/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #1 (cpuhp_state_mutex){+.+.}:
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        __mutex_lock+0x86/0x9b0
>        mutex_lock_nested+0x1b/0x20
>        __cpuhp_setup_state_cpuslocked+0x53/0x2a0
>        __cpuhp_setup_state+0x46/0x60
>        page_alloc_init+0x28/0x30
>        start_kernel+0x145/0x3fc
>        x86_64_start_reservations+0x2a/0x2c
>        x86_64_start_kernel+0x6d/0x70
>        verify_cpu+0x0/0xfb
> 
> -> #0 (cpu_hotplug_lock.rw_sem){++++}:
>        check_prev_add+0x430/0x840
>        __lock_acquire+0x1420/0x15e0
>        lock_acquire+0xb0/0x200
>        cpus_read_lock+0x3d/0xb0
>        stop_machine+0x1c/0x40
>        i915_gem_set_wedged+0x1a/0x20 [i915]
>        i915_reset+0xb9/0x230 [i915]
>        i915_reset_device+0x1f6/0x260 [i915]
>        i915_handle_error+0x2d8/0x430 [i915]
>        hangcheck_declare_hang+0xd3/0xf0 [i915]
>        i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>        process_one_work+0x233/0x660
>        worker_thread+0x4e/0x3b0
>        kthread+0x152/0x190
>        ret_from_fork+0x27/0x40
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&dev->struct_mutex);
>                                lock(&mm->mmap_sem);
>                                lock(&dev->struct_mutex);
>   lock(cpu_hotplug_lock.rw_sem);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kworker/3:4/562:
>  #0:  ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #1:  ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660
>  #2:  (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915]
> 
> stack backtrace:
> CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3179+ #1
> Hardware name:                  /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017
> Workqueue: events_long i915_hangcheck_elapsed [i915]
> Call Trace:
>  dump_stack+0x68/0x9f
>  print_circular_bug+0x235/0x3c0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  check_prev_add+0x430/0x840
>  ? irq_work_queue+0x86/0xe0
>  ? wake_up_klogd+0x53/0x70
>  __lock_acquire+0x1420/0x15e0
>  ? __lock_acquire+0x1420/0x15e0
>  ? lockdep_init_map_crosslock+0x20/0x20
>  lock_acquire+0xb0/0x200
>  ? stop_machine+0x1c/0x40
>  ? i915_gem_object_truncate+0x50/0x50 [i915]
>  cpus_read_lock+0x3d/0xb0
>  ? stop_machine+0x1c/0x40
>  stop_machine+0x1c/0x40
>  i915_gem_set_wedged+0x1a/0x20 [i915]
>  i915_reset+0xb9/0x230 [i915]
>  i915_reset_device+0x1f6/0x260 [i915]
>  ? gen8_gt_irq_ack+0x170/0x170 [i915]
>  ? work_on_cpu_safe+0x60/0x60
>  i915_handle_error+0x2d8/0x430 [i915]
>  ? vsnprintf+0xd1/0x4b0
>  ? scnprintf+0x3a/0x70
>  hangcheck_declare_hang+0xd3/0xf0 [i915]
>  ? intel_runtime_pm_put+0x56/0xa0 [i915]
>  i915_hangcheck_elapsed+0x262/0x2d0 [i915]
>  process_one_work+0x233/0x660
>  worker_thread+0x4e/0x3b0
>  kthread+0x152/0x190
>  ? process_one_work+0x660/0x660
>  ? kthread_create_on_node+0x40/0x40
>  ret_from_fork+0x27/0x40
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang
> Setting dangerous option reset - tainting kernel
> i915 0000:00:02.0: Resetting chip after gpu hang

Fwiw, this does not occur on machines with
# CONFI_X86_MSR is not set
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 13:22 ` Daniel Vetter
                   ` (2 preceding siblings ...)
  (?)
@ 2017-10-05 15:23 ` Thomas Gleixner
  2017-10-05 15:58     ` Daniel Vetter
  -1 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2017-10-05 15:23 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Intel Graphics Development, LKML, Chris Wilson, Tvrtko Ursulin,
	Joonas Lahtinen, Peter Zijlstra, Sasha Levin, Daniel Vetter

On Thu, 5 Oct 2017, Daniel Vetter wrote:

> 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> seems to have uncovered a few more rules about what is allowed and
> isn't.
> 
> This one here seems to indicate that allocating a work-queue while
> holding mmap_sem is a no-go, so let's try to preallocate it.
> 
> Of course another way to break this chain would be somewhere in the
> cpu hotplug code, since this isn't the only trace we're finding now
> which goes through msr_create_device.

That's an interesting multi chain circular dependency which is related to
devfs.

Now the MSR device is not the only one which is creating that
dependency. We have CPUID and MCE as well. That's what a quick search in
x86 revealed. No idea whether there are more of those interesting bits and
pieces.

To fix it on the hotplug side we'd have to introduce extra state space
which is handled outside the cpuhotplug_rwsem region, but inside of the
cpu_maps_update_begin()/end() region, which has a nasty pile of
implications vs. the state registration/deregistration as this stuff can be
built as modules. So we'd need a complete set of new interfaces and
handling routines with some explicit restrictions on those state callbacks.

I rather prefer not to go there unless its unavoidable, which brings me to
the obvious question about the stop_machine() usage in the graphics code.

void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
{
        stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
}

The function name is telling. The machine is wedged and stop_machine()
might make it even more wedged when looking at this splat :)

The called function name is interesting as well. Is that _BKL postfix a
leftover of the BKL removal a couple of years ago?

Aside of that, is it really required to use stomp_machine() for this
synchronization? We certainly have less intrusive mechansisms than that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 15:23 ` Thomas Gleixner
@ 2017-10-05 15:58     ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-10-05 15:58 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Daniel Vetter, Intel Graphics Development, LKML, Chris Wilson,
	Tvrtko Ursulin, Joonas Lahtinen, Peter Zijlstra, Sasha Levin,
	Daniel Vetter

On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> On Thu, 5 Oct 2017, Daniel Vetter wrote:
> 
> > 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> > seems to have uncovered a few more rules about what is allowed and
> > isn't.
> > 
> > This one here seems to indicate that allocating a work-queue while
> > holding mmap_sem is a no-go, so let's try to preallocate it.
> > 
> > Of course another way to break this chain would be somewhere in the
> > cpu hotplug code, since this isn't the only trace we're finding now
> > which goes through msr_create_device.
> 
> That's an interesting multi chain circular dependency which is related to
> devfs.
> 
> Now the MSR device is not the only one which is creating that
> dependency. We have CPUID and MCE as well. That's what a quick search in
> x86 revealed. No idea whether there are more of those interesting bits and
> pieces.
> 
> To fix it on the hotplug side we'd have to introduce extra state space
> which is handled outside the cpuhotplug_rwsem region, but inside of the
> cpu_maps_update_begin()/end() region, which has a nasty pile of
> implications vs. the state registration/deregistration as this stuff can be
> built as modules. So we'd need a complete set of new interfaces and
> handling routines with some explicit restrictions on those state callbacks.
> 
> I rather prefer not to go there unless its unavoidable, which brings me to
> the obvious question about the stop_machine() usage in the graphics code.
> 
> void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> {
>         stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
> }
> 
> The function name is telling. The machine is wedged and stop_machine()
> might make it even more wedged when looking at this splat :)
> 
> The called function name is interesting as well. Is that _BKL postfix a
> leftover of the BKL removal a couple of years ago?
> 
> Aside of that, is it really required to use stomp_machine() for this
> synchronization? We certainly have less intrusive mechansisms than that.

Yeah, the stop_machine needs to go, I'm working on something that uses
rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
merged even.

Now this one isn't the one I wanted to fix with this patch since there's
clearly something dubious going on on the i915 side too. The proper trace,
with the same part on the cpu hotplug side, highlights that you can't
allocate a workqueue while hodling mmap_sem. That one matches patch
description&diff a bit better :-)

Sorry for misleading you, should have checked to attach the right one. No
stop_machine()/i915_gem_set_wedged() in the below one.
-Daniel

======================================================
WARNING: possible circular locking dependency detected
4.14.0-rc3-CI-CI_DRM_3172+ #1 Tainted: G     U         
------------------------------------------------------
prime_mmap/1588 is trying to acquire lock:
 (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8109e5a7>] apply_workqueue_attrs+0x17/0x50

but task is already holding lock:
 (&dev_priv->mm_lock){+.+.}, at: [<ffffffffa01b2dfa>] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #6 (&dev_priv->mm_lock){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]
       i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
       drm_ioctl_kernel+0x69/0xb0
       drm_ioctl+0x2f9/0x3d0
       do_vfs_ioctl+0x94/0x670
       SyS_ioctl+0x41/0x70
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #5 (&mm->mmap_sem){++++}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __might_fault+0x68/0x90
       _copy_to_user+0x23/0x70
       filldir+0xa5/0x120
       dcache_readdir+0xf9/0x170
       iterate_dir+0x69/0x1a0
       SyS_getdents+0xa5/0x140
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #4 (&sb->s_type->i_mutex_key#5){++++}:
       down_write+0x3b/0x70
       handle_create+0xcb/0x1e0
       devtmpfsd+0x139/0x180
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #3 ((complete)&req.done){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       wait_for_common+0x58/0x210
       wait_for_completion+0x1d/0x20
       devtmpfs_create_node+0x13d/0x160
       device_add+0x5eb/0x620
       device_create_groups_vargs+0xe0/0xf0
       device_create+0x3a/0x40
       msr_device_create+0x2b/0x40
       cpuhp_invoke_callback+0xc9/0xbf0
       cpuhp_thread_fun+0x17b/0x240
       smpboot_thread_fn+0x18a/0x280
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #2 (cpuhp_state-up){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpuhp_issue_call+0x133/0x1c0
       __cpuhp_setup_state_cpuslocked+0x139/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_writeback_init+0x43/0x67
       pagecache_init+0x3d/0x42
       start_kernel+0x3a8/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #1 (cpuhp_state_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       __cpuhp_setup_state_cpuslocked+0x53/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_alloc_init+0x28/0x30
       start_kernel+0x145/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #0 (cpu_hotplug_lock.rw_sem){++++}:
       check_prev_add+0x430/0x840
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpus_read_lock+0x3d/0xb0
       apply_workqueue_attrs+0x17/0x50
       __alloc_workqueue_key+0x1d8/0x4d9
       i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915]
       i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
       drm_ioctl_kernel+0x69/0xb0
       drm_ioctl+0x2f9/0x3d0
       do_vfs_ioctl+0x94/0x670
       SyS_ioctl+0x41/0x70
       entry_SYSCALL_64_fastpath+0x1c/0xb1

other info that might help us debug this:

Chain exists of:
  cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev_priv->mm_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev_priv->mm_lock);
                               lock(&mm->mmap_sem);
                               lock(&dev_priv->mm_lock);
  lock(cpu_hotplug_lock.rw_sem);

 *** DEADLOCK ***

2 locks held by prime_mmap/1588:
 #0:  (&mm->mmap_sem){++++}, at: [<ffffffffa01b2de8>] i915_gem_userptr_init__mmu_notifier+0x138/0x270 [i915]
 #1:  (&dev_priv->mm_lock){+.+.}, at: [<ffffffffa01b2dfa>] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]

stack backtrace:
CPU: 6 PID: 1588 Comm: prime_mmap Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3172+ #1
Hardware name: Dell Inc. XPS 8300  /0Y2MRG, BIOS A06 10/17/2011
Call Trace:
 dump_stack+0x68/0x9f
 print_circular_bug+0x235/0x3c0
 ? lockdep_init_map_crosslock+0x20/0x20
 check_prev_add+0x430/0x840
 __lock_acquire+0x1420/0x15e0
 ? __lock_acquire+0x1420/0x15e0
 ? lockdep_init_map_crosslock+0x20/0x20
 lock_acquire+0xb0/0x200
 ? apply_workqueue_attrs+0x17/0x50
 cpus_read_lock+0x3d/0xb0
 ? apply_workqueue_attrs+0x17/0x50
 apply_workqueue_attrs+0x17/0x50
 __alloc_workqueue_key+0x1d8/0x4d9
 ? __lockdep_init_map+0x57/0x1c0
 i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915]
 i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
 ? i915_gem_userptr_release+0x140/0x140 [i915]
 drm_ioctl_kernel+0x69/0xb0
 drm_ioctl+0x2f9/0x3d0
 ? i915_gem_userptr_release+0x140/0x140 [i915]
 ? __do_page_fault+0x2f3/0x570
 do_vfs_ioctl+0x94/0x670
 ? entry_SYSCALL_64_fastpath+0x5/0xb1
 ? __this_cpu_preempt_check+0x13/0x20
 ? trace_hardirqs_on_caller+0xe3/0x1b0
 SyS_ioctl+0x41/0x70
 entry_SYSCALL_64_fastpath+0x1c/0xb1
RIP: 0033:0x7fdf3d529587
RSP: 002b:00007ffccbbedd78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: ffffffff81493a03 RCX: 00007fdf3d529587
RDX: 00007ffccbbeddb0 RSI: 00000000c0186473 RDI: 0000000000000003
RBP: ffffc90000ad7f88 R08: 0000000000000000 R09: 00007ffccbbeddfc
R10: 00007fdf3d7ecb58 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000003 R14: 00000000c0186473 R15: 00007ffccbbeddfc
 ? __this_cpu_preempt_check+0x13/0x20
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 15:58     ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-10-05 15:58 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, Daniel Vetter, Intel Graphics Development, LKML,
	Daniel Vetter, Sasha Levin

On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> On Thu, 5 Oct 2017, Daniel Vetter wrote:
> 
> > 4.14-rc1 gained the fancy new cross-release support in lockdep, which
> > seems to have uncovered a few more rules about what is allowed and
> > isn't.
> > 
> > This one here seems to indicate that allocating a work-queue while
> > holding mmap_sem is a no-go, so let's try to preallocate it.
> > 
> > Of course another way to break this chain would be somewhere in the
> > cpu hotplug code, since this isn't the only trace we're finding now
> > which goes through msr_create_device.
> 
> That's an interesting multi chain circular dependency which is related to
> devfs.
> 
> Now the MSR device is not the only one which is creating that
> dependency. We have CPUID and MCE as well. That's what a quick search in
> x86 revealed. No idea whether there are more of those interesting bits and
> pieces.
> 
> To fix it on the hotplug side we'd have to introduce extra state space
> which is handled outside the cpuhotplug_rwsem region, but inside of the
> cpu_maps_update_begin()/end() region, which has a nasty pile of
> implications vs. the state registration/deregistration as this stuff can be
> built as modules. So we'd need a complete set of new interfaces and
> handling routines with some explicit restrictions on those state callbacks.
> 
> I rather prefer not to go there unless its unavoidable, which brings me to
> the obvious question about the stop_machine() usage in the graphics code.
> 
> void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> {
>         stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
> }
> 
> The function name is telling. The machine is wedged and stop_machine()
> might make it even more wedged when looking at this splat :)
> 
> The called function name is interesting as well. Is that _BKL postfix a
> leftover of the BKL removal a couple of years ago?
> 
> Aside of that, is it really required to use stomp_machine() for this
> synchronization? We certainly have less intrusive mechansisms than that.

Yeah, the stop_machine needs to go, I'm working on something that uses
rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
merged even.

Now this one isn't the one I wanted to fix with this patch since there's
clearly something dubious going on on the i915 side too. The proper trace,
with the same part on the cpu hotplug side, highlights that you can't
allocate a workqueue while hodling mmap_sem. That one matches patch
description&diff a bit better :-)

Sorry for misleading you, should have checked to attach the right one. No
stop_machine()/i915_gem_set_wedged() in the below one.
-Daniel

======================================================
WARNING: possible circular locking dependency detected
4.14.0-rc3-CI-CI_DRM_3172+ #1 Tainted: G     U         
------------------------------------------------------
prime_mmap/1588 is trying to acquire lock:
 (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8109e5a7>] apply_workqueue_attrs+0x17/0x50

but task is already holding lock:
 (&dev_priv->mm_lock){+.+.}, at: [<ffffffffa01b2dfa>] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #6 (&dev_priv->mm_lock){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]
       i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
       drm_ioctl_kernel+0x69/0xb0
       drm_ioctl+0x2f9/0x3d0
       do_vfs_ioctl+0x94/0x670
       SyS_ioctl+0x41/0x70
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #5 (&mm->mmap_sem){++++}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __might_fault+0x68/0x90
       _copy_to_user+0x23/0x70
       filldir+0xa5/0x120
       dcache_readdir+0xf9/0x170
       iterate_dir+0x69/0x1a0
       SyS_getdents+0xa5/0x140
       entry_SYSCALL_64_fastpath+0x1c/0xb1

-> #4 (&sb->s_type->i_mutex_key#5){++++}:
       down_write+0x3b/0x70
       handle_create+0xcb/0x1e0
       devtmpfsd+0x139/0x180
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #3 ((complete)&req.done){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       wait_for_common+0x58/0x210
       wait_for_completion+0x1d/0x20
       devtmpfs_create_node+0x13d/0x160
       device_add+0x5eb/0x620
       device_create_groups_vargs+0xe0/0xf0
       device_create+0x3a/0x40
       msr_device_create+0x2b/0x40
       cpuhp_invoke_callback+0xc9/0xbf0
       cpuhp_thread_fun+0x17b/0x240
       smpboot_thread_fn+0x18a/0x280
       kthread+0x152/0x190
       ret_from_fork+0x27/0x40

-> #2 (cpuhp_state-up){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpuhp_issue_call+0x133/0x1c0
       __cpuhp_setup_state_cpuslocked+0x139/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_writeback_init+0x43/0x67
       pagecache_init+0x3d/0x42
       start_kernel+0x3a8/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #1 (cpuhp_state_mutex){+.+.}:
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       __mutex_lock+0x86/0x9b0
       mutex_lock_nested+0x1b/0x20
       __cpuhp_setup_state_cpuslocked+0x53/0x2a0
       __cpuhp_setup_state+0x46/0x60
       page_alloc_init+0x28/0x30
       start_kernel+0x145/0x3fc
       x86_64_start_reservations+0x2a/0x2c
       x86_64_start_kernel+0x6d/0x70
       verify_cpu+0x0/0xfb

-> #0 (cpu_hotplug_lock.rw_sem){++++}:
       check_prev_add+0x430/0x840
       __lock_acquire+0x1420/0x15e0
       lock_acquire+0xb0/0x200
       cpus_read_lock+0x3d/0xb0
       apply_workqueue_attrs+0x17/0x50
       __alloc_workqueue_key+0x1d8/0x4d9
       i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915]
       i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
       drm_ioctl_kernel+0x69/0xb0
       drm_ioctl+0x2f9/0x3d0
       do_vfs_ioctl+0x94/0x670
       SyS_ioctl+0x41/0x70
       entry_SYSCALL_64_fastpath+0x1c/0xb1

other info that might help us debug this:

Chain exists of:
  cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev_priv->mm_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev_priv->mm_lock);
                               lock(&mm->mmap_sem);
                               lock(&dev_priv->mm_lock);
  lock(cpu_hotplug_lock.rw_sem);

 *** DEADLOCK ***

2 locks held by prime_mmap/1588:
 #0:  (&mm->mmap_sem){++++}, at: [<ffffffffa01b2de8>] i915_gem_userptr_init__mmu_notifier+0x138/0x270 [i915]
 #1:  (&dev_priv->mm_lock){+.+.}, at: [<ffffffffa01b2dfa>] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915]

stack backtrace:
CPU: 6 PID: 1588 Comm: prime_mmap Tainted: G     U          4.14.0-rc3-CI-CI_DRM_3172+ #1
Hardware name: Dell Inc. XPS 8300  /0Y2MRG, BIOS A06 10/17/2011
Call Trace:
 dump_stack+0x68/0x9f
 print_circular_bug+0x235/0x3c0
 ? lockdep_init_map_crosslock+0x20/0x20
 check_prev_add+0x430/0x840
 __lock_acquire+0x1420/0x15e0
 ? __lock_acquire+0x1420/0x15e0
 ? lockdep_init_map_crosslock+0x20/0x20
 lock_acquire+0xb0/0x200
 ? apply_workqueue_attrs+0x17/0x50
 cpus_read_lock+0x3d/0xb0
 ? apply_workqueue_attrs+0x17/0x50
 apply_workqueue_attrs+0x17/0x50
 __alloc_workqueue_key+0x1d8/0x4d9
 ? __lockdep_init_map+0x57/0x1c0
 i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915]
 i915_gem_userptr_ioctl+0x222/0x2c0 [i915]
 ? i915_gem_userptr_release+0x140/0x140 [i915]
 drm_ioctl_kernel+0x69/0xb0
 drm_ioctl+0x2f9/0x3d0
 ? i915_gem_userptr_release+0x140/0x140 [i915]
 ? __do_page_fault+0x2f3/0x570
 do_vfs_ioctl+0x94/0x670
 ? entry_SYSCALL_64_fastpath+0x5/0xb1
 ? __this_cpu_preempt_check+0x13/0x20
 ? trace_hardirqs_on_caller+0xe3/0x1b0
 SyS_ioctl+0x41/0x70
 entry_SYSCALL_64_fastpath+0x1c/0xb1
RIP: 0033:0x7fdf3d529587
RSP: 002b:00007ffccbbedd78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: ffffffff81493a03 RCX: 00007fdf3d529587
RDX: 00007ffccbbeddb0 RSI: 00000000c0186473 RDI: 0000000000000003
RBP: ffffc90000ad7f88 R08: 0000000000000000 R09: 00007ffccbbeddfc
R10: 00007fdf3d7ecb58 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000003 R14: 00000000c0186473 R15: 00007ffccbbeddfc
 ? __this_cpu_preempt_check+0x13/0x20
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 15:58     ` Daniel Vetter
  (?)
@ 2017-10-05 16:19     ` Thomas Gleixner
  2017-10-05 16:28       ` Daniel Vetter
  -1 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2017-10-05 16:19 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Daniel Vetter, Intel Graphics Development, LKML, Chris Wilson,
	Tvrtko Ursulin, Joonas Lahtinen, Peter Zijlstra, Sasha Levin,
	Daniel Vetter

On Thu, 5 Oct 2017, Daniel Vetter wrote:
> On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> > Aside of that, is it really required to use stomp_machine() for this
> > synchronization? We certainly have less intrusive mechansisms than that.
> 
> Yeah, the stop_machine needs to go, I'm working on something that uses
> rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
> merged even.
> 
> Now this one isn't the one I wanted to fix with this patch since there's
> clearly something dubious going on on the i915 side too.

I already wondered :)

> The proper trace, with the same part on the cpu hotplug side, highlights
> that you can't allocate a workqueue while hodling mmap_sem. That one
> matches patch description&diff a bit better :-)

> Sorry for misleading you, should have checked to attach the right one. No
> stop_machine()/i915_gem_set_wedged() in the below one.

Well the problem is more or less the same and what I said about solving it
in a different place is still valid. I think about it some more, but don't
expect wonders :)

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 16:19     ` Thomas Gleixner
@ 2017-10-05 16:28       ` Daniel Vetter
  2017-10-05 18:26           ` Thomas Gleixner
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Vetter @ 2017-10-05 16:28 UTC (permalink / raw)
  To: Thomas Gleixner, Tejun Heo, Lai Jiangshan
  Cc: Daniel Vetter, Daniel Vetter, Intel Graphics Development, LKML,
	Chris Wilson, Tvrtko Ursulin, Joonas Lahtinen, Peter Zijlstra,
	Sasha Levin, Daniel Vetter

On Thu, Oct 05, 2017 at 06:19:30PM +0200, Thomas Gleixner wrote:
> On Thu, 5 Oct 2017, Daniel Vetter wrote:
> > On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> > > Aside of that, is it really required to use stomp_machine() for this
> > > synchronization? We certainly have less intrusive mechansisms than that.
> > 
> > Yeah, the stop_machine needs to go, I'm working on something that uses
> > rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
> > merged even.
> > 
> > Now this one isn't the one I wanted to fix with this patch since there's
> > clearly something dubious going on on the i915 side too.
> 
> I already wondered :)
> 
> > The proper trace, with the same part on the cpu hotplug side, highlights
> > that you can't allocate a workqueue while hodling mmap_sem. That one
> > matches patch description&diff a bit better :-)
> 
> > Sorry for misleading you, should have checked to attach the right one. No
> > stop_machine()/i915_gem_set_wedged() in the below one.
> 
> Well the problem is more or less the same and what I said about solving it
> in a different place is still valid. I think about it some more, but don't
> expect wonders :)

Yeah just want to make you aware there's now new implications in the
locking maze and that we overall decide to break the loop in the right
place. Also adding Tejun, since this is about workqueues, I forgot him.

tldr for Tejun: The new cross-release stuff in lockdep seems to indicate
that we cannot allocate a new workqueue while holding mmap_sem. Full
details in the thread.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 16:28       ` Daniel Vetter
@ 2017-10-05 18:26           ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-10-05 18:26 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Tejun Heo, Lai Jiangshan, Daniel Vetter,
	Intel Graphics Development, LKML, Chris Wilson, Tvrtko Ursulin,
	Joonas Lahtinen, Peter Zijlstra, Sasha Levin, Daniel Vetter

On Thu, 5 Oct 2017, Daniel Vetter wrote:
> On Thu, Oct 05, 2017 at 06:19:30PM +0200, Thomas Gleixner wrote:
> > On Thu, 5 Oct 2017, Daniel Vetter wrote:
> > > On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> > > > Aside of that, is it really required to use stomp_machine() for this
> > > > synchronization? We certainly have less intrusive mechansisms than that.
> > > 
> > > Yeah, the stop_machine needs to go, I'm working on something that uses
> > > rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
> > > merged even.
> > > 
> > > Now this one isn't the one I wanted to fix with this patch since there's
> > > clearly something dubious going on on the i915 side too.
> > 
> > I already wondered :)
> > 
> > > The proper trace, with the same part on the cpu hotplug side, highlights
> > > that you can't allocate a workqueue while hodling mmap_sem. That one
> > > matches patch description&diff a bit better :-)
> > 
> > > Sorry for misleading you, should have checked to attach the right one. No
> > > stop_machine()/i915_gem_set_wedged() in the below one.
> > 
> > Well the problem is more or less the same and what I said about solving it
> > in a different place is still valid. I think about it some more, but don't
> > expect wonders :)
> 
> Yeah just want to make you aware there's now new implications in the
> locking maze and that we overall decide to break the loop in the right
> place. Also adding Tejun, since this is about workqueues, I forgot him.
> 
> tldr for Tejun: The new cross-release stuff in lockdep seems to indicate
> that we cannot allocate a new workqueue while holding mmap_sem. Full
> details in the thread.

The issue is not restricted to work queues and mmap_sem. There is the
general problem of:

 cpuhotplug -> cpu_up/down -> callback -> device_create/destroy()

which creates a dependency between

      cpuhotplug_rwsem and devfs locking

So now any chain which either holds a devfs lock or has a separate
dependecy chain on the devfs locks and then calls some function which tries
to take cpuhotplug_rwsem will trigger a splat. Rightfully so ....

So in your case mmap_sem is involved in that, but that's not a
prerequisite. There are a gazillion other ways to achieve that.

The pattern which causes that is device creation in a hotplug callback and
then some other device access (read/write/ioctl) which ends up to acquire
cpuhotplug_rwsem plus a connection of both chains through arbitrary locks.

I'm trying to move out that decice_create/remove stuff from the regular
hotplug states, but I haven't found a solution for that yet which is
neither butt ugly nor creates other hard to solve problems.

Maybe a glas of wine or some sleep or both will help to get over that :)

Surely anyone is welcome to beat me to it.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
@ 2017-10-05 18:26           ` Thomas Gleixner
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Gleixner @ 2017-10-05 18:26 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Peter Zijlstra, Daniel Vetter, Intel Graphics Development,
	Lai Jiangshan, LKML, Tejun Heo, Daniel Vetter, Sasha Levin

On Thu, 5 Oct 2017, Daniel Vetter wrote:
> On Thu, Oct 05, 2017 at 06:19:30PM +0200, Thomas Gleixner wrote:
> > On Thu, 5 Oct 2017, Daniel Vetter wrote:
> > > On Thu, Oct 05, 2017 at 05:23:20PM +0200, Thomas Gleixner wrote:
> > > > Aside of that, is it really required to use stomp_machine() for this
> > > > synchronization? We certainly have less intrusive mechansisms than that.
> > > 
> > > Yeah, the stop_machine needs to go, I'm working on something that uses
> > > rcu_read_lock+synchronize_rcu for this case. Probably shouldn't have
> > > merged even.
> > > 
> > > Now this one isn't the one I wanted to fix with this patch since there's
> > > clearly something dubious going on on the i915 side too.
> > 
> > I already wondered :)
> > 
> > > The proper trace, with the same part on the cpu hotplug side, highlights
> > > that you can't allocate a workqueue while hodling mmap_sem. That one
> > > matches patch description&diff a bit better :-)
> > 
> > > Sorry for misleading you, should have checked to attach the right one. No
> > > stop_machine()/i915_gem_set_wedged() in the below one.
> > 
> > Well the problem is more or less the same and what I said about solving it
> > in a different place is still valid. I think about it some more, but don't
> > expect wonders :)
> 
> Yeah just want to make you aware there's now new implications in the
> locking maze and that we overall decide to break the loop in the right
> place. Also adding Tejun, since this is about workqueues, I forgot him.
> 
> tldr for Tejun: The new cross-release stuff in lockdep seems to indicate
> that we cannot allocate a new workqueue while holding mmap_sem. Full
> details in the thread.

The issue is not restricted to work queues and mmap_sem. There is the
general problem of:

 cpuhotplug -> cpu_up/down -> callback -> device_create/destroy()

which creates a dependency between

      cpuhotplug_rwsem and devfs locking

So now any chain which either holds a devfs lock or has a separate
dependecy chain on the devfs locks and then calls some function which tries
to take cpuhotplug_rwsem will trigger a splat. Rightfully so ....

So in your case mmap_sem is involved in that, but that's not a
prerequisite. There are a gazillion other ways to achieve that.

The pattern which causes that is device creation in a hotplug callback and
then some other device access (read/write/ioctl) which ends up to acquire
cpuhotplug_rwsem plus a connection of both chains through arbitrary locks.

I'm trying to move out that decice_create/remove stuff from the regular
hotplug states, but I haven't found a solution for that yet which is
neither butt ugly nor creates other hard to solve problems.

Maybe a glas of wine or some sleep or both will help to get over that :)

Surely anyone is welcome to beat me to it.

Thanks,

	tglx



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
  2017-10-05 13:22 ` Daniel Vetter
                   ` (3 preceding siblings ...)
  (?)
@ 2017-10-06  8:25 ` Patchwork
  -1 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2017-10-06  8:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock
URL   : https://patchwork.freedesktop.org/series/31430/
State : failure

== Summary ==

  CHK     include/config/kernel.release
  CHK     include/generated/uapi/linux/version.h
  CHK     include/generated/utsrelease.h
  CHK     include/generated/bounds.h
  CHK     include/generated/timeconst.h
  CHK     include/generated/asm-offsets.h
  CALL    scripts/checksyscalls.sh
  CHK     scripts/mod/devicetable-offsets.h
  CHK     include/generated/compile.h
  CHK     kernel/config_data.h
  CC [M]  drivers/gpu/drm/i915/i915_gem_userptr.o
drivers/gpu/drm/i915/i915_gem_userptr.c: In function ‘i915_mmu_notifier_find’:
drivers/gpu/drm/i915/i915_gem_userptr.c:217:1: error: label ‘err_wq’ defined but not used [-Werror=unused-label]
 err_wq:
 ^~~~~~
drivers/gpu/drm/i915/i915_gem_userptr.c: In function ‘i915_gem_userptr_init__mmu_notifier’:
drivers/gpu/drm/i915/i915_gem_userptr.c:222:23: error: ‘ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
  return ret == 0 ? mn : ERR_PTR(ret);
         ~~~~~~~~~~~~~~^~~~~~~~~~~~~~
drivers/gpu/drm/i915/i915_gem_userptr.c:184:6: note: ‘ret’ was declared here
  int ret;
      ^~~
cc1: all warnings being treated as errors
scripts/Makefile.build:313: recipe for target 'drivers/gpu/drm/i915/i915_gem_userptr.o' failed
make[4]: *** [drivers/gpu/drm/i915/i915_gem_userptr.o] Error 1
scripts/Makefile.build:572: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:572: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:572: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
Makefile:1019: recipe for target 'drivers' failed
make: *** [drivers] Error 2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-10-06  8:25 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-05 13:22 [PATCH] drm/i915: Preallocate mmu notifier to unbreak cpu hotplug deadlock Daniel Vetter
2017-10-05 13:22 ` Daniel Vetter
2017-10-05 13:51 ` Chris Wilson
2017-10-05 13:51   ` Chris Wilson
2017-10-05 13:57 ` Chris Wilson
2017-10-05 13:57   ` Chris Wilson
2017-10-05 15:23 ` Thomas Gleixner
2017-10-05 15:58   ` Daniel Vetter
2017-10-05 15:58     ` Daniel Vetter
2017-10-05 16:19     ` Thomas Gleixner
2017-10-05 16:28       ` Daniel Vetter
2017-10-05 18:26         ` Thomas Gleixner
2017-10-05 18:26           ` Thomas Gleixner
2017-10-06  8:25 ` ✗ Fi.CI.BAT: failure for " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.