All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
@ 2017-03-06 15:36 Chris Wilson
  2017-03-06 18:17 ` ✓ Fi.CI.BAT: success for " Patchwork
  2017-03-07  8:42 ` [PATCH] " Tvrtko Ursulin
  0 siblings, 2 replies; 7+ messages in thread
From: Chris Wilson @ 2017-03-06 15:36 UTC (permalink / raw)
  To: intel-gfx

If we allow the user to convert a GTT mmap address into a userptr, we
may end up in recursion hell, where currently we hit a mutex deadlock
but other possibilities include use-after-free during the
unbind/cancel_userptr.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 70 +++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 22b46398831e..6fbccc9c83d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -145,7 +145,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		del_object(mo);
 	spin_unlock(&mn->lock);
 
-	flush_workqueue(mn->wq);
+	if (!list_empty(&cancelled))
+		flush_workqueue(mn->wq);
 }
 
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
@@ -487,6 +488,24 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
+static bool has_internal_vma(struct drm_i915_private *i915,
+			     struct mm_struct *mm,
+			     unsigned long addr,
+			     unsigned long size)
+{
+	const unsigned long end = addr + size;
+	struct vm_area_struct *vma;
+
+	for (vma = find_vma (mm, addr);
+	     vma && vma->vm_start < end;
+	     vma = vma->vm_next) {
+		if (vma->vm_ops == i915->drm.driver->gem_vm_ops)
+			return true;
+	}
+
+	return false;
+}
+
 static void
 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
@@ -553,8 +572,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 }
 
 static struct sg_table *
-__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
-				      bool *active)
+__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 {
 	struct get_pages_work *work;
 
@@ -591,7 +609,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 	schedule_work(&work->work);
 
-	*active = true;
+	__i915_gem_userptr_set_active(obj, true);
 	return ERR_PTR(-EAGAIN);
 }
 
@@ -599,10 +617,11 @@ static struct sg_table *
 i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
 	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct mm_struct *mm = obj->userptr.mm->mm;
 	struct page **pvec;
 	struct sg_table *pages;
-	int pinned, ret;
-	bool active;
+	bool internal;
+	int pinned;
 
 	/* If userspace should engineer that these pages are replaced in
 	 * the vma between us binding this page into the GTT and completion
@@ -629,37 +648,38 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 			return ERR_PTR(-EAGAIN);
 	}
 
-	/* Let the mmu-notifier know that we have begun and need cancellation */
-	ret = __i915_gem_userptr_set_active(obj, true);
-	if (ret)
-		return ERR_PTR(ret);
-
 	pvec = NULL;
 	pinned = 0;
-	if (obj->userptr.mm->mm == current->mm) {
-		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
-				      GFP_TEMPORARY);
-		if (pvec == NULL) {
-			__i915_gem_userptr_set_active(obj, false);
-			return ERR_PTR(-ENOMEM);
-		}
 
-		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
-					       !obj->userptr.read_only, pvec);
+	down_read(&mm->mmap_sem);
+	internal = has_internal_vma(to_i915(obj->base.dev), mm,
+					    obj->userptr.ptr, obj->base.size);
+	if (internal) {
+		pinned = -EFAULT;
+	} else if (mm == current->mm) {
+		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
+				      GFP_TEMPORARY |
+				      __GFP_NORETRY |
+				      __GFP_NOWARN);
+		if (pvec)
+			pinned = __get_user_pages_fast(obj->userptr.ptr,
+						       num_pages,
+						       !obj->userptr.read_only,
+						       pvec);
 	}
 
-	active = false;
 	if (pinned < 0)
 		pages = ERR_PTR(pinned), pinned = 0;
 	else if (pinned < num_pages)
-		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
+		pages = __i915_gem_userptr_get_pages_schedule(obj);
 	else
 		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
-	if (IS_ERR(pages)) {
-		__i915_gem_userptr_set_active(obj, active);
+	up_read(&mm->mmap_sem);
+
+	if (IS_ERR(pages))
 		release_pages(pvec, pinned, 0);
-	}
 	drm_free_large(pvec);
+
 	return pages;
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-06 15:36 [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr Chris Wilson
@ 2017-03-06 18:17 ` Patchwork
  2017-03-07  8:42 ` [PATCH] " Tvrtko Ursulin
  1 sibling, 0 replies; 7+ messages in thread
From: Patchwork @ 2017-03-06 18:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/userptr: Disallow wrapping GTT into a userptr
URL   : https://patchwork.freedesktop.org/series/20771/
State : success

== Summary ==

Series 20771v1 drm/i915/userptr: Disallow wrapping GTT into a userptr
https://patchwork.freedesktop.org/api/1.0/series/20771/revisions/1/mbox/

Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                fail       -> PASS       (fi-skl-6770hq) fdo#99739
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-c:
                fail       -> PASS       (fi-skl-6770hq)

fdo#99739 https://bugs.freedesktop.org/show_bug.cgi?id=99739

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time: 465s
fi-bsw-n3050     total:278  pass:239  dwarn:0   dfail:0   fail:0   skip:39  time: 610s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time: 540s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time: 624s
fi-byt-j1900     total:278  pass:251  dwarn:0   dfail:0   fail:0   skip:27  time: 511s
fi-byt-n2820     total:278  pass:247  dwarn:0   dfail:0   fail:0   skip:31  time: 505s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 442s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 433s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time: 436s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 510s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 490s
fi-kbl-7500u     total:278  pass:259  dwarn:1   dfail:0   fail:0   skip:18  time: 478s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 512s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time: 600s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time: 505s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 552s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 553s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time: 425s

fefda0d99ee9245f0d4032eedb3b18cc690d8194 drm-tip: 2017y-03m-06d-17h-18m-39s UTC integration manifest
8487242 drm/i915/userptr: Disallow wrapping GTT into a userptr

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4076/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-06 15:36 [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr Chris Wilson
  2017-03-06 18:17 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-03-07  8:42 ` Tvrtko Ursulin
  2017-03-07  9:13   ` Chris Wilson
  1 sibling, 1 reply; 7+ messages in thread
From: Tvrtko Ursulin @ 2017-03-07  8:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 15:36, Chris Wilson wrote:
> If we allow the user to convert a GTT mmap address into a userptr, we
> may end up in recursion hell, where currently we hit a mutex deadlock
> but other possibilities include use-after-free during the
> unbind/cancel_userptr.

I thought we already disallowed that and indeed 
igt/gem_userptr_blits/invalid-gtt-mapping succeeds here - what am I missing?

Regards,

Tvrtko


>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 70 +++++++++++++++++++++------------
>  1 file changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 22b46398831e..6fbccc9c83d5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -145,7 +145,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  		del_object(mo);
>  	spin_unlock(&mn->lock);
>
> -	flush_workqueue(mn->wq);
> +	if (!list_empty(&cancelled))
> +		flush_workqueue(mn->wq);
>  }
>
>  static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -487,6 +488,24 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
>  	return ret;
>  }
>
> +static bool has_internal_vma(struct drm_i915_private *i915,
> +			     struct mm_struct *mm,
> +			     unsigned long addr,
> +			     unsigned long size)
> +{
> +	const unsigned long end = addr + size;
> +	struct vm_area_struct *vma;
> +
> +	for (vma = find_vma (mm, addr);
> +	     vma && vma->vm_start < end;
> +	     vma = vma->vm_next) {
> +		if (vma->vm_ops == i915->drm.driver->gem_vm_ops)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  static void
>  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  {
> @@ -553,8 +572,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  }
>
>  static struct sg_table *
> -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
> -				      bool *active)
> +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
>  {
>  	struct get_pages_work *work;
>
> @@ -591,7 +609,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
>  	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
>  	schedule_work(&work->work);
>
> -	*active = true;
> +	__i915_gem_userptr_set_active(obj, true);
>  	return ERR_PTR(-EAGAIN);
>  }
>
> @@ -599,10 +617,11 @@ static struct sg_table *
>  i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  {
>  	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct mm_struct *mm = obj->userptr.mm->mm;
>  	struct page **pvec;
>  	struct sg_table *pages;
> -	int pinned, ret;
> -	bool active;
> +	bool internal;
> +	int pinned;
>
>  	/* If userspace should engineer that these pages are replaced in
>  	 * the vma between us binding this page into the GTT and completion
> @@ -629,37 +648,38 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  			return ERR_PTR(-EAGAIN);
>  	}
>
> -	/* Let the mmu-notifier know that we have begun and need cancellation */
> -	ret = __i915_gem_userptr_set_active(obj, true);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
>  	pvec = NULL;
>  	pinned = 0;
> -	if (obj->userptr.mm->mm == current->mm) {
> -		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
> -				      GFP_TEMPORARY);
> -		if (pvec == NULL) {
> -			__i915_gem_userptr_set_active(obj, false);
> -			return ERR_PTR(-ENOMEM);
> -		}
>
> -		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> -					       !obj->userptr.read_only, pvec);
> +	down_read(&mm->mmap_sem);
> +	internal = has_internal_vma(to_i915(obj->base.dev), mm,
> +					    obj->userptr.ptr, obj->base.size);
> +	if (internal) {
> +		pinned = -EFAULT;
> +	} else if (mm == current->mm) {
> +		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
> +				      GFP_TEMPORARY |
> +				      __GFP_NORETRY |
> +				      __GFP_NOWARN);
> +		if (pvec)
> +			pinned = __get_user_pages_fast(obj->userptr.ptr,
> +						       num_pages,
> +						       !obj->userptr.read_only,
> +						       pvec);
>  	}
>
> -	active = false;
>  	if (pinned < 0)
>  		pages = ERR_PTR(pinned), pinned = 0;
>  	else if (pinned < num_pages)
> -		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
> +		pages = __i915_gem_userptr_get_pages_schedule(obj);
>  	else
>  		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> -	if (IS_ERR(pages)) {
> -		__i915_gem_userptr_set_active(obj, active);
> +	up_read(&mm->mmap_sem);
> +
> +	if (IS_ERR(pages))
>  		release_pages(pvec, pinned, 0);
> -	}
>  	drm_free_large(pvec);
> +
>  	return pages;
>  }
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-07  8:42 ` [PATCH] " Tvrtko Ursulin
@ 2017-03-07  9:13   ` Chris Wilson
  2017-03-07 11:03     ` Tvrtko Ursulin
  0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2017-03-07  9:13 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Mar 07, 2017 at 08:42:26AM +0000, Tvrtko Ursulin wrote:
> 
> On 06/03/2017 15:36, Chris Wilson wrote:
> >If we allow the user to convert a GTT mmap address into a userptr, we
> >may end up in recursion hell, where currently we hit a mutex deadlock
> >but other possibilities include use-after-free during the
> >unbind/cancel_userptr.
> 
> I thought we already disallowed that and indeed
> igt/gem_userptr_blits/invalid-gtt-mapping succeeds here - what am I
> missing?

Michał presented this deadlock:

[  143.203989] gem_userptr_bli D    0   902    898 0x00000000
[  143.204054] Call Trace:
[  143.204137]  __schedule+0x511/0x1180
[  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
[  143.204274]  schedule+0x57/0xe0
[  143.204327]  schedule_timeout+0x383/0x670
[  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
[  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[  143.204507]  ? usleep_range+0x110/0x110
[  143.204657]  ? irq_exit+0x89/0x100
[  143.204710]  ? retint_kernel+0x2d/0x2d
[  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
[  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
[  143.204944]  wait_for_common+0x1f0/0x2f0
[  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
[  143.205103]  ? wake_up_q+0xa0/0xa0
[  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
[  143.205237]  wait_for_completion+0x1d/0x20
[  143.205292]  flush_workqueue+0x2e9/0xbb0
[  143.205339]  ? flush_workqueue+0x163/0xbb0
[  143.205418]  ? __schedule+0x533/0x1180
[  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
[  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
[  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
[  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
[  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
[  143.206123]  zap_page_range_single+0x1c7/0x1f0
[  143.206171]  ? unmap_single_vma+0x160/0x160
[  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
[  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
[  143.206397]  unmap_mapping_range+0x18f/0x1b0
[  143.206444]  ? zap_vma_ptes+0x70/0x70
[  143.206524]  ? __pm_runtime_resume+0x67/0xa0
[  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
[  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
[  143.206925]  ? __lock_is_held+0x52/0x100
[  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
[  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
[  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
[  143.207457]  drm_ioctl+0x36c/0x670
[  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
[  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
[  143.207793]  ? drm_getunique+0x120/0x120
[  143.207875]  ? __handle_mm_fault+0x996/0x14a0
[  143.207939]  ? vm_insert_page+0x340/0x340
[  143.208028]  ? up_write+0x28/0x50
[  143.208086]  ? vm_mmap_pgoff+0x160/0x190
[  143.208163]  do_vfs_ioctl+0x12c/0xa60
[  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
[  143.208267]  ? ioctl_preallocate+0x150/0x150
[  143.208353]  ? __do_page_fault+0x36a/0x6e0
[  143.208400]  ? mark_held_locks+0x23/0xc0
[  143.208479]  ? up_read+0x1f/0x40
[  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
[  143.208669]  ? __fget_light+0xa7/0xc0
[  143.208747]  SyS_ioctl+0x41/0x70
[  143.208808]  entry_SYSCALL_64_fastpath+0x23/0xc6

Which he created by overwriting a bunch of userptr with a GTT mmap and
doing a set-tiling to trigger an unbind. The issue is that we have a
bunch of active userptr objects pending the fault-tolerant lookup in the
worker. We must have unbound all of them when the mmap(MAP_FIXED) caused
the first invalidation, and so get_pages must have completed to rebind
them... Ah. No.

https://github.com/t3hknr/intel-gpu-tools/commit/9cde9fbc3017136dd31e0b79e94c3e19bad4ec96

Looking at that test, there is no action that would rebind the userptr
after the mmap(MAP_FIXED) so we must have not seen the unbind earlier.

The only improvement offered by this patch will be the outright
rejection of an unresolvable gup before we kick off the worker.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-07  9:13   ` Chris Wilson
@ 2017-03-07 11:03     ` Tvrtko Ursulin
  2017-03-07 11:18       ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Tvrtko Ursulin @ 2017-03-07 11:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Michał Winiarski


On 07/03/2017 09:13, Chris Wilson wrote:
> On Tue, Mar 07, 2017 at 08:42:26AM +0000, Tvrtko Ursulin wrote:
>>
>> On 06/03/2017 15:36, Chris Wilson wrote:
>>> If we allow the user to convert a GTT mmap address into a userptr, we
>>> may end up in recursion hell, where currently we hit a mutex deadlock
>>> but other possibilities include use-after-free during the
>>> unbind/cancel_userptr.
>>
>> I thought we already disallowed that and indeed
>> igt/gem_userptr_blits/invalid-gtt-mapping succeeds here - what am I
>> missing?
>
> Michał presented this deadlock:
>
> [  143.203989] gem_userptr_bli D    0   902    898 0x00000000
> [  143.204054] Call Trace:
> [  143.204137]  __schedule+0x511/0x1180
> [  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
> [  143.204274]  schedule+0x57/0xe0
> [  143.204327]  schedule_timeout+0x383/0x670
> [  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
> [  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> [  143.204507]  ? usleep_range+0x110/0x110
> [  143.204657]  ? irq_exit+0x89/0x100
> [  143.204710]  ? retint_kernel+0x2d/0x2d
> [  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
> [  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
> [  143.204944]  wait_for_common+0x1f0/0x2f0
> [  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
> [  143.205103]  ? wake_up_q+0xa0/0xa0
> [  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
> [  143.205237]  wait_for_completion+0x1d/0x20
> [  143.205292]  flush_workqueue+0x2e9/0xbb0
> [  143.205339]  ? flush_workqueue+0x163/0xbb0
> [  143.205418]  ? __schedule+0x533/0x1180
> [  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
> [  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
> [  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
> [  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
> [  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
> [  143.206123]  zap_page_range_single+0x1c7/0x1f0
> [  143.206171]  ? unmap_single_vma+0x160/0x160
> [  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
> [  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
> [  143.206397]  unmap_mapping_range+0x18f/0x1b0
> [  143.206444]  ? zap_vma_ptes+0x70/0x70
> [  143.206524]  ? __pm_runtime_resume+0x67/0xa0
> [  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
> [  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
> [  143.206925]  ? __lock_is_held+0x52/0x100
> [  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
> [  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
> [  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
> [  143.207457]  drm_ioctl+0x36c/0x670
> [  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
> [  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
> [  143.207793]  ? drm_getunique+0x120/0x120
> [  143.207875]  ? __handle_mm_fault+0x996/0x14a0
> [  143.207939]  ? vm_insert_page+0x340/0x340
> [  143.208028]  ? up_write+0x28/0x50
> [  143.208086]  ? vm_mmap_pgoff+0x160/0x190
> [  143.208163]  do_vfs_ioctl+0x12c/0xa60
> [  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
> [  143.208267]  ? ioctl_preallocate+0x150/0x150
> [  143.208353]  ? __do_page_fault+0x36a/0x6e0
> [  143.208400]  ? mark_held_locks+0x23/0xc0
> [  143.208479]  ? up_read+0x1f/0x40
> [  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
> [  143.208669]  ? __fget_light+0xa7/0xc0
> [  143.208747]  SyS_ioctl+0x41/0x70
> [  143.208808]  entry_SYSCALL_64_fastpath+0x23/0xc6

That would mean another process never exits cancel_userptr, correct? Do 
we have a trace of the other end?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-07 11:03     ` Tvrtko Ursulin
@ 2017-03-07 11:18       ` Chris Wilson
  2017-03-07 11:30         ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2017-03-07 11:18 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Mar 07, 2017 at 11:03:03AM +0000, Tvrtko Ursulin wrote:
> 
> On 07/03/2017 09:13, Chris Wilson wrote:
> >On Tue, Mar 07, 2017 at 08:42:26AM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 06/03/2017 15:36, Chris Wilson wrote:
> >>>If we allow the user to convert a GTT mmap address into a userptr, we
> >>>may end up in recursion hell, where currently we hit a mutex deadlock
> >>>but other possibilities include use-after-free during the
> >>>unbind/cancel_userptr.
> >>
> >>I thought we already disallowed that and indeed
> >>igt/gem_userptr_blits/invalid-gtt-mapping succeeds here - what am I
> >>missing?
> >
> >Michał presented this deadlock:
> >
> >[  143.203989] gem_userptr_bli D    0   902    898 0x00000000
> >[  143.204054] Call Trace:
> >[  143.204137]  __schedule+0x511/0x1180
> >[  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
> >[  143.204274]  schedule+0x57/0xe0
> >[  143.204327]  schedule_timeout+0x383/0x670
> >[  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
> >[  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> >[  143.204507]  ? usleep_range+0x110/0x110
> >[  143.204657]  ? irq_exit+0x89/0x100
> >[  143.204710]  ? retint_kernel+0x2d/0x2d
> >[  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
> >[  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
> >[  143.204944]  wait_for_common+0x1f0/0x2f0
> >[  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
> >[  143.205103]  ? wake_up_q+0xa0/0xa0
> >[  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
> >[  143.205237]  wait_for_completion+0x1d/0x20
> >[  143.205292]  flush_workqueue+0x2e9/0xbb0
> >[  143.205339]  ? flush_workqueue+0x163/0xbb0
> >[  143.205418]  ? __schedule+0x533/0x1180
> >[  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
> >[  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
> >[  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
> >[  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
> >[  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
> >[  143.206123]  zap_page_range_single+0x1c7/0x1f0
> >[  143.206171]  ? unmap_single_vma+0x160/0x160
> >[  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
> >[  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
> >[  143.206397]  unmap_mapping_range+0x18f/0x1b0
> >[  143.206444]  ? zap_vma_ptes+0x70/0x70
> >[  143.206524]  ? __pm_runtime_resume+0x67/0xa0
> >[  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
> >[  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
> >[  143.206925]  ? __lock_is_held+0x52/0x100
> >[  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
> >[  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
> >[  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
> >[  143.207457]  drm_ioctl+0x36c/0x670
> >[  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
> >[  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
> >[  143.207793]  ? drm_getunique+0x120/0x120
> >[  143.207875]  ? __handle_mm_fault+0x996/0x14a0
> >[  143.207939]  ? vm_insert_page+0x340/0x340
> >[  143.208028]  ? up_write+0x28/0x50
> >[  143.208086]  ? vm_mmap_pgoff+0x160/0x190
> >[  143.208163]  do_vfs_ioctl+0x12c/0xa60
> >[  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
> >[  143.208267]  ? ioctl_preallocate+0x150/0x150
> >[  143.208353]  ? __do_page_fault+0x36a/0x6e0
> >[  143.208400]  ? mark_held_locks+0x23/0xc0
> >[  143.208479]  ? up_read+0x1f/0x40
> >[  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
> >[  143.208669]  ? __fget_light+0xa7/0xc0
> >[  143.208747]  SyS_ioctl+0x41/0x70
> >[  143.208808]  entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> That would mean another process never exits cancel_userptr, correct?
> Do we have a trace of the other end?

Our worker, who is waiting on struct_mutex (in order to unbind) which we
are holding ourselves as we are in the middle of an unbind. The nasty
part here is that we can recurse into unbind on the same object (let
alone the mutex deadlock).

That's impossible if we prevent the object from activating itself on a
GTT mmap.

ARGH. The deadlock is between a non-userptr and a set of userptr (that
explains how we get to the unbind). It is just that the other userptr
are still in the process of running their workers when the time comes to
cancel the work. (Avoids the dilemma of how we ended up with bound
userptr of the GTT).

Simplest patch is then fun with obj->userptr.work, i.e. something like
	if (xchg(&obj->userptr.work, NULL)) return 0;
-Chris


-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr
  2017-03-07 11:18       ` Chris Wilson
@ 2017-03-07 11:30         ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2017-03-07 11:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx, Michał Winiarski

On Tue, Mar 07, 2017 at 11:18:53AM +0000, Chris Wilson wrote:
> Simplest patch is then fun with obj->userptr.work, i.e. something like
> 	if (xchg(&obj->userptr.work, NULL)) return 0;

Overly simplistic. Too many holes to plug between potential users of the
pages and the current cancel_userptr.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-07 11:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-06 15:36 [PATCH] drm/i915/userptr: Disallow wrapping GTT into a userptr Chris Wilson
2017-03-06 18:17 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-03-07  8:42 ` [PATCH] " Tvrtko Ursulin
2017-03-07  9:13   ` Chris Wilson
2017-03-07 11:03     ` Tvrtko Ursulin
2017-03-07 11:18       ` Chris Wilson
2017-03-07 11:30         ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.