All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
@ 2018-03-26 14:59 Chris Wilson
  2018-03-26 15:33 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Chris Wilson @ 2018-03-26 14:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

We've always been blatantly ignoring mmu_notifier.h:

 * Invalidation of multiple concurrent ranges may be
 * optionally permitted by the driver. Either way the
 * establishment of sptes is forbidden in the range passed to
 * invalidate_range_begin/end for the whole duration of the
 * invalidate_range_begin/end critical section.

by not preventing concurrent calls to gup while an invalidate_range is
being processed. Wrap gup and invalidate_range in a paired rw_semaphore
to allow concurrent lookups, that are then interrupted and disabled
across the invalidate_range. Further refinement can be applied by
tracking the invalidate_range versus individual gup, which should be
done using a common set of helpers for all mmu_notifier subsystems share
the same need. I hear HMM is one such toolbox...

For the time being, assume concurrent invalidate and lookup are rare,
but not rare enough to completely ignore.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d596a8302ca3..938107dffd37 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -47,6 +47,7 @@ struct i915_mm_struct {
 
 struct i915_mmu_notifier {
 	spinlock_t lock;
+	struct rw_semaphore sem;
 	struct hlist_node node;
 	struct mmu_notifier mn;
 	struct rb_root_cached objects;
@@ -123,6 +124,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct interval_tree_node *it;
 	LIST_HEAD(cancelled);
 
+	down_write(&mn->sem);
+
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 		return;
 
@@ -156,8 +159,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		flush_workqueue(mn->wq);
 }
 
+static void i915_gem_userptr_mn_invalidate_range_end(struct mmu_notifier *_mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end)
+{
+	struct i915_mmu_notifier *mn =
+		container_of(_mn, struct i915_mmu_notifier, mn);
+
+	up_write(&mn->sem);
+}
+
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.invalidate_range_end = i915_gem_userptr_mn_invalidate_range_end,
 };
 
 static struct i915_mmu_notifier *
@@ -170,6 +185,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&mn->lock);
+	init_rwsem(&mn->sem);
 	mn->mn.ops = &i915_gem_userptr_notifier;
 	mn->objects = RB_ROOT_CACHED;
 	mn->wq = alloc_workqueue("i915-userptr-release",
@@ -504,12 +520,15 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (pvec != NULL) {
+		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
 		struct mm_struct *mm = obj->userptr.mm->mm;
 		unsigned int flags = 0;
 
 		if (!obj->userptr.read_only)
 			flags |= FOLL_WRITE;
 
+		down_read(&mn->sem);
+
 		ret = -EFAULT;
 		if (mmget_not_zero(mm)) {
 			down_read(&mm->mmap_sem);
@@ -528,6 +547,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			up_read(&mm->mmap_sem);
 			mmput(mm);
 		}
+
+		up_read(&mn->sem);
 	}
 
 	mutex_lock(&obj->mm.lock);
@@ -636,15 +657,21 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	pinned = 0;
 
 	if (mm == current->mm) {
+		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
+
 		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 				      GFP_KERNEL |
 				      __GFP_NORETRY |
 				      __GFP_NOWARN);
-		if (pvec) /* defer to worker if malloc fails */
+
+		/* defer to worker if malloc fails */
+		if (pvec && down_read_trylock(&mn->sem)) {
 			pinned = __get_user_pages_fast(obj->userptr.ptr,
 						       num_pages,
 						       !obj->userptr.read_only,
 						       pvec);
+			up_read(&mn->sem);
+		}
 	}
 
 	active = false;
-- 
2.16.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 14:59 [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore Chris Wilson
@ 2018-03-26 15:33 ` Patchwork
  2018-03-26 15:59 ` [PATCH] " Tvrtko Ursulin
  2018-03-26 16:53 ` ✗ Fi.CI.IGT: failure for " Patchwork
  2 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2018-03-26 15:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
URL   : https://patchwork.freedesktop.org/series/40676/
State : success

== Summary ==

Series 40676v1 drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
https://patchwork.freedesktop.org/api/1.0/series/40676/revisions/1/mbox/

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-c:
                incomplete -> PASS       (fi-bxt-dsi) fdo#103927
                incomplete -> PASS       (fi-hsw-4770) fdo#104944

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
fdo#104944 https://bugs.freedesktop.org/show_bug.cgi?id=104944

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:434s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:381s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:535s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:296s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:515s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:516s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:528s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:509s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:414s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:513s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:424s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:318s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:403s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:427s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:476s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:433s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:474s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:473s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:519s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:651s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:444s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:534s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:510s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:499s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:431s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:442s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:591s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:407s
Blacklisted hosts:
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:572s
fi-cnl-psr       total:224  pass:198  dwarn:0   dfail:0   fail:1   skip:24 
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:491s

94f5d9189e61055e246c31106b3810dc17ddee9c drm-tip: 2018y-03m-23d-23h-41m-40s UTC integration manifest
42235f2436b0 drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8492/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 14:59 [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore Chris Wilson
  2018-03-26 15:33 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-03-26 15:59 ` Tvrtko Ursulin
  2018-03-26 16:28   ` Chris Wilson
  2018-03-26 16:53 ` ✗ Fi.CI.IGT: failure for " Patchwork
  2 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2018-03-26 15:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter


On 26/03/2018 15:59, Chris Wilson wrote:
> We've always been blatantly ignoring mmu_notifier.h:
> 
>   * Invalidation of multiple concurrent ranges may be
>   * optionally permitted by the driver. Either way the
>   * establishment of sptes is forbidden in the range passed to
>   * invalidate_range_begin/end for the whole duration of the
>   * invalidate_range_begin/end critical section.
> 
> by not preventing concurrent calls to gup while an invalidate_range is
> being processed. Wrap gup and invalidate_range in a paired rw_semaphore
> to allow concurrent lookups, that are then interrupted and disabled
> across the invalidate_range. Further refinement can be applied by
> tracking the invalidate_range versus individual gup, which should be
> done using a common set of helpers for all mmu_notifier subsystems share
> the same need. I hear HMM is one such toolbox...
> 
> For the time being, assume concurrent invalidate and lookup are rare,
> but not rare enough to completely ignore.

I think I suggested a few times we should just "ban" the object on first 
invalidate and never ever for its lifetime allow it to obtain backing 
store again. I just don't remember why we decided not to go with that 
approach. :( Thinking about it now I still don't see that this 
restriction would be a problem and would simplify things.

With more locks I am quite fearful what lockdep will say, but lets see...

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_userptr.c | 29 ++++++++++++++++++++++++++++-
>   1 file changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index d596a8302ca3..938107dffd37 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -47,6 +47,7 @@ struct i915_mm_struct {
>   
>   struct i915_mmu_notifier {
>   	spinlock_t lock;
> +	struct rw_semaphore sem;
>   	struct hlist_node node;
>   	struct mmu_notifier mn;
>   	struct rb_root_cached objects;
> @@ -123,6 +124,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   	struct interval_tree_node *it;
>   	LIST_HEAD(cancelled);
>   
> +	down_write(&mn->sem);
> +
>   	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
>   		return;
>   
> @@ -156,8 +159,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   		flush_workqueue(mn->wq);
>   }
>   
> +static void i915_gem_userptr_mn_invalidate_range_end(struct mmu_notifier *_mn,
> +						     struct mm_struct *mm,
> +						     unsigned long start,
> +						     unsigned long end)
> +{
> +	struct i915_mmu_notifier *mn =
> +		container_of(_mn, struct i915_mmu_notifier, mn);
> +
> +	up_write(&mn->sem);
> +}
> +
>   static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>   	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> +	.invalidate_range_end = i915_gem_userptr_mn_invalidate_range_end,
>   };
>   
>   static struct i915_mmu_notifier *
> @@ -170,6 +185,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>   		return ERR_PTR(-ENOMEM);
>   
>   	spin_lock_init(&mn->lock);
> +	init_rwsem(&mn->sem);
>   	mn->mn.ops = &i915_gem_userptr_notifier;
>   	mn->objects = RB_ROOT_CACHED;
>   	mn->wq = alloc_workqueue("i915-userptr-release",
> @@ -504,12 +520,15 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   
>   	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
>   	if (pvec != NULL) {
> +		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
>   		struct mm_struct *mm = obj->userptr.mm->mm;
>   		unsigned int flags = 0;
>   
>   		if (!obj->userptr.read_only)
>   			flags |= FOLL_WRITE;
>   
> +		down_read(&mn->sem);
> +
>   		ret = -EFAULT;
>   		if (mmget_not_zero(mm)) {
>   			down_read(&mm->mmap_sem);
> @@ -528,6 +547,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   			up_read(&mm->mmap_sem);
>   			mmput(mm);
>   		}
> +
> +		up_read(&mn->sem);
>   	}
>   
>   	mutex_lock(&obj->mm.lock);
> @@ -636,15 +657,21 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>   	pinned = 0;
>   
>   	if (mm == current->mm) {
> +		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
> +
>   		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
>   				      GFP_KERNEL |
>   				      __GFP_NORETRY |
>   				      __GFP_NOWARN);
> -		if (pvec) /* defer to worker if malloc fails */
> +
> +		/* defer to worker if malloc fails */
> +		if (pvec && down_read_trylock(&mn->sem)) {
>   			pinned = __get_user_pages_fast(obj->userptr.ptr,
>   						       num_pages,
>   						       !obj->userptr.read_only,
>   						       pvec);
> +			up_read(&mn->sem);
> +		}
>   	}
>   
>   	active = false;
> 

Simple enough but I don't dare say anything until results from shards 
arrive.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 15:59 ` [PATCH] " Tvrtko Ursulin
@ 2018-03-26 16:28   ` Chris Wilson
  2018-03-26 19:45     ` Daniel Vetter
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2018-03-26 16:28 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Daniel Vetter

Quoting Tvrtko Ursulin (2018-03-26 16:59:20)
> 
> On 26/03/2018 15:59, Chris Wilson wrote:
> > We've always been blatantly ignoring mmu_notifier.h:
> > 
> >   * Invalidation of multiple concurrent ranges may be
> >   * optionally permitted by the driver. Either way the
> >   * establishment of sptes is forbidden in the range passed to
> >   * invalidate_range_begin/end for the whole duration of the
> >   * invalidate_range_begin/end critical section.
> > 
> > by not preventing concurrent calls to gup while an invalidate_range is
> > being processed. Wrap gup and invalidate_range in a paired rw_semaphore
> > to allow concurrent lookups, that are then interrupted and disabled
> > across the invalidate_range. Further refinement can be applied by
> > tracking the invalidate_range versus individual gup, which should be
> > done using a common set of helpers for all mmu_notifier subsystems share
> > the same need. I hear HMM is one such toolbox...
> > 
> > For the time being, assume concurrent invalidate and lookup are rare,
> > but not rare enough to completely ignore.
> 
> I think I suggested a few times we should just "ban" the object on first 
> invalidate and never ever for its lifetime allow it to obtain backing 
> store again. I just don't remember why we decided not to go with that 
> approach. :( Thinking about it now I still don't see that this 
> restriction would be a problem and would simplify things.

You still have the problem where it is being banned as we are trying to
instantiate it the first time. Imo, we are re-implementing mmap_sem
crudely. (Even more so when every mmu notifier must implement the same
code, and more than one will be called everytime the mm is touched.)

And we can get perfectly innocent invalidates, e.g. mprotect.
 
> With more locks I am quite fearful what lockdep will say, but lets see...

Same here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 14:59 [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore Chris Wilson
  2018-03-26 15:33 ` ✓ Fi.CI.BAT: success for " Patchwork
  2018-03-26 15:59 ` [PATCH] " Tvrtko Ursulin
@ 2018-03-26 16:53 ` Patchwork
  2018-03-26 20:08   ` Chris Wilson
  2 siblings, 1 reply; 14+ messages in thread
From: Patchwork @ 2018-03-26 16:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
URL   : https://patchwork.freedesktop.org/series/40676/
State : failure

== Summary ==

---- Possible new issues:

Test drv_module_reload:
        Subgroup basic-no-display:
                pass       -> DMESG-WARN (shard-snb)
Test gem_tiled_swapping:
        Subgroup non-threaded:
                pass       -> DMESG-WARN (shard-hsw)
Test gem_userptr_blits:
        Subgroup coherency-unsync:
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
        Subgroup dmabuf-sync:
                pass       -> DMESG-WARN (shard-hsw)
        Subgroup dmabuf-unsync:
                pass       -> DMESG-WARN (shard-snb)
        Subgroup forbidden-operations:
                pass       -> DMESG-FAIL (shard-apl)
                pass       -> DMESG-FAIL (shard-hsw)
                pass       -> DMESG-FAIL (shard-snb)
        Subgroup invalid-gtt-mapping:
                pass       -> INCOMPLETE (shard-apl)
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
        Subgroup invalid-null-pointer:
                pass       -> INCOMPLETE (shard-apl)
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
        Subgroup map-fixed-invalidate-busy:
                pass       -> DMESG-WARN (shard-hsw)
                pass       -> DMESG-WARN (shard-snb)
        Subgroup map-fixed-invalidate-busy-gup:
                pass       -> DMESG-WARN (shard-apl)
                pass       -> DMESG-WARN (shard-hsw)
                pass       -> DMESG-WARN (shard-snb)
        Subgroup map-fixed-invalidate-gup:
                pass       -> DMESG-WARN (shard-snb)
        Subgroup map-fixed-invalidate-overlap-busy:
                pass       -> DMESG-WARN (shard-apl)
        Subgroup map-fixed-invalidate-overlap-gup:
                pass       -> DMESG-WARN (shard-apl)
                pass       -> DMESG-WARN (shard-hsw)
                pass       -> DMESG-WARN (shard-snb)
        Subgroup process-exit-busy:
                pass       -> DMESG-WARN (shard-hsw)
                pass       -> DMESG-WARN (shard-snb)
        Subgroup process-exit-gtt-busy:
                pass       -> DMESG-WARN (shard-hsw)
                pass       -> DMESG-WARN (shard-snb)
        Subgroup sync-unmap-cycles:
                pass       -> DMESG-WARN (shard-apl)
        Subgroup unsync-unmap:
                pass       -> INCOMPLETE (shard-apl)
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
        Subgroup unsync-unmap-after-close:
                pass       -> INCOMPLETE (shard-apl)
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
        Subgroup unsync-unmap-cycles:
                pass       -> INCOMPLETE (shard-apl)
                pass       -> INCOMPLETE (shard-hsw)
                pass       -> INCOMPLETE (shard-snb)
Test kms_addfb_basic:
        Subgroup unused-handle:
                pass       -> INCOMPLETE (shard-apl)

---- Known issues:

Test kms_flip:
        Subgroup 2x-dpms-vs-vblank-race:
                fail       -> PASS       (shard-hsw) fdo#103060 +2
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887
        Subgroup 2x-flip-vs-wf_vblank-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368 +1
Test kms_rotation_crc:
        Subgroup sprite-rotation-180:
                fail       -> PASS       (shard-snb) fdo#103925
        Subgroup sprite-rotation-270:
                pass       -> FAIL       (shard-apl) fdo#103356

fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#103356 https://bugs.freedesktop.org/show_bug.cgi?id=103356

shard-apl        total:3262 pass:1693 dwarn:5   dfail:1   fail:7   skip:1549 time:10290s
shard-hsw        total:3235 pass:1638 dwarn:8   dfail:1   fail:2   skip:1579 time:9745s
shard-snb        total:3235 pass:1254 dwarn:9   dfail:1   fail:3   skip:1962 time:5613s
Blacklisted hosts:
shard-kbl        total:3274 pass:1806 dwarn:13  dfail:2   fail:9   skip:1438 time:7933s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8492/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 16:28   ` Chris Wilson
@ 2018-03-26 19:45     ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2018-03-26 19:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Daniel Vetter

On Mon, Mar 26, 2018 at 05:28:58PM +0100, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-03-26 16:59:20)
> > 
> > On 26/03/2018 15:59, Chris Wilson wrote:
> > > We've always been blatantly ignoring mmu_notifier.h:
> > > 
> > >   * Invalidation of multiple concurrent ranges may be
> > >   * optionally permitted by the driver. Either way the
> > >   * establishment of sptes is forbidden in the range passed to
> > >   * invalidate_range_begin/end for the whole duration of the
> > >   * invalidate_range_begin/end critical section.
> > > 
> > > by not preventing concurrent calls to gup while an invalidate_range is
> > > being processed. Wrap gup and invalidate_range in a paired rw_semaphore
> > > to allow concurrent lookups, that are then interrupted and disabled
> > > across the invalidate_range. Further refinement can be applied by
> > > tracking the invalidate_range versus individual gup, which should be
> > > done using a common set of helpers for all mmu_notifier subsystems share
> > > the same need. I hear HMM is one such toolbox...
> > > 
> > > For the time being, assume concurrent invalidate and lookup are rare,
> > > but not rare enough to completely ignore.
> > 
> > I think I suggested a few times we should just "ban" the object on first 
> > invalidate and never ever for its lifetime allow it to obtain backing 
> > store again. I just don't remember why we decided not to go with that 
> > approach. :( Thinking about it now I still don't see that this 
> > restriction would be a problem and would simplify things.
> 
> You still have the problem where it is being banned as we are trying to
> instantiate it the first time. Imo, we are re-implementing mmap_sem
> crudely. (Even more so when every mmu notifier must implement the same
> code, and more than one will be called everytime the mm is touched.)

Jerome Glisse is promising to get that fixed and provide neater
primitives. Apparently we can reuse parts of the HMM stuff since that's
built as a helper library (even though the docs don't make it clear).

> And we can get perfectly innocent invalidates, e.g. mprotect.

Also hugepage consolidation, memory migration, swapout and everything else
which really should work.

> > With more locks I am quite fearful what lockdep will say, but lets see...
> 
> Same here.

Cross-release enabled lockdep would be even better, because of our various
worker tricks which do hide lock deps from current lockdep. We should
still have the required fixups hanging around in topic/core-for-CI.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 16:53 ` ✗ Fi.CI.IGT: failure for " Patchwork
@ 2018-03-26 20:08   ` Chris Wilson
  2018-03-26 22:38     ` Chris Wilson
  2018-03-27  7:01     ` Daniel Vetter
  0 siblings, 2 replies; 14+ messages in thread
From: Chris Wilson @ 2018-03-26 20:08 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2018-03-26 17:53:44)
> Test gem_userptr_blits:
>         Subgroup coherency-unsync:
>                 pass       -> INCOMPLETE (shard-hsw)

Forgot that obj->userptr.mn may not exist.

>         Subgroup dmabuf-sync:
>                 pass       -> DMESG-WARN (shard-hsw)

But this is the tricky lockdep one, warning of the recursion from gup
into mmu_invalidate_range, i.e.

down_read(&i915_mmu_notifier->sem);
down_read(&mm_struct->mmap_sem);
	gup();
		down_write(&i915_mmut_notifier->sem);

That seems a genuine deadlock... So I wonder how we managed to get a
lockdep splat and not a dead machine. Maybe gup never triggers the
recursion for our set of flags? Hmm.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 20:08   ` Chris Wilson
@ 2018-03-26 22:38     ` Chris Wilson
  2018-03-27  6:48       ` Daniel Vetter
  2018-03-27  7:01     ` Daniel Vetter
  1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2018-03-26 22:38 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Chris Wilson (2018-03-26 21:08:33)
> Quoting Patchwork (2018-03-26 17:53:44)
> > Test gem_userptr_blits:
> >         Subgroup coherency-unsync:
> >                 pass       -> INCOMPLETE (shard-hsw)
> 
> Forgot that obj->userptr.mn may not exist.
> 
> >         Subgroup dmabuf-sync:
> >                 pass       -> DMESG-WARN (shard-hsw)
> 
> But this is the tricky lockdep one, warning of the recursion from gup
> into mmu_invalidate_range, i.e.
> 
> down_read(&i915_mmu_notifier->sem);
> down_read(&mm_struct->mmap_sem);
>         gup();
>                 down_write(&i915_mmut_notifier->sem);
> 
> That seems a genuine deadlock... So I wonder how we managed to get a
> lockdep splat and not a dead machine. Maybe gup never triggers the
> recursion for our set of flags? Hmm.

In another universe, CI found

[  255.666496] ======================================================
[  255.666498] WARNING: possible circular locking dependency detected
[  255.666500] 4.16.0-rc6-CI-Trybot_1944+ #1 Tainted: G     U  W       
[  255.666502] ------------------------------------------------------
[  255.666503] gem_userptr_bli/4794 is trying to acquire lock:
[  255.666505]  (fs_reclaim){+.+.}, at: [<00000000e1b95c73>] fs_reclaim_acquire.part.12+0x0/0x30
[  255.666510] 
               but task is already holding lock:
[  255.666512]  (&mn->sem){+.+.}, at: [<000000007c59ba79>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
[  255.666553] 
               which lock already depends on the new lock.

[  255.666555] 
               the existing dependency chain (in reverse order) is:
[  255.666557] 
               -> #2 (&mn->sem){+.+.}:
[  255.666578]        i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
[  255.666581]        __mmu_notifier_invalidate_range_start+0x73/0xb0
[  255.666584]        zap_page_range_single+0xcc/0xe0
[  255.666586]        unmap_mapping_pages+0xd4/0x110
[  255.666606]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
[  255.666625]        i915_vma_unbind+0x60a/0xa10 [i915]
[  255.666644]        i915_gem_object_set_tiling+0xf6/0x5b0 [i915]
[  255.666662]        i915_gem_set_tiling_ioctl+0x262/0x2f0 [i915]
[  255.666665]        drm_ioctl_kernel+0x60/0xa0
[  255.666667]        drm_ioctl+0x27e/0x320
[  255.666669]        do_vfs_ioctl+0x8a/0x670
[  255.666670]        SyS_ioctl+0x36/0x70
[  255.666672]        do_syscall_64+0x65/0x1a0
[  255.666675]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
[  255.666676] 
               -> #1 (&mapping->i_mmap_rwsem){++++}:
[  255.666680]        unmap_mapping_pages+0x3d/0x110
[  255.666698]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
[  255.666716]        i915_vma_unbind+0x60a/0xa10 [i915]
[  255.666734]        i915_gem_object_unbind+0xa0/0x130 [i915]
[  255.666751]        i915_gem_shrink+0x2d1/0x5d0 [i915]
[  255.666767]        i915_drop_caches_set+0x92/0x190 [i915]
[  255.666770]        simple_attr_write+0xab/0xc0
[  255.666772]        full_proxy_write+0x4b/0x70
[  255.666774]        __vfs_write+0x1e/0x130
[  255.666776]        vfs_write+0xbd/0x1b0
[  255.666778]        SyS_write+0x40/0xa0
[  255.666779]        do_syscall_64+0x65/0x1a0
[  255.666781]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
[  255.666783] 
               -> #0 (fs_reclaim){+.+.}:
[  255.666786]        fs_reclaim_acquire.part.12+0x24/0x30
[  255.666788]        __alloc_pages_nodemask+0x1f1/0x11d0
[  255.666790]        __get_free_pages+0x9/0x40
[  255.666792]        __pud_alloc+0x25/0xb0
[  255.666794]        copy_page_range+0xa75/0xaf0
[  255.666796]        copy_process.part.7+0x1267/0x1d90
[  255.666798]        _do_fork+0xc0/0x6b0
[  255.666800]        do_syscall_64+0x65/0x1a0
[  255.666801]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
[  255.666803] 
               other info that might help us debug this:

[  255.666805] Chain exists of:
                 fs_reclaim --> &mapping->i_mmap_rwsem --> &mn->sem

[  255.666809]  Possible unsafe locking scenario:

[  255.666811]        CPU0                    CPU1
[  255.666812]        ----                    ----
[  255.666814]   lock(&mn->sem);
[  255.666815]                                lock(&mapping->i_mmap_rwsem);
[  255.666817]                                lock(&mn->sem);
[  255.666819]   lock(fs_reclaim);
[  255.666821] 

So a shrinker deadlock. That doesn't look easy to wriggle out of, as we
have a random chunk of code that's between invalidate_range_start and
invalidate_range_end.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 22:38     ` Chris Wilson
@ 2018-03-27  6:48       ` Daniel Vetter
  2018-03-27  7:19         ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Vetter @ 2018-03-27  6:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Mar 26, 2018 at 11:38:55PM +0100, Chris Wilson wrote:
> Quoting Chris Wilson (2018-03-26 21:08:33)
> > Quoting Patchwork (2018-03-26 17:53:44)
> > > Test gem_userptr_blits:
> > >         Subgroup coherency-unsync:
> > >                 pass       -> INCOMPLETE (shard-hsw)
> > 
> > Forgot that obj->userptr.mn may not exist.
> > 
> > >         Subgroup dmabuf-sync:
> > >                 pass       -> DMESG-WARN (shard-hsw)
> > 
> > But this is the tricky lockdep one, warning of the recursion from gup
> > into mmu_invalidate_range, i.e.
> > 
> > down_read(&i915_mmu_notifier->sem);
> > down_read(&mm_struct->mmap_sem);
> >         gup();
> >                 down_write(&i915_mmut_notifier->sem);
> > 
> > That seems a genuine deadlock... So I wonder how we managed to get a
> > lockdep splat and not a dead machine. Maybe gup never triggers the
> > recursion for our set of flags? Hmm.
> 
> In another universe, CI found
> 
> [  255.666496] ======================================================
> [  255.666498] WARNING: possible circular locking dependency detected
> [  255.666500] 4.16.0-rc6-CI-Trybot_1944+ #1 Tainted: G     U  W       
> [  255.666502] ------------------------------------------------------
> [  255.666503] gem_userptr_bli/4794 is trying to acquire lock:
> [  255.666505]  (fs_reclaim){+.+.}, at: [<00000000e1b95c73>] fs_reclaim_acquire.part.12+0x0/0x30
> [  255.666510] 
>                but task is already holding lock:
> [  255.666512]  (&mn->sem){+.+.}, at: [<000000007c59ba79>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> [  255.666553] 
>                which lock already depends on the new lock.
> 
> [  255.666555] 
>                the existing dependency chain (in reverse order) is:
> [  255.666557] 
>                -> #2 (&mn->sem){+.+.}:
> [  255.666578]        i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> [  255.666581]        __mmu_notifier_invalidate_range_start+0x73/0xb0
> [  255.666584]        zap_page_range_single+0xcc/0xe0
> [  255.666586]        unmap_mapping_pages+0xd4/0x110
> [  255.666606]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> [  255.666625]        i915_vma_unbind+0x60a/0xa10 [i915]
> [  255.666644]        i915_gem_object_set_tiling+0xf6/0x5b0 [i915]
> [  255.666662]        i915_gem_set_tiling_ioctl+0x262/0x2f0 [i915]
> [  255.666665]        drm_ioctl_kernel+0x60/0xa0
> [  255.666667]        drm_ioctl+0x27e/0x320
> [  255.666669]        do_vfs_ioctl+0x8a/0x670
> [  255.666670]        SyS_ioctl+0x36/0x70
> [  255.666672]        do_syscall_64+0x65/0x1a0
> [  255.666675]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> [  255.666676] 
>                -> #1 (&mapping->i_mmap_rwsem){++++}:
> [  255.666680]        unmap_mapping_pages+0x3d/0x110
> [  255.666698]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> [  255.666716]        i915_vma_unbind+0x60a/0xa10 [i915]
> [  255.666734]        i915_gem_object_unbind+0xa0/0x130 [i915]
> [  255.666751]        i915_gem_shrink+0x2d1/0x5d0 [i915]
> [  255.666767]        i915_drop_caches_set+0x92/0x190 [i915]
> [  255.666770]        simple_attr_write+0xab/0xc0
> [  255.666772]        full_proxy_write+0x4b/0x70
> [  255.666774]        __vfs_write+0x1e/0x130
> [  255.666776]        vfs_write+0xbd/0x1b0
> [  255.666778]        SyS_write+0x40/0xa0
> [  255.666779]        do_syscall_64+0x65/0x1a0
> [  255.666781]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> [  255.666783] 
>                -> #0 (fs_reclaim){+.+.}:
> [  255.666786]        fs_reclaim_acquire.part.12+0x24/0x30
> [  255.666788]        __alloc_pages_nodemask+0x1f1/0x11d0
> [  255.666790]        __get_free_pages+0x9/0x40
> [  255.666792]        __pud_alloc+0x25/0xb0
> [  255.666794]        copy_page_range+0xa75/0xaf0
> [  255.666796]        copy_process.part.7+0x1267/0x1d90
> [  255.666798]        _do_fork+0xc0/0x6b0
> [  255.666800]        do_syscall_64+0x65/0x1a0
> [  255.666801]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> [  255.666803] 
>                other info that might help us debug this:
> 
> [  255.666805] Chain exists of:
>                  fs_reclaim --> &mapping->i_mmap_rwsem --> &mn->sem
> 
> [  255.666809]  Possible unsafe locking scenario:
> 
> [  255.666811]        CPU0                    CPU1
> [  255.666812]        ----                    ----
> [  255.666814]   lock(&mn->sem);
> [  255.666815]                                lock(&mapping->i_mmap_rwsem);
> [  255.666817]                                lock(&mn->sem);
> [  255.666819]   lock(fs_reclaim);
> [  255.666821] 
> 
> So a shrinker deadlock. That doesn't look easy to wriggle out of, as we
> have a random chunk of code that's between invalidate_range_start and
> invalidate_range_end.

Christian König said something like "with this design you can't allocate
anything while holding locks you might need from the mmu notifier".
Because reclaim eats into the mmu notifiers.

But hey it's before coffee, so probably best you just ignore me :-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
  2018-03-26 20:08   ` Chris Wilson
  2018-03-26 22:38     ` Chris Wilson
@ 2018-03-27  7:01     ` Daniel Vetter
  2018-03-27  7:21       ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Daniel Vetter @ 2018-03-27  7:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, Mar 26, 2018 at 09:08:33PM +0100, Chris Wilson wrote:
> Quoting Patchwork (2018-03-26 17:53:44)
> > Test gem_userptr_blits:
> >         Subgroup coherency-unsync:
> >                 pass       -> INCOMPLETE (shard-hsw)
> 
> Forgot that obj->userptr.mn may not exist.
> 
> >         Subgroup dmabuf-sync:
> >                 pass       -> DMESG-WARN (shard-hsw)
> 
> But this is the tricky lockdep one, warning of the recursion from gup
> into mmu_invalidate_range, i.e.
> 
> down_read(&i915_mmu_notifier->sem);
> down_read(&mm_struct->mmap_sem);
> 	gup();
> 		down_write(&i915_mmut_notifier->sem);
> 
> That seems a genuine deadlock... So I wonder how we managed to get a
> lockdep splat and not a dead machine. Maybe gup never triggers the
> recursion for our set of flags? Hmm.

Coffee starting to kick in. If we gup a range it's likely the mm won't
kick out the same range, but something else. I guess we'd need a really
huge userptr bo which can't fit into core completely to actually have a
reliably chance at triggering this. Would probably deadlock the box :-/

I think Jerome's recommendation is the sequence counter stuff from kvm,
plus retrying forever on the gup side. That would convert the same
deadlock into a livelock, but well can't have it all :-) And I think once
you've killed the task the gup worker hopefully realizes it's wasting time
and gives up.

For the kvm stuff: Look at #intel-gfx scrollback, we discussed all the
necessary bits. Plus Jerome showed some new helpers that would avoid the
hand-rolling.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap  mmu_notifier inside its own rw_semaphore
  2018-03-27  6:48       ` Daniel Vetter
@ 2018-03-27  7:19         ` Chris Wilson
  2018-03-27 10:01           ` Daniel Vetter
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2018-03-27  7:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

Quoting Daniel Vetter (2018-03-27 07:48:00)
> On Mon, Mar 26, 2018 at 11:38:55PM +0100, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-03-26 21:08:33)
> > > Quoting Patchwork (2018-03-26 17:53:44)
> > > > Test gem_userptr_blits:
> > > >         Subgroup coherency-unsync:
> > > >                 pass       -> INCOMPLETE (shard-hsw)
> > > 
> > > Forgot that obj->userptr.mn may not exist.
> > > 
> > > >         Subgroup dmabuf-sync:
> > > >                 pass       -> DMESG-WARN (shard-hsw)
> > > 
> > > But this is the tricky lockdep one, warning of the recursion from gup
> > > into mmu_invalidate_range, i.e.
> > > 
> > > down_read(&i915_mmu_notifier->sem);
> > > down_read(&mm_struct->mmap_sem);
> > >         gup();
> > >                 down_write(&i915_mmut_notifier->sem);
> > > 
> > > That seems a genuine deadlock... So I wonder how we managed to get a
> > > lockdep splat and not a dead machine. Maybe gup never triggers the
> > > recursion for our set of flags? Hmm.
> > 
> > In another universe, CI found
> > 
> > [  255.666496] ======================================================
> > [  255.666498] WARNING: possible circular locking dependency detected
> > [  255.666500] 4.16.0-rc6-CI-Trybot_1944+ #1 Tainted: G     U  W       
> > [  255.666502] ------------------------------------------------------
> > [  255.666503] gem_userptr_bli/4794 is trying to acquire lock:
> > [  255.666505]  (fs_reclaim){+.+.}, at: [<00000000e1b95c73>] fs_reclaim_acquire.part.12+0x0/0x30
> > [  255.666510] 
> >                but task is already holding lock:
> > [  255.666512]  (&mn->sem){+.+.}, at: [<000000007c59ba79>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > [  255.666553] 
> >                which lock already depends on the new lock.
> > 
> > [  255.666555] 
> >                the existing dependency chain (in reverse order) is:
> > [  255.666557] 
> >                -> #2 (&mn->sem){+.+.}:
> > [  255.666578]        i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > [  255.666581]        __mmu_notifier_invalidate_range_start+0x73/0xb0
> > [  255.666584]        zap_page_range_single+0xcc/0xe0
> > [  255.666586]        unmap_mapping_pages+0xd4/0x110
> > [  255.666606]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > [  255.666625]        i915_vma_unbind+0x60a/0xa10 [i915]
> > [  255.666644]        i915_gem_object_set_tiling+0xf6/0x5b0 [i915]
> > [  255.666662]        i915_gem_set_tiling_ioctl+0x262/0x2f0 [i915]
> > [  255.666665]        drm_ioctl_kernel+0x60/0xa0
> > [  255.666667]        drm_ioctl+0x27e/0x320
> > [  255.666669]        do_vfs_ioctl+0x8a/0x670
> > [  255.666670]        SyS_ioctl+0x36/0x70
> > [  255.666672]        do_syscall_64+0x65/0x1a0
> > [  255.666675]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > [  255.666676] 
> >                -> #1 (&mapping->i_mmap_rwsem){++++}:
> > [  255.666680]        unmap_mapping_pages+0x3d/0x110
> > [  255.666698]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > [  255.666716]        i915_vma_unbind+0x60a/0xa10 [i915]
> > [  255.666734]        i915_gem_object_unbind+0xa0/0x130 [i915]
> > [  255.666751]        i915_gem_shrink+0x2d1/0x5d0 [i915]
> > [  255.666767]        i915_drop_caches_set+0x92/0x190 [i915]
> > [  255.666770]        simple_attr_write+0xab/0xc0
> > [  255.666772]        full_proxy_write+0x4b/0x70
> > [  255.666774]        __vfs_write+0x1e/0x130
> > [  255.666776]        vfs_write+0xbd/0x1b0
> > [  255.666778]        SyS_write+0x40/0xa0
> > [  255.666779]        do_syscall_64+0x65/0x1a0
> > [  255.666781]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > [  255.666783] 
> >                -> #0 (fs_reclaim){+.+.}:
> > [  255.666786]        fs_reclaim_acquire.part.12+0x24/0x30
> > [  255.666788]        __alloc_pages_nodemask+0x1f1/0x11d0
> > [  255.666790]        __get_free_pages+0x9/0x40
> > [  255.666792]        __pud_alloc+0x25/0xb0
> > [  255.666794]        copy_page_range+0xa75/0xaf0
> > [  255.666796]        copy_process.part.7+0x1267/0x1d90
> > [  255.666798]        _do_fork+0xc0/0x6b0
> > [  255.666800]        do_syscall_64+0x65/0x1a0
> > [  255.666801]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > [  255.666803] 
> >                other info that might help us debug this:
> > 
> > [  255.666805] Chain exists of:
> >                  fs_reclaim --> &mapping->i_mmap_rwsem --> &mn->sem
> > 
> > [  255.666809]  Possible unsafe locking scenario:
> > 
> > [  255.666811]        CPU0                    CPU1
> > [  255.666812]        ----                    ----
> > [  255.666814]   lock(&mn->sem);
> > [  255.666815]                                lock(&mapping->i_mmap_rwsem);
> > [  255.666817]                                lock(&mn->sem);
> > [  255.666819]   lock(fs_reclaim);
> > [  255.666821] 
> > 
> > So a shrinker deadlock. That doesn't look easy to wriggle out of, as we
> > have a random chunk of code that's between invalidate_range_start and
> > invalidate_range_end.
> 
> Christian König said something like "with this design you can't allocate
> anything while holding locks you might need from the mmu notifier".
> Because reclaim eats into the mmu notifiers.

Oh, we aren't allocating from under the locks. That's the first thing I
double checked. Afaict, the only window is the code in the caller that's
between range_start/range_end. If that also can't touch fs_reclaim, then
this is just a red herring...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap  mmu_notifier inside its own rw_semaphore
  2018-03-27  7:01     ` Daniel Vetter
@ 2018-03-27  7:21       ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-03-27  7:21 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

Quoting Daniel Vetter (2018-03-27 08:01:17)
> On Mon, Mar 26, 2018 at 09:08:33PM +0100, Chris Wilson wrote:
> > Quoting Patchwork (2018-03-26 17:53:44)
> > > Test gem_userptr_blits:
> > >         Subgroup coherency-unsync:
> > >                 pass       -> INCOMPLETE (shard-hsw)
> > 
> > Forgot that obj->userptr.mn may not exist.
> > 
> > >         Subgroup dmabuf-sync:
> > >                 pass       -> DMESG-WARN (shard-hsw)
> > 
> > But this is the tricky lockdep one, warning of the recursion from gup
> > into mmu_invalidate_range, i.e.
> > 
> > down_read(&i915_mmu_notifier->sem);
> > down_read(&mm_struct->mmap_sem);
> >       gup();
> >               down_write(&i915_mmut_notifier->sem);
> > 
> > That seems a genuine deadlock... So I wonder how we managed to get a
> > lockdep splat and not a dead machine. Maybe gup never triggers the
> > recursion for our set of flags? Hmm.
> 
> Coffee starting to kick in. If we gup a range it's likely the mm won't
> kick out the same range, but something else. I guess we'd need a really
> huge userptr bo which can't fit into core completely to actually have a
> reliably chance at triggering this. Would probably deadlock the box :-/
> 
> I think Jerome's recommendation is the sequence counter stuff from kvm,
> plus retrying forever on the gup side. That would convert the same
> deadlock into a livelock, but well can't have it all :-)

Pre-coffee state also thinks it would trigger the second fs_reclaim
lockdep if it was sufficiently annotated.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap  mmu_notifier inside its own rw_semaphore
  2018-03-27  7:19         ` Chris Wilson
@ 2018-03-27 10:01           ` Daniel Vetter
  2018-03-27 10:10             ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Vetter @ 2018-03-27 10:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 27, 2018 at 08:19:33AM +0100, Chris Wilson wrote:
> Quoting Daniel Vetter (2018-03-27 07:48:00)
> > On Mon, Mar 26, 2018 at 11:38:55PM +0100, Chris Wilson wrote:
> > > Quoting Chris Wilson (2018-03-26 21:08:33)
> > > > Quoting Patchwork (2018-03-26 17:53:44)
> > > > > Test gem_userptr_blits:
> > > > >         Subgroup coherency-unsync:
> > > > >                 pass       -> INCOMPLETE (shard-hsw)
> > > > 
> > > > Forgot that obj->userptr.mn may not exist.
> > > > 
> > > > >         Subgroup dmabuf-sync:
> > > > >                 pass       -> DMESG-WARN (shard-hsw)
> > > > 
> > > > But this is the tricky lockdep one, warning of the recursion from gup
> > > > into mmu_invalidate_range, i.e.
> > > > 
> > > > down_read(&i915_mmu_notifier->sem);
> > > > down_read(&mm_struct->mmap_sem);
> > > >         gup();
> > > >                 down_write(&i915_mmut_notifier->sem);
> > > > 
> > > > That seems a genuine deadlock... So I wonder how we managed to get a
> > > > lockdep splat and not a dead machine. Maybe gup never triggers the
> > > > recursion for our set of flags? Hmm.
> > > 
> > > In another universe, CI found
> > > 
> > > [  255.666496] ======================================================
> > > [  255.666498] WARNING: possible circular locking dependency detected
> > > [  255.666500] 4.16.0-rc6-CI-Trybot_1944+ #1 Tainted: G     U  W       
> > > [  255.666502] ------------------------------------------------------
> > > [  255.666503] gem_userptr_bli/4794 is trying to acquire lock:
> > > [  255.666505]  (fs_reclaim){+.+.}, at: [<00000000e1b95c73>] fs_reclaim_acquire.part.12+0x0/0x30
> > > [  255.666510] 
> > >                but task is already holding lock:
> > > [  255.666512]  (&mn->sem){+.+.}, at: [<000000007c59ba79>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > > [  255.666553] 
> > >                which lock already depends on the new lock.
> > > 
> > > [  255.666555] 
> > >                the existing dependency chain (in reverse order) is:
> > > [  255.666557] 
> > >                -> #2 (&mn->sem){+.+.}:
> > > [  255.666578]        i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > > [  255.666581]        __mmu_notifier_invalidate_range_start+0x73/0xb0
> > > [  255.666584]        zap_page_range_single+0xcc/0xe0
> > > [  255.666586]        unmap_mapping_pages+0xd4/0x110
> > > [  255.666606]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > > [  255.666625]        i915_vma_unbind+0x60a/0xa10 [i915]
> > > [  255.666644]        i915_gem_object_set_tiling+0xf6/0x5b0 [i915]
> > > [  255.666662]        i915_gem_set_tiling_ioctl+0x262/0x2f0 [i915]
> > > [  255.666665]        drm_ioctl_kernel+0x60/0xa0
> > > [  255.666667]        drm_ioctl+0x27e/0x320
> > > [  255.666669]        do_vfs_ioctl+0x8a/0x670
> > > [  255.666670]        SyS_ioctl+0x36/0x70
> > > [  255.666672]        do_syscall_64+0x65/0x1a0
> > > [  255.666675]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > [  255.666676] 
> > >                -> #1 (&mapping->i_mmap_rwsem){++++}:
> > > [  255.666680]        unmap_mapping_pages+0x3d/0x110
> > > [  255.666698]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > > [  255.666716]        i915_vma_unbind+0x60a/0xa10 [i915]
> > > [  255.666734]        i915_gem_object_unbind+0xa0/0x130 [i915]
> > > [  255.666751]        i915_gem_shrink+0x2d1/0x5d0 [i915]
> > > [  255.666767]        i915_drop_caches_set+0x92/0x190 [i915]
> > > [  255.666770]        simple_attr_write+0xab/0xc0
> > > [  255.666772]        full_proxy_write+0x4b/0x70
> > > [  255.666774]        __vfs_write+0x1e/0x130
> > > [  255.666776]        vfs_write+0xbd/0x1b0
> > > [  255.666778]        SyS_write+0x40/0xa0
> > > [  255.666779]        do_syscall_64+0x65/0x1a0
> > > [  255.666781]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > [  255.666783] 
> > >                -> #0 (fs_reclaim){+.+.}:
> > > [  255.666786]        fs_reclaim_acquire.part.12+0x24/0x30
> > > [  255.666788]        __alloc_pages_nodemask+0x1f1/0x11d0
> > > [  255.666790]        __get_free_pages+0x9/0x40
> > > [  255.666792]        __pud_alloc+0x25/0xb0
> > > [  255.666794]        copy_page_range+0xa75/0xaf0
> > > [  255.666796]        copy_process.part.7+0x1267/0x1d90
> > > [  255.666798]        _do_fork+0xc0/0x6b0
> > > [  255.666800]        do_syscall_64+0x65/0x1a0
> > > [  255.666801]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > [  255.666803] 
> > >                other info that might help us debug this:
> > > 
> > > [  255.666805] Chain exists of:
> > >                  fs_reclaim --> &mapping->i_mmap_rwsem --> &mn->sem
> > > 
> > > [  255.666809]  Possible unsafe locking scenario:
> > > 
> > > [  255.666811]        CPU0                    CPU1
> > > [  255.666812]        ----                    ----
> > > [  255.666814]   lock(&mn->sem);
> > > [  255.666815]                                lock(&mapping->i_mmap_rwsem);
> > > [  255.666817]                                lock(&mn->sem);
> > > [  255.666819]   lock(fs_reclaim);
> > > [  255.666821] 
> > > 
> > > So a shrinker deadlock. That doesn't look easy to wriggle out of, as we
> > > have a random chunk of code that's between invalidate_range_start and
> > > invalidate_range_end.
> > 
> > Christian König said something like "with this design you can't allocate
> > anything while holding locks you might need from the mmu notifier".
> > Because reclaim eats into the mmu notifiers.
> 
> Oh, we aren't allocating from under the locks. That's the first thing I
> double checked. Afaict, the only window is the code in the caller that's
> between range_start/range_end. If that also can't touch fs_reclaim, then
> this is just a red herring...

Where is that happening? You left out the backtrace for the fs_reclaim hit
that closed the loop. Is it our code, or just something else going on?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/userptr: Wrap   mmu_notifier inside its own rw_semaphore
  2018-03-27 10:01           ` Daniel Vetter
@ 2018-03-27 10:10             ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-03-27 10:10 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

Quoting Daniel Vetter (2018-03-27 11:01:09)
> On Tue, Mar 27, 2018 at 08:19:33AM +0100, Chris Wilson wrote:
> > Quoting Daniel Vetter (2018-03-27 07:48:00)
> > > On Mon, Mar 26, 2018 at 11:38:55PM +0100, Chris Wilson wrote:
> > > > Quoting Chris Wilson (2018-03-26 21:08:33)
> > > > > Quoting Patchwork (2018-03-26 17:53:44)
> > > > > > Test gem_userptr_blits:
> > > > > >         Subgroup coherency-unsync:
> > > > > >                 pass       -> INCOMPLETE (shard-hsw)
> > > > > 
> > > > > Forgot that obj->userptr.mn may not exist.
> > > > > 
> > > > > >         Subgroup dmabuf-sync:
> > > > > >                 pass       -> DMESG-WARN (shard-hsw)
> > > > > 
> > > > > But this is the tricky lockdep one, warning of the recursion from gup
> > > > > into mmu_invalidate_range, i.e.
> > > > > 
> > > > > down_read(&i915_mmu_notifier->sem);
> > > > > down_read(&mm_struct->mmap_sem);
> > > > >         gup();
> > > > >                 down_write(&i915_mmut_notifier->sem);
> > > > > 
> > > > > That seems a genuine deadlock... So I wonder how we managed to get a
> > > > > lockdep splat and not a dead machine. Maybe gup never triggers the
> > > > > recursion for our set of flags? Hmm.
> > > > 
> > > > In another universe, CI found
> > > > 
> > > > [  255.666496] ======================================================
> > > > [  255.666498] WARNING: possible circular locking dependency detected
> > > > [  255.666500] 4.16.0-rc6-CI-Trybot_1944+ #1 Tainted: G     U  W       
> > > > [  255.666502] ------------------------------------------------------
> > > > [  255.666503] gem_userptr_bli/4794 is trying to acquire lock:
> > > > [  255.666505]  (fs_reclaim){+.+.}, at: [<00000000e1b95c73>] fs_reclaim_acquire.part.12+0x0/0x30
> > > > [  255.666510] 
> > > >                but task is already holding lock:
> > > > [  255.666512]  (&mn->sem){+.+.}, at: [<000000007c59ba79>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > > > [  255.666553] 
> > > >                which lock already depends on the new lock.
> > > > 
> > > > [  255.666555] 
> > > >                the existing dependency chain (in reverse order) is:
> > > > [  255.666557] 
> > > >                -> #2 (&mn->sem){+.+.}:
> > > > [  255.666578]        i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
> > > > [  255.666581]        __mmu_notifier_invalidate_range_start+0x73/0xb0
> > > > [  255.666584]        zap_page_range_single+0xcc/0xe0
> > > > [  255.666586]        unmap_mapping_pages+0xd4/0x110
> > > > [  255.666606]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > > > [  255.666625]        i915_vma_unbind+0x60a/0xa10 [i915]
> > > > [  255.666644]        i915_gem_object_set_tiling+0xf6/0x5b0 [i915]
> > > > [  255.666662]        i915_gem_set_tiling_ioctl+0x262/0x2f0 [i915]
> > > > [  255.666665]        drm_ioctl_kernel+0x60/0xa0
> > > > [  255.666667]        drm_ioctl+0x27e/0x320
> > > > [  255.666669]        do_vfs_ioctl+0x8a/0x670
> > > > [  255.666670]        SyS_ioctl+0x36/0x70
> > > > [  255.666672]        do_syscall_64+0x65/0x1a0
> > > > [  255.666675]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > > [  255.666676] 
> > > >                -> #1 (&mapping->i_mmap_rwsem){++++}:
> > > > [  255.666680]        unmap_mapping_pages+0x3d/0x110
> > > > [  255.666698]        i915_vma_revoke_mmap+0x7e/0x1c0 [i915]
> > > > [  255.666716]        i915_vma_unbind+0x60a/0xa10 [i915]
> > > > [  255.666734]        i915_gem_object_unbind+0xa0/0x130 [i915]
> > > > [  255.666751]        i915_gem_shrink+0x2d1/0x5d0 [i915]
> > > > [  255.666767]        i915_drop_caches_set+0x92/0x190 [i915]
> > > > [  255.666770]        simple_attr_write+0xab/0xc0
> > > > [  255.666772]        full_proxy_write+0x4b/0x70
> > > > [  255.666774]        __vfs_write+0x1e/0x130
> > > > [  255.666776]        vfs_write+0xbd/0x1b0
> > > > [  255.666778]        SyS_write+0x40/0xa0
> > > > [  255.666779]        do_syscall_64+0x65/0x1a0
> > > > [  255.666781]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > > [  255.666783] 
> > > >                -> #0 (fs_reclaim){+.+.}:
> > > > [  255.666786]        fs_reclaim_acquire.part.12+0x24/0x30
> > > > [  255.666788]        __alloc_pages_nodemask+0x1f1/0x11d0
> > > > [  255.666790]        __get_free_pages+0x9/0x40
> > > > [  255.666792]        __pud_alloc+0x25/0xb0
> > > > [  255.666794]        copy_page_range+0xa75/0xaf0
> > > > [  255.666796]        copy_process.part.7+0x1267/0x1d90
> > > > [  255.666798]        _do_fork+0xc0/0x6b0
> > > > [  255.666800]        do_syscall_64+0x65/0x1a0
> > > > [  255.666801]        entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > > > [  255.666803] 
> > > >                other info that might help us debug this:
> > > > 
> > > > [  255.666805] Chain exists of:
> > > >                  fs_reclaim --> &mapping->i_mmap_rwsem --> &mn->sem
> > > > 
> > > > [  255.666809]  Possible unsafe locking scenario:
> > > > 
> > > > [  255.666811]        CPU0                    CPU1
> > > > [  255.666812]        ----                    ----
> > > > [  255.666814]   lock(&mn->sem);
> > > > [  255.666815]                                lock(&mapping->i_mmap_rwsem);
> > > > [  255.666817]                                lock(&mn->sem);
> > > > [  255.666819]   lock(fs_reclaim);
> > > > [  255.666821] 
> > > > 
> > > > So a shrinker deadlock. That doesn't look easy to wriggle out of, as we
> > > > have a random chunk of code that's between invalidate_range_start and
> > > > invalidate_range_end.
> > > 
> > > Christian König said something like "with this design you can't allocate
> > > anything while holding locks you might need from the mmu notifier".
> > > Because reclaim eats into the mmu notifiers.
> > 
> > Oh, we aren't allocating from under the locks. That's the first thing I
> > double checked. Afaict, the only window is the code in the caller that's
> > between range_start/range_end. If that also can't touch fs_reclaim, then
> > this is just a red herring...
> 
> Where is that happening? You left out the backtrace for the fs_reclaim hit
> that closed the loop. Is it our code, or just something else going on?

It was from fork:

[   48.013723] 3 locks held by gem_userptr_bli/1336:
[   48.013725]  #0:  (&mm->mmap_sem){++++}, at: [<000000007282305d>] copy_process.part.7+0xe29/0x1d90
[   48.013730]  #1:  (&mm->mmap_sem/1){+.+.}, at: [<000000008e133750>] copy_process.part.7+0xe4d/0x1d90
[   48.013735]  #2:  (&mn->sem){+.+.}, at: [<0000000076ac255d>] i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
[   48.013759] 
               stack backtrace:
[   48.013762] CPU: 1 PID: 1336 Comm: gem_userptr_bli Tainted: G     U           4.16.0-rc6-CI-Trybot_1944+ #1
[   48.013765] Hardware name:  /NUC7i5BNB, BIOS BNKBL357.86A.0054.2017.1025.1822 10/25/2017
[   48.013768] Call Trace:
[   48.013771]  dump_stack+0x5f/0x86
[   48.013774]  print_circular_bug.isra.18+0x1d0/0x2c0
[   48.013777]  __lock_acquire+0x14ae/0x1b60
[   48.013781]  ? lock_acquire+0xaf/0x200
[   48.013783]  lock_acquire+0xaf/0x200
[   48.013785]  ? page_frag_free+0x60/0x60
[   48.013788]  fs_reclaim_acquire.part.12+0x24/0x30
[   48.013790]  ? page_frag_free+0x60/0x60
[   48.013792]  __alloc_pages_nodemask+0x1f1/0x11d0
[   48.013814]  ? i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
[   48.013817]  ? reacquire_held_locks+0xa2/0x170
[   48.013819]  ? reacquire_held_locks+0xa2/0x170
[   48.013840]  ? i915_gem_userptr_mn_invalidate_range_start+0x3e/0x1a0 [i915]
[   48.013844]  ? __mmu_notifier_invalidate_range_start+0x7b/0xb0
[   48.013847]  __get_free_pages+0x9/0x40
[   48.013849]  __pud_alloc+0x25/0xb0
[   48.013851]  copy_page_range+0xa75/0xaf0
[   48.013854]  ? lock_acquire+0xaf/0x200
[   48.013857]  copy_process.part.7+0x1267/0x1d90
[   48.013861]  _do_fork+0xc0/0x6b0
[   48.013864]  ? entry_SYSCALL_64_after_hwframe+0x52/0xb7
[   48.013866]  ? do_syscall_64+0x19/0x1a0
[   48.013868]  do_syscall_64+0x65/0x1a0
[   48.013871]  entry_SYSCALL_64_after_hwframe+0x42/0xb7

The "? i915_gem" stuff looked incriminating but have to be a mirage given
the caller, but we are inside a range_start/range_end when it does the
alloc_pages.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-03-27 10:10 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-26 14:59 [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore Chris Wilson
2018-03-26 15:33 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-03-26 15:59 ` [PATCH] " Tvrtko Ursulin
2018-03-26 16:28   ` Chris Wilson
2018-03-26 19:45     ` Daniel Vetter
2018-03-26 16:53 ` ✗ Fi.CI.IGT: failure for " Patchwork
2018-03-26 20:08   ` Chris Wilson
2018-03-26 22:38     ` Chris Wilson
2018-03-27  6:48       ` Daniel Vetter
2018-03-27  7:19         ` Chris Wilson
2018-03-27 10:01           ` Daniel Vetter
2018-03-27 10:10             ` Chris Wilson
2018-03-27  7:01     ` Daniel Vetter
2018-03-27  7:21       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.