All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
@ 2019-01-30 14:28 Chris Wilson
  2019-01-30 14:28 ` [PATCH 2/2] drm/i915: Force the GPU reset upon wedging Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Chris Wilson @ 2019-01-30 14:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Previously, we were able to rely on the recursive properties of
struct_mutex to allow us to serialise revoking mmaps and reacquiring the
FENCE registers with them being clobbered over a global device reset.
I then proceeded to throw out the baby with the bath water in order to
pursue a struct_mutex-less reset.

Perusing LWN for alternative strategies, the dilemma on how to serialise
access to a global resource on one side was answered by
https://lwn.net/Articles/202847/ -- Sleepable RCU:

    1  int readside(void) {
    2      int idx;
    3      rcu_read_lock();
    4	   if (nomoresrcu) {
    5          rcu_read_unlock();
    6	       return -EINVAL;
    7      }
    8	   idx = srcu_read_lock(&ss);
    9	   rcu_read_unlock();
    10	   /* SRCU read-side critical section. */
    11	   srcu_read_unlock(&ss, idx);
    12	   return 0;
    13 }
    14
    15 void cleanup(void)
    16 {
    17     nomoresrcu = 1;
    18     synchronize_rcu();
    19     synchronize_srcu(&ss);
    20     cleanup_srcu_struct(&ss);
    21 }

No more worrying about stop_machine, just an uber-complex mutex,
optimised for reads, with the overhead pushed to the rare reset path.

However, we do run the risk of a deadlock as we allocate underneath the
SRCU read lock, and the allocation may require a GPU reset, causing a
dependency cycle via the in-flight requests. We resolve that by declaring
the driver wedged and cancelling all in-flight rendering.

v2: Use expedited rcu barriers to match our earlier timing
characteristics.

Testcase: igt/gem_mmap_gtt/hang
Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  12 +-
 drivers/gpu/drm/i915/i915_drv.h               |  18 +--
 drivers/gpu/drm/i915/i915_gem.c               |  56 +++-------
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  26 -----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  12 +-
 drivers/gpu/drm/i915/i915_reset.c             | 103 +++++++++++-------
 drivers/gpu/drm/i915/i915_reset.h             |   4 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
 8 files changed, 98 insertions(+), 134 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fa2c226fc779..2cea263b4d79 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1281,14 +1281,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 
+	seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
 	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Wedged\n");
+		seq_puts(m, "\tWedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
-		seq_puts(m, "Waiter holding struct mutex\n");
-	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
-		seq_puts(m, "struct_mutex blocked for reset\n");
+		seq_puts(m, "\tDevice (global) reset in progress\n");
 
 	if (!i915_modparams.enable_hangcheck) {
 		seq_puts(m, "Hangcheck disabled\n");
@@ -3885,9 +3882,6 @@ i915_wedged_set(void *data, u64 val)
 	 * while it is writing to 'i915_wedged'
 	 */
 
-	if (i915_reset_backoff(&i915->gpu_error))
-		return -EAGAIN;
-
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1cecc588fbb..7946c271ab24 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2987,7 +2987,12 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
-int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
+static inline int __must_check
+i915_mutex_lock_interruptible(struct drm_device *dev)
+{
+	return mutex_lock_interruptible(&dev->struct_mutex);
+}
+
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
@@ -3004,21 +3009,11 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
 
-static inline bool i915_reset_backoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
-{
-	return i915_reset_backoff(error) | i915_terminally_wedged(error);
-}
-
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
 	return READ_ONCE(error->reset_count);
@@ -3091,7 +3086,6 @@ struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *dev_priv);
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
 
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
 void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
 
 void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e802af64d628..caccff87a2a1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -100,47 +100,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
-static int
-i915_gem_wait_for_error(struct i915_gpu_error *error)
-{
-	int ret;
-
-	might_sleep();
-
-	/*
-	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
-	 * userspace. If it takes that long something really bad is going on and
-	 * we should simply try to bail out and fail as gracefully as possible.
-	 */
-	ret = wait_event_interruptible_timeout(error->reset_queue,
-					       !i915_reset_backoff(error),
-					       I915_RESET_TIMEOUT);
-	if (ret == 0) {
-		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
-		return -EIO;
-	} else if (ret < 0) {
-		return ret;
-	} else {
-		return 0;
-	}
-}
-
-int i915_mutex_lock_interruptible(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	int ret;
-
-	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
-	if (ret)
-		return ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -1869,6 +1828,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
+	int srcu;
 	int ret;
 
 	/* Sanity check that we allow writing into this object */
@@ -1908,7 +1868,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
-
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
@@ -1946,9 +1905,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	srcu = i915_reset_lock(dev_priv);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_unpin;
+	}
+
 	ret = i915_vma_pin_fence(vma);
 	if (ret)
-		goto err_unpin;
+		goto err_reset;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
@@ -1969,6 +1934,8 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 
 err_fence:
 	i915_vma_unpin_fence(vma);
+err_reset:
+	i915_reset_unlock(dev_priv, srcu);
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
@@ -5324,6 +5291,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
+	init_srcu_struct(&dev_priv->gpu_error.srcu);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
@@ -5356,6 +5324,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
 
+	cleanup_srcu_struct(&dev_priv->gpu_error.srcu);
+
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 46e259661294..bdb745d5747f 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -435,32 +435,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 	list_add(&fence->link, &fence->i915->mm.fence_list);
 }
 
-/**
- * i915_gem_revoke_fences - revoke fence state
- * @dev_priv: i915 device private
- *
- * Removes all GTT mmappings via the fence registers. This forces any user
- * of the fence to reacquire that fence before continuing with their access.
- * One use is during GPU reset where the fence register is lost and we need to
- * revoke concurrent userspace access via GTT mmaps until the hardware has been
- * reset and the fence registers have been restored.
- */
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
-{
-	int i;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
-		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
-
-		if (fence->vma)
-			i915_vma_revoke_mmap(fence->vma);
-	}
-}
-
 /**
  * i915_gem_restore_fences - restore fence state
  * @dev_priv: i915 device private
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 53b1f22dd365..4e797c552b96 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -231,12 +231,10 @@ struct i915_gpu_error {
 	/**
 	 * flags: Control various stages of the GPU reset
 	 *
-	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
-	 * other users acquiring the struct_mutex. To do this we set the
-	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
-	 * and then check for that bit before acquiring the struct_mutex (in
-	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
-	 * secondary role in preventing two concurrent global reset attempts.
+	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
+	 * serialise with any other users attempting to do the same, and
+	 * any global resources that may be clobber by the reset (such as
+	 * FENCE registers).
 	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
@@ -272,6 +270,8 @@ struct i915_gpu_error {
 	 */
 	wait_queue_head_t reset_queue;
 
+	struct srcu_struct srcu;
+
 	struct i915_gpu_restart *restart;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 4462007a681c..0a6784cec4fd 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -639,6 +639,31 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
 	engine->reset.prepare(engine);
 }
 
+static void revoke_mmaps(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < i915->num_fence_regs; i++) {
+		struct i915_vma *vma = i915->fence_regs[i].vma;
+		struct drm_vma_offset_node *node;
+		u64 vma_offset;
+
+		if (!vma)
+			continue;
+
+		GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
+		if (!i915_vma_has_userfault(vma))
+			continue;
+
+		node = &vma->obj->base.vma_node;
+		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
+				    drm_vma_node_offset_addr(node) + vma_offset,
+				    vma->size,
+				    1);
+	}
+}
+
 static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -648,6 +673,7 @@ static void reset_prepare(struct drm_i915_private *i915)
 		reset_prepare_engine(engine);
 
 	intel_uc_sanitize(i915);
+	revoke_mmaps(i915);
 }
 
 static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
@@ -911,50 +937,22 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	return ret;
 }
 
-struct __i915_reset {
-	struct drm_i915_private *i915;
-	unsigned int stalled_mask;
-};
-
-static int __i915_reset__BKL(void *data)
-{
-	struct __i915_reset *arg = data;
-	int err;
-
-	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
-	if (err)
-		return err;
-
-	return gt_reset(arg->i915, arg->stalled_mask);
-}
-
-#if RESET_UNDER_STOP_MACHINE
-/*
- * XXX An alternative to using stop_machine would be to park only the
- * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
- * we should be able to prevent their memmory accesses via the lost fence
- * registers over the course of the reset without the potential recursive
- * of mutexes between the pagefault handler and reset.
- *
- * See igt/gem_mmap_gtt/hang
- */
-#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
-#else
-#define __do_reset(fn, arg) fn(arg)
-#endif
-
 static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
-	struct __i915_reset arg = { i915, stalled_mask };
 	int err, i;
 
-	err = __do_reset(__i915_reset__BKL, &arg);
+	/* Flush everyone currently using a resource about to be clobbered */
+	synchronize_srcu(&i915->gpu_error.srcu);
+
+	err = intel_gpu_reset(i915, ALL_ENGINES);
 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
-		msleep(100);
-		err = __do_reset(__i915_reset__BKL, &arg);
+		msleep(10 * (i + 1));
+		err = intel_gpu_reset(i915, ALL_ENGINES);
 	}
+	if (err)
+		return err;
 
-	return err;
+	return gt_reset(i915, stalled_mask);
 }
 
 /**
@@ -1274,9 +1272,12 @@ void i915_handle_error(struct drm_i915_private *i915,
 		wait_event(i915->gpu_error.reset_queue,
 			   !test_bit(I915_RESET_BACKOFF,
 				     &i915->gpu_error.flags));
-		goto out;
+		goto out; /* piggy-back on the other reset */
 	}
 
+	/* Make sure i915_reset_lock() sees the I915_RESET_BACKOFF */
+	synchronize_rcu_expedited();
+
 	/* Prevent any other reset-engine attempt. */
 	for_each_engine(engine, i915, tmp) {
 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
@@ -1300,6 +1301,32 @@ void i915_handle_error(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915, wakeref);
 }
 
+int i915_reset_lock(struct drm_i915_private *i915)
+{
+	int srcu;
+
+	rcu_read_lock();
+	while (test_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
+		rcu_read_unlock();
+
+		if (wait_event_interruptible(i915->gpu_error.reset_queue,
+					     !test_bit(I915_RESET_BACKOFF,
+						       &i915->gpu_error.flags)))
+			return -EINTR;
+
+		rcu_read_lock();
+	}
+	srcu = srcu_read_lock(&i915->gpu_error.srcu);
+	rcu_read_unlock();
+
+	return srcu;
+}
+
+void i915_reset_unlock(struct drm_i915_private *i915, int tag)
+{
+	srcu_read_unlock(&i915->gpu_error.srcu, tag);
+}
+
 bool i915_reset_flush(struct drm_i915_private *i915)
 {
 	int err;
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index f2d347f319df..eb412e0158da 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -9,6 +9,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/srcu.h>
 
 struct drm_i915_private;
 struct intel_engine_cs;
@@ -32,6 +33,9 @@ int i915_reset_engine(struct intel_engine_cs *engine,
 void i915_reset_request(struct i915_request *rq, bool guilty);
 bool i915_reset_flush(struct drm_i915_private *i915);
 
+int i915_reset_lock(struct drm_i915_private *i915);
+void i915_reset_unlock(struct drm_i915_private *i915, int tag);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 14ae46fda49f..074a0d9cbf26 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	init_srcu_struct(&i915->gpu_error.srcu);
 	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/i915: Force the GPU reset upon wedging
  2019-01-30 14:28 [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
@ 2019-01-30 14:28 ` Chris Wilson
  2019-01-30 16:01 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-01-30 14:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

When declaring the GPU wedged, we do need to hit the GPU with the reset
hammer so that its state matches our presumed state during cleanup. If
the reset fails, it fails, and we may be unhappy but wedged. However, if
we are testing our wedge/unwedged handling, the desync carries over into
the next test and promptly explodes.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106702
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_reset.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 0a6784cec4fd..db37fe819504 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -532,9 +532,6 @@ typedef int (*reset_func)(struct drm_i915_private *,
 
 static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
 {
-	if (!i915_modparams.reset)
-		return NULL;
-
 	if (INTEL_GEN(i915) >= 8)
 		return gen8_reset_engines;
 	else if (INTEL_GEN(i915) >= 6)
@@ -599,6 +596,9 @@ bool intel_has_gpu_reset(struct drm_i915_private *i915)
 	if (USES_GUC(i915))
 		return false;
 
+	if (!i915_modparams.reset)
+		return NULL;
+
 	return intel_get_gpu_reset(i915);
 }
 
@@ -823,7 +823,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		reset_prepare_engine(engine);
 
 	/* Even if the GPU reset fails, it should still stop the engines */
-	if (INTEL_GEN(i915) >= 5)
+	if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
 		intel_gpu_reset(i915, ALL_ENGINES);
 
 	for_each_engine(engine, i915, id) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
  2019-01-30 14:28 [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
  2019-01-30 14:28 ` [PATCH 2/2] drm/i915: Force the GPU reset upon wedging Chris Wilson
@ 2019-01-30 16:01 ` Patchwork
  2019-01-30 16:28 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-01-30 20:31 ` ✗ Fi.CI.IGT: failure " Patchwork
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2019-01-30 16:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
URL   : https://patchwork.freedesktop.org/series/55988/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Revoke mmaps and prevent access to fence registers across reset
-drivers/gpu/drm/i915/i915_gem.c:986:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:986:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:986:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:986:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_reset.c:1304:5: warning: context imbalance in 'i915_reset_lock' - different lock contexts for basic block
+drivers/gpu/drm/i915/i915_reset.c:1325:6: warning: context imbalance in 'i915_reset_unlock' - unexpected unlock
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3549:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3543:16: warning: expression using sizeof(void)

Commit: drm/i915: Force the GPU reset upon wedging
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
  2019-01-30 14:28 [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
  2019-01-30 14:28 ` [PATCH 2/2] drm/i915: Force the GPU reset upon wedging Chris Wilson
  2019-01-30 16:01 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Patchwork
@ 2019-01-30 16:28 ` Patchwork
  2019-01-30 20:31 ` ✗ Fi.CI.IGT: failure " Patchwork
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2019-01-30 16:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
URL   : https://patchwork.freedesktop.org/series/55988/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5509 -> Patchwork_12090
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/55988/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_12090 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       PASS -> INCOMPLETE [fdo#107718]

  
#### Possible fixes ####

  * igt@kms_pipe_crc_basic@read-crc-pipe-b-frame-sequence:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS +1

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271


Participating hosts (44 -> 41)
------------------------------

  Additional (1): fi-skl-6700hq 
  Missing    (4): fi-kbl-soraka fi-ilk-m540 fi-byt-squawks fi-bsw-cyan 


Build changes
-------------

    * Linux: CI_DRM_5509 -> Patchwork_12090

  CI_DRM_5509: 3e925dcc414b68e4a5def008a2d15097b7be7db4 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4800: 79cf575d5ac2b2275a4813a7bfb0cf35641ab296 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12090: b2e0fba4f2f76cd67d13fdf1fd021e686ca7f7a7 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

b2e0fba4f2f7 drm/i915: Force the GPU reset upon wedging
fcd1a2fa4e67 drm/i915: Revoke mmaps and prevent access to fence registers across reset

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12090/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
  2019-01-30 14:28 [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
                   ` (2 preceding siblings ...)
  2019-01-30 16:28 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-30 20:31 ` Patchwork
  2019-01-30 20:34   ` Chris Wilson
  3 siblings, 1 reply; 6+ messages in thread
From: Patchwork @ 2019-01-30 20:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
URL   : https://patchwork.freedesktop.org/series/55988/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5509_full -> Patchwork_12090_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12090_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12090_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12090_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_eio@unwedge-stress:
    - shard-glk:          PASS -> FAIL +8
    - shard-apl:          PASS -> FAIL +2
    - shard-kbl:          PASS -> FAIL

  
Known issues
------------

  Here are the changes found in Patchwork_12090_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_softpin@noreloc-s3:
    - shard-hsw:          PASS -> INCOMPLETE [fdo#103540]
    - shard-apl:          PASS -> INCOMPLETE [fdo#103927]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
    - shard-glk:          NOTRUN -> DMESG-WARN [fdo#107956]
    - shard-kbl:          PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_cursor_crc@cursor-128x128-onscreen:
    - shard-apl:          PASS -> FAIL [fdo#103232] +4

  * igt@kms_cursor_crc@cursor-256x256-dpms:
    - shard-glk:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy:
    - shard-hsw:          PASS -> FAIL [fdo#105767]

  * igt@kms_plane@pixel-format-pipe-c-planes-source-clamping:
    - shard-glk:          PASS -> FAIL [fdo#108948]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-apl:          PASS -> DMESG-WARN [fdo#108566]

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-yf:
    - shard-apl:          PASS -> FAIL [fdo#103166] +2
    - shard-glk:          PASS -> FAIL [fdo#103166] +1

  * igt@kms_setmode@basic:
    - shard-apl:          PASS -> FAIL [fdo#99912]
    - shard-hsw:          PASS -> FAIL [fdo#99912]
    - shard-kbl:          PASS -> FAIL [fdo#99912]

  
#### Possible fixes ####

  * igt@gem_mmap_gtt@hang:
    - shard-kbl:          FAIL [fdo#109469] -> PASS
    - shard-hsw:          FAIL [fdo#109469] -> PASS
    - shard-snb:          FAIL [fdo#109469] -> PASS
    - shard-glk:          FAIL [fdo#109469] -> PASS
    - shard-apl:          FAIL [fdo#109469] -> PASS

  * igt@kms_atomic_interruptible@legacy-setmode:
    - shard-glk:          INCOMPLETE [fdo#103359] / [k.org#198133] -> PASS

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
    - shard-glk:          DMESG-WARN [fdo#107956] -> PASS +1

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-hsw:          INCOMPLETE [fdo#103540] -> PASS

  * igt@kms_cursor_crc@cursor-64x21-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS +1

  * igt@kms_plane@pixel-format-pipe-c-planes:
    - shard-glk:          FAIL [fdo#103166] -> PASS

  * igt@kms_plane@plane-position-covered-pipe-c-planes:
    - shard-apl:          FAIL [fdo#103166] -> PASS +2

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-glk:          FAIL [fdo#108145] -> PASS +2

  * igt@kms_vblank@pipe-a-query-busy-hang:
    - shard-snb:          {SKIP} [fdo#109271] -> PASS

  * igt@perf_pmu@rc6:
    - shard-kbl:          {SKIP} [fdo#109271] -> PASS

  * igt@syncobj_wait@wait-all-for-submit-snapshot:
    - shard-snb:          DMESG-WARN [fdo#107469] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#105767]: https://bugs.freedesktop.org/show_bug.cgi?id=105767
  [fdo#107469]: https://bugs.freedesktop.org/show_bug.cgi?id=107469
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109469]: https://bugs.freedesktop.org/show_bug.cgi?id=109469
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (7 -> 5)
------------------------------

  Missing    (2): shard-skl shard-iclb 


Build changes
-------------

    * Linux: CI_DRM_5509 -> Patchwork_12090

  CI_DRM_5509: 3e925dcc414b68e4a5def008a2d15097b7be7db4 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4800: 79cf575d5ac2b2275a4813a7bfb0cf35641ab296 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12090: b2e0fba4f2f76cd67d13fdf1fd021e686ca7f7a7 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12090/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset
  2019-01-30 20:31 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-01-30 20:34   ` Chris Wilson
  0 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-01-30 20:34 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-01-30 20:31:57)
> #### Possible regressions ####
> 
>   * igt@gem_eio@unwedge-stress:
>     - shard-glk:          PASS -> FAIL +8
>     - shard-apl:          PASS -> FAIL +2
>     - shard-kbl:          PASS -> FAIL

At least now they are not timing issues, just that we fail to unwedge
the device. Still a mighty huh.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-01-30 20:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-30 14:28 [PATCH 1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-01-30 14:28 ` [PATCH 2/2] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-01-30 16:01 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/2] drm/i915: Revoke mmaps and prevent access to fence registers across reset Patchwork
2019-01-30 16:28 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-30 20:31 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-01-30 20:34   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.