All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/i915: Convert hangcheck from a timer into a delayed work item
@ 2015-01-26 16:03 Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 2/4] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Mika Kuoppala @ 2015-01-26 16:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula, Daniel Vetter

From: Chris Wilson <chris@chris-wilson.co.uk>

When run as a timer, i915_hangcheck_elapsed() must adhere to all the
rules of running in a softirq context. This is advantageous to us as we
want to minimise the risk that a driver bug will prevent us from
detecting a hung GPU. However, that is irrelevant if the driver bug
prevents us from resetting and recovering. Still it is prudent not to
rely on mutexes inside the checker, but given the coarseness of
dev->struct_mutex doing so is extremely hard.

Give in and run from a work queue, i.e. outside of softirq.

v2: Use own workqueue to avoid deadlocks (Daniel)
    Cleanup commit msg and add comment to i915_queue_hangcheck() (Chris)

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Daniel Vetter <dnaiel.vetter@ffwll.chm>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c | 13 ++++++++++++-
 drivers/gpu/drm/i915/i915_drv.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.h |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++++-------------
 5 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 51e8fe5..6eaf795 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -790,6 +790,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		goto out_freewq;
 	}
 
+	dev_priv->gpu_error.hangcheck_wq =
+		alloc_ordered_workqueue("i915-hangcheck", 0);
+	if (dev_priv->gpu_error.hangcheck_wq == NULL) {
+		DRM_ERROR("Failed to create our hangcheck workqueue.\n");
+		ret = -ENOMEM;
+		goto out_freedpwq;
+	}
+
 	intel_irq_init(dev_priv);
 	intel_uncore_sanitize(dev);
 
@@ -864,6 +872,8 @@ out_gem_unload:
 	intel_teardown_gmbus(dev);
 	intel_teardown_mchbar(dev);
 	pm_qos_remove_request(&dev_priv->pm_qos);
+	destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
+out_freedpwq:
 	destroy_workqueue(dev_priv->dp_wq);
 out_freewq:
 	destroy_workqueue(dev_priv->wq);
@@ -934,7 +944,7 @@ int i915_driver_unload(struct drm_device *dev)
 	}
 
 	/* Free error state after interrupts are fully disabled. */
-	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_work_sync(&dev_priv->gpu_error.work);
 	i915_destroy_error_state(dev);
 
@@ -960,6 +970,7 @@ int i915_driver_unload(struct drm_device *dev)
 
 	destroy_workqueue(dev_priv->dp_wq);
 	destroy_workqueue(dev_priv->wq);
+	destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
 	i915_global_gtt_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 66c72bd..cb1468d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1396,7 +1396,7 @@ static int intel_runtime_suspend(struct device *device)
 		return ret;
 	}
 
-	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	intel_uncore_forcewake_reset(dev, false);
 	dev_priv->pm.suspended = true;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0d67b17..a4eb023 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1345,7 +1345,8 @@ struct i915_gpu_error {
 	/* Hang gpu twice in this window and your context gets banned */
 #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
 
-	struct timer_list hangcheck_timer;
+	struct workqueue_struct *hangcheck_wq;
+	struct delayed_work hangcheck_work;
 
 	/* For reset and error_state handling. */
 	spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc81889..8a178cd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4615,7 +4615,7 @@ i915_gem_suspend(struct drm_device *dev)
 	i915_gem_stop_ringbuffers(dev);
 	mutex_unlock(&dev->struct_mutex);
 
-	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
 	flush_delayed_work(&dev_priv->mm.idle_work);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2399eae..234b1f7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2982,10 +2982,12 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
  * we kick the ring. If we see no progress on three subsequent calls
  * we assume chip is wedged and try to fix it by resetting the chip.
  */
-static void i915_hangcheck_elapsed(unsigned long data)
+static void i915_hangcheck_elapsed(struct work_struct *work)
 {
-	struct drm_device *dev = (struct drm_device *)data;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv),
+			     gpu_error.hangcheck_work.work);
+	struct drm_device *dev = dev_priv->dev;
 	struct intel_engine_cs *ring;
 	int i;
 	int busy_count = 0, rings_hung = 0;
@@ -3099,17 +3101,18 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
 void i915_queue_hangcheck(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct timer_list *timer = &dev_priv->gpu_error.hangcheck_timer;
+	struct i915_gpu_error *e = &to_i915(dev)->gpu_error;
 
 	if (!i915.enable_hangcheck)
 		return;
 
-	/* Don't continually defer the hangcheck, but make sure it is active */
-	if (timer_pending(timer))
-		return;
-	mod_timer(timer,
-		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+	/* Don't continually defer the hangcheck so that it is always run at
+	 * least once after work has been scheduled on any ring. Otherwise,
+	 * we will ignore a hung ring if a second ring is kept busy.
+	 */
+
+	queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work,
+			   round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
 }
 
 static void ibx_irq_reset(struct drm_device *dev)
@@ -4353,9 +4356,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	else
 		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
-	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
-		    i915_hangcheck_elapsed,
-		    (unsigned long) dev);
+	INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work,
+			  i915_hangcheck_elapsed);
 	INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work,
 			  intel_hpd_irq_reenable_work);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/4] drm/i915: Display current hangcheck status in debugfs
  2015-01-26 16:03 [PATCH 1/4] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
@ 2015-01-26 16:03 ` Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 3/4] drm/i915: Remove nested work in gpu error handling Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 4/4] drm/i915: Be consistent on printing seqnos Mika Kuoppala
  2 siblings, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2015-01-26 16:03 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

For example,

/sys/kernel/debug/dri/0/i915_hangcheck_info:

Hangcheck active, fires in 15887800ms
render ring:
        seqno = -4059 [current -583]
        action = 2
        score = 0
        ACTHD = 1ee8 [current 21f980]
        max ACTHD = 0

v2: Include expiration ETA. Can anyone spot a problem?
v3: Convert for workqueued hangcheck (Mika)
v4: Print seqnos as unsigned ints (Ville)
v5: Print seqnos as hex (Chris)

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com) (v2)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (v2)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2ad4c48..f865cfd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1219,6 +1219,41 @@ out:
 	return ret;
 }
 
+static int i915_hangcheck_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
+	struct intel_engine_cs *ring;
+	int i;
+
+	if (!i915.enable_hangcheck) {
+		seq_printf(m, "Hangcheck disabled\n");
+		return 0;
+	}
+
+	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
+		seq_printf(m, "Hangcheck active, fires in %dms\n",
+			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
+					    jiffies));
+	} else
+		seq_printf(m, "Hangcheck inactive\n");
+
+	for_each_ring(ring, dev_priv, i) {
+		seq_printf(m, "%s:\n", ring->name);
+		seq_printf(m, "\tseqno = %x [current %x]\n",
+			   ring->hangcheck.seqno, ring->get_seqno(ring, false));
+		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
+		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
+		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
+			   (long long)ring->hangcheck.acthd,
+			   (long long)intel_ring_get_active_head(ring));
+		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
+			   (long long)ring->hangcheck.max_acthd);
+	}
+
+	return 0;
+}
+
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_info_node *node = m->private;
@@ -4407,6 +4442,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
 	{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
+	{"i915_hangcheck_info", i915_hangcheck_info, 0},
 	{"i915_drpc_info", i915_drpc_info, 0},
 	{"i915_emon_status", i915_emon_status, 0},
 	{"i915_ring_freq_table", i915_ring_freq_table, 0},
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/4] drm/i915: Remove nested work in gpu error handling
  2015-01-26 16:03 [PATCH 1/4] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 2/4] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
@ 2015-01-26 16:03 ` Mika Kuoppala
  2015-01-27  9:53   ` Chris Wilson
  2015-01-26 16:03 ` [PATCH 4/4] drm/i915: Be consistent on printing seqnos Mika Kuoppala
  2 siblings, 1 reply; 16+ messages in thread
From: Mika Kuoppala @ 2015-01-26 16:03 UTC (permalink / raw)
  To: intel-gfx

Now when we declare gpu errors only through our own dedicated
hangcheck workqueue there is no need to have a separate workqueue
for handling the resetting and waking up the clients as the deadlock
concerns are no more.

The only exception is i915_debugfs::i915_set_wedged, which triggers
error handling through process context. However as this is only used through
test harness it is responsibility for test harness not to introduce hangs
through both debug interface and through hangcheck mechanism at the same time.

Remove gpu_error.work and let the hangcheck work do the tasks it used to.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |  1 -
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/i915_irq.c | 34 +++++++++++++---------------------
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 6eaf795..1a46787 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -945,7 +945,6 @@ int i915_driver_unload(struct drm_device *dev)
 
 	/* Free error state after interrupts are fully disabled. */
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-	cancel_work_sync(&dev_priv->gpu_error.work);
 	i915_destroy_error_state(dev);
 
 	if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a4eb023..cfb2641 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1352,8 +1352,6 @@ struct i915_gpu_error {
 	spinlock_t lock;
 	/* Protected by the above dev->gpu_error.lock. */
 	struct drm_i915_error_state *first_error;
-	struct work_struct work;
-
 
 	unsigned long missed_irq_rings;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 234b1f7..44dbf78 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2421,19 +2421,15 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 }
 
 /**
- * i915_error_work_func - do process context error handling work
- * @work: work struct
+ * i915_reset_and_wakeup - do process context error handling work
  *
  * Fire an error uevent so userspace can see that a hang or error
  * was detected.
  */
-static void i915_error_work_func(struct work_struct *work)
+static void i915_reset_and_wakeup(struct drm_device *dev)
 {
-	struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
-						    work);
-	struct drm_i915_private *dev_priv =
-		container_of(error, struct drm_i915_private, gpu_error);
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2600,10 +2596,10 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
 }
 
 /**
- * i915_handle_error - handle an error interrupt
+ * i915_handle_error - handle a gpu error
  * @dev: drm device
  *
- * Do some basic checking of regsiter state at error interrupt time and
+ * Do some basic checking of regsiter state at error time and
  * dump it to the syslog.  Also call i915_capture_error_state() to make
  * sure we get a record and make it available in debugfs.  Fire a uevent
  * so userspace knows something bad happened (should trigger collection
@@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	va_list args;
 	char error_msg[80];
 
+	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
+		return;
+
 	va_start(args, fmt);
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
@@ -2628,9 +2627,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 				&dev_priv->gpu_error.reset_counter);
 
 		/*
-		 * Wakeup waiting processes so that the reset work function
-		 * i915_error_work_func doesn't deadlock trying to grab various
-		 * locks. By bumping the reset counter first, the woken
+		 * Wakeup waiting processes so that the reset function
+		 * i915_reset_and_wakeup doesn't deadlock trying to grab
+		 * various locks. By bumping the reset counter first, the woken
 		 * processes will see a reset in progress and back off,
 		 * releasing their locks and then wait for the reset completion.
 		 * We must do this for _all_ gpu waiters that might hold locks
@@ -2643,13 +2642,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 		i915_error_wake_up(dev_priv, false);
 	}
 
-	/*
-	 * Our reset work can grab modeset locks (since it needs to reset the
-	 * state of outstanding pagelips). Hence it must not be run on our own
-	 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
-	 * code will deadlock.
-	 */
-	schedule_work(&dev_priv->gpu_error.work);
+	i915_reset_and_wakeup(dev);
 }
 
 /* Called from drm generic code, passed 'crtc' which
@@ -4345,7 +4338,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func);
-	INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func);
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/4] drm/i915: Be consistent on printing seqnos
  2015-01-26 16:03 [PATCH 1/4] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 2/4] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
  2015-01-26 16:03 ` [PATCH 3/4] drm/i915: Remove nested work in gpu error handling Mika Kuoppala
@ 2015-01-26 16:03 ` Mika Kuoppala
  2015-01-27  9:52   ` Chris Wilson
  2015-01-28 11:59   ` shuang.he
  2 siblings, 2 replies; 16+ messages in thread
From: Mika Kuoppala @ 2015-01-26 16:03 UTC (permalink / raw)
  To: intel-gfx

We have had %x and %u intermixed. Bring everything in line and
use %x

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f865cfd..28316c7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -123,7 +123,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -569,7 +569,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				struct intel_engine_cs *ring =
 					i915_gem_request_get_ring(work->flip_queued_req);
 
-				seq_printf(m, "Flip queued on %s at seqno %u, next seqno %u [current breadcrumb %u], completed? %d\n",
+				seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
 					   ring->name,
 					   i915_gem_request_get_seqno(work->flip_queued_req),
 					   dev_priv->next_seqno,
@@ -658,7 +658,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 		list_for_each_entry(gem_request,
 				    &ring->request_list,
 				    list) {
-			seq_printf(m, "    %d @ %d\n",
+			seq_printf(m, "    %x @ %d\n",
 				   gem_request->seqno,
 				   (int) (jiffies - gem_request->emitted_jiffies));
 		}
@@ -676,7 +676,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 				 struct intel_engine_cs *ring)
 {
 	if (ring->get_seqno) {
-		seq_printf(m, "Current sequence (%s): %u\n",
+		seq_printf(m, "Current sequence (%s): %x\n",
 			   ring->name, ring->get_seqno(ring, false));
 	}
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] drm/i915: Be consistent on printing seqnos
  2015-01-26 16:03 ` [PATCH 4/4] drm/i915: Be consistent on printing seqnos Mika Kuoppala
@ 2015-01-27  9:52   ` Chris Wilson
  2015-01-28 11:59   ` shuang.he
  1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2015-01-27  9:52 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Mon, Jan 26, 2015 at 06:03:06PM +0200, Mika Kuoppala wrote:
> We have had %x and %u intermixed. Bring everything in line and
> use %x
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>

Thanks, these had been annoying me for ages.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] drm/i915: Remove nested work in gpu error handling
  2015-01-26 16:03 ` [PATCH 3/4] drm/i915: Remove nested work in gpu error handling Mika Kuoppala
@ 2015-01-27  9:53   ` Chris Wilson
  2015-01-28  9:37     ` Daniel Vetter
  2015-01-28 15:03     ` [PATCH] " Mika Kuoppala
  0 siblings, 2 replies; 16+ messages in thread
From: Chris Wilson @ 2015-01-27  9:53 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Mon, Jan 26, 2015 at 06:03:05PM +0200, Mika Kuoppala wrote:
> Now when we declare gpu errors only through our own dedicated
> hangcheck workqueue there is no need to have a separate workqueue
> for handling the resetting and waking up the clients as the deadlock
> concerns are no more.
> 
> The only exception is i915_debugfs::i915_set_wedged, which triggers
> error handling through process context. However as this is only used through
> test harness it is responsibility for test harness not to introduce hangs
> through both debug interface and through hangcheck mechanism at the same time.
> 
> Remove gpu_error.work and let the hangcheck work do the tasks it used to.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>

For our own sanity, we need to stick some form of that comment in
i915_set_wedged(), so that when we do inevitably blow up, we can laugh
at ourselves.

Otherwise, lgtm.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] drm/i915: Remove nested work in gpu error handling
  2015-01-27  9:53   ` Chris Wilson
@ 2015-01-28  9:37     ` Daniel Vetter
  2015-01-28 15:03     ` [PATCH] " Mika Kuoppala
  1 sibling, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:37 UTC (permalink / raw)
  To: Chris Wilson, Mika Kuoppala, intel-gfx

On Tue, Jan 27, 2015 at 09:53:20AM +0000, Chris Wilson wrote:
> On Mon, Jan 26, 2015 at 06:03:05PM +0200, Mika Kuoppala wrote:
> > Now when we declare gpu errors only through our own dedicated
> > hangcheck workqueue there is no need to have a separate workqueue
> > for handling the resetting and waking up the clients as the deadlock
> > concerns are no more.
> > 
> > The only exception is i915_debugfs::i915_set_wedged, which triggers
> > error handling through process context. However as this is only used through
> > test harness it is responsibility for test harness not to introduce hangs
> > through both debug interface and through hangcheck mechanism at the same time.
> > 
> > Remove gpu_error.work and let the hangcheck work do the tasks it used to.
> > 
> > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> 
> For our own sanity, we need to stick some form of that comment in
> i915_set_wedged(), so that when we do inevitably blow up, we can laugh
> at ourselves.

Yeah that has the potential for some self-inflicted pain. I've merged all
the other patches meanwhile, thanks.
-Daniel
> 
> Otherwise, lgtm.
> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] drm/i915: Be consistent on printing seqnos
  2015-01-26 16:03 ` [PATCH 4/4] drm/i915: Be consistent on printing seqnos Mika Kuoppala
  2015-01-27  9:52   ` Chris Wilson
@ 2015-01-28 11:59   ` shuang.he
  2015-02-03 12:23     ` Jani Nikula
  1 sibling, 1 reply; 16+ messages in thread
From: shuang.he @ 2015-01-28 11:59 UTC (permalink / raw)
  To: shuang.he, ethan.gao, intel-gfx, mika.kuoppala

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5645
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -1              353/353              352/353
ILK                                  353/353              353/353
SNB                 -1              400/422              399/422
IVB              +2-2              485/487              485/487
BYT                                  296/296              296/296
HSW              +1-1              507/508              507/508
BDW                                  401/402              401/402
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*PNV  igt_gen3_render_linear_blits      PASS(3, M25M23)      CRASH(1, M23)
*SNB  igt_gem_concurrent_blit_gpu-bcs-gpu-read-after-write      PASS(2, M35)      NO_RESULT(1, M35)
*IVB  igt_gem_persistent_relocs_forked-thrashing      PASS(2, M34M4)      NO_RESULT(1, M4)
 IVB  igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance      DMESG_WARN(2, M34)PASS(3, M4)      PASS(1, M4)
 IVB  igt_gem_storedw_batches_loop_normal      DMESG_WARN(2, M34M4)PASS(5, M34M4M21)      PASS(1, M4)
*IVB  igt_gem_storedw_loop_blt      PASS(2, M34M4)      DMESG_WARN(1, M4)
 HSW  igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance      DMESG_WARN(1, M40)PASS(7, M40M20)      PASS(1, M40)
*HSW  igt_pm_rpm_debugfs-read      PASS(2, M40)      DMESG_WARN(1, M40)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-01-27  9:53   ` Chris Wilson
  2015-01-28  9:37     ` Daniel Vetter
@ 2015-01-28 15:03     ` Mika Kuoppala
  2015-01-28 15:30       ` Chris Wilson
  2015-02-02  9:17       ` Chris Wilson
  1 sibling, 2 replies; 16+ messages in thread
From: Mika Kuoppala @ 2015-01-28 15:03 UTC (permalink / raw)
  To: intel-gfx

Now when we declare gpu errors only through our own dedicated
hangcheck workqueue there is no need to have a separate workqueue
for handling the resetting and waking up the clients as the deadlock
concerns are no more.

The only exception is i915_debugfs::i915_set_wedged, which triggers
error handling through process context. However as this is only used through
test harness it is responsibility for test harness not to introduce hangs
through both debug interface and through hangcheck mechanism at the same time.

Remove gpu_error.work and let the hangcheck work do the tasks it used to.

v2: Add a big warning sign into i915_debugfs::i915_set_wedged (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 11 +++++++++++
 drivers/gpu/drm/i915/i915_dma.c     |  1 -
 drivers/gpu/drm/i915/i915_drv.h     |  2 --
 drivers/gpu/drm/i915/i915_irq.c     | 34 +++++++++++++---------------------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3b332a4..211d494 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3969,6 +3969,17 @@ i915_wedged_set(void *data, u64 val)
 	struct drm_device *dev = data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	/*
+	 * There is no safeguard against this debugfs entry colliding
+	 * with the hangcheck calling same i915_handle_error() in
+	 * parallel, causing an explosion. For now we assume that the
+	 * test harness is responsible enough not to inject gpu hangs
+	 * while it is writing to 'i915_wedged'
+	 */
+
+	if (i915_reset_in_progress(&dev_priv->gpu_error))
+		return -EAGAIN;
+
 	intel_runtime_pm_get(dev_priv);
 
 	i915_handle_error(dev, val,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 6eaf795..1a46787 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -945,7 +945,6 @@ int i915_driver_unload(struct drm_device *dev)
 
 	/* Free error state after interrupts are fully disabled. */
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-	cancel_work_sync(&dev_priv->gpu_error.work);
 	i915_destroy_error_state(dev);
 
 	if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b09173f..07f99ca 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1352,8 +1352,6 @@ struct i915_gpu_error {
 	spinlock_t lock;
 	/* Protected by the above dev->gpu_error.lock. */
 	struct drm_i915_error_state *first_error;
-	struct work_struct work;
-
 
 	unsigned long missed_irq_rings;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 234b1f7..44dbf78 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2421,19 +2421,15 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 }
 
 /**
- * i915_error_work_func - do process context error handling work
- * @work: work struct
+ * i915_reset_and_wakeup - do process context error handling work
  *
  * Fire an error uevent so userspace can see that a hang or error
  * was detected.
  */
-static void i915_error_work_func(struct work_struct *work)
+static void i915_reset_and_wakeup(struct drm_device *dev)
 {
-	struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
-						    work);
-	struct drm_i915_private *dev_priv =
-		container_of(error, struct drm_i915_private, gpu_error);
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2600,10 +2596,10 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
 }
 
 /**
- * i915_handle_error - handle an error interrupt
+ * i915_handle_error - handle a gpu error
  * @dev: drm device
  *
- * Do some basic checking of regsiter state at error interrupt time and
+ * Do some basic checking of regsiter state at error time and
  * dump it to the syslog.  Also call i915_capture_error_state() to make
  * sure we get a record and make it available in debugfs.  Fire a uevent
  * so userspace knows something bad happened (should trigger collection
@@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	va_list args;
 	char error_msg[80];
 
+	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
+		return;
+
 	va_start(args, fmt);
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
@@ -2628,9 +2627,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 				&dev_priv->gpu_error.reset_counter);
 
 		/*
-		 * Wakeup waiting processes so that the reset work function
-		 * i915_error_work_func doesn't deadlock trying to grab various
-		 * locks. By bumping the reset counter first, the woken
+		 * Wakeup waiting processes so that the reset function
+		 * i915_reset_and_wakeup doesn't deadlock trying to grab
+		 * various locks. By bumping the reset counter first, the woken
 		 * processes will see a reset in progress and back off,
 		 * releasing their locks and then wait for the reset completion.
 		 * We must do this for _all_ gpu waiters that might hold locks
@@ -2643,13 +2642,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 		i915_error_wake_up(dev_priv, false);
 	}
 
-	/*
-	 * Our reset work can grab modeset locks (since it needs to reset the
-	 * state of outstanding pagelips). Hence it must not be run on our own
-	 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
-	 * code will deadlock.
-	 */
-	schedule_work(&dev_priv->gpu_error.work);
+	i915_reset_and_wakeup(dev);
 }
 
 /* Called from drm generic code, passed 'crtc' which
@@ -4345,7 +4338,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func);
-	INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func);
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-01-28 15:03     ` [PATCH] " Mika Kuoppala
@ 2015-01-28 15:30       ` Chris Wilson
  2015-01-29 17:03         ` Daniel Vetter
  2015-02-02  9:17       ` Chris Wilson
  1 sibling, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2015-01-28 15:30 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
> Now when we declare gpu errors only through our own dedicated
> hangcheck workqueue there is no need to have a separate workqueue
> for handling the resetting and waking up the clients as the deadlock
> concerns are no more.
> 
> The only exception is i915_debugfs::i915_set_wedged, which triggers
> error handling through process context. However as this is only used through
> test harness it is responsibility for test harness not to introduce hangs
> through both debug interface and through hangcheck mechanism at the same time.
> 
> Remove gpu_error.work and let the hangcheck work do the tasks it used to.
> 
> v2: Add a big warning sign into i915_debugfs::i915_set_wedged (Chris)
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-01-28 15:30       ` Chris Wilson
@ 2015-01-29 17:03         ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2015-01-29 17:03 UTC (permalink / raw)
  To: Chris Wilson, Mika Kuoppala, intel-gfx

On Wed, Jan 28, 2015 at 03:30:35PM +0000, Chris Wilson wrote:
> On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
> > Now when we declare gpu errors only through our own dedicated
> > hangcheck workqueue there is no need to have a separate workqueue
> > for handling the resetting and waking up the clients as the deadlock
> > concerns are no more.
> > 
> > The only exception is i915_debugfs::i915_set_wedged, which triggers
> > error handling through process context. However as this is only used through
> > test harness it is responsibility for test harness not to introduce hangs
> > through both debug interface and through hangcheck mechanism at the same time.
> > 
> > Remove gpu_error.work and let the hangcheck work do the tasks it used to.
> > 
> > v2: Add a big warning sign into i915_debugfs::i915_set_wedged (Chris)
> > 
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-01-28 15:03     ` [PATCH] " Mika Kuoppala
  2015-01-28 15:30       ` Chris Wilson
@ 2015-02-02  9:17       ` Chris Wilson
  2015-02-02  9:38         ` Daniel Vetter
  1 sibling, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2015-02-02  9:17 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
> @@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
>  	va_list args;
>  	char error_msg[80];
>  
> +	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
> +		return;
> +

Oops, sorry, I should have realised this was wrong earlier. The mutex
breaking occurs later in i915_handle_error.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-02-02  9:17       ` Chris Wilson
@ 2015-02-02  9:38         ` Daniel Vetter
  2015-02-02 10:08           ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2015-02-02  9:38 UTC (permalink / raw)
  To: Chris Wilson, Mika Kuoppala, intel-gfx

On Mon, Feb 02, 2015 at 09:17:14AM +0000, Chris Wilson wrote:
> On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
> > @@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
> >  	va_list args;
> >  	char error_msg[80];
> >  
> > +	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
> > +		return;
> > +
> 
> Oops, sorry, I should have realised this was wrong earlier. The mutex
> breaking occurs later in i915_handle_error.

Oh well, already merged. Also, prts seems to complain that a bunch of
hang stress-tests changed from fail to timeout because of this one here.
Is this patch accidentally fix a bug and we just need to tune the tests,
or is there some new deadlock now? prts results are really thin, per usual
:(

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-02-02  9:38         ` Daniel Vetter
@ 2015-02-02 10:08           ` Chris Wilson
  2015-02-03 14:05             ` Mika Kuoppala
  0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2015-02-02 10:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, Feb 02, 2015 at 10:38:19AM +0100, Daniel Vetter wrote:
> On Mon, Feb 02, 2015 at 09:17:14AM +0000, Chris Wilson wrote:
> > On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
> > > @@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
> > >  	va_list args;
> > >  	char error_msg[80];
> > >  
> > > +	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
> > > +		return;
> > > +
> > 
> > Oops, sorry, I should have realised this was wrong earlier. The mutex
> > breaking occurs later in i915_handle_error.
> 
> Oh well, already merged. Also, prts seems to complain that a bunch of
> hang stress-tests changed from fail to timeout because of this one here.
> Is this patch accidentally fix a bug and we just need to tune the tests,
> or is there some new deadlock now? prts results are really thin, per usual
> :(

Yes. It will also prevent the gpu reset which those tests depend upon.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] drm/i915: Be consistent on printing seqnos
  2015-01-28 11:59   ` shuang.he
@ 2015-02-03 12:23     ` Jani Nikula
  0 siblings, 0 replies; 16+ messages in thread
From: Jani Nikula @ 2015-02-03 12:23 UTC (permalink / raw)
  To: shuang.he

On Wed, 28 Jan 2015, shuang.he@intel.com wrote:
> Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
> Task id: 5645
> -------------------------------------Summary-------------------------------------
> Platform          Delta          drm-intel-nightly          Series Applied
> PNV                 -1              353/353              352/353
> ILK                                  353/353              353/353
> SNB                 -1              400/422              399/422
> IVB              +2-2              485/487              485/487
> BYT                                  296/296              296/296
> HSW              +1-1              507/508              507/508
> BDW                                  401/402              401/402
> -------------------------------------Detailed-------------------------------------
> Platform  Test                                drm-intel-nightly          Series Applied
> *PNV  igt_gen3_render_linear_blits      PASS(3, M25M23)      CRASH(1, M23)
> *SNB  igt_gem_concurrent_blit_gpu-bcs-gpu-read-after-write      PASS(2, M35)      NO_RESULT(1, M35)
> *IVB  igt_gem_persistent_relocs_forked-thrashing      PASS(2, M34M4)      NO_RESULT(1, M4)
>  IVB  igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance      DMESG_WARN(2, M34)PASS(3, M4)      PASS(1, M4)
>  IVB  igt_gem_storedw_batches_loop_normal      DMESG_WARN(2, M34M4)PASS(5, M34M4M21)      PASS(1, M4)
> *IVB  igt_gem_storedw_loop_blt      PASS(2, M34M4)      DMESG_WARN(1, M4)
>  HSW  igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance      DMESG_WARN(1, M40)PASS(7, M40M20)      PASS(1, M40)
> *HSW  igt_pm_rpm_debugfs-read      PASS(2, M40)      DMESG_WARN(1, M40)
> Note: You need to pay more attention to line start with '*'

This bug was not found by prts:
https://bugs.freedesktop.org/show_bug.cgi?id=88908

Jani.

-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/i915: Remove nested work in gpu error handling
  2015-02-02 10:08           ` Chris Wilson
@ 2015-02-03 14:05             ` Mika Kuoppala
  0 siblings, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2015-02-03 14:05 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter; +Cc: intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Mon, Feb 02, 2015 at 10:38:19AM +0100, Daniel Vetter wrote:
>> On Mon, Feb 02, 2015 at 09:17:14AM +0000, Chris Wilson wrote:
>> > On Wed, Jan 28, 2015 at 05:03:14PM +0200, Mika Kuoppala wrote:
>> > > @@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
>> > >  	va_list args;
>> > >  	char error_msg[80];
>> > >  
>> > > +	if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
>> > > +		return;
>> > > +
>> > 
>> > Oops, sorry, I should have realised this was wrong earlier. The mutex
>> > breaking occurs later in i915_handle_error.
>> 
>> Oh well, already merged. Also, prts seems to complain that a bunch of
>> hang stress-tests changed from fail to timeout because of this one here.
>> Is this patch accidentally fix a bug and we just need to tune the tests,
>> or is there some new deadlock now? prts results are really thin, per usual
>> :(
>
> Yes. It will also prevent the gpu reset which those tests depend upon.

Jani pointed me on the dups. There will be more I think. The test
should have been more deep into the reset handling and instead of
bailing out we should have requeued ourselves to the least.

Sorry.
-Mika

> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-02-03 14:05 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-26 16:03 [PATCH 1/4] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
2015-01-26 16:03 ` [PATCH 2/4] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
2015-01-26 16:03 ` [PATCH 3/4] drm/i915: Remove nested work in gpu error handling Mika Kuoppala
2015-01-27  9:53   ` Chris Wilson
2015-01-28  9:37     ` Daniel Vetter
2015-01-28 15:03     ` [PATCH] " Mika Kuoppala
2015-01-28 15:30       ` Chris Wilson
2015-01-29 17:03         ` Daniel Vetter
2015-02-02  9:17       ` Chris Wilson
2015-02-02  9:38         ` Daniel Vetter
2015-02-02 10:08           ` Chris Wilson
2015-02-03 14:05             ` Mika Kuoppala
2015-01-26 16:03 ` [PATCH 4/4] drm/i915: Be consistent on printing seqnos Mika Kuoppala
2015-01-27  9:52   ` Chris Wilson
2015-01-28 11:59   ` shuang.he
2015-02-03 12:23     ` Jani Nikula

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.