All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 1/5] drm/i915: Allow disabling error capture
@ 2016-10-11  9:27 Chris Wilson
  2016-10-11  9:27 ` [CI 2/5] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:27 UTC (permalink / raw)
  To: intel-gfx

We currently capture the GPU state after we detect a hang. This is vital
for us to both triage and debug hangs in the wild (post-mortem
debugging). However, it comes at the cost of running some potentially
dangerous code (since it has to make very few assumption about the state
of the driver) that is quite resource intensive.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
 drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
 drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
 drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
 drivers/gpu/drm/i915/i915_params.h    |  1 +
 drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
 drivers/gpu/drm/i915/intel_display.c  |  4 ++++
 drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
 9 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..10a6ac11b6a9 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 
 	  If in doubt, say "N".
 
+config DRM_I915_CAPTURE_ERROR
+	bool "Enable capturing GPU state following a hang"
+	depends on DRM_I915
+	default y
+	help
+	  This option enables capturing the GPU state when a hang is detected.
+	  This information is vital for triaging hangs and assists in debugging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20689f1cd719..e4b5ba771bea 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static ssize_t
 i915_error_state_write(struct file *filp,
 		       const char __user *ubuf,
@@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
 	.release = i915_error_state_release,
 };
 
+#endif
+
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
@@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
 	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	{"i915_error_state", &i915_error_state_fops},
+#endif
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54d860e1c0fc..4570c4fa0287 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
 #endif
 
 /* i915_gpu_error.c */
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
@@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 void i915_destroy_error_state(struct drm_device *dev);
 
+#else
+
+static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
+					    u32 engine_mask,
+					    const char *error_msg)
+{
+}
+
+static inline void i915_destroy_error_state(struct drm_device *dev)
+{
+}
+
+#endif
+
 void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
 			      enum intel_engine_id engine_id,
 			      struct intel_instdone *instdone);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b5b58692ac5a..9b395ffa3b6a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
 #include <generated/utsrelease.h>
 #include "i915_drv.h"
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static const char *engine_str(int engine)
 {
 	switch (engine) {
@@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	struct drm_i915_error_state *error;
 	unsigned long flags;
 
+	if (!i915.error_capture)
+		return;
+
 	if (READ_ONCE(dev_priv->gpu_error.first_error))
 		return;
 
@@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
 		kref_put(&error->ref, i915_error_state_free);
 }
 
+#endif
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 {
 	switch (type) {
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 768ad89d9cd4..e72a41223535 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
 	.load_detect_test = 0,
 	.force_reset_modeset_test = 0,
 	.reset = true,
+	.error_capture = true,
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
@@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
 module_param_named_unsafe(reset, i915.reset, bool, 0600);
 MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+module_param_named(error_capture, i915.error_capture, bool, 0600);
+MODULE_PARM_DESC(error_capture,
+	"Record the GPU state following a hang. "
+	"This information in /sys/class/drm/card<N>/error is vital for "
+	"triaging and debugging hangs.");
+#endif
+
 module_param_named_unsafe(enable_hangcheck, i915.enable_hangcheck, bool, 0644);
 MODULE_PARM_DESC(enable_hangcheck,
 	"Periodically check GPU activity for detecting hangs. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 3a0dd78ddb38..94efc899c1ef 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -59,6 +59,7 @@ struct i915_params {
 	bool load_detect_test;
 	bool force_reset_modeset_test;
 	bool reset;
+	bool error_capture;
 	bool disable_display;
 	bool verbose_state_checks;
 	bool nuclear_pageflip;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 1012eeea1324..c9b71f676a57 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -514,6 +514,8 @@ static const struct attribute *vlv_attrs[] = {
 	NULL,
 };
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr, char *buf,
 				loff_t off, size_t count)
@@ -571,6 +573,8 @@ static struct bin_attribute error_state_attr = {
 	.write = error_state_write,
 };
 
+#endif
+
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -617,17 +621,21 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 	if (ret)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	ret = sysfs_create_bin_file(&kdev->kobj,
 				    &error_state_attr);
 	if (ret)
 		DRM_ERROR("error_state sysfs setup failed\n");
+#endif
 }
 
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+#endif
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sysfs_remove_files(&kdev->kobj, vlv_attrs);
 	else
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 23a6c7213eca..202812916b67 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -17099,6 +17099,8 @@ int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
 	return 0;
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 struct intel_display_error_state {
 
 	u32 power_well_driver;
@@ -17281,3 +17283,5 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
 	}
 }
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a24bc8c7889f..7c392547711f 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1470,6 +1470,8 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
 	kfree(dev_priv->overlay);
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 struct intel_overlay_error_state {
 	struct overlay_registers regs;
 	unsigned long base;
@@ -1587,3 +1589,5 @@ intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
 	P(UVSCALEV);
 #undef P
 }
+
+#endif
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [CI 2/5] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
@ 2016-10-11  9:27 ` Chris Wilson
  2016-10-11  9:27 ` [CI 3/5] drm/i915: Always use the GTT for error capture Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:27 UTC (permalink / raw)
  To: intel-gfx

The error state is purposefully racy as we expect it to be called at any
time and so have avoided any locking whilst capturing the crash dump.
However, with multi-engine GPUs and multiple CPUs, those races can
manifest into OOPSes as we attempt to chase dangling pointers freed on
other CPUs. Under discussion are lots of ways to slow down normal
operation in order to protect the post-mortem error capture, but what it
we take the opposite approach and freeze the machine whilst the error
capture runs (note the GPU may still running, but as long as we don't
process any of the results the driver's bookkeeping will be static).

Note that by of itself, this is not a complete fix. It also depends on
the compiler barriers in list_add/list_del to prevent traversing the
lists into the void. We also depend that we only require state from
carefully controlled sources - i.e. all the state we require for
post-mortem debugging should be reachable from the request itself so
that we only have to worry about retrieving the request carefully. Once
we have the request, we know that all pointers from it are intact.

v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
stop_machine() itself for its wbinvd fallback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/Kconfig          |  1 +
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 46 +++++++++++++++++++++--------------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 10a6ac11b6a9..0f46a9c04c0e 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,7 @@ config DRM_I915
 	depends on X86 && PCI
 	select INTEL_GTT
 	select INTERVAL_TREE
+	select STOP_MACHINE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4570c4fa0287..97203da3a08d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -746,6 +746,8 @@ struct drm_i915_error_state {
 	struct kref ref;
 	struct timeval time;
 
+	struct drm_i915_private *i915;
+
 	char error_msg[128];
 	bool simulated;
 	int iommu;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9b395ffa3b6a..69d4cffe4a32 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,7 @@
  */
 
 #include <generated/utsrelease.h>
+#include <linux/stop_machine.h>
 #include "i915_drv.h"
 
 #ifdef CONFIG_DRM_I915_CAPTURE_ERROR
@@ -746,14 +747,12 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 	dst->page_count = num_pages;
 	while (num_pages--) {
-		unsigned long flags;
 		void *d;
 
 		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
 		if (d == NULL)
 			goto unwind;
 
-		local_irq_save(flags);
 		if (use_ggtt) {
 			void __iomem *s;
 
@@ -772,15 +771,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 			page = i915_gem_object_get_page(src, i);
 
-			drm_clflush_pages(&page, 1);
-
 			s = kmap_atomic(page);
 			memcpy(d, s, PAGE_SIZE);
 			kunmap_atomic(s);
-
-			drm_clflush_pages(&page, 1);
 		}
-		local_irq_restore(flags);
 
 		dst->pages[i++] = d;
 		reloc_offset += PAGE_SIZE;
@@ -1449,6 +1443,31 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 	       sizeof(error->device_info));
 }
 
+static int capture(void *data)
+{
+	struct drm_i915_error_state *error = data;
+
+	/* Ensure that what we readback from memory matches what the GPU sees */
+	wbinvd();
+
+	i915_capture_gen_state(error->i915, error);
+	i915_capture_reg_state(error->i915, error);
+	i915_gem_record_fences(error->i915, error);
+	i915_gem_record_rings(error->i915, error);
+	i915_capture_active_buffers(error->i915, error);
+	i915_capture_pinned_buffers(error->i915, error);
+
+	do_gettimeofday(&error->time);
+
+	error->overlay = intel_overlay_capture_error_state(error->i915);
+	error->display = intel_display_capture_error_state(error->i915);
+
+	/* And make sure we don't leave trash in the CPU cache */
+	wbinvd();
+
+	return 0;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1480,18 +1499,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	}
 
 	kref_init(&error->ref);
+	error->i915 = dev_priv;
 
-	i915_capture_gen_state(dev_priv, error);
-	i915_capture_reg_state(dev_priv, error);
-	i915_gem_record_fences(dev_priv, error);
-	i915_gem_record_rings(dev_priv, error);
-	i915_capture_active_buffers(dev_priv, error);
-	i915_capture_pinned_buffers(dev_priv, error);
-
-	do_gettimeofday(&error->time);
-
-	error->overlay = intel_overlay_capture_error_state(dev_priv);
-	error->display = intel_display_capture_error_state(dev_priv);
+	stop_machine(capture, error, NULL);
 
 	i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
 	DRM_INFO("%s\n", error->error_msg);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [CI 3/5] drm/i915: Always use the GTT for error capture
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
  2016-10-11  9:27 ` [CI 2/5] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
@ 2016-10-11  9:27 ` Chris Wilson
  2016-10-11  9:27 ` [CI 4/5] drm/i915: Consolidate error object printing Chris Wilson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:27 UTC (permalink / raw)
  To: intel-gfx

Since the GTT provides universal access to any GPU page, we can use it
to reduce our plethora of read methods to just one. It also has the
important characteristic of being exactly what the GPU sees - if there
are incoherency problems, seeing the batch as executed (rather than as
trapped inside the cpu cache) is important.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  43 ++++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++----------------------
 3 files changed, 74 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0bb4232f66bc..2d846aa39ca5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	 */
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	unsigned long hole_start, hole_end;
+	struct i915_hw_ppgtt *ppgtt;
 	struct drm_mm_node *entry;
 	int ret;
 
@@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	/* Reserve a mappable slot for our lockless error capture */
+	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
+						  &ggtt->error_capture,
+						  4096, 0, -1,
+						  0, ggtt->mappable_end,
+						  0, 0);
+	if (ret)
+		return ret;
+
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 			       true);
 
 	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
-		struct i915_hw_ppgtt *ppgtt;
-
 		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-		if (!ppgtt)
-			return -ENOMEM;
+		if (!ppgtt) {
+			ret = -ENOMEM;
+			goto err;
+		}
 
 		ret = __hw_ppgtt_init(ppgtt, dev_priv);
-		if (ret) {
-			kfree(ppgtt);
-			return ret;
-		}
+		if (ret)
+			goto err_ppgtt;
 
-		if (ppgtt->base.allocate_va_range)
+		if (ppgtt->base.allocate_va_range) {
 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
 							    ppgtt->base.total);
-		if (ret) {
-			ppgtt->base.cleanup(&ppgtt->base);
-			kfree(ppgtt);
-			return ret;
+			if (ret)
+				goto err_ppgtt_cleanup;
 		}
 
 		ppgtt->base.clear_range(&ppgtt->base,
@@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	}
 
 	return 0;
+
+err_ppgtt_cleanup:
+	ppgtt->base.cleanup(&ppgtt->base);
+err_ppgtt:
+	kfree(ppgtt);
+err:
+	drm_mm_remove_node(&ggtt->error_capture);
+	return ret;
 }
 
 /**
@@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 
 	i915_gem_cleanup_stolen(&dev_priv->drm);
 
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ec78be2f8c77..bd93fb8f99d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -450,6 +450,8 @@ struct i915_ggtt {
 	bool do_idle_maps;
 
 	int mtrr;
+
+	struct drm_mm_node error_capture;
 };
 
 struct i915_hw_ppgtt {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 69d4cffe4a32..be82eadac9bb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -658,7 +658,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
 		return;
 
 	for (page = 0; page < obj->page_count; page++)
-		kfree(obj->pages[page]);
+		free_page((unsigned long)obj->pages[page]);
 
 	kfree(obj);
 }
@@ -695,98 +695,69 @@ static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
+static int compress_page(void *src, struct drm_i915_error_object *dst)
+{
+	unsigned long page;
+
+	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!page)
+		return -ENOMEM;
+
+	dst->pages[dst->page_count++] = (void *)page;
+
+	memcpy((void *)page, src, PAGE_SIZE);
+	return 0;
+}
+
 static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *dev_priv,
+i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_object *src;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	const u64 slot = ggtt->error_capture.start;
 	struct drm_i915_error_object *dst;
-	int num_pages;
-	bool use_ggtt;
-	int i = 0;
-	u64 reloc_offset;
+	unsigned long num_pages;
+	struct sgt_iter iter;
+	dma_addr_t dma;
 
 	if (!vma)
 		return NULL;
 
-	src = vma->obj;
-	if (!src->pages)
-		return NULL;
-
-	num_pages = src->base.size >> PAGE_SHIFT;
-
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
+	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
+	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
+		      GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
 		return NULL;
 
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
+	dst->page_count = 0;
 
-	reloc_offset = dst->gtt_offset;
-	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
-		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
-
-	/* Cannot access stolen address directly, try to use the aperture */
-	if (src->stolen) {
-		use_ggtt = true;
-
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
-			goto unwind;
-
-		reloc_offset = vma->node.start;
-		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
-			goto unwind;
-	}
+	for_each_sgt_dma(dma, iter, vma->pages) {
+		void __iomem *s;
+		int ret;
 
-	/* Cannot access snooped pages through the aperture */
-	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
-	    !HAS_LLC(dev_priv))
-		goto unwind;
+		ggtt->base.insert_page(&ggtt->base, dma, slot,
+				       I915_CACHE_NONE, 0);
 
-	dst->page_count = num_pages;
-	while (num_pages--) {
-		void *d;
+		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+		ret = compress_page((void * __force)s, dst);
+		io_mapping_unmap_atomic(s);
 
-		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-		if (d == NULL)
+		if (ret)
 			goto unwind;
-
-		if (use_ggtt) {
-			void __iomem *s;
-
-			/* Simply ignore tiling or any overlapping fence.
-			 * It's part of the error state, and this hopefully
-			 * captures what the GPU read.
-			 */
-
-			s = io_mapping_map_atomic_wc(&ggtt->mappable,
-						     reloc_offset);
-			memcpy_fromio(d, s, PAGE_SIZE);
-			io_mapping_unmap_atomic(s);
-		} else {
-			struct page *page;
-			void *s;
-
-			page = i915_gem_object_get_page(src, i);
-
-			s = kmap_atomic(page);
-			memcpy(d, s, PAGE_SIZE);
-			kunmap_atomic(s);
-		}
-
-		dst->pages[i++] = d;
-		reloc_offset += PAGE_SIZE;
 	}
-
-	return dst;
+	goto out;
 
 unwind:
-	while (i--)
-		kfree(dst->pages[i]);
+	while (dst->page_count--)
+		free_page((unsigned long)dst->pages[dst->page_count]);
 	kfree(dst);
-	return NULL;
+	dst = NULL;
+
+out:
+	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
+	return dst;
 }
 
 /* The error capture is special as tries to run underneath the normal
@@ -1447,9 +1418,6 @@ static int capture(void *data)
 {
 	struct drm_i915_error_state *error = data;
 
-	/* Ensure that what we readback from memory matches what the GPU sees */
-	wbinvd();
-
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
@@ -1462,9 +1430,6 @@ static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
-	/* And make sure we don't leave trash in the CPU cache */
-	wbinvd();
-
 	return 0;
 }
 
@@ -1541,7 +1506,6 @@ void i915_error_state_get(struct drm_device *dev,
 	if (error_priv->error)
 		kref_get(&error_priv->error->ref);
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
-
 }
 
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [CI 4/5] drm/i915: Consolidate error object printing
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
  2016-10-11  9:27 ` [CI 2/5] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
  2016-10-11  9:27 ` [CI 3/5] drm/i915: Always use the GTT for error capture Chris Wilson
@ 2016-10-11  9:27 ` Chris Wilson
  2016-10-11  9:27 ` [CI 5/5] drm/i915: Compress GPU objects in error state Chris Wilson
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:27 UTC (permalink / raw)
  To: intel-gfx

Leave all the pretty printing to userspace and simplify the error
capture to only have a single common object printer. It makes the kernel
code more compact, and the refactoring allows us to apply more complex
transformations like compressing the output.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 100 +++++++++-------------------------
 1 file changed, 25 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index be82eadac9bb..bf2498297341 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -343,10 +343,22 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 }
 
 static void print_error_obj(struct drm_i915_error_state_buf *m,
+			    struct intel_engine_cs *engine,
+			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
 	int page, offset, elt;
 
+	if (!obj)
+		return;
+
+	if (name) {
+		err_printf(m, "%s --- %s = 0x%08x %08x\n",
+			   engine ? engine->name : "global", name,
+			   upper_32_bits(obj->gtt_offset),
+			   lower_32_bits(obj->gtt_offset));
+	}
+
 	for (page = offset = 0; page < obj->page_count; page++) {
 		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
 			err_printf(m, "%08x :  %08x\n", offset,
@@ -372,8 +384,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct drm_i915_error_state *error = error_priv->error;
 	struct drm_i915_error_object *obj;
-	int i, j, offset, elt;
 	int max_hangcheck_score;
+	int i, j;
 
 	if (!error) {
 		err_printf(m, "no error state collected\n");
@@ -493,15 +505,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
-
-		obj = ee->wa_batchbuffer;
-		if (obj) {
-			err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
+			print_error_obj(m, &dev_priv->engine[i], NULL, obj);
 		}
 
 		if (ee->num_requests) {
@@ -533,77 +537,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		if ((obj = ee->ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"ringbuffer", ee->ringbuffer);
 
-		if ((obj = ee->hws_page)) {
-			u64 hws_offset = obj->gtt_offset;
-			u32 *hws_page = &obj->pages[0][0];
+		print_error_obj(m, &dev_priv->engine[i],
+				"HW Status", ee->hws_page);
 
-			if (i915.enable_execlists) {
-				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
-				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
-			}
-			err_printf(m, "%s --- HW Status = 0x%08llx\n",
-				   dev_priv->engine[i].name, hws_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   hws_page[elt],
-					   hws_page[elt+1],
-					   hws_page[elt+2],
-					   hws_page[elt+3]);
-				offset += 16;
-			}
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"HW context", ee->ctx);
 
-		obj = ee->wa_ctx;
-		if (obj) {
-			u64 wa_ctx_offset = obj->gtt_offset;
-			u32 *wa_ctx_page = &obj->pages[0][0];
-			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
-					   engine->wa_ctx.per_ctx.size);
-
-			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
-				   dev_priv->engine[i].name, wa_ctx_offset);
-			offset = 0;
-			for (elt = 0; elt < wa_ctx_size; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   wa_ctx_page[elt + 0],
-					   wa_ctx_page[elt + 1],
-					   wa_ctx_page[elt + 2],
-					   wa_ctx_page[elt + 3]);
-				offset += 16;
-			}
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"WA context", ee->wa_ctx);
 
-		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"WA batchbuffer", ee->wa_batchbuffer);
 	}
 
-	if ((obj = error->semaphore)) {
-		err_printf(m, "Semaphore page = 0x%08x\n",
-			   lower_32_bits(obj->gtt_offset));
-		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-				   elt * 4,
-				   obj->pages[0][elt],
-				   obj->pages[0][elt+1],
-				   obj->pages[0][elt+2],
-				   obj->pages[0][elt+3]);
-		}
-	}
+	print_error_obj(m, NULL, "Semaphores", error->semaphore);
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [CI 5/5] drm/i915: Compress GPU objects in error state
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
                   ` (2 preceding siblings ...)
  2016-10-11  9:27 ` [CI 4/5] drm/i915: Consolidate error object printing Chris Wilson
@ 2016-10-11  9:27 ` Chris Wilson
  2016-10-11  9:52 ` [CI 1/5] drm/i915: Allow disabling error capture Jani Nikula
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:27 UTC (permalink / raw)
  To: intel-gfx

Our error states are quickly growing, pinning kernel memory with them.
The majority of the space is taken up by the error objects. These
compress well using zlib and without decode are mostly meaningless, so
encoding them does not hinder quickly parsing the error state for
familiarity.

v2: Make the zlib dependency optional

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig          |  12 +++
 drivers/gpu/drm/i915/i915_drv.h       |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 176 ++++++++++++++++++++++++++++++----
 3 files changed, 169 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 0f46a9c04c0e..92ecced1bc8f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -57,6 +57,18 @@ config DRM_I915_CAPTURE_ERROR
 
 	  If in doubt, say "Y".
 
+config DRM_I915_COMPRESS_ERROR
+	bool "Compress GPU error state"
+	depends on DRM_I915_CAPTURE_ERROR
+	select ZLIB_DEFLATE
+	default y
+	help
+	  This option selects ZLIB_DEFLATE if it isn't already
+	  selected and causes any error state captured upon a GPU hang
+	  to be compressed using zlib.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 97203da3a08d..3d46f57eed3f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -820,9 +820,10 @@ struct drm_i915_error_state {
 		struct intel_instdone instdone;
 
 		struct drm_i915_error_object {
-			int page_count;
 			u64 gtt_offset;
 			u64 gtt_size;
+			int page_count;
+			int unused;
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index bf2498297341..6c22be28ba01 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,6 +29,7 @@
 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
+#include <linux/zlib.h>
 #include "i915_drv.h"
 
 #ifdef CONFIG_DRM_I915_CAPTURE_ERROR
@@ -175,6 +176,110 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
 #define err_puts(e, s) i915_error_puts(e, s)
 
+#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
+
+static bool compress_init(struct z_stream_s *zstream)
+{
+	memset(zstream, 0, sizeof(*zstream));
+
+	zstream->workspace =
+		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+			GFP_ATOMIC | __GFP_NOWARN);
+	if (!zstream->workspace)
+		return false;
+
+	if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+		kfree(zstream->workspace);
+		return false;
+	}
+
+	return true;
+}
+
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	zstream->next_in = src;
+	zstream->avail_in = PAGE_SIZE;
+
+	do {
+		if (zstream->avail_out == 0) {
+			unsigned long page;
+
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+		}
+
+		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+			return -EIO;
+	} while (zstream->avail_in);
+
+	/* Fallback to uncompressed if we increase size? */
+	if (0 && zstream->total_out > zstream->total_in)
+		return -E2BIG;
+
+	return 0;
+}
+
+static void compress_fini(struct z_stream_s *zstream,
+			  struct drm_i915_error_object *dst)
+{
+	if (dst) {
+		zlib_deflate(zstream, Z_FINISH);
+		dst->unused = zstream->avail_out;
+	}
+
+	zlib_deflateEnd(zstream);
+	kfree(zstream->workspace);
+}
+
+static void err_compression_marker(struct drm_i915_error_state_buf *m)
+{
+	err_puts(m, ":");
+}
+
+#else
+
+static bool compress_init(struct z_stream_s *zstream)
+{
+	return true;
+}
+
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	unsigned long page;
+
+	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!page)
+		return -ENOMEM;
+
+	dst->pages[dst->page_count++] =
+		memcpy((void *)page, src, PAGE_SIZE);
+
+	return 0;
+}
+
+static void compress_fini(struct z_stream_s *zstream,
+			  struct drm_i915_error_object *dst)
+{
+}
+
+static void err_compression_marker(struct drm_i915_error_state_buf *m)
+{
+	err_puts(m, "~");
+}
+
+#endif
+
 static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				const char *name,
 				struct drm_i915_error_buffer *err,
@@ -342,12 +447,36 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
+static int
+ascii85_encode_len(int len)
+{
+	return DIV_ROUND_UP(len, 4);
+}
+
+static bool
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return false;
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return true;
+}
+
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct intel_engine_cs *engine,
 			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
-	int page, offset, elt;
+	char out[6];
+	int page;
 
 	if (!obj)
 		return;
@@ -359,13 +488,23 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 			   lower_32_bits(obj->gtt_offset));
 	}
 
-	for (page = offset = 0; page < obj->page_count; page++) {
-		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-			err_printf(m, "%08x :  %08x\n", offset,
-				   obj->pages[page][elt]);
-			offset += 4;
+	err_compression_marker(m);
+	for (page = 0; page < obj->page_count; page++) {
+		int i, len;
+
+		len = PAGE_SIZE;
+		if (page == obj->page_count - 1)
+			len -= obj->unused;
+		len = ascii85_encode_len(len);
+
+		for (i = 0; i < len; i++) {
+			if (ascii85_encode(obj->pages[page][i], out))
+				err_puts(m, out);
+			else
+				err_puts(m, "z");
 		}
 	}
+	err_puts(m, "\n");
 }
 
 static void err_print_capabilities(struct drm_i915_error_state_buf *m,
@@ -645,20 +784,6 @@ static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
-static int compress_page(void *src, struct drm_i915_error_object *dst)
-{
-	unsigned long page;
-
-	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
-	if (!page)
-		return -ENOMEM;
-
-	dst->pages[dst->page_count++] = (void *)page;
-
-	memcpy((void *)page, src, PAGE_SIZE);
-	return 0;
-}
-
 static struct drm_i915_error_object *
 i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
@@ -666,6 +791,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	const u64 slot = ggtt->error_capture.start;
 	struct drm_i915_error_object *dst;
+	struct z_stream_s zstream;
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	dma_addr_t dma;
@@ -674,6 +800,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
+	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
 	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
 		      GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
@@ -682,6 +809,12 @@ i915_error_object_create(struct drm_i915_private *i915,
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
 	dst->page_count = 0;
+	dst->unused = 0;
+
+	if (!compress_init(&zstream)) {
+		kfree(dst);
+		return NULL;
+	}
 
 	for_each_sgt_dma(dma, iter, vma->pages) {
 		void __iomem *s;
@@ -691,7 +824,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 				       I915_CACHE_NONE, 0);
 
 		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
-		ret = compress_page((void * __force)s, dst);
+		ret = compress_page(&zstream, (void  __force *)s, dst);
 		io_mapping_unmap_atomic(s);
 
 		if (ret)
@@ -706,6 +839,7 @@ unwind:
 	dst = NULL;
 
 out:
+	compress_fini(&zstream, dst);
 	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
 	return dst;
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
                   ` (3 preceding siblings ...)
  2016-10-11  9:27 ` [CI 5/5] drm/i915: Compress GPU objects in error state Chris Wilson
@ 2016-10-11  9:52 ` Jani Nikula
  2016-10-11  9:57   ` Chris Wilson
  2016-10-11 11:20 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/5] drm/i915: Allow disabling error capture Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 19+ messages in thread
From: Jani Nikula @ 2016-10-11  9:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We currently capture the GPU state after we detect a hang. This is vital
> for us to both triage and debug hangs in the wild (post-mortem
> debugging). However, it comes at the cost of running some potentially
> dangerous code (since it has to make very few assumption about the state
> of the driver) that is quite resource intensive.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
>  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
>  drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
>  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
>  drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
>  drivers/gpu/drm/i915/i915_params.h    |  1 +
>  drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
>  drivers/gpu/drm/i915/intel_display.c  |  4 ++++
>  drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
>  9 files changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7769e469118f..10a6ac11b6a9 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>  
>  	  If in doubt, say "N".
>  
> +config DRM_I915_CAPTURE_ERROR
> +	bool "Enable capturing GPU state following a hang"
> +	depends on DRM_I915
> +	default y
> +	help
> +	  This option enables capturing the GPU state when a hang is detected.
> +	  This information is vital for triaging hangs and assists in debugging.
> +
> +	  If in doubt, say "Y".
> +
>  config DRM_I915_USERPTR
>  	bool "Always enable userptr support"
>  	depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 20689f1cd719..e4b5ba771bea 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  static ssize_t
>  i915_error_state_write(struct file *filp,
>  		       const char __user *ubuf,
> @@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
>  	.release = i915_error_state_release,
>  };
>  
> +#endif
> +
>  static int
>  i915_next_seqno_get(void *data, u64 *val)
>  {
> @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>  	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
>  	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
>  	{"i915_gem_drop_caches", &i915_drop_caches_fops},
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>  	{"i915_error_state", &i915_error_state_fops},
> +#endif
>  	{"i915_next_seqno", &i915_next_seqno_fops},
>  	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
>  	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 54d860e1c0fc..4570c4fa0287 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
>  #endif
>  
>  /* i915_gpu_error.c */
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  __printf(2, 3)
>  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
>  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
>  void i915_destroy_error_state(struct drm_device *dev);
>  
> +#else
> +
> +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
> +					    u32 engine_mask,
> +					    const char *error_msg)
> +{
> +}
> +
> +static inline void i915_destroy_error_state(struct drm_device *dev)
> +{
> +}
> +
> +#endif
> +
>  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
>  			      enum intel_engine_id engine_id,
>  			      struct intel_instdone *instdone);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b5b58692ac5a..9b395ffa3b6a 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -30,6 +30,8 @@
>  #include <generated/utsrelease.h>
>  #include "i915_drv.h"
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  static const char *engine_str(int engine)
>  {
>  	switch (engine) {
> @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
>  	struct drm_i915_error_state *error;
>  	unsigned long flags;
>  
> +	if (!i915.error_capture)
> +		return;
> +
>  	if (READ_ONCE(dev_priv->gpu_error.first_error))
>  		return;
>  
> @@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
>  		kref_put(&error->ref, i915_error_state_free);
>  }
>  
> +#endif
> +
>  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>  {
>  	switch (type) {
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index 768ad89d9cd4..e72a41223535 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
>  	.load_detect_test = 0,
>  	.force_reset_modeset_test = 0,
>  	.reset = true,
> +	.error_capture = true,
>  	.invert_brightness = 0,
>  	.disable_display = 0,
>  	.enable_cmd_parser = 1,
> @@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
>  module_param_named_unsafe(reset, i915.reset, bool, 0600);
>  MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +module_param_named(error_capture, i915.error_capture, bool, 0600);
> +MODULE_PARM_DESC(error_capture,
> +	"Record the GPU state following a hang. "
> +	"This information in /sys/class/drm/card<N>/error is vital for "
> +	"triaging and debugging hangs.");
> +#endif

The addition of the module parameter is really orthogonal to the config
option, and should be added separately.

The question is, what is the target audience of
CONFIG_DRM_I915_CAPTURE_ERROR=n? Is the added config #ifdeffery worth
the trouble? It *is* a maintainability burden, because we will only ever
be building with CONFIG_DRM_I915_CAPTURE_ERROR=y ourselves, and we'll
screw it up anyway. Does i915.error_capture=0 accomplish the same thing
(apart from not reducing code size)? Do we really need to be able to
prevent the root from enabling error capture on the fly?

What would be wrong with just adding the module parameter, and making
CONFIG_DRM_I915_CAPTURE_ERROR the default value for it? I.e.

	.error_capture = IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR),

in the initialization. No #ifdefs anywhere.

BR,
Jani.


> +
>  module_param_named_unsafe(enable_hangcheck, i915.enable_hangcheck, bool, 0644);
>  MODULE_PARM_DESC(enable_hangcheck,
>  	"Periodically check GPU activity for detecting hangs. "
> diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
> index 3a0dd78ddb38..94efc899c1ef 100644
> --- a/drivers/gpu/drm/i915/i915_params.h
> +++ b/drivers/gpu/drm/i915/i915_params.h
> @@ -59,6 +59,7 @@ struct i915_params {
>  	bool load_detect_test;
>  	bool force_reset_modeset_test;
>  	bool reset;
> +	bool error_capture;
>  	bool disable_display;
>  	bool verbose_state_checks;
>  	bool nuclear_pageflip;
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 1012eeea1324..c9b71f676a57 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -514,6 +514,8 @@ static const struct attribute *vlv_attrs[] = {
>  	NULL,
>  };
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
>  				struct bin_attribute *attr, char *buf,
>  				loff_t off, size_t count)
> @@ -571,6 +573,8 @@ static struct bin_attribute error_state_attr = {
>  	.write = error_state_write,
>  };
>  
> +#endif
> +
>  void i915_setup_sysfs(struct drm_i915_private *dev_priv)
>  {
>  	struct device *kdev = dev_priv->drm.primary->kdev;
> @@ -617,17 +621,21 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
>  	if (ret)
>  		DRM_ERROR("RPS sysfs setup failed\n");
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>  	ret = sysfs_create_bin_file(&kdev->kobj,
>  				    &error_state_attr);
>  	if (ret)
>  		DRM_ERROR("error_state sysfs setup failed\n");
> +#endif
>  }
>  
>  void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
>  {
>  	struct device *kdev = dev_priv->drm.primary->kdev;
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>  	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
> +#endif
>  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
>  		sysfs_remove_files(&kdev->kobj, vlv_attrs);
>  	else
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 23a6c7213eca..202812916b67 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -17099,6 +17099,8 @@ int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  struct intel_display_error_state {
>  
>  	u32 power_well_driver;
> @@ -17281,3 +17283,5 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
>  		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
>  	}
>  }
> +
> +#endif
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index a24bc8c7889f..7c392547711f 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1470,6 +1470,8 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
>  	kfree(dev_priv->overlay);
>  }
>  
> +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> +
>  struct intel_overlay_error_state {
>  	struct overlay_registers regs;
>  	unsigned long base;
> @@ -1587,3 +1589,5 @@ intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
>  	P(UVSCALEV);
>  #undef P
>  }
> +
> +#endif

-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11  9:52 ` [CI 1/5] drm/i915: Allow disabling error capture Jani Nikula
@ 2016-10-11  9:57   ` Chris Wilson
  2016-10-11 10:16     ` Jani Nikula
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2016-10-11  9:57 UTC (permalink / raw)
  To: Jani Nikula; +Cc: intel-gfx

On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > We currently capture the GPU state after we detect a hang. This is vital
> > for us to both triage and debug hangs in the wild (post-mortem
> > debugging). However, it comes at the cost of running some potentially
> > dangerous code (since it has to make very few assumption about the state
> > of the driver) that is quite resource intensive.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
> >  drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
> >  drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
> >  drivers/gpu/drm/i915/i915_params.h    |  1 +
> >  drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
> >  drivers/gpu/drm/i915/intel_display.c  |  4 ++++
> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
> >  9 files changed, 65 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> > index 7769e469118f..10a6ac11b6a9 100644
> > --- a/drivers/gpu/drm/i915/Kconfig
> > +++ b/drivers/gpu/drm/i915/Kconfig
> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
> >  
> >  	  If in doubt, say "N".
> >  
> > +config DRM_I915_CAPTURE_ERROR
> > +	bool "Enable capturing GPU state following a hang"
> > +	depends on DRM_I915
> > +	default y
> > +	help
> > +	  This option enables capturing the GPU state when a hang is detected.
> > +	  This information is vital for triaging hangs and assists in debugging.
> > +
> > +	  If in doubt, say "Y".
> > +
> >  config DRM_I915_USERPTR
> >  	bool "Always enable userptr support"
> >  	depends on DRM_I915
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 20689f1cd719..e4b5ba771bea 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
> >  	return 0;
> >  }
> >  
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  static ssize_t
> >  i915_error_state_write(struct file *filp,
> >  		       const char __user *ubuf,
> > @@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
> >  	.release = i915_error_state_release,
> >  };
> >  
> > +#endif
> > +
> >  static int
> >  i915_next_seqno_get(void *data, u64 *val)
> >  {
> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
> >  	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
> >  	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
> >  	{"i915_gem_drop_caches", &i915_drop_caches_fops},
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >  	{"i915_error_state", &i915_error_state_fops},
> > +#endif
> >  	{"i915_next_seqno", &i915_next_seqno_fops},
> >  	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
> >  	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 54d860e1c0fc..4570c4fa0287 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
> >  #endif
> >  
> >  /* i915_gpu_error.c */
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  __printf(2, 3)
> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
> >  void i915_destroy_error_state(struct drm_device *dev);
> >  
> > +#else
> > +
> > +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
> > +					    u32 engine_mask,
> > +					    const char *error_msg)
> > +{
> > +}
> > +
> > +static inline void i915_destroy_error_state(struct drm_device *dev)
> > +{
> > +}
> > +
> > +#endif
> > +
> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
> >  			      enum intel_engine_id engine_id,
> >  			      struct intel_instdone *instdone);
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index b5b58692ac5a..9b395ffa3b6a 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -30,6 +30,8 @@
> >  #include <generated/utsrelease.h>
> >  #include "i915_drv.h"
> >  
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +
> >  static const char *engine_str(int engine)
> >  {
> >  	switch (engine) {
> > @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
> >  	struct drm_i915_error_state *error;
> >  	unsigned long flags;
> >  
> > +	if (!i915.error_capture)
> > +		return;
> > +
> >  	if (READ_ONCE(dev_priv->gpu_error.first_error))
> >  		return;
> >  
> > @@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
> >  		kref_put(&error->ref, i915_error_state_free);
> >  }
> >  
> > +#endif
> > +
> >  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
> >  {
> >  	switch (type) {
> > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> > index 768ad89d9cd4..e72a41223535 100644
> > --- a/drivers/gpu/drm/i915/i915_params.c
> > +++ b/drivers/gpu/drm/i915/i915_params.c
> > @@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
> >  	.load_detect_test = 0,
> >  	.force_reset_modeset_test = 0,
> >  	.reset = true,
> > +	.error_capture = true,
> >  	.invert_brightness = 0,
> >  	.disable_display = 0,
> >  	.enable_cmd_parser = 1,
> > @@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
> >  module_param_named_unsafe(reset, i915.reset, bool, 0600);
> >  MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
> >  
> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> > +module_param_named(error_capture, i915.error_capture, bool, 0600);
> > +MODULE_PARM_DESC(error_capture,
> > +	"Record the GPU state following a hang. "
> > +	"This information in /sys/class/drm/card<N>/error is vital for "
> > +	"triaging and debugging hangs.");
> > +#endif
> 
> The addition of the module parameter is really orthogonal to the config
> option, and should be added separately.
> 
> The question is, what is the target audience of
> CONFIG_DRM_I915_CAPTURE_ERROR=n? Is the added config #ifdeffery worth
> the trouble? It *is* a maintainability burden, because we will only ever
> be building with CONFIG_DRM_I915_CAPTURE_ERROR=y ourselves, and we'll
> screw it up anyway. Does i915.error_capture=0 accomplish the same thing
> (apart from not reducing code size)? Do we really need to be able to
> prevent the root from enabling error capture on the fly?

What I had in mind was that RT kernels will probably default to turning
off this facility (since it will cause latencies in the hundreds of ms
range).
 
> What would be wrong with just adding the module parameter, and making
> CONFIG_DRM_I915_CAPTURE_ERROR the default value for it? I.e.
> 
> 	.error_capture = IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR),
> 
> in the initialization. No #ifdefs anywhere.

I wanted to turn it off by default at compilation time.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11  9:57   ` Chris Wilson
@ 2016-10-11 10:16     ` Jani Nikula
  2016-10-11 10:28       ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Jani Nikula @ 2016-10-11 10:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
>> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > We currently capture the GPU state after we detect a hang. This is vital
>> > for us to both triage and debug hangs in the wild (post-mortem
>> > debugging). However, it comes at the cost of running some potentially
>> > dangerous code (since it has to make very few assumption about the state
>> > of the driver) that is quite resource intensive.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
>> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
>> >  drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
>> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
>> >  drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
>> >  drivers/gpu/drm/i915/i915_params.h    |  1 +
>> >  drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
>> >  drivers/gpu/drm/i915/intel_display.c  |  4 ++++
>> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
>> >  9 files changed, 65 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> > index 7769e469118f..10a6ac11b6a9 100644
>> > --- a/drivers/gpu/drm/i915/Kconfig
>> > +++ b/drivers/gpu/drm/i915/Kconfig
>> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>> >  
>> >  	  If in doubt, say "N".
>> >  
>> > +config DRM_I915_CAPTURE_ERROR
>> > +	bool "Enable capturing GPU state following a hang"
>> > +	depends on DRM_I915
>> > +	default y
>> > +	help
>> > +	  This option enables capturing the GPU state when a hang is detected.
>> > +	  This information is vital for triaging hangs and assists in debugging.
>> > +
>> > +	  If in doubt, say "Y".
>> > +
>> >  config DRM_I915_USERPTR
>> >  	bool "Always enable userptr support"
>> >  	depends on DRM_I915
>> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> > index 20689f1cd719..e4b5ba771bea 100644
>> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
>> >  	return 0;
>> >  }
>> >  
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +
>> >  static ssize_t
>> >  i915_error_state_write(struct file *filp,
>> >  		       const char __user *ubuf,
>> > @@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
>> >  	.release = i915_error_state_release,
>> >  };
>> >  
>> > +#endif
>> > +
>> >  static int
>> >  i915_next_seqno_get(void *data, u64 *val)
>> >  {
>> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>> >  	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
>> >  	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
>> >  	{"i915_gem_drop_caches", &i915_drop_caches_fops},
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >  	{"i915_error_state", &i915_error_state_fops},
>> > +#endif
>> >  	{"i915_next_seqno", &i915_next_seqno_fops},
>> >  	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
>> >  	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
>> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> > index 54d860e1c0fc..4570c4fa0287 100644
>> > --- a/drivers/gpu/drm/i915/i915_drv.h
>> > +++ b/drivers/gpu/drm/i915/i915_drv.h
>> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
>> >  #endif
>> >  
>> >  /* i915_gpu_error.c */
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +
>> >  __printf(2, 3)
>> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
>> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
>> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
>> >  void i915_destroy_error_state(struct drm_device *dev);
>> >  
>> > +#else
>> > +
>> > +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
>> > +					    u32 engine_mask,
>> > +					    const char *error_msg)
>> > +{
>> > +}
>> > +
>> > +static inline void i915_destroy_error_state(struct drm_device *dev)
>> > +{
>> > +}
>> > +
>> > +#endif
>> > +
>> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
>> >  			      enum intel_engine_id engine_id,
>> >  			      struct intel_instdone *instdone);
>> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> > index b5b58692ac5a..9b395ffa3b6a 100644
>> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> > @@ -30,6 +30,8 @@
>> >  #include <generated/utsrelease.h>
>> >  #include "i915_drv.h"
>> >  
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +
>> >  static const char *engine_str(int engine)
>> >  {
>> >  	switch (engine) {
>> > @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
>> >  	struct drm_i915_error_state *error;
>> >  	unsigned long flags;
>> >  
>> > +	if (!i915.error_capture)
>> > +		return;
>> > +
>> >  	if (READ_ONCE(dev_priv->gpu_error.first_error))
>> >  		return;
>> >  
>> > @@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
>> >  		kref_put(&error->ref, i915_error_state_free);
>> >  }
>> >  
>> > +#endif
>> > +
>> >  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>> >  {
>> >  	switch (type) {
>> > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
>> > index 768ad89d9cd4..e72a41223535 100644
>> > --- a/drivers/gpu/drm/i915/i915_params.c
>> > +++ b/drivers/gpu/drm/i915/i915_params.c
>> > @@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
>> >  	.load_detect_test = 0,
>> >  	.force_reset_modeset_test = 0,
>> >  	.reset = true,
>> > +	.error_capture = true,
>> >  	.invert_brightness = 0,
>> >  	.disable_display = 0,
>> >  	.enable_cmd_parser = 1,
>> > @@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
>> >  module_param_named_unsafe(reset, i915.reset, bool, 0600);
>> >  MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
>> >  
>> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> > +module_param_named(error_capture, i915.error_capture, bool, 0600);
>> > +MODULE_PARM_DESC(error_capture,
>> > +	"Record the GPU state following a hang. "
>> > +	"This information in /sys/class/drm/card<N>/error is vital for "
>> > +	"triaging and debugging hangs.");
>> > +#endif
>> 
>> The addition of the module parameter is really orthogonal to the config
>> option, and should be added separately.
>> 
>> The question is, what is the target audience of
>> CONFIG_DRM_I915_CAPTURE_ERROR=n? Is the added config #ifdeffery worth
>> the trouble? It *is* a maintainability burden, because we will only ever
>> be building with CONFIG_DRM_I915_CAPTURE_ERROR=y ourselves, and we'll
>> screw it up anyway. Does i915.error_capture=0 accomplish the same thing
>> (apart from not reducing code size)? Do we really need to be able to
>> prevent the root from enabling error capture on the fly?
>
> What I had in mind was that RT kernels will probably default to turning
> off this facility (since it will cause latencies in the hundreds of ms
> range).
>  
>> What would be wrong with just adding the module parameter, and making
>> CONFIG_DRM_I915_CAPTURE_ERROR the default value for it? I.e.
>> 
>> 	.error_capture = IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR),
>> 
>> in the initialization. No #ifdefs anywhere.
>
> I wanted to turn it off by default at compilation time.

What I suggest above does that. ;)

J.



-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11 10:16     ` Jani Nikula
@ 2016-10-11 10:28       ` Chris Wilson
  2016-10-11 10:56         ` Jani Nikula
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2016-10-11 10:28 UTC (permalink / raw)
  To: Jani Nikula; +Cc: intel-gfx

On Tue, Oct 11, 2016 at 01:16:42PM +0300, Jani Nikula wrote:
> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
> >> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >> > We currently capture the GPU state after we detect a hang. This is vital
> >> > for us to both triage and debug hangs in the wild (post-mortem
> >> > debugging). However, it comes at the cost of running some potentially
> >> > dangerous code (since it has to make very few assumption about the state
> >> > of the driver) that is quite resource intensive.
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> > ---
> >> >  drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
> >> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
> >> >  drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
> >> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
> >> >  drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
> >> >  drivers/gpu/drm/i915/i915_params.h    |  1 +
> >> >  drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
> >> >  drivers/gpu/drm/i915/intel_display.c  |  4 ++++
> >> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
> >> >  9 files changed, 65 insertions(+)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> >> > index 7769e469118f..10a6ac11b6a9 100644
> >> > --- a/drivers/gpu/drm/i915/Kconfig
> >> > +++ b/drivers/gpu/drm/i915/Kconfig
> >> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
> >> >  
> >> >  	  If in doubt, say "N".
> >> >  
> >> > +config DRM_I915_CAPTURE_ERROR
> >> > +	bool "Enable capturing GPU state following a hang"
> >> > +	depends on DRM_I915
> >> > +	default y
> >> > +	help
> >> > +	  This option enables capturing the GPU state when a hang is detected.
> >> > +	  This information is vital for triaging hangs and assists in debugging.
> >> > +
> >> > +	  If in doubt, say "Y".
> >> > +
> >> >  config DRM_I915_USERPTR
> >> >  	bool "Always enable userptr support"
> >> >  	depends on DRM_I915
> >> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > index 20689f1cd719..e4b5ba771bea 100644
> >> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
> >> >  	return 0;
> >> >  }
> >> >  
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +
> >> >  static ssize_t
> >> >  i915_error_state_write(struct file *filp,
> >> >  		       const char __user *ubuf,
> >> > @@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
> >> >  	.release = i915_error_state_release,
> >> >  };
> >> >  
> >> > +#endif
> >> > +
> >> >  static int
> >> >  i915_next_seqno_get(void *data, u64 *val)
> >> >  {
> >> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
> >> >  	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
> >> >  	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
> >> >  	{"i915_gem_drop_caches", &i915_drop_caches_fops},
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> >  	{"i915_error_state", &i915_error_state_fops},
> >> > +#endif
> >> >  	{"i915_next_seqno", &i915_next_seqno_fops},
> >> >  	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
> >> >  	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
> >> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> > index 54d860e1c0fc..4570c4fa0287 100644
> >> > --- a/drivers/gpu/drm/i915/i915_drv.h
> >> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
> >> >  #endif
> >> >  
> >> >  /* i915_gpu_error.c */
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +
> >> >  __printf(2, 3)
> >> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
> >> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
> >> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
> >> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
> >> >  void i915_destroy_error_state(struct drm_device *dev);
> >> >  
> >> > +#else
> >> > +
> >> > +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
> >> > +					    u32 engine_mask,
> >> > +					    const char *error_msg)
> >> > +{
> >> > +}
> >> > +
> >> > +static inline void i915_destroy_error_state(struct drm_device *dev)
> >> > +{
> >> > +}
> >> > +
> >> > +#endif
> >> > +
> >> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
> >> >  			      enum intel_engine_id engine_id,
> >> >  			      struct intel_instdone *instdone);
> >> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> >> > index b5b58692ac5a..9b395ffa3b6a 100644
> >> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> >> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> >> > @@ -30,6 +30,8 @@
> >> >  #include <generated/utsrelease.h>
> >> >  #include "i915_drv.h"
> >> >  
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +
> >> >  static const char *engine_str(int engine)
> >> >  {
> >> >  	switch (engine) {
> >> > @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
> >> >  	struct drm_i915_error_state *error;
> >> >  	unsigned long flags;
> >> >  
> >> > +	if (!i915.error_capture)
> >> > +		return;
> >> > +
> >> >  	if (READ_ONCE(dev_priv->gpu_error.first_error))
> >> >  		return;
> >> >  
> >> > @@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
> >> >  		kref_put(&error->ref, i915_error_state_free);
> >> >  }
> >> >  
> >> > +#endif
> >> > +
> >> >  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
> >> >  {
> >> >  	switch (type) {
> >> > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> >> > index 768ad89d9cd4..e72a41223535 100644
> >> > --- a/drivers/gpu/drm/i915/i915_params.c
> >> > +++ b/drivers/gpu/drm/i915/i915_params.c
> >> > @@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
> >> >  	.load_detect_test = 0,
> >> >  	.force_reset_modeset_test = 0,
> >> >  	.reset = true,
> >> > +	.error_capture = true,
> >> >  	.invert_brightness = 0,
> >> >  	.disable_display = 0,
> >> >  	.enable_cmd_parser = 1,
> >> > @@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
> >> >  module_param_named_unsafe(reset, i915.reset, bool, 0600);
> >> >  MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
> >> >  
> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
> >> > +module_param_named(error_capture, i915.error_capture, bool, 0600);
> >> > +MODULE_PARM_DESC(error_capture,
> >> > +	"Record the GPU state following a hang. "
> >> > +	"This information in /sys/class/drm/card<N>/error is vital for "
> >> > +	"triaging and debugging hangs.");
> >> > +#endif
> >> 
> >> The addition of the module parameter is really orthogonal to the config
> >> option, and should be added separately.
> >> 
> >> The question is, what is the target audience of
> >> CONFIG_DRM_I915_CAPTURE_ERROR=n? Is the added config #ifdeffery worth
> >> the trouble? It *is* a maintainability burden, because we will only ever
> >> be building with CONFIG_DRM_I915_CAPTURE_ERROR=y ourselves, and we'll
> >> screw it up anyway. Does i915.error_capture=0 accomplish the same thing
> >> (apart from not reducing code size)? Do we really need to be able to
> >> prevent the root from enabling error capture on the fly?
> >
> > What I had in mind was that RT kernels will probably default to turning
> > off this facility (since it will cause latencies in the hundreds of ms
> > range).
> >  
> >> What would be wrong with just adding the module parameter, and making
> >> CONFIG_DRM_I915_CAPTURE_ERROR the default value for it? I.e.
> >> 
> >> 	.error_capture = IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR),
> >> 
> >> in the initialization. No #ifdefs anywhere.
> >
> > I wanted to turn it off by default at compilation time.
> 
> What I suggest above does that. ;)

It's 30KiB of code that they don't want. I know that's only about 3% of
the size of the driver, but it's a help.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11 10:28       ` Chris Wilson
@ 2016-10-11 10:56         ` Jani Nikula
  2016-10-11 12:13           ` Daniel Vetter
  0 siblings, 1 reply; 19+ messages in thread
From: Jani Nikula @ 2016-10-11 10:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 11, 2016 at 01:16:42PM +0300, Jani Nikula wrote:
>> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Tue, Oct 11, 2016 at 12:52:00PM +0300, Jani Nikula wrote:
>> >> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> >> > We currently capture the GPU state after we detect a hang. This is vital
>> >> > for us to both triage and debug hangs in the wild (post-mortem
>> >> > debugging). However, it comes at the cost of running some potentially
>> >> > dangerous code (since it has to make very few assumption about the state
>> >> > of the driver) that is quite resource intensive.
>> >> >
>> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >> > ---
>> >> >  drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
>> >> >  drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
>> >> >  drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
>> >> >  drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
>> >> >  drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
>> >> >  drivers/gpu/drm/i915/i915_params.h    |  1 +
>> >> >  drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
>> >> >  drivers/gpu/drm/i915/intel_display.c  |  4 ++++
>> >> >  drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
>> >> >  9 files changed, 65 insertions(+)
>> >> >
>> >> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> >> > index 7769e469118f..10a6ac11b6a9 100644
>> >> > --- a/drivers/gpu/drm/i915/Kconfig
>> >> > +++ b/drivers/gpu/drm/i915/Kconfig
>> >> > @@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>> >> >  
>> >> >  	  If in doubt, say "N".
>> >> >  
>> >> > +config DRM_I915_CAPTURE_ERROR
>> >> > +	bool "Enable capturing GPU state following a hang"
>> >> > +	depends on DRM_I915
>> >> > +	default y
>> >> > +	help
>> >> > +	  This option enables capturing the GPU state when a hang is detected.
>> >> > +	  This information is vital for triaging hangs and assists in debugging.
>> >> > +
>> >> > +	  If in doubt, say "Y".
>> >> > +
>> >> >  config DRM_I915_USERPTR
>> >> >  	bool "Always enable userptr support"
>> >> >  	depends on DRM_I915
>> >> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > index 20689f1cd719..e4b5ba771bea 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> >> > @@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
>> >> >  	return 0;
>> >> >  }
>> >> >  
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +
>> >> >  static ssize_t
>> >> >  i915_error_state_write(struct file *filp,
>> >> >  		       const char __user *ubuf,
>> >> > @@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
>> >> >  	.release = i915_error_state_release,
>> >> >  };
>> >> >  
>> >> > +#endif
>> >> > +
>> >> >  static int
>> >> >  i915_next_seqno_get(void *data, u64 *val)
>> >> >  {
>> >> > @@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
>> >> >  	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
>> >> >  	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
>> >> >  	{"i915_gem_drop_caches", &i915_drop_caches_fops},
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> >  	{"i915_error_state", &i915_error_state_fops},
>> >> > +#endif
>> >> >  	{"i915_next_seqno", &i915_next_seqno_fops},
>> >> >  	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
>> >> >  	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
>> >> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> >> > index 54d860e1c0fc..4570c4fa0287 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_drv.h
>> >> > +++ b/drivers/gpu/drm/i915/i915_drv.h
>> >> > @@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
>> >> >  #endif
>> >> >  
>> >> >  /* i915_gpu_error.c */
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +
>> >> >  __printf(2, 3)
>> >> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
>> >> >  int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
>> >> > @@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
>> >> >  void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
>> >> >  void i915_destroy_error_state(struct drm_device *dev);
>> >> >  
>> >> > +#else
>> >> > +
>> >> > +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
>> >> > +					    u32 engine_mask,
>> >> > +					    const char *error_msg)
>> >> > +{
>> >> > +}
>> >> > +
>> >> > +static inline void i915_destroy_error_state(struct drm_device *dev)
>> >> > +{
>> >> > +}
>> >> > +
>> >> > +#endif
>> >> > +
>> >> >  void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
>> >> >  			      enum intel_engine_id engine_id,
>> >> >  			      struct intel_instdone *instdone);
>> >> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> >> > index b5b58692ac5a..9b395ffa3b6a 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> >> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> >> > @@ -30,6 +30,8 @@
>> >> >  #include <generated/utsrelease.h>
>> >> >  #include "i915_drv.h"
>> >> >  
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +
>> >> >  static const char *engine_str(int engine)
>> >> >  {
>> >> >  	switch (engine) {
>> >> > @@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
>> >> >  	struct drm_i915_error_state *error;
>> >> >  	unsigned long flags;
>> >> >  
>> >> > +	if (!i915.error_capture)
>> >> > +		return;
>> >> > +
>> >> >  	if (READ_ONCE(dev_priv->gpu_error.first_error))
>> >> >  		return;
>> >> >  
>> >> > @@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
>> >> >  		kref_put(&error->ref, i915_error_state_free);
>> >> >  }
>> >> >  
>> >> > +#endif
>> >> > +
>> >> >  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>> >> >  {
>> >> >  	switch (type) {
>> >> > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
>> >> > index 768ad89d9cd4..e72a41223535 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_params.c
>> >> > +++ b/drivers/gpu/drm/i915/i915_params.c
>> >> > @@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
>> >> >  	.load_detect_test = 0,
>> >> >  	.force_reset_modeset_test = 0,
>> >> >  	.reset = true,
>> >> > +	.error_capture = true,
>> >> >  	.invert_brightness = 0,
>> >> >  	.disable_display = 0,
>> >> >  	.enable_cmd_parser = 1,
>> >> > @@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
>> >> >  module_param_named_unsafe(reset, i915.reset, bool, 0600);
>> >> >  MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
>> >> >  
>> >> > +#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
>> >> > +module_param_named(error_capture, i915.error_capture, bool, 0600);
>> >> > +MODULE_PARM_DESC(error_capture,
>> >> > +	"Record the GPU state following a hang. "
>> >> > +	"This information in /sys/class/drm/card<N>/error is vital for "
>> >> > +	"triaging and debugging hangs.");
>> >> > +#endif
>> >> 
>> >> The addition of the module parameter is really orthogonal to the config
>> >> option, and should be added separately.
>> >> 
>> >> The question is, what is the target audience of
>> >> CONFIG_DRM_I915_CAPTURE_ERROR=n? Is the added config #ifdeffery worth
>> >> the trouble? It *is* a maintainability burden, because we will only ever
>> >> be building with CONFIG_DRM_I915_CAPTURE_ERROR=y ourselves, and we'll
>> >> screw it up anyway. Does i915.error_capture=0 accomplish the same thing
>> >> (apart from not reducing code size)? Do we really need to be able to
>> >> prevent the root from enabling error capture on the fly?
>> >
>> > What I had in mind was that RT kernels will probably default to turning
>> > off this facility (since it will cause latencies in the hundreds of ms
>> > range).
>> >  
>> >> What would be wrong with just adding the module parameter, and making
>> >> CONFIG_DRM_I915_CAPTURE_ERROR the default value for it? I.e.
>> >> 
>> >> 	.error_capture = IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR),
>> >> 
>> >> in the initialization. No #ifdefs anywhere.
>> >
>> > I wanted to turn it off by default at compilation time.
>> 
>> What I suggest above does that. ;)
>
> It's 30KiB of code that they don't want. I know that's only about 3% of
> the size of the driver, but it's a help.

Fair enough. Please copy-paste some of the elaboration to the commit
message. Ack from me, but it wouldn't hurt to get an ack from Daniel as
well.

BR,
Jani.

-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [CI,1/5] drm/i915: Allow disabling error capture
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
                   ` (4 preceding siblings ...)
  2016-10-11  9:52 ` [CI 1/5] drm/i915: Allow disabling error capture Jani Nikula
@ 2016-10-11 11:20 ` Patchwork
  2018-10-26 13:02 ` ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Allow disabling error capture (rev3) Patchwork
  2018-10-26 13:03 ` Patchwork
  7 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2016-10-11 11:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/5] drm/i915: Allow disabling error capture
URL   : https://patchwork.freedesktop.org/series/13576/
State : warning

== Summary ==

Series 13576v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13576/revisions/1/mbox/

Test drv_module_reload_basic:
                skip       -> PASS       (fi-skl-6260u)
Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-a-frame-sequence:
                pass       -> DMESG-WARN (fi-ivb-3770)
Test kms_psr_sink_crc:
        Subgroup psr_basic:
                dmesg-warn -> PASS       (fi-skl-6700hq)
Test vgem_basic:
        Subgroup unload:
                skip       -> PASS       (fi-skl-6700k)
                skip       -> PASS       (fi-bxt-t5700)
                skip       -> PASS       (fi-skl-6700hq)
                pass       -> SKIP       (fi-ivb-3770)
                pass       -> SKIP       (fi-bsw-n3050)

fi-bdw-5557u     total:248  pass:231  dwarn:0   dfail:0   fail:0   skip:17 
fi-bsw-n3050     total:248  pass:204  dwarn:0   dfail:0   fail:0   skip:44 
fi-bxt-t5700     total:248  pass:217  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-j1900     total:248  pass:214  dwarn:1   dfail:0   fail:1   skip:32 
fi-byt-n2820     total:248  pass:211  dwarn:0   dfail:0   fail:1   skip:36 
fi-hsw-4770      total:248  pass:224  dwarn:0   dfail:0   fail:0   skip:24 
fi-hsw-4770r     total:248  pass:224  dwarn:0   dfail:0   fail:0   skip:24 
fi-ilk-650       total:248  pass:184  dwarn:0   dfail:0   fail:2   skip:62 
fi-ivb-3520m     total:248  pass:221  dwarn:0   dfail:0   fail:0   skip:27 
fi-ivb-3770      total:248  pass:220  dwarn:1   dfail:0   fail:0   skip:27 
fi-kbl-7200u     total:248  pass:222  dwarn:0   dfail:0   fail:0   skip:26 
fi-skl-6260u     total:248  pass:232  dwarn:0   dfail:0   fail:0   skip:16 
fi-skl-6700hq    total:248  pass:225  dwarn:0   dfail:0   fail:0   skip:23 
fi-skl-6700k     total:248  pass:222  dwarn:1   dfail:0   fail:0   skip:25 
fi-skl-6770hq    total:248  pass:231  dwarn:1   dfail:0   fail:1   skip:15 
fi-snb-2520m     total:248  pass:211  dwarn:0   dfail:0   fail:0   skip:37 
fi-snb-2600      total:248  pass:209  dwarn:0   dfail:0   fail:0   skip:39 

Results at /archive/results/CI_IGT_test/Patchwork_2671/

f79322df75bf6193458ec601a345ba6bd86befca drm-intel-nightly: 2016y-10m-11d-09h-16m-03s UTC integration manifest
d6d2da1 drm/i915: Compress GPU objects in error state
ab626fb drm/i915: Consolidate error object printing
57cb589 drm/i915: Always use the GTT for error capture
1083247 drm/i915: Stop the machine whilst capturing the GPU crash dump
8a5c841 drm/i915: Allow disabling error capture

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [CI 1/5] drm/i915: Allow disabling error capture
  2016-10-11 10:56         ` Jani Nikula
@ 2016-10-11 12:13           ` Daniel Vetter
  2016-10-11 13:32             ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2016-10-11 12:13 UTC (permalink / raw)
  To: Jani Nikula; +Cc: intel-gfx

On Tue, Oct 11, 2016 at 12:56 PM, Jani Nikula
<jani.nikula@linux.intel.com> wrote:
> Fair enough. Please copy-paste some of the elaboration to the commit
> message. Ack from me, but it wouldn't hurt to get an ack from Daniel as
> well.

Would be nice if we can trade in some of the #ifdefry with a
conditional compiliation of i915_gpu_error.c (and shuffle some parts
around, or maybe intel_display_error.c extraction to make that
feasible). Besides that wishlist item, ack.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c
  2016-10-11 12:13           ` Daniel Vetter
@ 2016-10-11 13:32             ` Chris Wilson
  2016-10-11 13:32               ` [PATCH 2/2] drm/i915: Allow disabling error capture Chris Wilson
  2016-10-12  8:51               ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Joonas Lahtinen
  0 siblings, 2 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-11 13:32 UTC (permalink / raw)
  To: intel-gfx

In the next patch, I want to conditionally compile i915_gpu_error.c and
that requires moving the functions used by debug out of
i915_gpu_error.c!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
 drivers/gpu/drm/i915/i915_drv.h         |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c   | 106 +-------------------------------
 drivers/gpu/drm/i915/i915_irq.c         |   4 +-
 drivers/gpu/drm/i915/intel_engine_cs.c  | 104 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
 6 files changed, 111 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20689f1cd719..f6762e00f872 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1339,7 +1339,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seqno[id] = intel_engine_get_seqno(engine);
 	}
 
-	i915_get_engine_instdone(dev_priv, RCS, &instdone);
+	intel_engine_get_instdone(&dev_priv->engine[RCS], &instdone);
 
 	intel_runtime_pm_put(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54d860e1c0fc..4553a5372008 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3564,9 +3564,6 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 void i915_destroy_error_state(struct drm_device *dev);
 
-void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
-			      enum intel_engine_id engine_id,
-			      struct intel_instdone *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b5b58692ac5a..04205c82f0c9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1038,7 +1038,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
 		ee->ipehr = I915_READ(IPEHR);
 	}
 
-	i915_get_engine_instdone(dev_priv, engine->id, &ee->instdone);
+	intel_engine_get_instdone(engine, &ee->instdone);
 
 	ee->waiting = intel_engine_has_waiter(engine);
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
@@ -1548,107 +1548,3 @@ void i915_destroy_error_state(struct drm_device *dev)
 	if (error)
 		kref_put(&error->ref, i915_error_state_free);
 }
-
-const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
-{
-	switch (type) {
-	case I915_CACHE_NONE: return " uncached";
-	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
-	case I915_CACHE_L3_LLC: return " L3+LLC";
-	case I915_CACHE_WT: return " WT";
-	default: return "";
-	}
-}
-
-static inline uint32_t
-read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
-		  int subslice, i915_reg_t reg)
-{
-	uint32_t mcr;
-	uint32_t ret;
-	enum forcewake_domains fw_domains;
-
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
-						    FW_REG_READ);
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     GEN8_MCR_SELECTOR,
-						     FW_REG_READ | FW_REG_WRITE);
-
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
-
-	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
-	/*
-	 * The HW expects the slice and sublice selectors to be reset to 0
-	 * after reading out the registers.
-	 */
-	WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
-	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
-	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
-	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
-
-	ret = I915_READ_FW(reg);
-
-	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
-	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
-
-	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
-	spin_unlock_irq(&dev_priv->uncore.lock);
-
-	return ret;
-}
-
-/* NB: please notice the memset */
-void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
-			      enum intel_engine_id engine_id,
-			      struct intel_instdone *instdone)
-{
-	u32 mmio_base = dev_priv->engine[engine_id].mmio_base;
-	int slice;
-	int subslice;
-
-	memset(instdone, 0, sizeof(*instdone));
-
-	switch (INTEL_GEN(dev_priv)) {
-	default:
-		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
-
-		if (engine_id != RCS)
-			break;
-
-		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
-		for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
-			instdone->sampler[slice][subslice] =
-				read_subslice_reg(dev_priv, slice, subslice,
-						  GEN7_SAMPLER_INSTDONE);
-			instdone->row[slice][subslice] =
-				read_subslice_reg(dev_priv, slice, subslice,
-						  GEN7_ROW_INSTDONE);
-		}
-		break;
-	case 7:
-		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
-
-		if (engine_id != RCS)
-			break;
-
-		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
-		instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
-		instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
-
-		break;
-	case 6:
-	case 5:
-	case 4:
-		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
-
-		if (engine_id == RCS)
-			/* HACK: Using the wrong struct member */
-			instdone->slice_common = I915_READ(GEN4_INSTDONE1);
-		break;
-	case 3:
-	case 2:
-		instdone->instdone = I915_READ(GEN2_INSTDONE);
-		break;
-	}
-}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bd6c8b0eeaef..ddff6f9b869c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2588,7 +2588,7 @@ static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
 
 	pr_err("render error detected, EIR: 0x%08x\n", eir);
 
-	i915_get_engine_instdone(dev_priv, RCS, &instdone);
+	intel_engine_get_instdone(&dev_priv->engine[RCS], &instdone);
 
 	if (IS_G4X(dev_priv)) {
 		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
@@ -3001,7 +3001,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
 	if (engine->id != RCS)
 		return true;
 
-	i915_get_engine_instdone(dev_priv, RCS, &instdone);
+	intel_engine_get_instdone(engine, &instdone);
 
 	/* There might be unstable subunit states even when
 	 * actual head is not moving. Filter out the unstable ones by
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 480584c09306..1d597feba97f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -349,3 +349,107 @@ u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine)
 
 	return bbaddr;
 }
+
+const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
+{
+	switch (type) {
+	case I915_CACHE_NONE: return " uncached";
+	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
+	case I915_CACHE_L3_LLC: return " L3+LLC";
+	case I915_CACHE_WT: return " WT";
+	default: return "";
+	}
+}
+
+static inline uint32_t
+read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
+		  int subslice, i915_reg_t reg)
+{
+	uint32_t mcr;
+	uint32_t ret;
+	enum forcewake_domains fw_domains;
+
+	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
+						    FW_REG_READ);
+	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+						     GEN8_MCR_SELECTOR,
+						     FW_REG_READ | FW_REG_WRITE);
+
+	spin_lock_irq(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+
+	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
+	/*
+	 * The HW expects the slice and sublice selectors to be reset to 0
+	 * after reading out the registers.
+	 */
+	WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
+	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
+
+	ret = I915_READ_FW(reg);
+
+	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
+
+	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
+	spin_unlock_irq(&dev_priv->uncore.lock);
+
+	return ret;
+}
+
+/* NB: please notice the memset */
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+			       struct intel_instdone *instdone)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	u32 mmio_base = engine->mmio_base;
+	int slice;
+	int subslice;
+
+	memset(instdone, 0, sizeof(*instdone));
+
+	switch (INTEL_GEN(dev_priv)) {
+	default:
+		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
+
+		if (engine->id != RCS)
+			break;
+
+		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
+		for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+			instdone->sampler[slice][subslice] =
+				read_subslice_reg(dev_priv, slice, subslice,
+						  GEN7_SAMPLER_INSTDONE);
+			instdone->row[slice][subslice] =
+				read_subslice_reg(dev_priv, slice, subslice,
+						  GEN7_ROW_INSTDONE);
+		}
+		break;
+	case 7:
+		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
+
+		if (engine->id != RCS)
+			break;
+
+		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
+		instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
+		instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
+
+		break;
+	case 6:
+	case 5:
+	case 4:
+		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
+
+		if (engine->id == RCS)
+			/* HACK: Using the wrong struct member */
+			instdone->slice_common = I915_READ(GEN4_INSTDONE1);
+		break;
+	case 3:
+	case 2:
+		instdone->instdone = I915_READ(GEN2_INSTDONE);
+		break;
+	}
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 124f4646958d..36eff9765cc2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -550,6 +550,9 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
 
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+			       struct intel_instdone *instdone);
+
 /*
  * Arbitrary size for largest possible 'add request' sequence. The code paths
  * are complex and variable. Empirical measurement shows that the worst case
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/2] drm/i915: Allow disabling error capture
  2016-10-11 13:32             ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Chris Wilson
@ 2016-10-11 13:32               ` Chris Wilson
  2016-10-13  9:16                 ` Jani Nikula
  2016-10-12  8:51               ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Joonas Lahtinen
  1 sibling, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2016-10-11 13:32 UTC (permalink / raw)
  To: intel-gfx

We currently capture the GPU state after we detect a hang. This is vital
for us to both triage and debug hangs in the wild (post-mortem
debugging). However, it comes at the cost of running some potentially
dangerous code (since it has to make very few assumption about the state
of the driver) that is quite resource intensive.

This patch introduces both a method to disable error capture at runtime
(for users who hit bugs at runtime and need a workaround) and to disable
error capture at compiletime (for realtime users who want to minimise
any possible latency, and never require error capture, saving ~30k of
code). The cost is that we know have to be wary of (and test!) a kconfig
flag and a module parameter. The effect of the module parameter is easy
to verify through code inspection and runtime testing, but a kconfig flag
needs regular compile checking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/i915/Kconfig          | 13 +++++++++++++
 drivers/gpu/drm/i915/Makefile         |  4 +++-
 drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
 drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c |  3 +++
 drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
 drivers/gpu/drm/i915/i915_params.h    |  1 +
 drivers/gpu/drm/i915/i915_sysfs.c     | 25 ++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_display.c  |  4 ++++
 drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
 10 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..8844b99bd760 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -46,6 +46,19 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 
 	  If in doubt, say "N".
 
+config DRM_I915_CAPTURE_ERROR
+	bool "Enable capturing GPU state following a hang"
+	depends on DRM_I915
+	default y
+	help
+	  This option enables capturing the GPU state when a hang is detected.
+	  This information is vital for triaging hangs and assists in debugging.
+	  Please report any hang to
+            https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
+	  for triaging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a998c2bce70a..8790ae4fb171 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,7 +42,6 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
 	  i915_gem_userptr.o \
-	  i915_gpu_error.o \
 	  i915_trace_points.o \
 	  intel_breadcrumbs.o \
 	  intel_engine_cs.o \
@@ -107,6 +106,9 @@ i915-y += dvo_ch7017.o \
 	  intel_sdvo.o \
 	  intel_tv.o
 
+# Post-mortem debug and GPU hang state capture
+i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
+
 # virtual gpu code
 i915-y += i915_vgpu.o
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f6762e00f872..358663e833d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
 static ssize_t
 i915_error_state_write(struct file *filp,
 		       const char __user *ubuf,
@@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
 	.release = i915_error_state_release,
 };
 
+#endif
+
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
@@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
 	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 	{"i915_error_state", &i915_error_state_fops},
+#endif
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4553a5372008..380590b30bbf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
 #endif
 
 /* i915_gpu_error.c */
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
@@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 void i915_destroy_error_state(struct drm_device *dev);
 
+#else
+
+static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
+					    u32 engine_mask,
+					    const char *error_msg)
+{
+}
+
+static inline void i915_destroy_error_state(struct drm_device *dev)
+{
+}
+
+#endif
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 04205c82f0c9..c88c0d192a60 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1464,6 +1464,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	struct drm_i915_error_state *error;
 	unsigned long flags;
 
+	if (!i915.error_capture)
+		return;
+
 	if (READ_ONCE(dev_priv->gpu_error.first_error))
 		return;
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 768ad89d9cd4..629e4334719c 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
 	.load_detect_test = 0,
 	.force_reset_modeset_test = 0,
 	.reset = true,
+	.error_capture = true,
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
@@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
 module_param_named_unsafe(reset, i915.reset, bool, 0600);
 MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
 
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+module_param_named(error_capture, i915.error_capture, bool, 0600);
+MODULE_PARM_DESC(error_capture,
+	"Record the GPU state following a hang. "
+	"This information in /sys/class/drm/card<N>/error is vital for "
+	"triaging and debugging hangs.");
+#endif
+
 module_param_named_unsafe(enable_hangcheck, i915.enable_hangcheck, bool, 0644);
 MODULE_PARM_DESC(enable_hangcheck,
 	"Periodically check GPU activity for detecting hangs. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 3a0dd78ddb38..94efc899c1ef 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -59,6 +59,7 @@ struct i915_params {
 	bool load_detect_test;
 	bool force_reset_modeset_test;
 	bool reset;
+	bool error_capture;
 	bool disable_display;
 	bool verbose_state_checks;
 	bool nuclear_pageflip;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 1012eeea1324..b626c58db113 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -514,6 +514,8 @@ static const struct attribute *vlv_attrs[] = {
 	NULL,
 };
 
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
 static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr, char *buf,
 				loff_t off, size_t count)
@@ -571,6 +573,21 @@ static struct bin_attribute error_state_attr = {
 	.write = error_state_write,
 };
 
+static void i915_setup_error_capture(struct device *kdev)
+{
+	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
+		DRM_ERROR("error_state sysfs setup failed\n");
+}
+
+static void i915_remove_error_capture(struct device *kdev)
+{
+	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+}
+#else
+static void i915_setup_error_capture(struct device *kdev) {}
+static void i915_teardown_error_capture(struct device *kdev) {}
+#endif
+
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -617,17 +634,15 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 	if (ret)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
-	ret = sysfs_create_bin_file(&kdev->kobj,
-				    &error_state_attr);
-	if (ret)
-		DRM_ERROR("error_state sysfs setup failed\n");
+	i915_setup_error_capture(kdev);
 }
 
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
-	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+	i915_teardown_error_capture(kdev);
+
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sysfs_remove_files(&kdev->kobj, vlv_attrs);
 	else
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 23a6c7213eca..70234caacf39 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -17099,6 +17099,8 @@ int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
 struct intel_display_error_state {
 
 	u32 power_well_driver;
@@ -17281,3 +17283,5 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
 	}
 }
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a24bc8c7889f..8c411bfc3b3f 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1470,6 +1470,8 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
 	kfree(dev_priv->overlay);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
 struct intel_overlay_error_state {
 	struct overlay_registers regs;
 	unsigned long base;
@@ -1587,3 +1589,5 @@ intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
 	P(UVSCALEV);
 #undef P
 }
+
+#endif
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c
  2016-10-11 13:32             ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Chris Wilson
  2016-10-11 13:32               ` [PATCH 2/2] drm/i915: Allow disabling error capture Chris Wilson
@ 2016-10-12  8:51               ` Joonas Lahtinen
  1 sibling, 0 replies; 19+ messages in thread
From: Joonas Lahtinen @ 2016-10-12  8:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-10-11 at 14:32 +0100, Chris Wilson wrote:
> In the next patch, I want to conditionally compile i915_gpu_error.c and
> that requires moving the functions used by debug out of
> i915_gpu_error.c!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/2] drm/i915: Allow disabling error capture
  2016-10-11 13:32               ` [PATCH 2/2] drm/i915: Allow disabling error capture Chris Wilson
@ 2016-10-13  9:16                 ` Jani Nikula
  2016-10-13 10:01                   ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Jani Nikula @ 2016-10-13  9:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>  	{"i915_error_state", &i915_error_state_fops},
> +#endif

IGT tests/drm_lib.sh tests for existence of i915_error_state to find the
debugfs path for i915. Perhaps not the cleverest thing to do, but I
wonder if there are others who depend on the existence of that file.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/2] drm/i915: Allow disabling error capture
  2016-10-13  9:16                 ` Jani Nikula
@ 2016-10-13 10:01                   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2016-10-13 10:01 UTC (permalink / raw)
  To: Jani Nikula; +Cc: intel-gfx

On Thu, Oct 13, 2016 at 12:16:03PM +0300, Jani Nikula wrote:
> On Tue, 11 Oct 2016, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
> >  	{"i915_error_state", &i915_error_state_fops},
> > +#endif
> 
> IGT tests/drm_lib.sh tests for existence of i915_error_state to find the
> debugfs path for i915. Perhaps not the cleverest thing to do, but I
> wonder if there are others who depend on the existence of that file.

Ugh. The general non-singleton code forms the path from the stat.st_rdev.
Something to work towards.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Allow disabling error capture (rev3)
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
                   ` (5 preceding siblings ...)
  2016-10-11 11:20 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/5] drm/i915: Allow disabling error capture Patchwork
@ 2018-10-26 13:02 ` Patchwork
  2018-10-26 13:03 ` Patchwork
  7 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-10-26 13:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [2/2] drm/i915: Allow disabling error capture (rev3)
URL   : https://patchwork.freedesktop.org/series/13576/
State : failure

== Summary ==

Applying: drm/i915: Allow disabling error capture
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/Kconfig
M	drivers/gpu/drm/i915/Makefile
M	drivers/gpu/drm/i915/i915_debugfs.c
M	drivers/gpu/drm/i915/i915_drv.h
M	drivers/gpu/drm/i915/i915_gpu_error.c
M	drivers/gpu/drm/i915/i915_params.c
M	drivers/gpu/drm/i915/i915_params.h
M	drivers/gpu/drm/i915/i915_sysfs.c
M	drivers/gpu/drm/i915/intel_display.c
M	drivers/gpu/drm/i915/intel_overlay.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_sysfs.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_sysfs.c
Auto-merging drivers/gpu/drm/i915/i915_params.h
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_params.h
Auto-merging drivers/gpu/drm/i915/i915_params.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_params.c
Auto-merging drivers/gpu/drm/i915/i915_gpu_error.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_gpu_error.c
Auto-merging drivers/gpu/drm/i915/i915_drv.h
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_drv.h
Auto-merging drivers/gpu/drm/i915/i915_debugfs.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_debugfs.c
Auto-merging drivers/gpu/drm/i915/Makefile
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/Makefile
Auto-merging drivers/gpu/drm/i915/Kconfig
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/Kconfig
error: Failed to merge in the changes.
Patch failed at 0001 drm/i915: Allow disabling error capture
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Allow disabling error capture (rev3)
  2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
                   ` (6 preceding siblings ...)
  2018-10-26 13:02 ` ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Allow disabling error capture (rev3) Patchwork
@ 2018-10-26 13:03 ` Patchwork
  7 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-10-26 13:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [2/2] drm/i915: Allow disabling error capture (rev3)
URL   : https://patchwork.freedesktop.org/series/13576/
State : failure

== Summary ==

Applying: drm/i915: Allow disabling error capture
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/Kconfig
M	drivers/gpu/drm/i915/Makefile
M	drivers/gpu/drm/i915/i915_debugfs.c
M	drivers/gpu/drm/i915/i915_drv.h
M	drivers/gpu/drm/i915/i915_gpu_error.c
M	drivers/gpu/drm/i915/i915_params.c
M	drivers/gpu/drm/i915/i915_params.h
M	drivers/gpu/drm/i915/i915_sysfs.c
M	drivers/gpu/drm/i915/intel_display.c
M	drivers/gpu/drm/i915/intel_overlay.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/i915_sysfs.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_sysfs.c
Auto-merging drivers/gpu/drm/i915/i915_params.h
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_params.h
Auto-merging drivers/gpu/drm/i915/i915_params.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_params.c
Auto-merging drivers/gpu/drm/i915/i915_gpu_error.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_gpu_error.c
Auto-merging drivers/gpu/drm/i915/i915_drv.h
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_drv.h
Auto-merging drivers/gpu/drm/i915/i915_debugfs.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/i915_debugfs.c
Auto-merging drivers/gpu/drm/i915/Makefile
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/Makefile
Auto-merging drivers/gpu/drm/i915/Kconfig
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/Kconfig
error: Failed to merge in the changes.
Patch failed at 0001 drm/i915: Allow disabling error capture
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-10-26 13:03 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-11  9:27 [CI 1/5] drm/i915: Allow disabling error capture Chris Wilson
2016-10-11  9:27 ` [CI 2/5] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-10-11  9:27 ` [CI 3/5] drm/i915: Always use the GTT for error capture Chris Wilson
2016-10-11  9:27 ` [CI 4/5] drm/i915: Consolidate error object printing Chris Wilson
2016-10-11  9:27 ` [CI 5/5] drm/i915: Compress GPU objects in error state Chris Wilson
2016-10-11  9:52 ` [CI 1/5] drm/i915: Allow disabling error capture Jani Nikula
2016-10-11  9:57   ` Chris Wilson
2016-10-11 10:16     ` Jani Nikula
2016-10-11 10:28       ` Chris Wilson
2016-10-11 10:56         ` Jani Nikula
2016-10-11 12:13           ` Daniel Vetter
2016-10-11 13:32             ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Chris Wilson
2016-10-11 13:32               ` [PATCH 2/2] drm/i915: Allow disabling error capture Chris Wilson
2016-10-13  9:16                 ` Jani Nikula
2016-10-13 10:01                   ` Chris Wilson
2016-10-12  8:51               ` [PATCH 1/2] drm/i915: Move common code out of i915_gpu_error.c Joonas Lahtinen
2016-10-11 11:20 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/5] drm/i915: Allow disabling error capture Patchwork
2018-10-26 13:02 ` ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Allow disabling error capture (rev3) Patchwork
2018-10-26 13:03 ` Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.