intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging
@ 2023-04-10 19:25 John.C.Harrison
  2023-04-10 19:25 ` [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log John.C.Harrison
                   ` (6 more replies)
  0 siblings, 7 replies; 11+ messages in thread
From: John.C.Harrison @ 2023-04-10 19:25 UTC (permalink / raw)
  To: Intel-GFX; +Cc: DRI-Devel

From: John Harrison <John.C.Harrison@Intel.com>

Sometimes, the only effective way to debug an issue is to dump all the
interesting information at the point of failure. So add support for
doing that.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>


John Harrison (2):
  drm/i915: Dump error capture to kernel log
  drm/i915/guc: Dump error capture to dmesg on CTB error

 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +++++++++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
 drivers/gpu/drm/i915/i915_gpu_error.c     | 130 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.h     |   8 ++
 4 files changed, 197 insertions(+)

-- 
2.39.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
@ 2023-04-10 19:25 ` John.C.Harrison
  2023-04-10 19:25 ` [Intel-gfx] [PATCH 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error John.C.Harrison
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: John.C.Harrison @ 2023-04-10 19:25 UTC (permalink / raw)
  To: Intel-GFX; +Cc: DRI-Devel

From: John Harrison <John.C.Harrison@Intel.com>

This is useful for getting debug information out in certain
situations, such as failing kernel selftests and CI runs that don't
log error captures. It is especially useful for things like retrieving
GuC logs as GuC operation can't be tracked by adding printk or ftrace
entries.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 130 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.h |   8 ++
 2 files changed, 138 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f020c0086fbcd..500fec34188a0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2219,3 +2219,133 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
 		i915->gpu_error.first_error = ERR_PTR(err);
 	spin_unlock_irq(&i915->gpu_error.lock);
 }
+
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask)
+{
+	static int g_count;
+	struct drm_i915_private *i915 = gt->i915;
+	struct i915_gpu_coredump *error;
+	intel_wakeref_t wakeref;
+	size_t buf_size = PAGE_SIZE * 128;
+	size_t pos_err;
+	char *buf, *ptr, *next;
+	int l_count = g_count++;
+	int line = 0;
+
+	/* Can't allocate memory during a reset */
+	if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
+		drm_err(&gt->i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n",
+			l_count, line++);
+		return;
+	}
+
+	error = READ_ONCE(i915->gpu_error.first_error);
+	if (error) {
+		drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error capture first...\n",
+			l_count, line++);
+		i915_reset_error_state(i915);
+	}
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE);
+
+	if (IS_ERR(error)) {
+		drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n",
+			l_count, line++, PTR_ERR(error));
+		return;
+	}
+
+	buf = kvmalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n",
+			l_count, line++);
+		i915_gpu_coredump_put(error);
+		return;
+	}
+
+	drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n",
+		 l_count, line++, __builtin_return_address(0));
+
+	/* Largest string length safe to print via dmesg */
+#	define MAX_CHUNK	800
+
+	pos_err = 0;
+	while (1) {
+		ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
+
+		if (got <= 0)
+			break;
+
+		buf[got] = 0;
+		pos_err += got;
+
+		ptr = buf;
+		while (got > 0) {
+			size_t count;
+			char tag[2];
+
+			next = strnchr(ptr, got, '\n');
+			if (next) {
+				count = next - ptr;
+				*next = 0;
+				tag[0] = '>';
+				tag[1] = '<';
+			} else {
+				count = got;
+				tag[0] = '}';
+				tag[1] = '{';
+			}
+
+			if (count > MAX_CHUNK) {
+				size_t pos;
+				char *ptr2 = ptr;
+
+				for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
+					char chr = ptr[pos];
+
+					ptr[pos] = 0;
+					drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n",
+						 l_count, line++, ptr2);
+					ptr[pos] = chr;
+					ptr2 = ptr + pos;
+
+					/*
+					 * If spewing large amounts of data via a serial console,
+					 * this can be a very slow process. So be friendly and try
+					 * not to cause 'softlockup on CPU' problems.
+					 */
+					cond_resched();
+				}
+
+				if (ptr2 < (ptr + count))
+					drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+						 l_count, line++, tag[0], ptr2, tag[1]);
+				else if (tag[0] == '>')
+					drm_info(&i915->drm, "[Capture/%d.%d] ><\n",
+						 l_count, line++);
+			} else {
+				drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+					 l_count, line++, tag[0], ptr, tag[1]);
+			}
+
+			ptr = next;
+			got -= count;
+			if (next) {
+				ptr++;
+				got--;
+			}
+
+			/* As above. */
+			cond_resched();
+		}
+
+		if (got)
+			drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes remaining!\n",
+				 l_count, line++, got);
+	}
+
+	kvfree(buf);
+
+	drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a91932cc65317..b14c2c9915141 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -260,6 +260,9 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask);
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
@@ -323,6 +326,11 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
 #else
 
+static inline void intel_klog_error_capture(struct intel_gt *gt,
+					    intel_engine_mask_t engine_mask)
+{
+}
+
 __printf(2, 3)
 static inline void
 i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
  2023-04-10 19:25 ` [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log John.C.Harrison
@ 2023-04-10 19:25 ` John.C.Harrison
  2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging Patchwork
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: John.C.Harrison @ 2023-04-10 19:25 UTC (permalink / raw)
  To: Intel-GFX; +Cc: DRI-Devel

From: John Harrison <John.C.Harrison@Intel.com>

In the past, There have been sporadic CTB failures which proved hard
to reproduce manually. The most effective solution was to dump the GuC
log at the point of failure and let the CI system do the repro. It is
preferable not to dump the GuC log via dmesg for all issues as it is
not always necessary and is not helpful for end users. But rather than
trying to re-invent the code to do this each time it is wanted, commit
the code but for DEBUG_GUC builds only.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 53 +++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 +++
 2 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 1803a633ed648..66a1818a3485f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -13,6 +13,30 @@
 #include "intel_guc_ct.h"
 #include "intel_guc_print.h"
 
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+enum {
+	CT_DEAD_ALIVE = 0,
+	CT_DEAD_SETUP,
+	CT_DEAD_WRITE,
+	CT_DEAD_DEADLOCK,
+	CT_DEAD_H2G_HAS_ROOM,
+	CT_DEAD_READ,
+	CT_DEAD_PROCESS_FAILED,
+};
+
+static void ct_dead_ct_worker_func(struct work_struct *w);
+
+#define CT_DEAD(ct, reason)	\
+	do { \
+		if (!(ct)->dead_ct_reported) { \
+			(ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \
+			queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \
+		} \
+	} while (0)
+#else
+#define CT_DEAD(ct, reason)	do { } while (0)
+#endif
+
 static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
 {
 	return container_of(ct, struct intel_guc, ct);
@@ -93,6 +117,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 	spin_lock_init(&ct->requests.lock);
 	INIT_LIST_HEAD(&ct->requests.pending);
 	INIT_LIST_HEAD(&ct->requests.incoming);
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+	INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func);
+#endif
 	INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
 	tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func);
 	init_waitqueue_head(&ct->wq);
@@ -319,11 +346,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 
 	ct->enabled = true;
 	ct->stall_time = KTIME_MAX;
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+	ct->dead_ct_reported = false;
+	ct->dead_ct_reason = CT_DEAD_ALIVE;
+#endif
 
 	return 0;
 
 err_out:
 	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
+	CT_DEAD(ct, SETUP);
 	return err;
 }
 
@@ -434,6 +466,7 @@ static int ct_write(struct intel_guc_ct *ct,
 corrupted:
 	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
 		 desc->head, desc->tail, desc->status);
+	CT_DEAD(ct, WRITE);
 	ctb->broken = true;
 	return -EPIPE;
 }
@@ -504,6 +537,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
 		CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
 		CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
 
+		CT_DEAD(ct, DEADLOCK);
 		ct->ctbs.send.broken = true;
 	}
 
@@ -552,6 +586,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
 			 head, ctb->size);
 		desc->status |= GUC_CTB_STATUS_OVERFLOW;
 		ctb->broken = true;
+		CT_DEAD(ct, H2G_HAS_ROOM);
 		return false;
 	}
 
@@ -908,6 +943,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
 		 desc->head, desc->tail, desc->status);
 	ctb->broken = true;
+	CT_DEAD(ct, READ);
 	return -EPIPE;
 }
 
@@ -1057,6 +1093,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
 	if (unlikely(err)) {
 		CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
 			 ERR_PTR(err), 4 * request->size, request->msg);
+		CT_DEAD(ct, PROCESS_FAILED);
 		ct_free_msg(request);
 	}
 
@@ -1233,3 +1270,19 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct,
 	drm_printf(p, "Tail: %u\n",
 		   ct->ctbs.recv.desc->tail);
 }
+
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+static void ct_dead_ct_worker_func(struct work_struct *w)
+{
+	struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker);
+	struct intel_guc *guc = ct_to_guc(ct);
+
+	if (ct->dead_ct_reported)
+		return;
+
+	ct->dead_ct_reported = true;
+
+	guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason);
+	intel_klog_error_capture(guc_to_gt(guc), (intel_engine_mask_t)~0U);
+}
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index f709a19c7e214..d111d61449154 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -85,6 +85,12 @@ struct intel_guc_ct {
 
 	/** @stall_time: time of first time a CTB submission is stalled */
 	ktime_t stall_time;
+
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+	int dead_ct_reason;
+	bool dead_ct_reported;
+	struct work_struct dead_ct_worker;
+#endif
 };
 
 void intel_guc_ct_init_early(struct intel_guc_ct *ct);
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
  2023-04-10 19:25 ` [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log John.C.Harrison
  2023-04-10 19:25 ` [Intel-gfx] [PATCH 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error John.C.Harrison
@ 2023-04-10 19:50 ` Patchwork
  2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-04-10 19:50 UTC (permalink / raw)
  To: john.c.harrison; +Cc: intel-gfx

== Series Details ==

Series: Add support for dumping error captures via kernel logging
URL   : https://patchwork.freedesktop.org/series/116280/
State : warning

== Summary ==

Error: dim checkpatch failed
28aef43095be drm/i915: Dump error capture to kernel log
-:61: WARNING:OOM_MESSAGE: Possible unnecessary 'out of memory' message
#61: FILE: drivers/gpu/drm/i915/i915_gpu_error.c:2261:
+	if (!buf) {
+		drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n",

total: 0 errors, 1 warnings, 0 checks, 153 lines checked
fcba0a3c2b99 drm/i915/guc: Dump error capture to dmesg on CTB error
-:37: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'ct' - possible side-effects?
#37: FILE: drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c:29:
+#define CT_DEAD(ct, reason)	\
+	do { \
+		if (!(ct)->dead_ct_reported) { \
+			(ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \
+			queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \
+		} \
+	} while (0)

total: 0 errors, 0 warnings, 1 checks, 121 lines checked



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for Add support for dumping error captures via kernel logging
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
                   ` (2 preceding siblings ...)
  2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging Patchwork
@ 2023-04-10 19:50 ` Patchwork
  2023-04-10 19:59 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-04-10 19:50 UTC (permalink / raw)
  To: john.c.harrison; +Cc: intel-gfx

== Series Details ==

Series: Add support for dumping error captures via kernel logging
URL   : https://patchwork.freedesktop.org/series/116280/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for Add support for dumping error captures via kernel logging
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
                   ` (3 preceding siblings ...)
  2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2023-04-10 19:59 ` Patchwork
  2023-04-10 21:15 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  2023-04-11 14:41 ` [Intel-gfx] [PATCH 0/2] " Rodrigo Vivi
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-04-10 19:59 UTC (permalink / raw)
  To: john.c.harrison; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 9865 bytes --]

== Series Details ==

Series: Add support for dumping error captures via kernel logging
URL   : https://patchwork.freedesktop.org/series/116280/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12984 -> Patchwork_116280v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/index.html

Participating hosts (36 -> 34)
------------------------------

  Missing    (2): fi-snb-2520m fi-pnv-d510 

Known issues
------------

  Here are the changes found in Patchwork_116280v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3@smem:
    - bat-rpls-1:         NOTRUN -> [ABORT][1] ([i915#6687] / [i915#7978])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rpls-1/igt@gem_exec_suspend@basic-s3@smem.html

  * igt@gem_mmap@basic:
    - bat-dg2-9:          NOTRUN -> [SKIP][2] ([i915#4083])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@gem_mmap@basic.html

  * igt@gem_mmap_gtt@basic:
    - bat-dg2-9:          NOTRUN -> [SKIP][3] ([i915#4077]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@gem_mmap_gtt@basic.html

  * igt@gem_render_tiled_blits@basic:
    - bat-dg2-9:          NOTRUN -> [SKIP][4] ([i915#4079]) +1 similar issue
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@gem_render_tiled_blits@basic.html

  * igt@i915_pm_backlight@basic-brightness:
    - bat-dg2-9:          NOTRUN -> [SKIP][5] ([i915#5354] / [i915#7561])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@i915_pm_backlight@basic-brightness.html

  * igt@i915_selftest@live@slpc:
    - bat-rpls-1:         NOTRUN -> [DMESG-FAIL][6] ([i915#6367] / [i915#7996])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rpls-1/igt@i915_selftest@live@slpc.html

  * igt@kms_addfb_basic@addfb25-y-tiled-small-legacy:
    - bat-dg2-9:          NOTRUN -> [SKIP][7] ([i915#5190])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_addfb_basic@addfb25-y-tiled-small-legacy.html

  * igt@kms_addfb_basic@basic-y-tiled-legacy:
    - bat-dg2-9:          NOTRUN -> [SKIP][8] ([i915#4215] / [i915#5190])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_addfb_basic@basic-y-tiled-legacy.html

  * igt@kms_addfb_basic@framebuffer-vs-set-tiling:
    - bat-dg2-9:          NOTRUN -> [SKIP][9] ([i915#4212]) +7 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_addfb_basic@framebuffer-vs-set-tiling.html

  * igt@kms_chamelium_hpd@vga-hpd-fast:
    - bat-dg2-9:          NOTRUN -> [SKIP][10] ([i915#7828]) +8 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_chamelium_hpd@vga-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - bat-dg2-9:          NOTRUN -> [SKIP][11] ([i915#4103] / [i915#4213]) +1 similar issue
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_force_connector_basic@force-load-detect:
    - bat-dg2-9:          NOTRUN -> [SKIP][12] ([fdo#109285])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
    - bat-dg2-9:          NOTRUN -> [SKIP][13] ([i915#5274])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_force_connector_basic@prune-stale-modes.html

  * igt@kms_psr@sprite_plane_onoff:
    - bat-dg2-9:          NOTRUN -> [SKIP][14] ([i915#1072]) +3 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_psr@sprite_plane_onoff.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - bat-dg2-9:          NOTRUN -> [SKIP][15] ([i915#3555] / [i915#4579])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-flip:
    - bat-dg2-9:          NOTRUN -> [SKIP][16] ([i915#3708])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@prime_vgem@basic-fence-flip.html

  * igt@prime_vgem@basic-fence-mmap:
    - bat-dg2-9:          NOTRUN -> [SKIP][17] ([i915#3708] / [i915#4077]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@prime_vgem@basic-fence-mmap.html

  * igt@prime_vgem@basic-userptr:
    - bat-dg2-9:          NOTRUN -> [SKIP][18] ([i915#3708] / [i915#4873])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@prime_vgem@basic-userptr.html

  * igt@prime_vgem@basic-write:
    - bat-dg2-9:          NOTRUN -> [SKIP][19] ([i915#3291] / [i915#3708]) +2 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-dg2-9/igt@prime_vgem@basic-write.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-apl-guc:         [DMESG-FAIL][20] ([i915#5334]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/fi-apl-guc/igt@i915_selftest@live@gt_heartbeat.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/fi-apl-guc/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@requests:
    - bat-rpls-1:         [ABORT][22] ([i915#7911] / [i915#7982]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/bat-rpls-1/igt@i915_selftest@live@requests.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rpls-1/igt@i915_selftest@live@requests.html

  * igt@i915_selftest@live@slpc:
    - bat-rplp-1:         [DMESG-FAIL][24] ([i915#6367] / [i915#7913]) -> [PASS][25]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/bat-rplp-1/igt@i915_selftest@live@slpc.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rplp-1/igt@i915_selftest@live@slpc.html

  
#### Warnings ####

  * igt@kms_setmode@basic-clone-single-crtc:
    - bat-adls-5:         [SKIP][26] ([i915#3555]) -> [SKIP][27] ([i915#3555] / [i915#4579])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/bat-adls-5/igt@kms_setmode@basic-clone-single-crtc.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-adls-5/igt@kms_setmode@basic-clone-single-crtc.html
    - bat-rpls-1:         [SKIP][28] ([i915#3555]) -> [SKIP][29] ([i915#3555] / [i915#4579])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/bat-rpls-1/igt@kms_setmode@basic-clone-single-crtc.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rpls-1/igt@kms_setmode@basic-clone-single-crtc.html
    - bat-rpls-2:         [SKIP][30] ([i915#3555]) -> [SKIP][31] ([i915#3555] / [i915#4579])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/bat-rpls-2/igt@kms_setmode@basic-clone-single-crtc.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/bat-rpls-2/igt@kms_setmode@basic-clone-single-crtc.html

  
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4212]: https://gitlab.freedesktop.org/drm/intel/issues/4212
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#4215]: https://gitlab.freedesktop.org/drm/intel/issues/4215
  [i915#4579]: https://gitlab.freedesktop.org/drm/intel/issues/4579
  [i915#4873]: https://gitlab.freedesktop.org/drm/intel/issues/4873
  [i915#5190]: https://gitlab.freedesktop.org/drm/intel/issues/5190
  [i915#5274]: https://gitlab.freedesktop.org/drm/intel/issues/5274
  [i915#5334]: https://gitlab.freedesktop.org/drm/intel/issues/5334
  [i915#5354]: https://gitlab.freedesktop.org/drm/intel/issues/5354
  [i915#6367]: https://gitlab.freedesktop.org/drm/intel/issues/6367
  [i915#6687]: https://gitlab.freedesktop.org/drm/intel/issues/6687
  [i915#7561]: https://gitlab.freedesktop.org/drm/intel/issues/7561
  [i915#7828]: https://gitlab.freedesktop.org/drm/intel/issues/7828
  [i915#7911]: https://gitlab.freedesktop.org/drm/intel/issues/7911
  [i915#7913]: https://gitlab.freedesktop.org/drm/intel/issues/7913
  [i915#7978]: https://gitlab.freedesktop.org/drm/intel/issues/7978
  [i915#7982]: https://gitlab.freedesktop.org/drm/intel/issues/7982
  [i915#7996]: https://gitlab.freedesktop.org/drm/intel/issues/7996


Build changes
-------------

  * Linux: CI_DRM_12984 -> Patchwork_116280v1

  CI-20190529: 20190529
  CI_DRM_12984: 936e01d4b5d0a82f06f95c736cff42dfa20c72aa @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7246: b4252c86f4470e6b2d1201b1cf11d991bc5710eb @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_116280v1: 936e01d4b5d0a82f06f95c736cff42dfa20c72aa @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

959f60cab05c drm/i915/guc: Dump error capture to dmesg on CTB error
fbb0a9b39c4e drm/i915: Dump error capture to kernel log

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/index.html

[-- Attachment #2: Type: text/html, Size: 12067 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for Add support for dumping error captures via kernel logging
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
                   ` (4 preceding siblings ...)
  2023-04-10 19:59 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2023-04-10 21:15 ` Patchwork
  2023-04-11 14:41 ` [Intel-gfx] [PATCH 0/2] " Rodrigo Vivi
  6 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2023-04-10 21:15 UTC (permalink / raw)
  To: john.c.harrison; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 13059 bytes --]

== Series Details ==

Series: Add support for dumping error captures via kernel logging
URL   : https://patchwork.freedesktop.org/series/116280/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12984_full -> Patchwork_116280v1_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts

Known issues
------------

  Here are the changes found in Patchwork_116280v1_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-glk:          [PASS][1] -> [FAIL][2] ([i915#2842])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-glk4/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk6/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_lmem_swapping@smem-oom:
    - shard-glk:          NOTRUN -> [SKIP][3] ([fdo#109271] / [i915#4613])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk1/igt@gem_lmem_swapping@smem-oom.html

  * igt@kms_async_flips@alternate-sync-async-flip@pipe-b-hdmi-a-1:
    - shard-glk:          [PASS][4] -> [FAIL][5] ([i915#2521])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-glk4/igt@kms_async_flips@alternate-sync-async-flip@pipe-b-hdmi-a-1.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk6/igt@kms_async_flips@alternate-sync-async-flip@pipe-b-hdmi-a-1.html

  * igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_mc_ccs:
    - shard-glk:          NOTRUN -> [SKIP][6] ([fdo#109271] / [i915#3886])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk1/igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_mc_ccs.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
    - shard-glk:          [PASS][7] -> [FAIL][8] ([i915#2346])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-glk1/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk4/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
    - shard-apl:          [PASS][9] -> [FAIL][10] ([i915#2346])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-apl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-apl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-indfb-draw-blt:
    - shard-glk:          NOTRUN -> [SKIP][11] ([fdo#109271]) +21 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk1/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-indfb-draw-blt.html

  
#### Possible fixes ####

  * igt@gem_barrier_race@remote-request@rcs0:
    - shard-glk:          [ABORT][12] ([i915#8211]) -> [PASS][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-glk3/igt@gem_barrier_race@remote-request@rcs0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk1/igt@gem_barrier_race@remote-request@rcs0.html

  * igt@gem_ctx_exec@basic-nohangcheck:
    - {shard-rkl}:        [FAIL][14] ([i915#6268]) -> [PASS][15]
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-rkl-2/igt@gem_ctx_exec@basic-nohangcheck.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-rkl-7/igt@gem_ctx_exec@basic-nohangcheck.html

  * igt@gem_exec_fair@basic-none@bcs0:
    - {shard-rkl}:        [FAIL][16] ([i915#2842]) -> [PASS][17]
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-rkl-1/igt@gem_exec_fair@basic-none@bcs0.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-rkl-4/igt@gem_exec_fair@basic-none@bcs0.html

  * igt@gen9_exec_parse@allowed-single:
    - shard-glk:          [ABORT][18] ([i915#5566]) -> [PASS][19]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-glk2/igt@gen9_exec_parse@allowed-single.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-glk7/igt@gen9_exec_parse@allowed-single.html

  * igt@i915_pm_dc@dc9-dpms:
    - {shard-tglu}:       [SKIP][20] ([i915#4281]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-tglu-7/igt@i915_pm_dc@dc9-dpms.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-tglu-2/igt@i915_pm_dc@dc9-dpms.html

  * igt@i915_pm_rc6_residency@rc6-idle@vcs0:
    - {shard-dg1}:        [FAIL][22] ([i915#3591]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-dg1-18/igt@i915_pm_rc6_residency@rc6-idle@vcs0.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-dg1-14/igt@i915_pm_rc6_residency@rc6-idle@vcs0.html

  * igt@i915_pm_rpm@dpms-non-lpsp:
    - {shard-rkl}:        [SKIP][24] ([i915#1397]) -> [PASS][25]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-rkl-7/igt@i915_pm_rpm@dpms-non-lpsp.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-rkl-1/igt@i915_pm_rpm@dpms-non-lpsp.html

  * igt@i915_pm_rpm@modeset-lpsp-stress:
    - {shard-dg1}:        [SKIP][26] ([i915#1397]) -> [PASS][27]
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-dg1-18/igt@i915_pm_rpm@modeset-lpsp-stress.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-dg1-14/igt@i915_pm_rpm@modeset-lpsp-stress.html

  * igt@i915_selftest@live@gt_pm:
    - {shard-rkl}:        [DMESG-FAIL][28] ([i915#4258]) -> [PASS][29]
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-rkl-6/igt@i915_selftest@live@gt_pm.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-rkl-3/igt@i915_selftest@live@gt_pm.html

  * igt@kms_cursor_legacy@flip-vs-cursor-varying-size:
    - {shard-tglu}:       [FAIL][30] ([i915#2346]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-tglu-10/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-tglu-9/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html

  * igt@perf_pmu@idle@rcs0:
    - {shard-rkl}:        [FAIL][32] ([i915#4349]) -> [PASS][33]
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12984/shard-rkl-7/igt@perf_pmu@idle@rcs0.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/shard-rkl-3/igt@perf_pmu@idle@rcs0.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1839]: https://gitlab.freedesktop.org/drm/intel/issues/1839
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2433]: https://gitlab.freedesktop.org/drm/intel/issues/2433
  [i915#2521]: https://gitlab.freedesktop.org/drm/intel/issues/2521
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2876]: https://gitlab.freedesktop.org/drm/intel/issues/2876
  [i915#3023]: https://gitlab.freedesktop.org/drm/intel/issues/3023
  [i915#315]: https://gitlab.freedesktop.org/drm/intel/issues/315
  [i915#3281]: https://gitlab.freedesktop.org/drm/intel/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3299]: https://gitlab.freedesktop.org/drm/intel/issues/3299
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3458]: https://gitlab.freedesktop.org/drm/intel/issues/3458
  [i915#3539]: https://gitlab.freedesktop.org/drm/intel/issues/3539
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3591]: https://gitlab.freedesktop.org/drm/intel/issues/3591
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3804]: https://gitlab.freedesktop.org/drm/intel/issues/3804
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4078]: https://gitlab.freedesktop.org/drm/intel/issues/4078
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4212]: https://gitlab.freedesktop.org/drm/intel/issues/4212
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#4258]: https://gitlab.freedesktop.org/drm/intel/issues/4258
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4281]: https://gitlab.freedesktop.org/drm/intel/issues/4281
  [i915#4349]: https://gitlab.freedesktop.org/drm/intel/issues/4349
  [i915#4538]: https://gitlab.freedesktop.org/drm/intel/issues/4538
  [i915#4579]: https://gitlab.freedesktop.org/drm/intel/issues/4579
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4771]: https://gitlab.freedesktop.org/drm/intel/issues/4771
  [i915#4812]: https://gitlab.freedesktop.org/drm/intel/issues/4812
  [i915#4833]: https://gitlab.freedesktop.org/drm/intel/issues/4833
  [i915#4852]: https://gitlab.freedesktop.org/drm/intel/issues/4852
  [i915#4860]: https://gitlab.freedesktop.org/drm/intel/issues/4860
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5289]: https://gitlab.freedesktop.org/drm/intel/issues/5289
  [i915#5354]: https://gitlab.freedesktop.org/drm/intel/issues/5354
  [i915#5563]: https://gitlab.freedesktop.org/drm/intel/issues/5563
  [i915#5566]: https://gitlab.freedesktop.org/drm/intel/issues/5566
  [i915#6095]: https://gitlab.freedesktop.org/drm/intel/issues/6095
  [i915#6268]: https://gitlab.freedesktop.org/drm/intel/issues/6268
  [i915#6301]: https://gitlab.freedesktop.org/drm/intel/issues/6301
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#6590]: https://gitlab.freedesktop.org/drm/intel/issues/6590
  [i915#6946]: https://gitlab.freedesktop.org/drm/intel/issues/6946
  [i915#6953]: https://gitlab.freedesktop.org/drm/intel/issues/6953
  [i915#7116]: https://gitlab.freedesktop.org/drm/intel/issues/7116
  [i915#7561]: https://gitlab.freedesktop.org/drm/intel/issues/7561
  [i915#7697]: https://gitlab.freedesktop.org/drm/intel/issues/7697
  [i915#7701]: https://gitlab.freedesktop.org/drm/intel/issues/7701
  [i915#7711]: https://gitlab.freedesktop.org/drm/intel/issues/7711
  [i915#7742]: https://gitlab.freedesktop.org/drm/intel/issues/7742
  [i915#7828]: https://gitlab.freedesktop.org/drm/intel/issues/7828
  [i915#7975]: https://gitlab.freedesktop.org/drm/intel/issues/7975
  [i915#8011]: https://gitlab.freedesktop.org/drm/intel/issues/8011
  [i915#8211]: https://gitlab.freedesktop.org/drm/intel/issues/8211
  [i915#8213]: https://gitlab.freedesktop.org/drm/intel/issues/8213
  [i915#8228]: https://gitlab.freedesktop.org/drm/intel/issues/8228
  [i915#8292]: https://gitlab.freedesktop.org/drm/intel/issues/8292
  [i915#8347]: https://gitlab.freedesktop.org/drm/intel/issues/8347


Build changes
-------------

  * Linux: CI_DRM_12984 -> Patchwork_116280v1

  CI-20190529: 20190529
  CI_DRM_12984: 936e01d4b5d0a82f06f95c736cff42dfa20c72aa @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7246: b4252c86f4470e6b2d1201b1cf11d991bc5710eb @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_116280v1: 936e01d4b5d0a82f06f95c736cff42dfa20c72aa @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_116280v1/index.html

[-- Attachment #2: Type: text/html, Size: 9883 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging
  2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
                   ` (5 preceding siblings ...)
  2023-04-10 21:15 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
@ 2023-04-11 14:41 ` Rodrigo Vivi
  2023-04-11 16:41   ` John Harrison
  6 siblings, 1 reply; 11+ messages in thread
From: Rodrigo Vivi @ 2023-04-11 14:41 UTC (permalink / raw)
  To: John.C.Harrison; +Cc: Intel-GFX, DRI-Devel

On Mon, Apr 10, 2023 at 12:25:21PM -0700, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> Sometimes, the only effective way to debug an issue is to dump all the
> interesting information at the point of failure. So add support for
> doing that.

No! Please no!
We have some of this on Xe and I'm hating it. I'm going to try to remove
from there soon. It is horrible when you lost the hability to use dmesg
directly because it goes over the number of lines it saves... or even
with dmesg -w it goes over the number of lines of your terminal...
or the ssh and serial slowness when printing a bunch of information.

We probably want to be able to capture multiple error states and be
able to cross them with a kernel timeline, but definitely not overflood
our log terminals.

> 
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> 
> 
> John Harrison (2):
>   drm/i915: Dump error capture to kernel log
>   drm/i915/guc: Dump error capture to dmesg on CTB error
> 
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +++++++++
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
>  drivers/gpu/drm/i915/i915_gpu_error.c     | 130 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gpu_error.h     |   8 ++
>  4 files changed, 197 insertions(+)
> 
> -- 
> 2.39.1
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging
  2023-04-11 14:41 ` [Intel-gfx] [PATCH 0/2] " Rodrigo Vivi
@ 2023-04-11 16:41   ` John Harrison
  2023-04-11 16:50     ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: John Harrison @ 2023-04-11 16:41 UTC (permalink / raw)
  To: Rodrigo Vivi; +Cc: Intel-GFX, DRI-Devel

On 4/11/2023 07:41, Rodrigo Vivi wrote:
> On Mon, Apr 10, 2023 at 12:25:21PM -0700, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> Sometimes, the only effective way to debug an issue is to dump all the
>> interesting information at the point of failure. So add support for
>> doing that.
> No! Please no!
> We have some of this on Xe and I'm hating it. I'm going to try to remove
> from there soon. It is horrible when you lost the hability to use dmesg
> directly because it goes over the number of lines it saves... or even
> with dmesg -w it goes over the number of lines of your terminal...
> or the ssh and serial slowness when printing a bunch of information.
>
> We probably want to be able to capture multiple error states and be
> able to cross them with a kernel timeline, but definitely not overflood
> our log terminals.
I think you are missing the point.

This is the emergency backup plan for when nothing else works. It is not 
on by default. It should never happen on an end user system unless we 
specifically request them to run with a patched kernel to enable a dump 
at a specific point.

But there are (many) times when nothing else works. In those instances, 
it is extremely useful to be able to dump the system state in this manner.

It is code we have been using internally for some time and it has helped 
resolve a number of different difficult to debug bugs. As our Xe 
generation platforms are now out in the wild and no longer just 
internal, it is also proving important to have this facility available 
in upstream trees as well. And having it merged rather than floating 
around as random patches passed from person to person is far easier to 
manage and would also help reduce the internal tree burden.

John.

>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>
>>
>> John Harrison (2):
>>    drm/i915: Dump error capture to kernel log
>>    drm/i915/guc: Dump error capture to dmesg on CTB error
>>
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +++++++++
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
>>   drivers/gpu/drm/i915/i915_gpu_error.c     | 130 ++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gpu_error.h     |   8 ++
>>   4 files changed, 197 insertions(+)
>>
>> -- 
>> 2.39.1
>>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging
  2023-04-11 16:41   ` John Harrison
@ 2023-04-11 16:50     ` Daniel Vetter
  2023-04-18 16:38       ` Rodrigo Vivi
  0 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2023-04-11 16:50 UTC (permalink / raw)
  To: John Harrison; +Cc: Intel-GFX, DRI-Devel, Rodrigo Vivi

On Tue, Apr 11, 2023 at 09:41:04AM -0700, John Harrison wrote:
> On 4/11/2023 07:41, Rodrigo Vivi wrote:
> > On Mon, Apr 10, 2023 at 12:25:21PM -0700, John.C.Harrison@Intel.com wrote:
> > > From: John Harrison <John.C.Harrison@Intel.com>
> > > 
> > > Sometimes, the only effective way to debug an issue is to dump all the
> > > interesting information at the point of failure. So add support for
> > > doing that.
> > No! Please no!
> > We have some of this on Xe and I'm hating it. I'm going to try to remove
> > from there soon. It is horrible when you lost the hability to use dmesg
> > directly because it goes over the number of lines it saves... or even
> > with dmesg -w it goes over the number of lines of your terminal...
> > or the ssh and serial slowness when printing a bunch of information.
> > 
> > We probably want to be able to capture multiple error states and be
> > able to cross them with a kernel timeline, but definitely not overflood
> > our log terminals.
> I think you are missing the point.
> 
> This is the emergency backup plan for when nothing else works. It is not on
> by default. It should never happen on an end user system unless we
> specifically request them to run with a patched kernel to enable a dump at a
> specific point.
> 
> But there are (many) times when nothing else works. In those instances, it
> is extremely useful to be able to dump the system state in this manner.
> 
> It is code we have been using internally for some time and it has helped
> resolve a number of different difficult to debug bugs. As our Xe generation
> platforms are now out in the wild and no longer just internal, it is also
> proving important to have this facility available in upstream trees as well.
> And having it merged rather than floating around as random patches passed
> from person to person is far easier to manage and would also help reduce the
> internal tree burden.

Note that Xe needs to move over to devcoredump infrastructure, so if you
need dumping straight to dmesg that would be a patch for that subsystem in
the future.

Not sure how much you want to add fun here in the i915-gem deadend, I'll
leave that up to i915 maintainers.

Just figured this is a good place to drop this aside :-)
-Daniel

> 
> John.
> 
> > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> > > 
> > > 
> > > John Harrison (2):
> > >    drm/i915: Dump error capture to kernel log
> > >    drm/i915/guc: Dump error capture to dmesg on CTB error
> > > 
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +++++++++
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
> > >   drivers/gpu/drm/i915/i915_gpu_error.c     | 130 ++++++++++++++++++++++
> > >   drivers/gpu/drm/i915/i915_gpu_error.h     |   8 ++
> > >   4 files changed, 197 insertions(+)
> > > 
> > > -- 
> > > 2.39.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging
  2023-04-11 16:50     ` Daniel Vetter
@ 2023-04-18 16:38       ` Rodrigo Vivi
  0 siblings, 0 replies; 11+ messages in thread
From: Rodrigo Vivi @ 2023-04-18 16:38 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-GFX, DRI-Devel

On Tue, Apr 11, 2023 at 06:50:53PM +0200, Daniel Vetter wrote:
> On Tue, Apr 11, 2023 at 09:41:04AM -0700, John Harrison wrote:
> > On 4/11/2023 07:41, Rodrigo Vivi wrote:
> > > On Mon, Apr 10, 2023 at 12:25:21PM -0700, John.C.Harrison@Intel.com wrote:
> > > > From: John Harrison <John.C.Harrison@Intel.com>
> > > > 
> > > > Sometimes, the only effective way to debug an issue is to dump all the
> > > > interesting information at the point of failure. So add support for
> > > > doing that.
> > > No! Please no!
> > > We have some of this on Xe and I'm hating it. I'm going to try to remove
> > > from there soon. It is horrible when you lost the hability to use dmesg
> > > directly because it goes over the number of lines it saves... or even
> > > with dmesg -w it goes over the number of lines of your terminal...
> > > or the ssh and serial slowness when printing a bunch of information.
> > > 
> > > We probably want to be able to capture multiple error states and be
> > > able to cross them with a kernel timeline, but definitely not overflood
> > > our log terminals.
> > I think you are missing the point.
> > 
> > This is the emergency backup plan for when nothing else works. It is not on
> > by default. It should never happen on an end user system unless we
> > specifically request them to run with a patched kernel to enable a dump at a
> > specific point.
> > 
> > But there are (many) times when nothing else works. In those instances, it
> > is extremely useful to be able to dump the system state in this manner.
> > 
> > It is code we have been using internally for some time and it has helped
> > resolve a number of different difficult to debug bugs. As our Xe generation
> > platforms are now out in the wild and no longer just internal, it is also
> > proving important to have this facility available in upstream trees as well.
> > And having it merged rather than floating around as random patches passed
> > from person to person is far easier to manage and would also help reduce the
> > internal tree burden.

Okay then. As long as it depends on some DEBUG config which depends on EXPERT
I believe we have a good reason.

I see the second patch is indeed protected by CONFIG_DRM_I915_DEBUG_GUC.
It would be good to do something similar on the patch 1?

> 
> Note that Xe needs to move over to devcoredump infrastructure, so if you
> need dumping straight to dmesg that would be a patch for that subsystem in
> the future.

devcoredump is a nice thing and it does deserves a bit of improvements
to be able to catch snapshots and all, but for this case here I believe
that the current devcoredump infrastructure would already be enough.

It would be only a matter of doing an immediate print to the dmesg at
the moment that devcoredump is created and this is inside the driver.

But yeap, that would need to be protected by debug/expert kconfig.

> 
> Not sure how much you want to add fun here in the i915-gem deadend, I'll
> leave that up to i915 maintainers.
> 
> Just figured this is a good place to drop this aside :-)
> -Daniel
> 
> > 
> > John.
> > 
> > > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> > > > 
> > > > 
> > > > John Harrison (2):
> > > >    drm/i915: Dump error capture to kernel log
> > > >    drm/i915/guc: Dump error capture to dmesg on CTB error
> > > > 
> > > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  53 +++++++++
> > > >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |   6 +
> > > >   drivers/gpu/drm/i915/i915_gpu_error.c     | 130 ++++++++++++++++++++++
> > > >   drivers/gpu/drm/i915/i915_gpu_error.h     |   8 ++
> > > >   4 files changed, 197 insertions(+)
> > > > 
> > > > -- 
> > > > 2.39.1
> > > > 
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-04-18 16:39 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
2023-04-10 19:25 ` [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log John.C.Harrison
2023-04-10 19:25 ` [Intel-gfx] [PATCH 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error John.C.Harrison
2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging Patchwork
2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2023-04-10 19:59 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-04-10 21:15 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2023-04-11 14:41 ` [Intel-gfx] [PATCH 0/2] " Rodrigo Vivi
2023-04-11 16:41   ` John Harrison
2023-04-11 16:50     ` Daniel Vetter
2023-04-18 16:38       ` Rodrigo Vivi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).