All of lore.kernel.org
 help / color / mirror / Atom feed
From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Cc: DRI-Devel@Lists.FreeDesktop.Org, Matthew Brost <matthew.brost@intel.com>
Subject: [PATCH 7/7] Me: Dump GuC log to dmesg on SLPC load failure
Date: Tue, 31 Aug 2021 19:20:43 -0700	[thread overview]
Message-ID: <20210901022043.2395135-8-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20210901022043.2395135-1-John.C.Harrison@Intel.com>

From: Matthew Brost <matthew.brost@intel.com>

---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c |  3 +
 drivers/gpu/drm/i915/i915_gpu_error.c       | 97 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.h       |  3 +
 3 files changed, 103 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..9b52cae16ebb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -262,6 +262,9 @@ static int slpc_reset(struct intel_guc_slpc *slpc)
 		if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) {
 			drm_err(&i915->drm, "SLPC not enabled! State = %s\n",
 				slpc_get_state_string(slpc));
+
+			intel_klog_error_capture(guc_to_gt(guc),
+						 (intel_engine_mask_t) ~0U);
 			return -EIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a61e23deeb00..55e58810a381 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1969,3 +1969,100 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
 		i915->gpu_error.first_error = ERR_PTR(err);
 	spin_unlock_irq(&i915->gpu_error.lock);
 }
+
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct i915_gpu_coredump *error;
+	intel_wakeref_t wakeref;
+	size_t buf_size = PAGE_SIZE * 128;
+	size_t pos_err;
+	char *buf, *ptr, *next;
+
+	error = READ_ONCE(i915->gpu_error.first_error);
+	if (error) {
+		drm_err(&i915->drm, "Clearing existing error capture first...\n");
+		i915_reset_error_state(i915);
+	}
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		error = i915_gpu_coredump(gt, engine_mask);
+
+	if (IS_ERR(error)) {
+		drm_err(&i915->drm, "Failed to capture error capture: %ld!\n", PTR_ERR(error));
+		return;
+	}
+
+	buf = kvmalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		drm_err(&i915->drm, "Failed to allocate buffer for error capture!\n");
+		return;
+	}
+
+	drm_info(&i915->drm, "Dumping i915 error capture...\n");
+
+	/* Largest string length safe to print via dmesg */
+#	define MAX_CHUNK	800
+
+	pos_err = 0;
+	while (1) {
+		ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
+		if (got <= 0)
+			break;
+
+		buf[got] = 0;
+		pos_err += got;
+
+		ptr = buf;
+		while (got > 0) {
+			size_t count;
+			char tag[2];
+
+			next = strnchr(ptr, got, '\n');
+			if (next) {
+				count = next - ptr;
+				*next = 0;
+				tag[0] = '>';
+				tag[1] = '<';
+			} else {
+				count = got;
+				tag[0] = '}';
+				tag[1] = '{';
+			}
+
+			if (count > MAX_CHUNK) {
+				size_t pos;
+				char *ptr2 = ptr;
+
+				for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
+					char chr = ptr[pos];
+					ptr[pos] = 0;
+					drm_info(&i915->drm, "Capture }%s{\n", ptr2);
+					ptr[pos] = chr;
+					ptr2 = ptr + pos;
+				}
+
+				if (ptr2 < (ptr + count))
+					drm_info(&i915->drm, "Capture %c%s%c\n", tag[0], ptr2, tag[1]);
+				else if (tag[0] == '>')
+					drm_info(&i915->drm, "Capture ><\n");
+			} else
+				drm_info(&i915->drm, "Capture %c%s%c\n", tag[0], ptr, tag[1]);
+
+			ptr = next;
+			got -= count;
+			if (next) {
+				ptr++;
+				got--;
+			}
+		}
+
+		if (got)
+			drm_info(&i915->drm, "Got %zd bytes remaining!\n", got);
+	}
+
+	kvfree(buf);
+
+	drm_info(&i915->drm, "Dumped %zd bytes\n", pos_err);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index b98d8cdbe4f2..f5ab72cc3367 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -214,6 +214,9 @@ struct drm_i915_error_state_buf {
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask);
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Cc: DRI-Devel@Lists.FreeDesktop.Org, Matthew Brost <matthew.brost@intel.com>
Subject: [Intel-gfx] [PATCH 7/7] Me: Dump GuC log to dmesg on SLPC load failure
Date: Tue, 31 Aug 2021 19:20:43 -0700	[thread overview]
Message-ID: <20210901022043.2395135-8-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20210901022043.2395135-1-John.C.Harrison@Intel.com>

From: Matthew Brost <matthew.brost@intel.com>

---
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c |  3 +
 drivers/gpu/drm/i915/i915_gpu_error.c       | 97 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.h       |  3 +
 3 files changed, 103 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..9b52cae16ebb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -262,6 +262,9 @@ static int slpc_reset(struct intel_guc_slpc *slpc)
 		if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) {
 			drm_err(&i915->drm, "SLPC not enabled! State = %s\n",
 				slpc_get_state_string(slpc));
+
+			intel_klog_error_capture(guc_to_gt(guc),
+						 (intel_engine_mask_t) ~0U);
 			return -EIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a61e23deeb00..55e58810a381 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1969,3 +1969,100 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
 		i915->gpu_error.first_error = ERR_PTR(err);
 	spin_unlock_irq(&i915->gpu_error.lock);
 }
+
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct i915_gpu_coredump *error;
+	intel_wakeref_t wakeref;
+	size_t buf_size = PAGE_SIZE * 128;
+	size_t pos_err;
+	char *buf, *ptr, *next;
+
+	error = READ_ONCE(i915->gpu_error.first_error);
+	if (error) {
+		drm_err(&i915->drm, "Clearing existing error capture first...\n");
+		i915_reset_error_state(i915);
+	}
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		error = i915_gpu_coredump(gt, engine_mask);
+
+	if (IS_ERR(error)) {
+		drm_err(&i915->drm, "Failed to capture error capture: %ld!\n", PTR_ERR(error));
+		return;
+	}
+
+	buf = kvmalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		drm_err(&i915->drm, "Failed to allocate buffer for error capture!\n");
+		return;
+	}
+
+	drm_info(&i915->drm, "Dumping i915 error capture...\n");
+
+	/* Largest string length safe to print via dmesg */
+#	define MAX_CHUNK	800
+
+	pos_err = 0;
+	while (1) {
+		ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
+		if (got <= 0)
+			break;
+
+		buf[got] = 0;
+		pos_err += got;
+
+		ptr = buf;
+		while (got > 0) {
+			size_t count;
+			char tag[2];
+
+			next = strnchr(ptr, got, '\n');
+			if (next) {
+				count = next - ptr;
+				*next = 0;
+				tag[0] = '>';
+				tag[1] = '<';
+			} else {
+				count = got;
+				tag[0] = '}';
+				tag[1] = '{';
+			}
+
+			if (count > MAX_CHUNK) {
+				size_t pos;
+				char *ptr2 = ptr;
+
+				for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
+					char chr = ptr[pos];
+					ptr[pos] = 0;
+					drm_info(&i915->drm, "Capture }%s{\n", ptr2);
+					ptr[pos] = chr;
+					ptr2 = ptr + pos;
+				}
+
+				if (ptr2 < (ptr + count))
+					drm_info(&i915->drm, "Capture %c%s%c\n", tag[0], ptr2, tag[1]);
+				else if (tag[0] == '>')
+					drm_info(&i915->drm, "Capture ><\n");
+			} else
+				drm_info(&i915->drm, "Capture %c%s%c\n", tag[0], ptr, tag[1]);
+
+			ptr = next;
+			got -= count;
+			if (next) {
+				ptr++;
+				got--;
+			}
+		}
+
+		if (got)
+			drm_info(&i915->drm, "Got %zd bytes remaining!\n", got);
+	}
+
+	kvfree(buf);
+
+	drm_info(&i915->drm, "Dumped %zd bytes\n", pos_err);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index b98d8cdbe4f2..f5ab72cc3367 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -214,6 +214,9 @@ struct drm_i915_error_state_buf {
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
+void intel_klog_error_capture(struct intel_gt *gt,
+			      intel_engine_mask_t engine_mask);
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-- 
2.25.1


  parent reply	other threads:[~2021-09-01  2:21 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-01  2:20 [PATCH 0/7] [CI] Enable GuC submission by default on DG1 John.C.Harrison
2021-09-01  2:20 ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` [PATCH 1/7] drm/i915: Do not define vma on stack John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` [PATCH 2/7] drm/i915/guc: put all guc objects in lmem when available John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01 16:12   ` Matthew Auld
2021-09-01  2:20 ` [PATCH 3/7] drm/i915/guc: Add DG1 GuC / HuC firmware defs John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` [PATCH 4/7] drm/i915/guc: Enable GuC submission by default on DG1 John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` [PATCH 5/7] Me: Allow relocs on DG1 for CI John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` [PATCH 6/7] Me: Workaround LMEM blow up John.C.Harrison
2021-09-01  2:20   ` [Intel-gfx] " John.C.Harrison
2021-09-01  2:20 ` John.C.Harrison [this message]
2021-09-01  2:20   ` [Intel-gfx] [PATCH 7/7] Me: Dump GuC log to dmesg on SLPC load failure John.C.Harrison
2021-09-01  2:37 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Enable GuC submission by default on DG1 (rev2) Patchwork
2021-09-01  3:08 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210901022043.2395135-8-John.C.Harrison@Intel.com \
    --to=john.c.harrison@intel.com \
    --cc=DRI-Devel@Lists.FreeDesktop.Org \
    --cc=Intel-GFX@Lists.FreeDesktop.Org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.