All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jordan Crouse <jcrouse-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
To: freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Cc: linux-arm-msm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: [PATCH 4/4] drm/msm: gpu: Capture the GPU state on a GPU hang
Date: Fri,  5 Jan 2018 11:00:21 -0700	[thread overview]
Message-ID: <1515175221-5601-5-git-send-email-jcrouse@codeaurora.org> (raw)
In-Reply-To: <1515175221-5601-1-git-send-email-jcrouse-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>

Capture the GPU state on a GPU hang and store it for later playback
using the 'crash' node in the debugfs directory.  Only one crash
state is stored at a time on the assumption that the first hang is
usually the most interesting. The existing crash state can be cleared
by writing to the debugfs node and then a new one will be captured
on the next hang.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  1 +
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  1 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  1 +
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 16 +++++++--
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_debugfs.c       | 57 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/msm/msm_gpu.h           | 44 ++++++++++++++++++++++++-
 7 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 8a7d56ec..be65b4e 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -320,6 +320,7 @@ static void a3xx_recover(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
 	adreno_recover(gpu);
 
+	msm_gpu_crashstate_set(gpu, state);
 	gpu->funcs->gpu_state_put(state);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index e64c7fc..943e13f 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -319,6 +319,7 @@ static void a4xx_recover(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
 	adreno_recover(gpu);
 
+	msm_gpu_crashstate_set(gpu, state);
 	gpu->funcs->gpu_state_put(state);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 6747b7b..1e32c2e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -774,6 +774,7 @@ static void a5xx_recover(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 	adreno_recover(gpu);
 
+	msm_gpu_crashstate_set(gpu, state);
 	gpu->funcs->gpu_state_put(state);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index ba1b912..e1785c2 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -372,6 +372,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu)
 	if (!state)
 		return ERR_PTR(-ENOMEM);
 
+	kref_init(&state->ref);
+
 	do_gettimeofday(&state->time);
 
 	for (i = 0; i < gpu->nr_rings; i++) {
@@ -407,15 +409,23 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu)
 	return state;
 }
 
-void adreno_gpu_state_put(struct msm_gpu_state *state)
+static void adreno_gpu_state_destroy(struct kref *kref)
 {
-	if (IS_ERR_OR_NULL(state))
-		return;
+	struct msm_gpu_state *state = container_of(kref,
+		struct msm_gpu_state, ref);
 
 	kfree(state->registers);
 	kfree(state);
 }
 
+int adreno_gpu_state_put(struct msm_gpu_state *state)
+{
+	if (IS_ERR_OR_NULL(state))
+		return 1;
+
+	return kref_put(&state->ref, adreno_gpu_state_destroy);
+}
+
 void adreno_show_info(struct msm_gpu *gpu, struct msm_gpu_state *state,
 		struct drm_printer *p)
 {
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 4542b6b..e304a3e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -230,7 +230,7 @@ void adreno_show_regs(struct msm_gpu *gpu, struct msm_gpu_state *state,
 		struct drm_printer *p);
 
 struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu);
-void adreno_gpu_state_put(struct msm_gpu_state *state);
+int adreno_gpu_state_put(struct msm_gpu_state *state);
 
 /* ringbuffer helpers (the parts that are adreno specific) */
 
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 89ee74b..1bde88d 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -16,11 +16,65 @@
  */
 
 #ifdef CONFIG_DEBUG_FS
+
+#include <generated/utsrelease.h>
+#include <linux/debugfs.h>
 #include "msm_drv.h"
 #include "msm_gpu.h"
 #include "msm_kms.h"
 #include "msm_debugfs.h"
 
+static int msm_gpu_crash_show(struct seq_file *m, void *data)
+{
+	struct msm_gpu *gpu = m->private;
+	struct msm_gpu_state *state;
+
+	state = msm_gpu_crashstate_get(gpu);
+	if (!state)
+		return 0;
+
+	seq_printf(m, "%s Crash Status:\n", gpu->name);
+	seq_puts(m, "Kernel: " UTS_RELEASE "\n");
+	seq_printf(m, "Time: %ld s %ld us\n",
+		state->time.tv_sec, state->time.tv_usec);
+
+	gpu->funcs->show(gpu, state, m);
+
+	msm_gpu_crashstate_put(gpu);
+
+	return 0;
+}
+
+static ssize_t msm_gpu_crash_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *pos)
+{
+	struct msm_gpu *gpu = ((struct seq_file *)file->private_data)->private;
+
+	dev_err(gpu->dev->dev, "Releasing the GPU crash state\n");
+	msm_gpu_crashstate_put(gpu);
+
+	return count;
+}
+
+static int msm_gpu_crash_open(struct inode *inode, struct file *file)
+{
+	struct msm_drm_private *priv = inode->i_private;
+
+	if (!priv->gpu)
+		return -ENODEV;
+
+	return single_open(file, msm_gpu_crash_show, priv->gpu);
+}
+
+static const struct file_operations msm_gpu_crash_fops = {
+	.owner = THIS_MODULE,
+	.open = msm_gpu_crash_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.write = msm_gpu_crash_write,
+};
+
 static int msm_gpu_show(struct drm_device *dev, struct seq_file *m)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -170,6 +224,9 @@ int msm_debugfs_init(struct drm_minor *minor)
 		return ret;
 	}
 
+	debugfs_create_file("crash", 0644, minor->debugfs_root,
+		priv, &msm_gpu_crash_fops);
+
 	if (priv->kms->funcs->debugfs_init)
 		ret = priv->kms->funcs->debugfs_init(priv->kms, minor);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index cff52ca..7ce2cba 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -69,7 +69,7 @@ struct msm_gpu_funcs {
 			struct seq_file *m);
 #endif
 	struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
-	void (*gpu_state_put)(struct msm_gpu_state *state);
+	int (*gpu_state_put)(struct msm_gpu_state *state);
 };
 
 struct msm_gpu {
@@ -129,6 +129,8 @@ struct msm_gpu {
 	struct work_struct recover_work;
 
 	struct drm_gem_object *memptrs_bo;
+
+	struct msm_gpu_state *crashstate;
 };
 
 /* It turns out that all targets use the same ringbuffer size */
@@ -176,6 +178,7 @@ struct msm_gpu_submitqueue {
 };
 
 struct msm_gpu_state {
+	struct kref ref;
 	struct timeval time;
 
 	struct {
@@ -270,4 +273,43 @@ static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
 		kref_put(&queue->ref, msm_submitqueue_destroy);
 }
 
+static inline void msm_gpu_crashstate_set(struct msm_gpu *gpu,
+		struct msm_gpu_state *state)
+{
+	/* FIXME: make sure the mutex is set? */
+
+	if (!IS_ERR_OR_NULL(state) && !gpu->crashstate) {
+		kref_get(&state->ref);
+		gpu->crashstate = state;
+	}
+}
+
+static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
+{
+	struct msm_gpu_state *state = NULL;
+
+	mutex_lock(&gpu->dev->struct_mutex);
+
+	if (gpu->crashstate) {
+		kref_get(&gpu->crashstate->ref);
+		state = gpu->crashstate;
+	}
+
+	mutex_unlock(&gpu->dev->struct_mutex);
+
+	return state;
+}
+
+static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
+{
+	mutex_lock(&gpu->dev->struct_mutex);
+
+	if (gpu->crashstate) {
+		if (gpu->funcs->gpu_state_put(gpu->crashstate))
+			gpu->crashstate = NULL;
+	}
+
+	mutex_unlock(&gpu->dev->struct_mutex);
+}
+
 #endif /* __MSM_GPU_H__ */
-- 
1.9.1

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

  parent reply	other threads:[~2018-01-05 18:00 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-05 18:00 [RFC 0/4] drm/msm: GPU crash state Jordan Crouse
2018-01-05 18:00 ` [PATCH 1/4] drm/msm: gpu: Use drm_printer to consolidate the show/dump code Jordan Crouse
2018-01-05 18:00 ` [PATCH 2/4] drm/msm: gpu: Capture the state of the GPU Jordan Crouse
     [not found] ` <1515175221-5601-1-git-send-email-jcrouse-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2018-01-05 18:00   ` [PATCH 3/4] drm/msm: gpu: Convert the GPU show functions to use the GPU state Jordan Crouse
2018-01-05 18:00   ` Jordan Crouse [this message]
2018-01-05 18:32   ` [RFC 0/4] drm/msm: GPU crash state Chris Wilson
     [not found]     ` <151517714292.6838.13295427873358659730-M6iVdVfohj6unts5RBS2dVaTQe2KTcn/@public.gmane.org>
2018-01-05 22:11       ` Jordan Crouse
     [not found]         ` <20180105221119.GA10299-9PYrDHPZ2Orvke4nUoYGnHL1okKdlPRT@public.gmane.org>
2018-01-05 22:51           ` Rob Clark

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1515175221-5601-5-git-send-email-jcrouse@codeaurora.org \
    --to=jcrouse-sgv2jx0feol9jmxxk+q4oq@public.gmane.org \
    --cc=dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=linux-arm-msm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.