All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Chen, Guchun" <Guchun.Chen-5C7GfCeVMHo@public.gmane.org>
To: "amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org"
	<amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>,
	"Zhang, Hawking" <Hawking.Zhang-5C7GfCeVMHo@public.gmane.org>,
	"Li, Dennis" <Dennis.Li-5C7GfCeVMHo@public.gmane.org>,
	"Grodzovsky,
	Andrey" <Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>,
	"Zhou1, Tao" <Tao.Zhou1-5C7GfCeVMHo@public.gmane.org>
Cc: "Li, Candice" <Candice.Li-5C7GfCeVMHo@public.gmane.org>,
	"Chen, Guchun" <Guchun.Chen-5C7GfCeVMHo@public.gmane.org>
Subject: [PATCH] drm/amdgpu: refine reboot debugfs operation in ras case
Date: Mon, 21 Oct 2019 09:08:06 +0000	[thread overview]
Message-ID: <20191021090735.19696-1-guchun.chen@amd.com> (raw)

Reboot operation for ras recovery is one common debugfs
entry, which should get rid of ras_ctrl node and remove
ip dependence when inputting by user. So add one new
auto_reboot node in ras debugfs dir to achieve this.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 55 ++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6220394521e4..3adcd29feb5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -153,8 +153,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 		op = 1;
 	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
 		op = 2;
-	else if (sscanf(str, "reboot %32s", block_name) == 1)
-		op = 3;
 	else if (str[0] && str[1] && str[2] && str[3])
 		/* ascii string, but commands are not matched. */
 		return -EINVAL;
@@ -223,7 +221,6 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
  * - 0: disable RAS on the block. Take ::head as its data.
  * - 1: enable RAS on the block. Take ::head as its data.
  * - 2: inject errors on the block. Take ::inject as its data.
- * - 3: reboot on unrecoverable error
  *
  * How to use the interface?
  * programs:
@@ -305,9 +302,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 		/* data.inject.address is offset instead of absolute gpu address */
 		ret = amdgpu_ras_error_inject(adev, &data.inject);
 		break;
-	case 3:
-		amdgpu_ras_get_context(adev)->reboot = true;
-		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -346,6 +340,46 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user
 	return ret == 1 ? size : -EIO;
 }
 
+/**
+ * DOC: AMDGPU RAS debugfs auto reboot interface
+ *
+ * After one uncorrectable error happens, GPU recovery will be scheduled.
+ * Due to the known problem in GPU recovery failing to bring GPU back, this
+ * interface provides one direct way to user to reboot system automatically
+ * in such case within ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery
+ * routine will never be called.
+ *
+ * Enable auto_reboot:
+ *
+ *	echo 1 > /sys/kernel/debug/dri/x/ras/auto_reboot
+ *
+ * Revert auto_reboot:
+ *
+ * 	echo 0 > /sys/kernel/debug/dri/x/ras/auto_reboot
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_reboot_write(struct file *f,
+	const char __user *buf, size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev =
+		(struct amdgpu_device *)file_inode(f)->i_private;
+	char tmp[8] = {0};
+	int value = -1;
+
+	if (size != simple_write_to_buffer(tmp, sizeof(tmp), pos, buf, size))
+		return -EINVAL;
+
+	if (kstrtoint(tmp, 10, &value))
+		return -EINVAL;
+
+	if (value == 1)
+		amdgpu_ras_get_context(adev)->reboot = true;
+	else if (value == 0)
+		amdgpu_ras_get_context(adev)->reboot = false;
+
+	return size;
+}
+
 static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
 	.owner = THIS_MODULE,
 	.read = NULL,
@@ -360,6 +394,13 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_ras_debugfs_reboot_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_ras_debugfs_reboot_write,
+	.llseek = default_llseek
+};
+
 /**
  * DOC: AMDGPU RAS sysfs Error Count Interface
  *
@@ -1037,6 +1078,8 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 				adev, &amdgpu_ras_debugfs_ctrl_ops);
 	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
 				adev, &amdgpu_ras_debugfs_eeprom_ops);
+	debugfs_create_file("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
+				adev, &amdgpu_ras_debugfs_reboot_ops);
 }
 
 void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

             reply	other threads:[~2019-10-21  9:08 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-21  9:08 Chen, Guchun [this message]
     [not found] ` <20191021090735.19696-1-guchun.chen-5C7GfCeVMHo@public.gmane.org>
2019-10-21  9:11   ` [PATCH] drm/amdgpu: refine reboot debugfs operation in ras case Christian König
     [not found]     ` <d1558d15-a1dc-e370-1410-ebfcafd01618-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-10-21  9:19       ` Chen, Guchun
2019-10-21 14:00   ` Deucher, Alexander
2019-10-21  9:33 Chen, Guchun
     [not found] ` <20191021093245.28945-1-guchun.chen-5C7GfCeVMHo@public.gmane.org>
2019-10-21 11:13   ` Christian König
     [not found]     ` <058c4a9d-1ae2-5721-4404-794e575b42a8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-10-21 14:04       ` Chen, Guchun
2019-10-21 15:40   ` Deucher, Alexander

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191021090735.19696-1-guchun.chen@amd.com \
    --to=guchun.chen-5c7gfcevmho@public.gmane.org \
    --cc=Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org \
    --cc=Candice.Li-5C7GfCeVMHo@public.gmane.org \
    --cc=Dennis.Li-5C7GfCeVMHo@public.gmane.org \
    --cc=Hawking.Zhang-5C7GfCeVMHo@public.gmane.org \
    --cc=Tao.Zhou1-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.