All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/1] drm/amdgpu: add initial support for pci error handler
@ 2020-08-11 13:30 Nirmoy Das
  2020-08-12 14:52 ` Andrey Grodzovsky
  0 siblings, 1 reply; 13+ messages in thread
From: Nirmoy Das @ 2020-08-11 13:30 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Nirmoy Das, christian.koenig

This patch will ignore non-fatal errors and try to
stop amdgpu's sw stack on fatal errors.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 56 ++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c1219af2e7d6..2b9ede3000ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_atomic_helper.h>
 #include <linux/mmu_notifier.h>
 
 #include "amdgpu.h"
@@ -1516,6 +1517,58 @@ static struct drm_driver kms_driver = {
 	.patchlevel = KMS_DRIVER_PATCHLEVEL,
 };
 
+static pci_ers_result_t amdgpu_pci_err_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = dev->dev_private;
+	int i;
+	int ret = PCI_ERS_RESULT_DISCONNECT;
+
+	switch (state) {
+	case pci_channel_io_normal:
+		ret = PCI_ERS_RESULT_CAN_RECOVER;
+		break;
+	default:
+		/* Disable power management */
+		adev->runpm = 0;
+		/* Suspend all IO operations */
+		amdgpu_fbdev_set_suspend(adev, 1);
+		cancel_delayed_work_sync(&adev->delayed_init_work);
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
+		}
+
+		if (adev->mode_info.mode_config_initialized) {
+			if (!amdgpu_device_has_dc_support(adev))
+				drm_helper_force_disable_all(adev->ddev);
+			else
+				drm_atomic_helper_shutdown(adev->ddev);
+		}
+
+		amdgpu_fence_driver_fini(adev);
+		amdgpu_fbdev_fini(adev);
+		/* Try to close drm device to stop applications
+		 * from opening dri files for further IO operations.
+		 * TODO: This will throw warning as ttm is not
+		 * cleaned perperly */
+		drm_dev_fini(dev);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct pci_error_handlers amdgpu_err_handler = {
+       .error_detected = amdgpu_pci_err_detected,
+};
+
+
 static struct pci_driver amdgpu_kms_pci_driver = {
 	.name = DRIVER_NAME,
 	.id_table = pciidlist,
@@ -1523,10 +1576,9 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 	.remove = amdgpu_pci_remove,
 	.shutdown = amdgpu_pci_shutdown,
 	.driver.pm = &amdgpu_pm_ops,
+	.err_handler = &amdgpu_err_handler,
 };
 
-
-
 static int __init amdgpu_init(void)
 {
 	int r;
-- 
2.27.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-08-14 20:20 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-11 13:30 [RFC PATCH 1/1] drm/amdgpu: add initial support for pci error handler Nirmoy Das
2020-08-12 14:52 ` Andrey Grodzovsky
2020-08-13 11:09   ` Nirmoy
2020-08-13 13:36     ` Alex Deucher
2020-08-13 13:38     ` Andrey Grodzovsky
2020-08-13 15:06       ` Nirmoy
2020-08-13 18:18         ` Andrey Grodzovsky
2020-08-13 21:17           ` Luben Tuikov
2020-08-13 21:30             ` Luben Tuikov
2020-08-14 15:23             ` Nirmoy
2020-08-14 19:52               ` Luben Tuikov
2020-08-14 20:10                 ` Alex Deucher
2020-08-14 20:20                   ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.