linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: "Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	"Christian König" <christian.koenig@amd.com>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"Nirmoy Das" <nirmoy.das@amd.com>
Subject: [PATCH 5.14 12/16] drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8"
Date: Thu,  4 Nov 2021 15:12:43 +0100	[thread overview]
Message-ID: <20211104141200.252486368@linuxfoundation.org> (raw)
In-Reply-To: <20211104141159.863820939@linuxfoundation.org>

From: Christian König <christian.koenig@amd.com>

commit c8365dbda056578eebe164bf110816b1a39b4b7f upstream.

This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4.

Further discussion reveals that this feature is severely broken
and needs to be reverted ASAP.

GPU reset can never be delayed by userspace even for debugging or
otherwise we can run into in kernel deadlocks.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Nirmoy Das <nirmoy.das@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |    2 
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   80 ----------------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |    5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |    4 -
 4 files changed, 91 deletions(-)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1073,8 +1073,6 @@ struct amdgpu_device {
 	char				product_name[32];
 	char				serial[20];
 
-	struct amdgpu_autodump		autodump;
-
 	atomic_t			throttling_logging_enabled;
 	struct ratelimit_state		throttling_logging_rs;
 	uint32_t                        ras_hw_enabled;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -27,7 +27,6 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
-#include <linux/poll.h>
 
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
@@ -37,86 +36,8 @@
 #include "amdgpu_securedisplay.h"
 #include "amdgpu_fw_attestation.h"
 
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
-{
-#if defined(CONFIG_DEBUG_FS)
-	unsigned long timeout = 600 * HZ;
-	int ret;
-
-	wake_up_interruptible(&adev->autodump.gpu_hang);
-
-	ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
-	if (ret == 0) {
-		pr_err("autodump: timeout, move on to gpu recovery\n");
-		return -ETIMEDOUT;
-	}
-#endif
-	return 0;
-}
-
 #if defined(CONFIG_DEBUG_FS)
 
-static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
-{
-	struct amdgpu_device *adev = inode->i_private;
-	int ret;
-
-	file->private_data = adev;
-
-	ret = down_read_killable(&adev->reset_sem);
-	if (ret)
-		return ret;
-
-	if (adev->autodump.dumping.done) {
-		reinit_completion(&adev->autodump.dumping);
-		ret = 0;
-	} else {
-		ret = -EBUSY;
-	}
-
-	up_read(&adev->reset_sem);
-
-	return ret;
-}
-
-static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
-{
-	struct amdgpu_device *adev = file->private_data;
-
-	complete_all(&adev->autodump.dumping);
-	return 0;
-}
-
-static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
-{
-	struct amdgpu_device *adev = file->private_data;
-
-	poll_wait(file, &adev->autodump.gpu_hang, poll_table);
-
-	if (amdgpu_in_reset(adev))
-		return POLLIN | POLLRDNORM | POLLWRNORM;
-
-	return 0;
-}
-
-static const struct file_operations autodump_debug_fops = {
-	.owner = THIS_MODULE,
-	.open = amdgpu_debugfs_autodump_open,
-	.poll = amdgpu_debugfs_autodump_poll,
-	.release = amdgpu_debugfs_autodump_release,
-};
-
-static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
-{
-	init_completion(&adev->autodump.dumping);
-	complete_all(&adev->autodump.dumping);
-	init_waitqueue_head(&adev->autodump.gpu_hang);
-
-	debugfs_create_file("amdgpu_autodump", 0600,
-		adev_to_drm(adev)->primary->debugfs_root,
-		adev, &autodump_debug_fops);
-}
-
 /**
  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
  *
@@ -1588,7 +1509,6 @@ int amdgpu_debugfs_init(struct amdgpu_de
 	}
 
 	amdgpu_ras_debugfs_create_all(adev);
-	amdgpu_debugfs_autodump_init(adev);
 	amdgpu_rap_debugfs_init(adev);
 	amdgpu_securedisplay_debugfs_init(adev);
 	amdgpu_fw_attestation_debugfs_init(adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -26,10 +26,6 @@
 /*
  * Debugfs
  */
-struct amdgpu_autodump {
-	struct completion		dumping;
-	struct wait_queue_head		gpu_hang;
-};
 
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 int amdgpu_debugfs_init(struct amdgpu_device *adev);
@@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_d
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4407,10 +4407,6 @@ int amdgpu_device_pre_asic_reset(struct
 	if (reset_context->reset_req_dev == adev)
 		job = reset_context->job;
 
-	/* no need to dump if device is not in good state during probe period */
-	if (!adev->gmc.xgmi.pending_reset)
-		amdgpu_debugfs_wait_dump(adev);
-
 	if (amdgpu_sriov_vf(adev)) {
 		/* stop the data exchange thread */
 		amdgpu_virt_fini_data_exchange(adev);



  parent reply	other threads:[~2021-11-04 14:15 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-04 14:12 [PATCH 5.14 00/16] 5.14.17-rc1 review Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 01/16] scsi: core: Put LLD module refcnt after SCSI device is released Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 02/16] sfc: Fix reading non-legacy supported link modes Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 03/16] vrf: Revert "Reset skb conntrack connection..." Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 04/16] media: firewire: firedtv-avc: fix a buffer overflow in avc_ca_pmt() Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 05/16] Revert "xhci: Set HCD flag to defer primary roothub registration" Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 06/16] Revert "usb: core: hcd: Add support for deferring " Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 07/16] drm/amdkfd: fix boot failure when iommu is disabled in Picasso Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 08/16] drm/i915: Remove memory frequency calculation Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 09/16] Revert "soc: imx: gpcv2: move reset assert after requesting domain power up" Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 10/16] ARM: 9120/1: Revert "amba: make use of -1 IRQs warn" Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 11/16] Revert "wcn36xx: Disable bmps when encryption is disabled" Greg Kroah-Hartman
2021-11-04 14:12 ` Greg Kroah-Hartman [this message]
2021-11-04 14:12 ` [PATCH 5.14 13/16] drm/amd/display: Revert "Directly retrain link from debugfs" Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 14/16] Revert "drm/i915/gt: Propagate change in error status to children on unhold" Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 15/16] ALSA: usb-audio: Add Schiit Hel device to mixer map quirk table Greg Kroah-Hartman
2021-11-04 14:12 ` [PATCH 5.14 16/16] ALSA: usb-audio: Add Audient iD14 " Greg Kroah-Hartman
2021-11-04 19:54 ` [PATCH 5.14 00/16] 5.14.17-rc1 review Shuah Khan
2021-11-04 22:01 ` Florian Fainelli
2021-11-05  2:28 ` Ken Moffat
2021-11-05  3:10 ` Daniel Díaz
2021-11-05 15:12 ` Guenter Roeck
2021-11-06 13:13 ` Fox Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211104141200.252486368@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=alexander.deucher@amd.com \
    --cc=christian.koenig@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nirmoy.das@amd.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).