linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: "Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	"Christian König" <christian.koenig@amd.com>,
	"Defang Bo" <bodefang@126.com>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"Sasha Levin" <sashal@kernel.org>
Subject: [PATCH 5.4 43/72] drm/amdgpu: Add check to prevent IH overflow
Date: Fri,  5 Mar 2021 13:21:45 +0100	[thread overview]
Message-ID: <20210305120859.451394280@linuxfoundation.org> (raw)
In-Reply-To: <20210305120857.341630346@linuxfoundation.org>

From: Defang Bo <bodefang@126.com>

[ Upstream commit e4180c4253f3f2da09047f5139959227f5cf1173 ]

Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2").
When an ring buffer overflow happens the appropriate bit is set in the WPTR
register which is also written back to memory. But clearing the bit in the
WPTR doesn't trigger another memory writeback.

So what can happen is that we end up processing the buffer overflow over and
over again because the bit is never cleared. Resulting in a random system
lockup because of an infinite loop in an interrupt handler.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Defang Bo <bodefang@126.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/gpu/drm/amd/amdgpu/cz_ih.c      | 37 ++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 36 +++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/tonga_ih.c   | 37 ++++++++++++++++---------
 3 files changed, 71 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 1dca0cabc326..13520d173296 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -193,19 +193,30 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
 
 	wptr = le32_to_cpu(*ih->wptr_cpu);
 
-	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
-		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
-		/* When a ring buffer overflow happen start parsing interrupt
-		 * from the last not overwritten vector (wptr + 16). Hopefully
-		 * this should allow us to catchup.
-		 */
-		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-			wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
-		ih->rptr = (wptr + 16) & ih->ptr_mask;
-		tmp = RREG32(mmIH_RB_CNTL);
-		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
-		WREG32(mmIH_RB_CNTL, tmp);
-	}
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	/* Double check that the overflow wasn't already cleared. */
+	wptr = RREG32(mmIH_RB_WPTR);
+
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+	/* When a ring buffer overflow happen start parsing interrupt
+	 * from the last not overwritten vector (wptr + 16). Hopefully
+	 * this should allow us to catchup.
+	 */
+	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+	ih->rptr = (wptr + 16) & ih->ptr_mask;
+	tmp = RREG32(mmIH_RB_CNTL);
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+	WREG32(mmIH_RB_CNTL, tmp);
+
+
+out:
 	return (wptr & ih->ptr_mask);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index a13dd9a51149..7d165f024f07 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -193,19 +193,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
 
 	wptr = le32_to_cpu(*ih->wptr_cpu);
 
-	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
-		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
-		/* When a ring buffer overflow happen start parsing interrupt
-		 * from the last not overwritten vector (wptr + 16). Hopefully
-		 * this should allow us to catchup.
-		 */
-		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
-		ih->rptr = (wptr + 16) & ih->ptr_mask;
-		tmp = RREG32(mmIH_RB_CNTL);
-		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
-		WREG32(mmIH_RB_CNTL, tmp);
-	}
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	/* Double check that the overflow wasn't already cleared. */
+	wptr = RREG32(mmIH_RB_WPTR);
+
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+	/* When a ring buffer overflow happen start parsing interrupt
+	 * from the last not overwritten vector (wptr + 16). Hopefully
+	 * this should allow us to catchup.
+	 */
+	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+	ih->rptr = (wptr + 16) & ih->ptr_mask;
+	tmp = RREG32(mmIH_RB_CNTL);
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+	WREG32(mmIH_RB_CNTL, tmp);
+
+
+out:
 	return (wptr & ih->ptr_mask);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index e40140bf6699..db0a3bda13fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -195,19 +195,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
 
 	wptr = le32_to_cpu(*ih->wptr_cpu);
 
-	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
-		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
-		/* When a ring buffer overflow happen start parsing interrupt
-		 * from the last not overwritten vector (wptr + 16). Hopefully
-		 * this should allow us to catchup.
-		 */
-		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
-		ih->rptr = (wptr + 16) & ih->ptr_mask;
-		tmp = RREG32(mmIH_RB_CNTL);
-		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
-		WREG32(mmIH_RB_CNTL, tmp);
-	}
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	/* Double check that the overflow wasn't already cleared. */
+	wptr = RREG32(mmIH_RB_WPTR);
+
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+	/* When a ring buffer overflow happen start parsing interrupt
+	 * from the last not overwritten vector (wptr + 16). Hopefully
+	 * this should allow us to catchup.
+	 */
+
+	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+	ih->rptr = (wptr + 16) & ih->ptr_mask;
+	tmp = RREG32(mmIH_RB_CNTL);
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+	WREG32(mmIH_RB_CNTL, tmp);
+
+out:
 	return (wptr & ih->ptr_mask);
 }
 
-- 
2.30.1




  parent reply	other threads:[~2021-03-05 12:35 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05 12:21 [PATCH 5.4 00/72] 5.4.103-rc1 review Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 01/72] net: usb: qmi_wwan: support ZTE P685M modem Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 02/72] Input: elantech - fix protocol errors for some trackpoints in SMBus mode Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 03/72] nvme-pci: refactor nvme_unmap_data Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 04/72] nvme-pci: fix error unwind in nvme_map_data Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 05/72] arm64 module: set plt* section addresses to 0x0 Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 06/72] MIPS: VDSO: Use CLANG_FLAGS instead of filtering out --target= Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 07/72] JFS: more checks for invalid superblock Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 08/72] udlfb: Fix memory leak in dlfb_usb_probe Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 09/72] media: mceusb: sanity check for prescaler value Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 10/72] erofs: fix shift-out-of-bounds of blkszbits Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 11/72] media: v4l2-ctrls.c: fix shift-out-of-bounds in std_validate Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 12/72] xfs: Fix assert failure in xfs_setattr_size() Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 13/72] net/af_iucv: remove WARN_ONCE on malformed RX packets Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 14/72] smackfs: restrict bytes count in smackfs write functions Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 15/72] net: fix up truesize of cloned skb in skb_prepare_for_shift() Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 16/72] nbd: handle device refs for DESTROY_ON_DISCONNECT properly Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 17/72] mm/hugetlb.c: fix unnecessary address expansion of pmd sharing Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 18/72] net: bridge: use switchdev for port flags set through sysfs too Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 19/72] net: ag71xx: remove unnecessary MTU reservation Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 20/72] net: fix dev_ifsioc_locked() race condition Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 21/72] dt-bindings: ethernet-controller: fix fixed-link specification Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 22/72] dt-bindings: net: btusb: DT fix s/interrupt-name/interrupt-names/ Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 23/72] MIPS: Drop 32-bit asm string functions Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 24/72] drm/virtio: use kvmalloc for large allocations Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 25/72] rsi: Fix TX EAPOL packet handling against iwlwifi AP Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 26/72] rsi: Move card interrupt handling to RX thread Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 27/72] rcu/nocb: Trigger self-IPI on late deferred wake up before user resume Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 28/72] staging: fwserial: Fix error handling in fwserial_create Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 29/72] x86/reboot: Add Zotac ZBOX CI327 nano PCI reboot quirk Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 30/72] vt/consolemap: do font sum unsigned Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 31/72] wlcore: Fix command execute failure 19 for wl12xx Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 32/72] Bluetooth: hci_h5: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for btrtl Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 33/72] pktgen: fix misuse of BUG_ON() in pktgen_thread_worker() Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 34/72] ath10k: fix wmi mgmt tx queue full due to race condition Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 35/72] x86/build: Treat R_386_PLT32 relocation as R_386_PC32 Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 36/72] Bluetooth: Fix null pointer dereference in amp_read_loc_assoc_final_data Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 37/72] staging: most: sound: add sanity check for function argument Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 38/72] staging: bcm2835-audio: Replace unsafe strcpy() with strscpy() Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 39/72] brcmfmac: Add DMI nvram filename quirk for Predia Basic tablet Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 40/72] brcmfmac: Add DMI nvram filename quirk for Voyo winpad A15 tablet Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 41/72] drm/hisilicon: Fix use-after-free Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 42/72] crypto: tcrypt - avoid signed overflow in byte count Greg Kroah-Hartman
2021-03-05 12:21 ` Greg Kroah-Hartman [this message]
2021-03-05 12:21 ` [PATCH 5.4 44/72] PCI: Add a REBAR size quirk for Sapphire RX 5600 XT Pulse Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 45/72] drm/amd/display: Guard against NULL pointer deref when get_i2c_info fails Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 46/72] media: uvcvideo: Allow entities with no pads Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 47/72] f2fs: handle unallocated section and zone on pinned/atgc Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 48/72] f2fs: fix to set/clear I_LINKABLE under i_lock Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 49/72] nvme-core: add cancel tagset helpers Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 50/72] nvme-rdma: add clean action for failed reconnection Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 51/72] nvme-tcp: " Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 52/72] ASoC: Intel: Add DMI quirk table to soc_intel_is_byt_cr() Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 53/72] btrfs: fix error handling in commit_fs_roots Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 54/72] perf/x86/kvm: Add Cascade Lake Xeon steppings to isolation_ucodes[] Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 55/72] parisc: Bump 64-bit IRQ stack size to 64 KB Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 56/72] sched/features: Fix hrtick reprogramming Greg Kroah-Hartman
2021-03-05 12:21 ` [PATCH 5.4 57/72] ASoC: Intel: bytcr_rt5640: Add quirk for the Estar Beauty HD MID 7316R tablet Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 58/72] ASoC: Intel: bytcr_rt5640: Add quirk for the Voyo Winpad A15 tablet Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 59/72] ASoC: Intel: bytcr_rt5651: Add quirk for the Jumper EZpad 7 tablet Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 60/72] ASoC: Intel: bytcr_rt5640: Add quirk for the Acer One S1002 tablet Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 61/72] scsi: iscsi: Restrict sessions and handles to admin capabilities Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 62/72] sysfs: Add sysfs_emit and sysfs_emit_at to format sysfs output Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 63/72] scsi: iscsi: Ensure sysfs attributes are limited to PAGE_SIZE Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 64/72] scsi: iscsi: Verify lengths on passthrough PDUs Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 65/72] Xen/gnttab: handle p2m update errors on a per-slot basis Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 66/72] xen-netback: respect gnttab_map_refs()s return value Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 67/72] zsmalloc: account the number of compacted pages correctly Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 68/72] swap: fix swapfile read/write offset Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 69/72] media: v4l: ioctl: Fix memory leak in video_usercopy Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 70/72] ALSA: hda/realtek: Add quirk for Clevo NH55RZQ Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 71/72] ALSA: hda/realtek: Add quirk for Intel NUC 10 Greg Kroah-Hartman
2021-03-05 12:22 ` [PATCH 5.4 72/72] ALSA: hda/realtek: Apply dual codec quirks for MSI Godlike X570 board Greg Kroah-Hartman
2021-03-06  0:57 ` [PATCH 5.4 00/72] 5.4.103-rc1 review Samuel Zou
2021-03-06  9:55   ` Greg Kroah-Hartman
2021-03-06  3:23 ` Guenter Roeck
2021-03-06  9:55   ` Greg Kroah-Hartman
2021-03-06  5:25 ` Florian Fainelli
2021-03-06  9:55 ` Naresh Kamboju
2021-03-06 16:33 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210305120859.451394280@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=alexander.deucher@amd.com \
    --cc=bodefang@126.com \
    --cc=christian.koenig@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).