linux-tegra.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nicolas Chauvet <kwizart@gmail.com>
To: Thierry Reding <thierry.reding@gmail.com>,
	Jonathan Hunter <jonathanh@nvidia.com>,
	Manikanta Maddireddy <mmaddireddy@nvidia.com>,
	Bjorn Helgaas <helgaas@kernel.org>,
	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: linux-tegra@vger.kernel.org, linux-pci@vger.kernel.org,
	Nicolas Chauvet <kwizart@gmail.com>,
	stable@vger.kernel.org
Subject: [PATCH] pci: tegra: Revert raw_violation_fixup for tegra124
Date: Fri, 17 Jul 2020 23:35:10 +0200	[thread overview]
Message-ID: <20200717213510.171726-1-kwizart@gmail.com> (raw)

As reported in https://bugzilla.kernel.org/206217 , raw_violation_fixup
is causing more harm than good in some common use-cases.

This patch is a partial revert of the 191cd6fb5 commit:
 "PCI: tegra: Add SW fixup for RAW violations"
that was first introduced in 5.3-rc1 kernel.
This fix the following regression since then.

* Description:
When both the NIC and MMC are used one can see the following message:

NETDEV WATCHDOG: enp1s0 (r8169): transmit queue 0 timed out

  and

pcieport 0000:00:02.0: AER: Uncorrected (Non-Fatal) error received: 0000:01:00.0
r8169 0000:01:00.0: AER: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Requester ID)
r8169 0000:01:00.0: AER:   device [10ec:8168] error status/mask=00004000/00400000
r8169 0000:01:00.0: AER:    [14] CmpltTO                (First)
r8169 0000:01:00.0: AER: can't recover (no error_detected callback)
pcieport 0000:00:02.0: AER: device recovery failed

After that, the ethernet NIC isn't functional anymore even after reloading
the r8169 module.
After a reboot, this is reproducible by copying a large file over the
NIC to the MMC.

For some reasons this cannot be reproduced when the same file is copied
to a tmpfs.

* Little background on the fixup, by Manikanta Maddireddy:
  "In the internal testing with dGPU on Tegra124, CmplTO is reported by
dGPU. This happened because FIFO queue in AFI(AXI to PCIe) module
get full by upstream posted writes. Back to back upstream writes
interleaved with infrequent reads, triggers RAW violation and CmpltTO.
This is fixed by reducing the posted write credits and by changing
updateFC timer frequency. These settings are fixed after stress test.

In the current case, RTL NIC is also reporting CmplTO. These settings
seems to be aggravating the issue instead of fixing it."

v1: first non-RFC version
 - Disable raw_violation_fixup and fully remove unused code and macros

Signed-off-by: Nicolas Chauvet <kwizart@gmail.com>
Reviewed-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Cc: <stable@vger.kernel.org> # 5.4.x
---
 drivers/pci/controller/pci-tegra.c | 32 ------------------------------
 1 file changed, 32 deletions(-)

diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index 235b456698fc..b532d5082fb6 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -181,13 +181,6 @@
 
 #define AFI_PEXBIAS_CTRL_0		0x168
 
-#define RP_PRIV_XP_DL		0x00000494
-#define  RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD	(0x1ff << 1)
-
-#define RP_RX_HDR_LIMIT		0x00000e00
-#define  RP_RX_HDR_LIMIT_PW_MASK	(0xff << 8)
-#define  RP_RX_HDR_LIMIT_PW		(0x0e << 8)
-
 #define RP_ECTL_2_R1	0x00000e84
 #define  RP_ECTL_2_R1_RX_CTLE_1C_MASK		0xffff
 
@@ -323,7 +316,6 @@ struct tegra_pcie_soc {
 	bool program_uphy;
 	bool update_clamp_threshold;
 	bool program_deskew_time;
-	bool raw_violation_fixup;
 	bool update_fc_timer;
 	bool has_cache_bars;
 	struct {
@@ -659,23 +651,6 @@ static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
 		writel(value, port->base + RP_VEND_CTL0);
 	}
 
-	/* Fixup for read after write violation. */
-	if (soc->raw_violation_fixup) {
-		value = readl(port->base + RP_RX_HDR_LIMIT);
-		value &= ~RP_RX_HDR_LIMIT_PW_MASK;
-		value |= RP_RX_HDR_LIMIT_PW;
-		writel(value, port->base + RP_RX_HDR_LIMIT);
-
-		value = readl(port->base + RP_PRIV_XP_DL);
-		value |= RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD;
-		writel(value, port->base + RP_PRIV_XP_DL);
-
-		value = readl(port->base + RP_VEND_XP);
-		value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
-		value |= soc->update_fc_threshold;
-		writel(value, port->base + RP_VEND_XP);
-	}
-
 	if (soc->update_fc_timer) {
 		value = readl(port->base + RP_VEND_XP);
 		value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
@@ -2416,7 +2391,6 @@ static const struct tegra_pcie_soc tegra20_pcie = {
 	.program_uphy = true,
 	.update_clamp_threshold = false,
 	.program_deskew_time = false,
-	.raw_violation_fixup = false,
 	.update_fc_timer = false,
 	.has_cache_bars = true,
 	.ectl.enable = false,
@@ -2446,7 +2420,6 @@ static const struct tegra_pcie_soc tegra30_pcie = {
 	.program_uphy = true,
 	.update_clamp_threshold = false,
 	.program_deskew_time = false,
-	.raw_violation_fixup = false,
 	.update_fc_timer = false,
 	.has_cache_bars = false,
 	.ectl.enable = false,
@@ -2459,8 +2432,6 @@ static const struct tegra_pcie_soc tegra124_pcie = {
 	.pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
 	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
 	.pads_refclk_cfg0 = 0x44ac44ac,
-	/* FC threshold is bit[25:18] */
-	.update_fc_threshold = 0x03fc0000,
 	.has_pex_clkreq_en = true,
 	.has_pex_bias_ctrl = true,
 	.has_intr_prsnt_sense = true,
@@ -2470,7 +2441,6 @@ static const struct tegra_pcie_soc tegra124_pcie = {
 	.program_uphy = true,
 	.update_clamp_threshold = true,
 	.program_deskew_time = false,
-	.raw_violation_fixup = true,
 	.update_fc_timer = false,
 	.has_cache_bars = false,
 	.ectl.enable = false,
@@ -2494,7 +2464,6 @@ static const struct tegra_pcie_soc tegra210_pcie = {
 	.program_uphy = true,
 	.update_clamp_threshold = true,
 	.program_deskew_time = true,
-	.raw_violation_fixup = false,
 	.update_fc_timer = true,
 	.has_cache_bars = false,
 	.ectl = {
@@ -2536,7 +2505,6 @@ static const struct tegra_pcie_soc tegra186_pcie = {
 	.program_uphy = false,
 	.update_clamp_threshold = false,
 	.program_deskew_time = false,
-	.raw_violation_fixup = false,
 	.update_fc_timer = false,
 	.has_cache_bars = false,
 	.ectl.enable = false,
-- 
2.25.4

             reply	other threads:[~2020-07-17 21:35 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-17 21:35 Nicolas Chauvet [this message]
2020-07-17 21:53 ` [PATCH] pci: tegra: Revert raw_violation_fixup for tegra124 Bjorn Helgaas
2020-07-20  7:02   ` Nicolas Chauvet
     [not found]     ` <CABr+WTkJ8jZDkM_=-LYxpbrqrsPEb96YBRJvBjR5u+0Ck9R4CQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-07-20 11:30       ` Lorenzo Pieralisi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200717213510.171726-1-kwizart@gmail.com \
    --to=kwizart@gmail.com \
    --cc=helgaas@kernel.org \
    --cc=jonathanh@nvidia.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-tegra@vger.kernel.org \
    --cc=lorenzo.pieralisi@arm.com \
    --cc=mmaddireddy@nvidia.com \
    --cc=stable@vger.kernel.org \
    --cc=thierry.reding@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).