linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Serge Semin <fancer.lancer@gmail.com>
To: Michal Simek <michal.simek@amd.com>,
	Alexander Stein <alexander.stein@ew.tq-group.com>,
	Borislav Petkov <bp@alien8.de>, Tony Luck <tony.luck@intel.com>,
	James Morse <james.morse@arm.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Robert Richter <rric@kernel.org>,
	Dinh Nguyen <dinguyen@kernel.org>
Cc: Serge Semin <fancer.lancer@gmail.com>,
	Punnaiah Choudary Kalluri <punnaiah.choudary.kalluri@xilinx.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	linux-arm-kernel@lists.infradead.org, linux-edac@vger.kernel.org,
	linux-kernel@vger.kernel.org, Sherry Sun <sherry.sun@nxp.com>,
	Borislav Petkov <bp@suse.de>
Subject: [PATCH v5 01/20] EDAC/synopsys: Fix ECC status data and IRQ disable race condition
Date: Thu, 22 Feb 2024 21:12:46 +0300	[thread overview]
Message-ID: <20240222181324.28242-2-fancer.lancer@gmail.com> (raw)
In-Reply-To: <20240222181324.28242-1-fancer.lancer@gmail.com>

The race condition around the ECCCLR register access happens in the IRQ
disable method called in the device remove() procedure and in the ECC IRQ
handler:
1. Enable IRQ:
   a. ECCCLR = EN_CE | EN_UE
2. Disable IRQ:
   a. ECCCLR = 0
3. IRQ handler:
   a. ECCCLR = CLR_CE | CLR_CE_CNT | CLR_CE | CLR_CE_CNT
   b. ECCCLR = 0
   c. ECCCLR = EN_CE | EN_UE
So if the IRQ disabling procedure is called concurrently with the IRQ
handler method the IRQ might be actually left enabled due to the
statement 3c.

The root cause of the problem is that ECCCLR register (which since v3.10a
has been called as ECCCTL) has intermixed ECC status data clear flags and
the IRQ enable/disable flags. Thus the IRQ disabling (clear EN flags) and
handling (write 1 to clear ECC status data) procedures must be serialised
around the ECCCTL register modification to prevent the race.

So fix the problem described above by adding the spin-lock around the
ECCCLR modifications and preventing the IRQ-handler from modifying the
IRQs enable flags (there is no point in disabling the IRQ and then
re-enabling it again within a single IRQ handler call, see the statements
3a/3b and 3c above).

Fixes: f7824ded4149 ("EDAC/synopsys: Add support for version 3 of the Synopsys EDAC DDR")
Signed-off-by: Serge Semin <fancer.lancer@gmail.com>

---

Cc: Sherry Sun <sherry.sun@nxp.com>

Changelog v4:
- This is a new patch detached from
  [PATCH v3 01/17] EDAC/synopsys: Fix native uMCTL2 IRQs handling procedure
- Rename lock to reglock (Borislav)
---
 drivers/edac/synopsys_edac.c | 50 ++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 709babce43ba..0168b05e3ca1 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -9,6 +9,7 @@
 #include <linux/edac.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/of.h>
 
@@ -299,6 +300,7 @@ struct synps_ecc_status {
 /**
  * struct synps_edac_priv - DDR memory controller private instance data.
  * @baseaddr:		Base address of the DDR controller.
+ * @reglock:		Concurrent CSRs access lock.
  * @message:		Buffer for framing the event specific info.
  * @stat:		ECC status information.
  * @p_data:		Platform data.
@@ -313,6 +315,7 @@ struct synps_ecc_status {
  */
 struct synps_edac_priv {
 	void __iomem *baseaddr;
+	spinlock_t reglock;
 	char message[SYNPS_EDAC_MSG_SIZE];
 	struct synps_ecc_status stat;
 	const struct synps_platform_data *p_data;
@@ -408,7 +411,8 @@ static int zynq_get_error_info(struct synps_edac_priv *priv)
 static int zynqmp_get_error_info(struct synps_edac_priv *priv)
 {
 	struct synps_ecc_status *p;
-	u32 regval, clearval = 0;
+	u32 regval, clearval;
+	unsigned long flags;
 	void __iomem *base;
 
 	base = priv->baseaddr;
@@ -452,10 +456,14 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
 	p->ueinfo.blknr = (regval & ECC_CEADDR1_BLKNR_MASK);
 	p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
 out:
-	clearval = ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT;
-	clearval |= ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
+	spin_lock_irqsave(&priv->reglock, flags);
+
+	clearval = readl(base + ECC_CLR_OFST) |
+		   ECC_CTRL_CLR_CE_ERR | ECC_CTRL_CLR_CE_ERRCNT |
+		   ECC_CTRL_CLR_UE_ERR | ECC_CTRL_CLR_UE_ERRCNT;
 	writel(clearval, base + ECC_CLR_OFST);
-	writel(0x0, base + ECC_CLR_OFST);
+
+	spin_unlock_irqrestore(&priv->reglock, flags);
 
 	return 0;
 }
@@ -515,24 +523,41 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
 
 static void enable_intr(struct synps_edac_priv *priv)
 {
+	unsigned long flags;
+
 	/* Enable UE/CE Interrupts */
-	if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
-		writel(DDR_UE_MASK | DDR_CE_MASK,
-		       priv->baseaddr + ECC_CLR_OFST);
-	else
+	if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) {
 		writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
 		       priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
 
+		return;
+	}
+
+	spin_lock_irqsave(&priv->reglock, flags);
+
+	writel(DDR_UE_MASK | DDR_CE_MASK,
+	       priv->baseaddr + ECC_CLR_OFST);
+
+	spin_unlock_irqrestore(&priv->reglock, flags);
 }
 
 static void disable_intr(struct synps_edac_priv *priv)
 {
+	unsigned long flags;
+
 	/* Disable UE/CE Interrupts */
-	if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
-		writel(0x0, priv->baseaddr + ECC_CLR_OFST);
-	else
+	if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) {
 		writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
 		       priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+
+		return;
+	}
+
+	spin_lock_irqsave(&priv->reglock, flags);
+
+	writel(0, priv->baseaddr + ECC_CLR_OFST);
+
+	spin_unlock_irqrestore(&priv->reglock, flags);
 }
 
 /**
@@ -576,8 +601,6 @@ static irqreturn_t intr_handler(int irq, void *dev_id)
 	/* v3.0 of the controller does not have this register */
 	if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
 		writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
-	else
-		enable_intr(priv);
 
 	return IRQ_HANDLED;
 }
@@ -1359,6 +1382,7 @@ static int mc_probe(struct platform_device *pdev)
 	priv = mci->pvt_info;
 	priv->baseaddr = baseaddr;
 	priv->p_data = p_data;
+	spin_lock_init(&priv->reglock);
 
 	mc_init(mci, pdev);
 
-- 
2.43.0


  reply	other threads:[~2024-02-22 18:14 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-22 18:12 [PATCH v5 00/20] EDAC/mc/synopsys: Various fixes and cleanups Serge Semin
2024-02-22 18:12 ` Serge Semin [this message]
2024-04-15 18:36   ` [PATCH v5 01/20] EDAC/synopsys: Fix ECC status data and IRQ disable race condition Borislav Petkov
2024-04-16 10:06     ` Serge Semin
2024-04-21 10:07       ` Borislav Petkov
2024-04-25 12:52         ` Serge Semin
2024-05-06 10:20           ` Borislav Petkov
2024-05-06 11:27             ` Serge Semin
2024-05-06 12:12               ` Borislav Petkov
2024-02-22 18:12 ` [PATCH v5 02/20] EDAC/synopsys: Fix generic device type detection procedure Serge Semin
2024-02-22 18:12 ` [PATCH v5 03/20] EDAC/synopsys: Fix mci->scrub_cap field setting Serge Semin
2024-02-22 18:12 ` [PATCH v5 04/20] EDAC/synopsys: Drop erroneous ADDRMAP4.addrmap_col_b10 parse Serge Semin
2024-02-22 18:12 ` [PATCH v5 05/20] EDAC/synopsys: Fix reading errors count before ECC status Serge Semin
2024-02-22 18:12 ` [PATCH v5 06/20] EDAC/synopsys: Fix misleading IRQ self-cleared quirk flag Serge Semin
2024-02-22 18:12 ` [PATCH v5 07/20] EDAC/synopsys: Use platform device devm ioremap method Serge Semin
2024-02-22 18:12 ` [PATCH v5 08/20] EDAC/synopsys: Drop internal CE and UE counters Serge Semin
2024-02-22 18:12 ` [PATCH v5 09/20] EDAC/synopsys: Drop local to_mci() macro definition Serge Semin
2024-02-22 18:12 ` [PATCH v5 10/20] EDAC/synopsys: Drop struct ecc_error_info.blknr field Serge Semin
2024-02-22 18:12 ` [PATCH v5 11/20] EDAC/synopsys: Shorten out struct ecc_error_info.bankgrpnr field name Serge Semin
2024-02-22 18:12 ` [PATCH v5 12/20] EDAC/synopsys: Drop redundant info from the error messages Serge Semin
2024-02-22 18:12 ` [PATCH v5 13/20] EDAC/mc: Init DIMM labels in MC registration method Serge Semin
2024-02-22 18:12 ` [PATCH v5 14/20] EDAC/mc: Add generic unique MC index allocation procedure Serge Semin
2024-02-22 18:13 ` [PATCH v5 15/20] EDAC/mc: Re-use " Serge Semin
2024-02-22 18:13 ` [PATCH v5 16/20] EDAC/synopsys: Detach Zynq A05 DDRC support to separate driver Serge Semin
2024-02-22 18:13 ` [PATCH v5 17/20] EDAC/synopsys: Drop unused platform-specific setup API Serge Semin
2024-02-22 18:13 ` [PATCH v5 18/20] EDAC/synopsys: Unify CSRs macro declarations Serge Semin
2024-02-22 18:13 ` [PATCH v5 19/20] EDAC/synopsys: Unify struct/macro/function prefixes Serge Semin
2024-02-22 18:13 ` [PATCH v5 20/20] EDAC/synopsys: Convert to using BIT/GENMASK/FIELD_x macros Serge Semin
2024-03-06  5:27 ` [PATCH v5 00/20] EDAC/mc/synopsys: Various fixes and cleanups Shubhrajyoti Datta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240222181324.28242-2-fancer.lancer@gmail.com \
    --to=fancer.lancer@gmail.com \
    --cc=alexander.stein@ew.tq-group.com \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=bp@suse.de \
    --cc=dinguyen@kernel.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=james.morse@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=michal.simek@amd.com \
    --cc=punnaiah.choudary.kalluri@xilinx.com \
    --cc=rric@kernel.org \
    --cc=sherry.sun@nxp.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).