net: dsa: realtek: silent indirect reg read failures on SMP

* net: dsa: realtek: silent indirect reg read failures on SMP
@ 2022-02-06 20:26 Luiz Angelo Daros de Luca
  2022-02-09 10:32 ` Alvin Šipraga
  0 siblings, 1 reply; 8+ messages in thread
From: Luiz Angelo Daros de Luca @ 2022-02-06 20:26 UTC (permalink / raw)
  To: netdev, Alvin Šipraga, Linus Walleij; +Cc: Arınç ÜNAL

Hello,

Arinç reported an issue with interfaces going down and up in a fixed
interval (a little less than 40s). It happened with both SMI or MDIO
interfaces for two different rtl8365mb supported models.

The indirect read procedure is something like this:

rtl8365mb_phy_read
  rtl8365mb_phy_ocp_read
    rtl8365mb_phy_poll_busy
      regmap_read_poll_timeout(RTL8365MB_INDIRECT_ACCESS_STATUS_REG)
    rtl8365mb_phy_ocp_prepare
      regmap_update_bits(RTL8365MB_GPHY_OCP_MSB_0_REG)
      regmap_write(RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG)
    regmap_write(RTL8365MB_INDIRECT_ACCESS_CTRL_REG)
    rtl8365mb_phy_poll_busy
      regmap_read_poll_timeout(RTL8365MB_INDIRECT_ACCESS_STATUS_REG)
    regmap_read(RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG)

And the write is similar. The regmap read/write does have its own
mutex. realtek-mdio does use a sequence of read/writes to mdio for
each read/write ops but beyond regmap internal locks, I also lock
bus->mdio_lock.

In a non-SMP system or with extra cores disabled, everything works as
expected. However, with an SMP system, there is some kind of
concurrent access to registers and
regmap_read(RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG) will eventually
read 0x0000. It is like the chip didn't have time to update that
register or got lost in the way. It only happens when the system is
idle. If you stress the CPU, the simultaneous register access gets
unsynchronized or serialized.

I can "simulate" a similar issue in a non-SMP system by dumping regmap
from non-used registers like 0x20a0 to 0x20ff (0x20a0 is related to
port 5 state, but I have 0,1,2,3,4 + 6,7). There are other regions
that cause the same behavior but [0x20a0,0x20ff] is the first one I
found. It never failed while reading good memory regions. Maybe it is
just caused by another issue like the chip getting confused while
reading undefined memory regions.

for a in $(seq 1000); do dd
if=/sys/kernel/debug/regmap/mdio-bus:1d/registers bs=11
skip=$((0x20a0)) count=$((0x1)) 2>/dev/null; done

(That range might not be precise as I don't know if dd is enough to
select only a specific range)

While the script is running, I can see both the driver and the debugfs
dump flicking between good data, garbage and zero. When DSA reads
0x0000, it brings the port down and up.

I tried to add a simple mutex over rtl8365mb_phy_ocp_read call but it
still failed to prevent the event in a SMP system (it would not
protect the debugfs dump). Maybe I'm missing something obvious.

I also wonder if all those functions that use a sequence of
regmap_{read,write} might need a mutex to protect from parallel
execution. I didn't find a way to create a "regmap transaction" by
locking it externally as its lock control is private as well as the
non-locked _regmap_read(). I could disable its lock and do it inside
the driver, but it would also disable the debugfs dump and add a bunch
of new code. And I don't even know if it would fix the issue.
Is there a better way to deal with this issue? Maybe force dsa polling
to use a single processor?

This is the failed patch.

diff --git a/drivers/net/dsa/realtek/rtl8365mb.c
b/drivers/net/dsa/realtek/rtl8365mb.c
index 1a0562811c1a..5572271a2514 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -817,6 +817,7 @@ struct rtl8365mb_port {
 * @port_mask: mask of all ports
 * @learn_limit_max: maximum number of L2 addresses the chip can learn
 * @mib_lock: prevent concurrent reads of MIB counters
+ * @indirect_reg_lock: prevent concurrent access of indirect registers
 * @ports: per-port data
 * @jam_table: chip-specific initialization jam table
 * @jam_size: size of the chip's jam table
@@ -832,6 +833,7 @@ struct rtl8365mb {
       u32 port_mask;
       u32 learn_limit_max;
       struct mutex mib_lock;
+       struct mutex indirect_reg_lock;
       struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS];
       const struct rtl8365mb_jam_tbl_entry *jam_table;
       size_t jam_size;
@@ -989,6 +991,7 @@ static int rtl8365mb_phy_ocp_write(struct
realtek_priv *priv, int phy,

static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)
{
+       struct rtl8365mb *mb = priv->chip_data;
       u32 ocp_addr;
       u16 val;
       int ret;
@@ -1001,16 +1004,24 @@ static int rtl8365mb_phy_read(struct
realtek_priv *priv, int phy, int regnum)

       ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;

-       ret = rtl8365mb_phy_ocp_read(priv, phy, ocp_addr, &val);
+       ret = mutex_lock_interruptible(&mb->indirect_reg_lock);
       if (ret) {
               dev_err(priv->dev,
-                       "failed to read PHY%d reg %02x @ %04x, ret %d\n", phy,
+                       "read PHY%d reg %02x @ %04x, ret %d interrupted\n", phy,
                       regnum, ocp_addr, ret);
               return ret;
       }

-       dev_dbg(priv->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n",
-               phy, regnum, ocp_addr, val);
+       ret = rtl8365mb_phy_ocp_read(priv, phy, ocp_addr, &val);
+       if (ret)
+               dev_err(priv->dev,
+                       "failed to read PHY%d reg %02x @ %04x, ret %d\n", phy,
+                       regnum, ocp_addr, ret);
+       else
+               dev_dbg(priv->dev, "read PHY%d register 0x%02x @ %04x,
val <- %04x\n",
+                       phy, regnum, ocp_addr, val);
+
+       mutex_unlock(&mb->indirect_reg_lock);

       return val;
}
@@ -1018,6 +1029,7 @@ static int rtl8365mb_phy_read(struct
realtek_priv *priv, int phy, int regnum)
static int rtl8365mb_phy_write(struct realtek_priv *priv, int phy, int regnum,
                              u16 val)
{
+       struct rtl8365mb *mb = priv->chip_data;
       u32 ocp_addr;
       int ret;

@@ -1029,18 +1041,26 @@ static int rtl8365mb_phy_write(struct
realtek_priv *priv, int phy, int regnum,

       ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;

-       ret = rtl8365mb_phy_ocp_write(priv, phy, ocp_addr, val);
+       ret = mutex_lock_interruptible(&mb->indirect_reg_lock);
       if (ret) {
               dev_err(priv->dev,
-                       "failed to write PHY%d reg %02x @ %04x, ret %d\n", phy,
+                       "write PHY%d reg %02x @ %04x, ret %d
interrupted\n", phy,
                       regnum, ocp_addr, ret);
               return ret;
       }

-       dev_dbg(priv->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n",
-               phy, regnum, ocp_addr, val);
+       ret = rtl8365mb_phy_ocp_write(priv, phy, ocp_addr, val);
+       if (ret)
+               dev_err(priv->dev,
+                       "failed to write PHY%d reg %02x @ %04x, ret %d\n", phy,
+                       regnum, ocp_addr, ret);
+       else
+               dev_dbg(priv->dev, "write PHY%d register 0x%02x @
%04x, val -> %04x\n",
+                       phy, regnum, ocp_addr, val);

-       return 0;
+       mutex_unlock(&mb->indirect_reg_lock);
+
+       return ret;
}

static int rtl8365mb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
@@ -2215,6 +2235,8 @@ static int rtl8365mb_setup(struct dsa_switch *ds)

       mb = priv->chip_data;

+       mutex_init(&mb->indirect_reg_lock);
+
       ret = rtl8365mb_reset_chip(priv);
       if (ret) {
               dev_err(priv->dev, "failed to reset chip: %d\n", ret);
--
2.34.1


---
   Luiz

^ permalink raw reply related	[flat|nested] 8+ messages in thread