netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] Bug fixes to amd-xgbe driver
@ 2021-02-12 18:00 Shyam Sundar S K
  2021-02-12 18:00 ` [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout Shyam Sundar S K
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Shyam Sundar S K @ 2021-02-12 18:00 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila, Shyam Sundar S K

General fixes on amd-xgbe driver are addressed in this series, mostly
on the mailbox communication failures and improving the link stability
of the amd-xgbe device.

Shyam Sundar S K (4):
  amd-xgbe: Reset the PHY rx data path when mailbox command timeout
  amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning
  amd-xgbe: Reset link when the link never comes back
  amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP

 drivers/net/ethernet/amd/xgbe/xgbe-common.h | 13 +++++++
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |  1 +
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c   |  3 +-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 39 ++++++++++++++++++++-
 4 files changed, 53 insertions(+), 3 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout
  2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
@ 2021-02-12 18:00 ` Shyam Sundar S K
  2021-02-12 18:43   ` Tom Lendacky
  2021-02-12 18:00 ` [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning Shyam Sundar S K
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Shyam Sundar S K @ 2021-02-12 18:00 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila, Shyam Sundar S K, Sudheesh Mavila

Sometimes mailbox commands timeout when the RX data path becomes
unresponsive. This prevents the submission of new mailbox commands to DXIO.
This patch identifies the timeout and resets the RX data path so that the
next message can be submitted properly.

Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h | 13 +++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 25 ++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index b40d4377cc71..318817450fbd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1279,10 +1279,18 @@
 #define MDIO_PMA_10GBR_FECCTRL		0x00ab
 #endif
 
+#ifndef MDIO_PMA_RX_CTRL1
+#define MDIO_PMA_RX_CTRL1		0x8051
+#endif
+
 #ifndef MDIO_PCS_DIG_CTRL
 #define MDIO_PCS_DIG_CTRL		0x8000
 #endif
 
+#ifndef MDIO_PCS_DIGITAL_STAT
+#define MDIO_PCS_DIGITAL_STAT		0x8010
+#endif
+
 #ifndef MDIO_AN_XNP
 #define MDIO_AN_XNP			0x0016
 #endif
@@ -1358,6 +1366,7 @@
 #define XGBE_KR_TRAINING_ENABLE		BIT(1)
 
 #define XGBE_PCS_CL37_BP		BIT(12)
+#define XGBE_PCS_PSEQ_STATE_BIT		0x10
 
 #define XGBE_AN_CL37_INT_CMPLT		BIT(0)
 #define XGBE_AN_CL37_INT_MASK		0x01
@@ -1375,6 +1384,10 @@
 #define XGBE_PMA_CDR_TRACK_EN_OFF	0x00
 #define XGBE_PMA_CDR_TRACK_EN_ON	0x01
 
+#define XGBE_PMA_RX_RST_0_MASK		BIT(4)
+#define XGBE_PMA_RX_RST_0_RESET_ON	0x10
+#define XGBE_PMA_RX_RST_0_RESET_OFF	0x00
+
 /* Bit setting and getting macros
  *  The get macro will extract the current bit field value from within
  *  the variable
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 859ded0c06b0..489f1f86df99 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1953,6 +1953,24 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
 	xgbe_phy_put_comm_ownership(pdata);
 }
 
+static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata)
+{
+	int reg;
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT);
+	if (reg & XGBE_PCS_PSEQ_STATE_BIT) {
+		/* mailbox command timed out, reset Rx block */
+		/* Assert reset bit for 8ns and wait for 40us */
+		XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
+				 XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON);
+		ndelay(20);
+		XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
+				 XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF);
+		usleep_range(40, 50);
+		netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n");
+	}
+}
+
 static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
 					unsigned int cmd, unsigned int sub_cmd)
 {
@@ -1960,9 +1978,11 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
 	unsigned int wait;
 
 	/* Log if a previous command did not complete */
-	if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+	if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) {
 		netif_dbg(pdata, link, pdata->netdev,
 			  "firmware mailbox not ready for command\n");
+			xgbe_phy_rx_reset(pdata);
+	}
 
 	/* Construct the command */
 	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
@@ -1984,6 +2004,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
 
 	netif_dbg(pdata, link, pdata->netdev,
 		  "firmware mailbox command did not complete\n");
+
+	/* Reset on error */
+	xgbe_phy_rx_reset(pdata);
 }
 
 static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning
  2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
  2021-02-12 18:00 ` [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout Shyam Sundar S K
@ 2021-02-12 18:00 ` Shyam Sundar S K
  2021-02-12 18:48   ` Tom Lendacky
  2021-02-12 18:00 ` [PATCH 3/4] amd-xgbe: Reset link when the link never comes back Shyam Sundar S K
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Shyam Sundar S K @ 2021-02-12 18:00 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila, Shyam Sundar S K, Sudheesh Mavila

Current driver calls the netif_carrier_off() during the later point in
time to tear down the link which causes the netdev watchdog to timeout.

Calling netif_carrier_off() immediately after netif_tx_stop_all_queues()
would avoids the warning.

 ------------[ cut here ]------------
 NETDEV WATCHDOG: enp3s0f2 (amd-xgbe): transmit queue 0 timed out
 WARNING: CPU: 3 PID: 0 at net/sched/sch_generic.c:461 dev_watchdog+0x20d/0x220
 Modules linked in: amd_xgbe(E)  amd-xgbe 0000:03:00.2 enp3s0f2: Link is Down
 CPU: 3 PID: 0 Comm: swapper/3 Tainted: G            E
 Hardware name: AMD Bilby-RV2/Bilby-RV2, BIOS RBB1202A 10/18/2019
 RIP: 0010:dev_watchdog+0x20d/0x220
 Code: 00 49 63 4e e0 eb 92 4c 89 e7 c6 05 c6 e2 c1 00 01 e8 e7 ce fc ff 89 d9 48
 RSP: 0018:ffff90cfc28c3e88 EFLAGS: 00010286
 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
 RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff90cfc28d63c0
 RBP: ffff90cfb977845c R08: 0000000000000050 R09: 0000000000196018
 R10: ffff90cfc28c3ef8 R11: 0000000000000000 R12: ffff90cfb9778000
 R13: 0000000000000003 R14: ffff90cfb9778480 R15: 0000000000000010
 FS:  0000000000000000(0000) GS:ffff90cfc28c0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f240ff2d9d0 CR3: 00000001e3e0a000 CR4: 00000000003406e0
 Call Trace:
  <IRQ>
  ? pfifo_fast_reset+0x100/0x100
  call_timer_fn+0x2b/0x130
  run_timer_softirq+0x3e8/0x440
  ? enqueue_hrtimer+0x39/0x90

Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 1 +
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 2709a2db5657..395eb0b52680 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1368,6 +1368,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 		return;
 
 	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(pdata->netdev);
 
 	xgbe_stop_timers(pdata);
 	flush_workqueue(pdata->dev_workqueue);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 93ef5a30cb8d..19ee4db0156d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -1396,7 +1396,6 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
 	pdata->phy_if.phy_impl.stop(pdata);
 
 	pdata->phy.link = 0;
-	netif_carrier_off(pdata->netdev);
 
 	xgbe_phy_adjust_link(pdata);
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] amd-xgbe: Reset link when the link never comes back
  2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
  2021-02-12 18:00 ` [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout Shyam Sundar S K
  2021-02-12 18:00 ` [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning Shyam Sundar S K
@ 2021-02-12 18:00 ` Shyam Sundar S K
  2021-02-12 19:01   ` Tom Lendacky
  2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
  2021-02-12 18:57 ` [PATCH 0/4] Bug fixes to amd-xgbe driver Florian Fainelli
  4 siblings, 1 reply; 12+ messages in thread
From: Shyam Sundar S K @ 2021-02-12 18:00 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila, Shyam Sundar S K, Sudheesh Mavila

Normally, auto negotiation and reconnect should be automatically done by
the hardware. But there seems to be an issue where auto negotiation has
to be restarted manually. This happens because of link training and so
even though still connected to the partner the link never "comes back".
This would need a reset to recover.

Also, a change in xgbe-mdio is needed to get ethtool to recognize the
link down and get the link change message.

Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c   | 2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 19ee4db0156d..4e97b4869522 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -1345,7 +1345,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 							     &an_restart);
 	if (an_restart) {
 		xgbe_phy_config_aneg(pdata);
-		return;
+		goto adjust_link;
 	}
 
 	if (pdata->phy.link) {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 489f1f86df99..1bb468ac9635 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2607,6 +2607,14 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
 	if (reg & MDIO_STAT1_LSTATUS)
 		return 1;
 
+	if (pdata->phy.autoneg == AUTONEG_ENABLE &&
+	    phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) {
+		if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) {
+			netif_carrier_off(pdata->netdev);
+			*an_restart = 1;
+		}
+	}
+
 	/* No link, attempt a receiver reset cycle */
 	if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) {
 		phy_data->rrc_count = 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP
  2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
                   ` (2 preceding siblings ...)
  2021-02-12 18:00 ` [PATCH 3/4] amd-xgbe: Reset link when the link never comes back Shyam Sundar S K
@ 2021-02-12 18:00 ` Shyam Sundar S K
  2021-02-12 18:56   ` Florian Fainelli
                     ` (2 more replies)
  2021-02-12 18:57 ` [PATCH 0/4] Bug fixes to amd-xgbe driver Florian Fainelli
  4 siblings, 3 replies; 12+ messages in thread
From: Shyam Sundar S K @ 2021-02-12 18:00 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila, Shyam Sundar S K, Sudheesh Mavila

Frequent link up/down events can happen when a Bel Fuse SFP part is
connected to the amd-xgbe device. Try to avoid the frequent link
issues by resetting the PHY as documented in Bel Fuse SFP datasheets.

Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 1bb468ac9635..e328fd9bd294 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -922,6 +922,12 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
 	if ((phy_id & 0xfffffff0) != 0x03625d10)
 		return false;
 
+	/* Reset PHY - wait for self-clearing reset bit to clear */
+	reg = phy_read(phy_data->phydev, 0x00);
+	phy_write(phy_data->phydev, 0x00, reg | 0x8000);
+	read_poll_timeout(phy_read, reg, !(reg & 0x8000) || reg < 0,
+			  10000, 50000, true, phy_data->phydev, 0x0);
+
 	/* Disable RGMII mode */
 	phy_write(phy_data->phydev, 0x18, 0x7007);
 	reg = phy_read(phy_data->phydev, 0x18);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout
  2021-02-12 18:00 ` [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout Shyam Sundar S K
@ 2021-02-12 18:43   ` Tom Lendacky
  0 siblings, 0 replies; 12+ messages in thread
From: Tom Lendacky @ 2021-02-12 18:43 UTC (permalink / raw)
  To: Shyam Sundar S K, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila

On 2/12/21 12:00 PM, Shyam Sundar S K wrote:
> Sometimes mailbox commands timeout when the RX data path becomes
> unresponsive. This prevents the submission of new mailbox commands to DXIO.
> This patch identifies the timeout and resets the RX data path so that the
> next message can be submitted properly.
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

I believe you need a Co-developed-by: before Sudheesh's Signed-off-by: 
if he was a co-developer.

> ---
>   drivers/net/ethernet/amd/xgbe/xgbe-common.h | 13 +++++++++++
>   drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 25 ++++++++++++++++++++-
>   2 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
> index b40d4377cc71..318817450fbd 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
> @@ -1279,10 +1279,18 @@
>   #define MDIO_PMA_10GBR_FECCTRL		0x00ab
>   #endif
>   
> +#ifndef MDIO_PMA_RX_CTRL1
> +#define MDIO_PMA_RX_CTRL1		0x8051
> +#endif
> +
>   #ifndef MDIO_PCS_DIG_CTRL
>   #define MDIO_PCS_DIG_CTRL		0x8000
>   #endif
>   
> +#ifndef MDIO_PCS_DIGITAL_STAT
> +#define MDIO_PCS_DIGITAL_STAT		0x8010
> +#endif
> +
>   #ifndef MDIO_AN_XNP
>   #define MDIO_AN_XNP			0x0016
>   #endif
> @@ -1358,6 +1366,7 @@
>   #define XGBE_KR_TRAINING_ENABLE		BIT(1)
>   
>   #define XGBE_PCS_CL37_BP		BIT(12)
> +#define XGBE_PCS_PSEQ_STATE_BIT		0x10

See comment below, this is a 3-bit field.

>   
>   #define XGBE_AN_CL37_INT_CMPLT		BIT(0)
>   #define XGBE_AN_CL37_INT_MASK		0x01
> @@ -1375,6 +1384,10 @@
>   #define XGBE_PMA_CDR_TRACK_EN_OFF	0x00
>   #define XGBE_PMA_CDR_TRACK_EN_ON	0x01
>   
> +#define XGBE_PMA_RX_RST_0_MASK		BIT(4)
> +#define XGBE_PMA_RX_RST_0_RESET_ON	0x10
> +#define XGBE_PMA_RX_RST_0_RESET_OFF	0x00
> +
>   /* Bit setting and getting macros
>    *  The get macro will extract the current bit field value from within
>    *  the variable
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 859ded0c06b0..489f1f86df99 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -1953,6 +1953,24 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
>   	xgbe_phy_put_comm_ownership(pdata);
>   }
>   
> +static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata)
> +{
> +	int reg;
> +
> +	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT);
> +	if (reg & XGBE_PCS_PSEQ_STATE_BIT) {

The PSEQ_STATE field is a 3 bit field and I believe you're looking for a 
POWER_GOOD state, so this should be:

	reg = XMDIO_READ_BITS(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT
			      XGBE_PCS_PSEQ_STATE_MASK);
	if (reg == XGBE_PCS_PSEQ_STATE_POWER_GOOD) {

where the constants define in xgbe-common.h should be:

#define XGBE_PCS_PSEQ_STATE_MASK	0x1c
#define XGBE_PCS_PSEQ_STATE_POWER_GOOD	0x10


> +		/* mailbox command timed out, reset Rx block */
> +		/* Assert reset bit for 8ns and wait for 40us */

Please combine this comment and be sure to capitalize appropriately.

> +		XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
> +				 XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON);
> +		ndelay(20);

This time doesn't match the comment of 8ns... which one is correct?

> +		XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
> +				 XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF);
> +		usleep_range(40, 50);
> +		netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n");
> +	}
> +}
> +
>   static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
>   					unsigned int cmd, unsigned int sub_cmd)
>   {
> @@ -1960,9 +1978,11 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
>   	unsigned int wait;
>   
>   	/* Log if a previous command did not complete */
> -	if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
> +	if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) {
>   		netif_dbg(pdata, link, pdata->netdev,
>   			  "firmware mailbox not ready for command\n");
> +			xgbe_phy_rx_reset(pdata);

An extra tab here.

Thanks,
Tom

> +	}
>   
>   	/* Construct the command */
>   	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
> @@ -1984,6 +2004,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
>   
>   	netif_dbg(pdata, link, pdata->netdev,
>   		  "firmware mailbox command did not complete\n");
> +
> +	/* Reset on error */
> +	xgbe_phy_rx_reset(pdata);
>   }
>   
>   static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning
  2021-02-12 18:00 ` [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning Shyam Sundar S K
@ 2021-02-12 18:48   ` Tom Lendacky
  0 siblings, 0 replies; 12+ messages in thread
From: Tom Lendacky @ 2021-02-12 18:48 UTC (permalink / raw)
  To: Shyam Sundar S K, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila

On 2/12/21 12:00 PM, Shyam Sundar S K wrote:
> Current driver calls the netif_carrier_off() during the later point in
> time to tear down the link which causes the netdev watchdog to timeout.

This is a bit confusing...  how about:

The current driver calls netif_carrier_off() late in the link tear down 
which can result in a netdev watchdog timeout.

> 
> Calling netif_carrier_off() immediately after netif_tx_stop_all_queues()
> would avoids the warning.

s/would//

> 
>   ------------[ cut here ]------------
>   NETDEV WATCHDOG: enp3s0f2 (amd-xgbe): transmit queue 0 timed out
>   WARNING: CPU: 3 PID: 0 at net/sched/sch_generic.c:461 dev_watchdog+0x20d/0x220
>   Modules linked in: amd_xgbe(E)  amd-xgbe 0000:03:00.2 enp3s0f2: Link is Down
>   CPU: 3 PID: 0 Comm: swapper/3 Tainted: G            E
>   Hardware name: AMD Bilby-RV2/Bilby-RV2, BIOS RBB1202A 10/18/2019
>   RIP: 0010:dev_watchdog+0x20d/0x220
>   Code: 00 49 63 4e e0 eb 92 4c 89 e7 c6 05 c6 e2 c1 00 01 e8 e7 ce fc ff 89 d9 48
>   RSP: 0018:ffff90cfc28c3e88 EFLAGS: 00010286
>   RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
>   RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff90cfc28d63c0
>   RBP: ffff90cfb977845c R08: 0000000000000050 R09: 0000000000196018
>   R10: ffff90cfc28c3ef8 R11: 0000000000000000 R12: ffff90cfb9778000
>   R13: 0000000000000003 R14: ffff90cfb9778480 R15: 0000000000000010
>   FS:  0000000000000000(0000) GS:ffff90cfc28c0000(0000) knlGS:0000000000000000
>   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>   CR2: 00007f240ff2d9d0 CR3: 00000001e3e0a000 CR4: 00000000003406e0
>   Call Trace:
>    <IRQ>
>    ? pfifo_fast_reset+0x100/0x100
>    call_timer_fn+0x2b/0x130
>    run_timer_softirq+0x3e8/0x440
>    ? enqueue_hrtimer+0x39/0x90
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

Same comment about Co-developed-by: here as previous patch.

With the above comments addressed,

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>

> ---
>   drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 1 +
>   drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 1 -
>   2 files changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> index 2709a2db5657..395eb0b52680 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> @@ -1368,6 +1368,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
>   		return;
>   
>   	netif_tx_stop_all_queues(netdev);
> +	netif_carrier_off(pdata->netdev);
>   
>   	xgbe_stop_timers(pdata);
>   	flush_workqueue(pdata->dev_workqueue);
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> index 93ef5a30cb8d..19ee4db0156d 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> @@ -1396,7 +1396,6 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
>   	pdata->phy_if.phy_impl.stop(pdata);
>   
>   	pdata->phy.link = 0;
> -	netif_carrier_off(pdata->netdev);
>   
>   	xgbe_phy_adjust_link(pdata);
>   }
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP
  2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
@ 2021-02-12 18:56   ` Florian Fainelli
  2021-02-12 19:05   ` Tom Lendacky
  2021-02-12 21:14   ` Heiner Kallweit
  2 siblings, 0 replies; 12+ messages in thread
From: Florian Fainelli @ 2021-02-12 18:56 UTC (permalink / raw)
  To: Shyam Sundar S K, Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila



On 2/12/2021 10:00 AM, Shyam Sundar S K wrote:
> Frequent link up/down events can happen when a Bel Fuse SFP part is
> connected to the amd-xgbe device. Try to avoid the frequent link
> issues by resetting the PHY as documented in Bel Fuse SFP datasheets.
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> ---
>  drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 1bb468ac9635..e328fd9bd294 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -922,6 +922,12 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
>  	if ((phy_id & 0xfffffff0) != 0x03625d10)
>  		return false;
>  
> +	/* Reset PHY - wait for self-clearing reset bit to clear */
> +	reg = phy_read(phy_data->phydev, 0x00);
> +	phy_write(phy_data->phydev, 0x00, reg | 0x8000);
> +	read_poll_timeout(phy_read, reg, !(reg & 0x8000) || reg < 0,
> +			  10000, 50000, true, phy_data->phydev, 0x0);

Can you use the standard register definitions from include/linux/mii.h
here? You are doing a software reset of the PHY through the BMCR.RESET
register, so you might as well make that clear.

> +
>  	/* Disable RGMII mode */
>  	phy_write(phy_data->phydev, 0x18, 0x7007);
>  	reg = phy_read(phy_data->phydev, 0x18);
> 

-- 
Florian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] Bug fixes to amd-xgbe driver
  2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
                   ` (3 preceding siblings ...)
  2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
@ 2021-02-12 18:57 ` Florian Fainelli
  4 siblings, 0 replies; 12+ messages in thread
From: Florian Fainelli @ 2021-02-12 18:57 UTC (permalink / raw)
  To: Shyam Sundar S K, Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila



On 2/12/2021 10:00 AM, Shyam Sundar S K wrote:
> General fixes on amd-xgbe driver are addressed in this series, mostly
> on the mailbox communication failures and improving the link stability
> of the amd-xgbe device.
> 
> Shyam Sundar S K (4):
>   amd-xgbe: Reset the PHY rx data path when mailbox command timeout
>   amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning
>   amd-xgbe: Reset link when the link never comes back
>   amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP

If these are all bugfixes there would be an expectation that you provide
appropriate Fixes: tag in your commit messages to designate which commit
introduced the problem, you may also want to make it clear in the patch
subject that this is intended for the "net" tree which collects bug
fixes, see:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/networking/netdev-FAQ.rst#n28

> 
>  drivers/net/ethernet/amd/xgbe/xgbe-common.h | 13 +++++++
>  drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |  1 +
>  drivers/net/ethernet/amd/xgbe/xgbe-mdio.c   |  3 +-
>  drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 39 ++++++++++++++++++++-
>  4 files changed, 53 insertions(+), 3 deletions(-)
> 

-- 
Florian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] amd-xgbe: Reset link when the link never comes back
  2021-02-12 18:00 ` [PATCH 3/4] amd-xgbe: Reset link when the link never comes back Shyam Sundar S K
@ 2021-02-12 19:01   ` Tom Lendacky
  0 siblings, 0 replies; 12+ messages in thread
From: Tom Lendacky @ 2021-02-12 19:01 UTC (permalink / raw)
  To: Shyam Sundar S K, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila

On 2/12/21 12:00 PM, Shyam Sundar S K wrote:
> Normally, auto negotiation and reconnect should be automatically done by
> the hardware. But there seems to be an issue where auto negotiation has
> to be restarted manually. This happens because of link training and so
> even though still connected to the partner the link never "comes back".
> This would need a reset to recover.

This last sentence is strange. Are you meaning to say this needs to 
restart auto-negotiation?

Please mention this pertains only to a backplane connection mode.

> 
> Also, a change in xgbe-mdio is needed to get ethtool to recognize the
> link down and get the link change message.
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

Same comment about Co-developed-by: as previous patch.

With those addressed,

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>

> ---
>   drivers/net/ethernet/amd/xgbe/xgbe-mdio.c   | 2 +-
>   drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 8 ++++++++
>   2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> index 19ee4db0156d..4e97b4869522 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
> @@ -1345,7 +1345,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
>   							     &an_restart);
>   	if (an_restart) {
>   		xgbe_phy_config_aneg(pdata);
> -		return;
> +		goto adjust_link;
>   	}
>   
>   	if (pdata->phy.link) {
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 489f1f86df99..1bb468ac9635 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -2607,6 +2607,14 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
>   	if (reg & MDIO_STAT1_LSTATUS)
>   		return 1;
>   
> +	if (pdata->phy.autoneg == AUTONEG_ENABLE &&
> +	    phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) {
> +		if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) {
> +			netif_carrier_off(pdata->netdev);
> +			*an_restart = 1;
> +		}
> +	}
> +
>   	/* No link, attempt a receiver reset cycle */
>   	if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) {
>   		phy_data->rrc_count = 0;
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP
  2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
  2021-02-12 18:56   ` Florian Fainelli
@ 2021-02-12 19:05   ` Tom Lendacky
  2021-02-12 21:14   ` Heiner Kallweit
  2 siblings, 0 replies; 12+ messages in thread
From: Tom Lendacky @ 2021-02-12 19:05 UTC (permalink / raw)
  To: Shyam Sundar S K, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila

On 2/12/21 12:00 PM, Shyam Sundar S K wrote:
> Frequent link up/down events can happen when a Bel Fuse SFP part is
> connected to the amd-xgbe device. Try to avoid the frequent link
> issues by resetting the PHY as documented in Bel Fuse SFP datasheets.
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

Same comment about Co-developed-by: tag as previous patch.

With that addressed,

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>

> ---
>   drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 6 ++++++
>   1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 1bb468ac9635..e328fd9bd294 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -922,6 +922,12 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
>   	if ((phy_id & 0xfffffff0) != 0x03625d10)
>   		return false;
>   
> +	/* Reset PHY - wait for self-clearing reset bit to clear */
> +	reg = phy_read(phy_data->phydev, 0x00);
> +	phy_write(phy_data->phydev, 0x00, reg | 0x8000);
> +	read_poll_timeout(phy_read, reg, !(reg & 0x8000) || reg < 0,
> +			  10000, 50000, true, phy_data->phydev, 0x0);
> +
>   	/* Disable RGMII mode */
>   	phy_write(phy_data->phydev, 0x18, 0x7007);
>   	reg = phy_read(phy_data->phydev, 0x18);
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP
  2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
  2021-02-12 18:56   ` Florian Fainelli
  2021-02-12 19:05   ` Tom Lendacky
@ 2021-02-12 21:14   ` Heiner Kallweit
  2 siblings, 0 replies; 12+ messages in thread
From: Heiner Kallweit @ 2021-02-12 21:14 UTC (permalink / raw)
  To: Shyam Sundar S K, Tom Lendacky, David S . Miller, Jakub Kicinski, netdev
  Cc: Sudheesh.Mavila

On 12.02.2021 19:00, Shyam Sundar S K wrote:
> Frequent link up/down events can happen when a Bel Fuse SFP part is
> connected to the amd-xgbe device. Try to avoid the frequent link
> issues by resetting the PHY as documented in Bel Fuse SFP datasheets.
> 
> Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> ---
>  drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 1bb468ac9635..e328fd9bd294 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -922,6 +922,12 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
>  	if ((phy_id & 0xfffffff0) != 0x03625d10)
>  		return false;
>  
> +	/* Reset PHY - wait for self-clearing reset bit to clear */
> +	reg = phy_read(phy_data->phydev, 0x00);
> +	phy_write(phy_data->phydev, 0x00, reg | 0x8000);
> +	read_poll_timeout(phy_read, reg, !(reg & 0x8000) || reg < 0,
> +			  10000, 50000, true, phy_data->phydev, 0x0);
> +

Why don't you simply use genphy_soft_reset() ?
Also it's not too nice to use magic register and bit numbers,
there are constants available, e.g. 0x00 = MII_BMCR

>  	/* Disable RGMII mode */
>  	phy_write(phy_data->phydev, 0x18, 0x7007);
>  	reg = phy_read(phy_data->phydev, 0x18);
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-02-12 21:15 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-12 18:00 [PATCH 0/4] Bug fixes to amd-xgbe driver Shyam Sundar S K
2021-02-12 18:00 ` [PATCH 1/4] amd-xgbe: Reset the PHY rx data path when mailbox command timeout Shyam Sundar S K
2021-02-12 18:43   ` Tom Lendacky
2021-02-12 18:00 ` [PATCH 2/4] amd-xgbe: Fix NETDEV WATCHDOG transmit queue timeout warning Shyam Sundar S K
2021-02-12 18:48   ` Tom Lendacky
2021-02-12 18:00 ` [PATCH 3/4] amd-xgbe: Reset link when the link never comes back Shyam Sundar S K
2021-02-12 19:01   ` Tom Lendacky
2021-02-12 18:00 ` [PATCH 4/4] amd-xgbe: Fix network fluctuations when using 1G BELFUSE SFP Shyam Sundar S K
2021-02-12 18:56   ` Florian Fainelli
2021-02-12 19:05   ` Tom Lendacky
2021-02-12 21:14   ` Heiner Kallweit
2021-02-12 18:57 ` [PATCH 0/4] Bug fixes to amd-xgbe driver Florian Fainelli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).