flexcan missing error state transitions

* flexcan missing error state transitions
@ 2017-08-18  7:47 ZHU Yi (ST-FIR/ENG1-Zhu)
  2017-08-21 16:18 ` Andri Yngvason
  0 siblings, 1 reply; 28+ messages in thread
From: ZHU Yi (ST-FIR/ENG1-Zhu) @ 2017-08-18  7:47 UTC (permalink / raw)
  To: linux-can, mkl, wg
  Cc: hs, Jonas Mark (ST-FIR/ENG1), RUAN Tingquan (ST-FIR/ENG1-Zhu)

[-- Attachment #1: Type: text/plain, Size: 2890 bytes --]

Hello,

In our i.MX6 based platform, we encountered Rx overflow in flexcan (kernel v4.9-rc2),
where commit b3cf53e9 (can: flexcan: add support for timestamp based rx-offload)
fixed this issue.

However, 
commit 30164759 (can: flexcan: make use of rx-offload's irq_offload_fifo) changed 
the way of can state transition from poll state upon receive + state + error interrupt to
only update state upon state interrupt.

Due to a chip flaw in flexcan core regarding state interrupt generation, there are some
state transitions which do not generate interrupts:
(copied from TomE's post on https://community.nxp.com/thread/333839)

Interrupts     MCF53xx i.MX53 i.MX6  MSCAN
------------------------------------------
Active->Warn   No      No     Yes    Yes
Warn->Active   No      No     No     Yes
Warn->Passive  No      No     No     Yes
Passive->Warn  No      No     No     Yes
Passive->BOFF  Yes     Yes    Yes    Yes
BOFF->Active   No      No     No     Yes
Errors         Yes     Yes    Yes    No

As our application requires to monitor the can bus state change, so this function
will not work properly after we applied all the commits to flexcan from v4.11 to v4.9-rc2.
We also tested v4.11 which contains all the latest patches to flexcan, and it behaves the same.

Sadly, the flexcan core does not generate error state interrupts for all state transitions.
This is required for the current flexcan driver to operate smoothly. We propose the following
workaround to mitigate this problem:
1. update state upon _any_ interrupt
2. update state when user explicitly inquires (via do_get_state callback)

Item 1 will keep the state correct as long as the bus is attached and can frames are send,
so the core can	generates Tx/Rx/State/Error interrupts accordingly.

However, if there is single node on the bus, then this node cannot enter bus off state
(only becomes error passive), and the core only generates a single interrupt (warn + ack error),
and no more. It looks like i.MX6 disables flood of error interrupts in auto-retransmission,
otherwise, the state would be updated. Nonetheless, the old driver (in v4.9-rc2) also cannot report
error passive in this case, even it postponed (a little) to read FLT_CONF in napi_poll.

This means the user space cannot simply rely on receiving a state change notification via a 
SocketCAN error frame. It needs to do active polling to get the correct state. But without proper
error interrupts or CAN traffic this is impossible. But essentially it is still an improvement over
v4.9-rc2 because there you could not get the proper error passive state even if polling was done
from user space. Hence item 2 allows user space to get the correct state manually.

The attached patch implements this for i.MX6 and has been tested on i.MX6.
It applies cleanly to v4.13-rc5.

Best regards,
Yi ZHU


[-- Attachment #2: 0001-can-flexcan-mitigate-incomplete-state-interrupt.patch --]
[-- Type: application/octet-stream, Size: 5938 bytes --]

From 6c0c111f0d8c318a459606cf5871dcc56512fb84 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu5@cn.bosch.com>
Date: Thu, 17 Aug 2017 14:45:19 +0800
Subject: [PATCH] can: flexcan: mitigate incomplete state interrupt

This patch adds a workaround to mitigate missing state transitions
on i.MX6 flexcan core which has incomplete state interrupt by:
1. update state upon _any_ interrupt
2. update state when user explicitly inquires (do_get_state())

Signed-off-by: Zhu Yi <yi.zhu5@cn.bosch.com>
Signed-off-by: Mark Jonas <mark.jonas@de.bosch.com>
Tested-by: Ruan Tingquan <tingquan.ruan@cn.bosch.com>
---
 drivers/net/can/flexcan.c | 91 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 72 insertions(+), 19 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 13f0f21..ac37fe4 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -199,6 +199,20 @@
 #define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disable Memory error detection */
 #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP	BIT(5) /* Use timestamp based offloading */
 
+/* FLEXCAN does not create a complete set of error state IRQs.
+ *
+ * Error State IRQ  i.MX53  i.MX6
+ * ------------------------------
+ * Active->Warn       no     yes
+ * Warn->Active       no      no
+ * Warn->Passive      no      no
+ * Passive->Warn      no      no
+ * Passive->BOFF     yes     yes
+ * BOFF->Active       no      no
+ * Errors            yes     yes
+ */
+#define FLEXCAN_QUIRK_BROKEN_STATE_IRQ	BIT(6) /* Incomplete state IRQ */
+
 /* Structure of the message buffer */
 struct flexcan_mb {
 	u32 can_ctrl;
@@ -288,7 +302,7 @@ static const struct flexcan_devtype_data fsl_imx28_devtype_data;
 
 static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
 	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
-		FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
+		FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_STATE_IRQ,
 };
 
 static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
@@ -478,6 +492,56 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
 	return err;
 }
 
+static enum can_state __flexcan_get_state(const struct net_device *dev,
+                                          u32 reg_esr,
+                                          enum can_state *rx_state,
+                                          enum can_state *tx_state)
+{
+	enum can_state new_state;
+	int flt;
+	struct can_berr_counter bec;
+
+	flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK;
+	if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) {
+		*tx_state = unlikely(reg_esr & FLEXCAN_ESR_TX_WRN) ?
+			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
+		*rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ?
+			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
+		new_state = max(*tx_state, *rx_state);
+	} else {
+		__flexcan_get_berr_counter(dev, &bec);
+		new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ?
+			CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF;
+		*rx_state = bec.rxerr >= bec.txerr ? new_state : 0;
+		*tx_state = bec.rxerr <= bec.txerr ? new_state : 0;
+	}
+
+	return new_state;
+}
+
+static int flexcan_get_state(const struct net_device *dev,
+			     enum can_state *state)
+{
+	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->regs;
+	u32 reg_esr;
+	enum can_state rx_state, tx_state;
+
+	*state = priv->can.state;
+
+	/* only update the states which are supposed to be managed by hardware
+	 * in case the core has incomplete state interrupt
+	 */
+	if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_STATE_IRQ) &&
+	    (priv->can.state != CAN_STATE_STOPPED) &&
+	    (priv->can.state != CAN_STATE_SLEEPING)) {
+		reg_esr = flexcan_read(&regs->esr);
+		*state = __flexcan_get_state(dev, reg_esr, &rx_state, &tx_state);
+	}
+
+	return 0;
+}
+
 static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct flexcan_priv *priv = netdev_priv(dev);
@@ -587,23 +651,8 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
 	struct sk_buff *skb;
 	struct can_frame *cf;
 	enum can_state new_state, rx_state, tx_state;
-	int flt;
-	struct can_berr_counter bec;
 
-	flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK;
-	if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) {
-		tx_state = unlikely(reg_esr & FLEXCAN_ESR_TX_WRN) ?
-			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
-		rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ?
-			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
-		new_state = max(tx_state, rx_state);
-	} else {
-		__flexcan_get_berr_counter(dev, &bec);
-		new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ?
-			CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF;
-		rx_state = bec.rxerr >= bec.txerr ? new_state : 0;
-		tx_state = bec.rxerr <= bec.txerr ? new_state : 0;
-	}
+	new_state = __flexcan_get_state(dev, reg_esr, &rx_state, &tx_state);
 
 	/* state hasn't changed */
 	if (likely(new_state == priv->can.state))
@@ -765,8 +814,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 		flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, &regs->esr);
 	}
 
-	/* state change interrupt */
-	if (reg_esr & FLEXCAN_ESR_ERR_STATE)
+	/* state change interrupt or on incomplete state interrupt core */
+	if ((reg_esr & FLEXCAN_ESR_ERR_STATE) ||
+	    (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_STATE_IRQ))
 		flexcan_irq_state(dev, reg_esr);
 
 	/* bus error IRQ - handle if bus error reporting is activated */
@@ -1265,6 +1315,9 @@ static int flexcan_probe(struct platform_device *pdev)
 	priv->devtype_data = devtype_data;
 	priv->reg_xceiver = reg_xceiver;
 
+	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_STATE_IRQ)
+		priv->can.do_get_state = flexcan_get_state;
+
 	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
 		priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP;
 		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP];
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 28+ messages in thread