All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 00/10] can: c_can: Another pile of fixes and improvements
@ 2014-04-04 15:24 Thomas Gleixner
  2014-04-04 15:24 ` [patch 01/10] can: c_can: Fix startup logic Thomas Gleixner
                   ` (9 more replies)
  0 siblings, 10 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

This series is on top of linux-can-fixes-for-3.15-20140401.

It contains a bunch of fixes for fatal wreckage and a few improvements
including a new version of the interrupt reduction patch. The observed
stall is gone now.

One fix is for a pattern copied all over drivers/net/can/

        netif_receive_skb(skb);
        stats->rx_packets++;
        stats->rx_bytes += frame->can_dlc;

After handing off the skb to the network stack, we cannot access it
anymore as netif_receive_skb() might free it. I have no time to fix
that all up now, but it's trivial enough to do.

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 01/10] can: c_can: Fix startup logic
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 02/10] can: c_can: Make bus off interrupt disable logic work Thomas Gleixner
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-fix-startup-logic.patch --]
[-- Type: text/plain, Size: 4421 bytes --]

c_can_start() enables interrupts way too early. The first enabling
happens when setting the control mode in c_can_chip_config(), which
allows an interrupt in the middle of an halfconfigured device. That's
wrong but not fatal if called from c_can_open as napi is not yet
enabled, but when called from c_can_set_mode() it is....

The second enabling is at the at the end of c_can_start, but that
happens before napi_enable() and that means that an interrupt which
comes in before napi_enable() will disable interrupts again and call
napi_schedule, which ignores the request and the later napi_enable()
is not making thinks work either. So the interface is up with all
device interrupts enabled. Lets you send 16 packets and then game
over.

Move the device interrupt after napi_enable() and add it to the other
callsites of c_can_start() in c_can_set_mode() and c_can_power_up()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -612,30 +612,22 @@ static int c_can_chip_config(struct net_
 	struct c_can_priv *priv = netdev_priv(dev);
 
 	/* enable automatic retransmission */
-	priv->write_reg(priv, C_CAN_CTRL_REG,
-			CONTROL_ENABLE_AR);
+	priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR);
 
 	if ((priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) &&
 	    (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)) {
 		/* loopback + silent mode : useful for hot self-test */
-		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_EIE |
-				CONTROL_SIE | CONTROL_IE | CONTROL_TEST);
-		priv->write_reg(priv, C_CAN_TEST_REG,
-				TEST_LBACK | TEST_SILENT);
+		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_TEST);
+		priv->write_reg(priv, C_CAN_TEST_REG, TEST_LBACK | TEST_SILENT);
 	} else if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
 		/* loopback mode : useful for self-test function */
-		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_EIE |
-				CONTROL_SIE | CONTROL_IE | CONTROL_TEST);
+		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_TEST);
 		priv->write_reg(priv, C_CAN_TEST_REG, TEST_LBACK);
 	} else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) {
 		/* silent mode : bus-monitoring mode */
-		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_EIE |
-				CONTROL_SIE | CONTROL_IE | CONTROL_TEST);
+		priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_TEST);
 		priv->write_reg(priv, C_CAN_TEST_REG, TEST_SILENT);
-	} else
-		/* normal mode*/
-		priv->write_reg(priv, C_CAN_CTRL_REG,
-				CONTROL_EIE | CONTROL_SIE | CONTROL_IE);
+	}
 
 	/* configure message objects */
 	c_can_configure_msg_objects(dev);
@@ -662,9 +654,6 @@ static int c_can_start(struct net_device
 	/* reset tx helper pointers */
 	priv->tx_next = priv->tx_echo = 0;
 
-	/* enable status change, error and module interrupts */
-	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
-
 	return 0;
 }
 
@@ -681,6 +670,7 @@ static void c_can_stop(struct net_device
 
 static int c_can_set_mode(struct net_device *dev, enum can_mode mode)
 {
+	struct c_can_priv *priv = netdev_priv(dev);
 	int err;
 
 	switch (mode) {
@@ -689,6 +679,8 @@ static int c_can_set_mode(struct net_dev
 		if (err)
 			return err;
 		netif_wake_queue(dev);
+		/* enable status change, error and module interrupts */
+		c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -1184,6 +1176,8 @@ static int c_can_open(struct net_device
 	can_led_event(dev, CAN_LED_EVENT_OPEN);
 
 	napi_enable(&priv->napi);
+	/* enable status change, error and module interrupts */
+	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 	netif_start_queue(dev);
 
 	return 0;
@@ -1281,6 +1275,7 @@ int c_can_power_up(struct net_device *de
 	u32 val;
 	unsigned long time_out;
 	struct c_can_priv *priv = netdev_priv(dev);
+	int ret;
 
 	if (!(dev->flags & IFF_UP))
 		return 0;
@@ -1307,7 +1302,11 @@ int c_can_power_up(struct net_device *de
 	if (time_after(jiffies, time_out))
 		return -ETIMEDOUT;
 
-	return c_can_start(dev);
+	ret = c_can_start(dev);
+	if (!ret)
+		c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(c_can_power_up);
 #endif



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 03/10] can: c_can: Do not access skb after net_receive_skb()
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
  2014-04-04 15:24 ` [patch 01/10] can: c_can: Fix startup logic Thomas Gleixner
  2014-04-04 15:24 ` [patch 02/10] can: c_can: Make bus off interrupt disable logic work Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 04/10] can: c_can: Handle state change correctly Thomas Gleixner
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-do-not-access-skb-after-netif_rx.patch --]
[-- Type: text/plain, Size: 1265 bytes --]

There is no guarantee that the skb is in the same state after calling
net_receive_skb(). It might be freed or reused.

The whole can subsystem is full of this. Copy and paste ....

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |    9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -429,10 +429,10 @@ static int c_can_read_msg_object(struct
 		}
 	}
 
-	netif_receive_skb(skb);
-
 	stats->rx_packets++;
 	stats->rx_bytes += frame->can_dlc;
+
+	netif_receive_skb(skb);
 	return 0;
 }
 
@@ -960,9 +960,9 @@ static int c_can_handle_state_change(str
 		break;
 	}
 
-	netif_receive_skb(skb);
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;
+	netif_receive_skb(skb);
 
 	return 1;
 }
@@ -1033,10 +1033,9 @@ static int c_can_handle_bus_err(struct n
 	/* set a `lec` value so that we can check for updates later */
 	priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
 
-	netif_receive_skb(skb);
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;
-
+	netif_receive_skb(skb);
 	return 1;
 }
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 02/10] can: c_can: Make bus off interrupt disable logic work
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
  2014-04-04 15:24 ` [patch 01/10] can: c_can: Fix startup logic Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 03/10] can: c_can: Do not access skb after net_receive_skb() Thomas Gleixner
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-Make-bus-off-interrupt-disable-logic-work.patch --]
[-- Type: text/plain, Size: 1928 bytes --]

The state change handler is called with device interrupts disabled
already. So no point in disabling them again when we enter bus off
state.

But what's worse is that we reenable the interrupts at the end of NAPI
poll unconditionally. So c_can_start() which is called from the
restart timer can trigger interrupts which confuse the hell out of the
half reinitialized driver/hw.

Remove the pointless device interrupt disable in the BUS_OFF handler
and prevent reenabling the device interrupts at the end of the poll
routine when the current state is BUS_OFF.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -954,11 +954,6 @@ static int c_can_handle_state_change(str
 		/* bus-off state */
 		priv->can.state = CAN_STATE_BUS_OFF;
 		cf->can_id |= CAN_ERR_BUSOFF;
-		/*
-		 * disable all interrupts in bus-off mode to ensure that
-		 * the CPU is not hogged down
-		 */
-		c_can_enable_all_interrupts(priv, DISABLE_ALL_INTERRUPTS);
 		can_bus_off(dev);
 		break;
 	default:
@@ -1089,6 +1084,7 @@ static int c_can_poll(struct napi_struct
 			netdev_dbg(dev, "entered bus off state\n");
 			work_done += c_can_handle_state_change(dev,
 						C_CAN_BUS_OFF);
+			goto end;
 		}
 
 		/* handle bus recovery events */
@@ -1122,8 +1118,9 @@ static int c_can_poll(struct napi_struct
 end:
 	if (work_done < quota) {
 		napi_complete(napi);
-		/* enable all IRQs */
-		c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
+		/* enable all IRQs if we are not in bus off state */
+		if (priv->can.state != CAN_STATE_BUS_OFF)
+			c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 	}
 
 	return work_done;



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 04/10] can: c_can: Handle state change correctly
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (2 preceding siblings ...)
  2014-04-04 15:24 ` [patch 03/10] can: c_can: Do not access skb after net_receive_skb() Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 05/10] can: c_can: Fix berr reporting Thomas Gleixner
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-fix-state-change.patch --]
[-- Type: text/plain, Size: 2188 bytes --]

If the allocation of an error skb fails, the state change handling
returns w/o doing any work. That leaves the interface in a wreckaged
state as the internal status can.state is wrong.

Split the interface handling and the skb handling.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -914,6 +914,26 @@ static int c_can_handle_state_change(str
 	struct sk_buff *skb;
 	struct can_berr_counter bec;
 
+	switch (error_type) {
+	case C_CAN_ERROR_WARNING:
+		/* error warning state */
+		priv->can.can_stats.error_warning++;
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		break;
+	case C_CAN_ERROR_PASSIVE:
+		/* error passive state */
+		priv->can.can_stats.error_passive++;
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		break;
+	case C_CAN_BUS_OFF:
+		/* bus-off state */
+		priv->can.state = CAN_STATE_BUS_OFF;
+		can_bus_off(dev);
+		break;
+	default:
+		break;
+	}
+
 	/* propagate the error condition to the CAN stack */
 	skb = alloc_can_err_skb(dev, &cf);
 	if (unlikely(!skb))
@@ -927,8 +947,6 @@ static int c_can_handle_state_change(str
 	switch (error_type) {
 	case C_CAN_ERROR_WARNING:
 		/* error warning state */
-		priv->can.can_stats.error_warning++;
-		priv->can.state = CAN_STATE_ERROR_WARNING;
 		cf->can_id |= CAN_ERR_CRTL;
 		cf->data[1] = (bec.txerr > bec.rxerr) ?
 			CAN_ERR_CRTL_TX_WARNING :
@@ -939,8 +957,6 @@ static int c_can_handle_state_change(str
 		break;
 	case C_CAN_ERROR_PASSIVE:
 		/* error passive state */
-		priv->can.can_stats.error_passive++;
-		priv->can.state = CAN_STATE_ERROR_PASSIVE;
 		cf->can_id |= CAN_ERR_CRTL;
 		if (rx_err_passive)
 			cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE;
@@ -952,7 +968,6 @@ static int c_can_handle_state_change(str
 		break;
 	case C_CAN_BUS_OFF:
 		/* bus-off state */
-		priv->can.state = CAN_STATE_BUS_OFF;
 		cf->can_id |= CAN_ERR_BUSOFF;
 		can_bus_off(dev);
 		break;



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 05/10] can: c_can: Fix berr reporting
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (3 preceding siblings ...)
  2014-04-04 15:24 ` [patch 04/10] can: c_can: Handle state change correctly Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 07/10] can: c_can: Simplify buffer reenabling Thomas Gleixner
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-fix-berr-reporting.patch --]
[-- Type: text/plain, Size: 2410 bytes --]

Reading the LEC type with

  return (mode & ENABLED) && (status & LEC_MASK);

is not guaranteed to return (status & LEC_MASK) if the enabled bit in
mode is set. It's guaranteed to return 0 or !=0 because the && is a
boolean operator.

Remove the inline function and call unconditionally into the
berr_handling code and return early when the reporting is disabled.

That's also a preparation for keeping the stats in order. See next
patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -171,6 +171,7 @@ enum c_can_lec_type {
 	LEC_BIT0_ERROR,
 	LEC_CRC_ERROR,
 	LEC_UNUSED,
+	LEC_MASK = LEC_UNUSED,
 };
 
 /*
@@ -897,12 +898,6 @@ static int c_can_do_rx_poll(struct net_d
 	return pkts;
 }
 
-static inline int c_can_has_and_handle_berr(struct c_can_priv *priv)
-{
-	return (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) &&
-		(priv->current_status & LEC_UNUSED);
-}
-
 static int c_can_handle_state_change(struct net_device *dev,
 				enum c_can_bus_error_types error_type)
 {
@@ -998,6 +993,9 @@ static int c_can_handle_bus_err(struct n
 	if (lec_type == LEC_UNUSED || lec_type == LEC_NO_ERROR)
 		return 0;
 
+	if (!(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING))
+		return 0;
+
 	/* propagate the error condition to the CAN stack */
 	skb = alloc_can_err_skb(dev, &cf);
 	if (unlikely(!skb))
@@ -1057,7 +1055,6 @@ static int c_can_handle_bus_err(struct n
 static int c_can_poll(struct napi_struct *napi, int quota)
 {
 	u16 irqstatus;
-	int lec_type = 0;
 	int work_done = 0;
 	struct net_device *dev = napi->dev;
 	struct c_can_priv *priv = netdev_priv(dev);
@@ -1116,9 +1113,8 @@ static int c_can_poll(struct napi_struct
 		priv->last_status = priv->current_status;
 
 		/* handle lec errors on the bus */
-		lec_type = c_can_has_and_handle_berr(priv);
-		if (lec_type)
-			work_done += c_can_handle_bus_err(dev, lec_type);
+		work_done += c_can_handle_bus_err(dev,
+					priv->current_status & LEC_MASK);
 	} else if ((irqstatus >= C_CAN_MSG_OBJ_RX_FIRST) &&
 			(irqstatus <= C_CAN_MSG_OBJ_RX_LAST)) {
 		/* handle events corresponding to receive message objects */



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 07/10] can: c_can: Simplify buffer reenabling
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (4 preceding siblings ...)
  2014-04-04 15:24 ` [patch 05/10] can: c_can: Fix berr reporting Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 16:14   ` Oliver Hartkopp
  2014-04-04 15:24 ` [patch 06/10] can: c_can: Always update error stats Thomas Gleixner
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-Simplify-buffer-reenabling.patch --]
[-- Type: text/plain, Size: 2025 bytes --]

Instead of writing to the message object we can simply clear the
NewDat bit with the get method.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -108,6 +108,7 @@
 #define IF_COMM_CONTROL		BIT(4)
 #define IF_COMM_CLR_INT_PND	BIT(3)
 #define IF_COMM_TXRQST		BIT(2)
+#define IF_COMM_CLR_NEWDAT	IF_COMM_TXRQST
 #define IF_COMM_DATAA		BIT(1)
 #define IF_COMM_DATAB		BIT(0)
 #define IF_COMM_ALL		(IF_COMM_MASK | IF_COMM_ARB | \
@@ -120,7 +121,7 @@
 				 IF_COMM_DATAA | IF_COMM_DATAB)
 
 /* For the high buffers we clear the interrupt bit and newdat */
-#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_TXRQST)
+#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_CLR_NEWDAT)
 
 /* IFx arbitration */
 #define IF_ARB_MSGVAL		BIT(15)
@@ -353,17 +354,12 @@ static void c_can_write_msg_object(struc
 }
 
 static inline void c_can_activate_all_lower_rx_msg_obj(struct net_device *dev,
-						int iface,
-						int ctrl_mask)
+						       int iface)
 {
 	int i;
-	struct c_can_priv *priv = netdev_priv(dev);
 
-	for (i = C_CAN_MSG_OBJ_RX_FIRST; i <= C_CAN_MSG_RX_LOW_LAST; i++) {
-		priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface),
-				ctrl_mask & ~IF_MCONT_NEWDAT);
-		c_can_object_put(dev, iface, i, IF_COMM_CONTROL);
-	}
+	for (i = C_CAN_MSG_OBJ_RX_FIRST; i <= C_CAN_MSG_RX_LOW_LAST; i++)
+		c_can_object_get(dev, iface, i, IF_COMM_CLR_NEWDAT);
 }
 
 static int c_can_handle_lost_msg_obj(struct net_device *dev,
@@ -829,7 +825,7 @@ static int c_can_read_objects(struct net
 
 		if (obj == C_CAN_MSG_RX_LOW_LAST)
 			/* activate all lower message objects */
-			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX, ctrl);
+			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
 
 		pkts++;
 		quota--;



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 06/10] can: c_can: Always update error stats
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (5 preceding siblings ...)
  2014-04-04 15:24 ` [patch 07/10] can: c_can: Simplify buffer reenabling Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 09/10] can: c_can: Get rid of pointless interrupts Thomas Gleixner
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-always-update-error-stats.patch --]
[-- Type: text/plain, Size: 1712 bytes --]

If berr reporting is disabled or the allocation of the error skb
fails, we still want to see the error statistics.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -378,6 +378,9 @@ static int c_can_handle_lost_msg_obj(str
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 	c_can_object_put(dev, iface, objno, IF_COMM_CONTROL);
 
+	stats->rx_errors++;
+	stats->rx_over_errors++;
+
 	/* create an error msg */
 	skb = alloc_can_err_skb(dev, &frame);
 	if (unlikely(!skb))
@@ -385,8 +388,6 @@ static int c_can_handle_lost_msg_obj(str
 
 	frame->can_id |= CAN_ERR_CRTL;
 	frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
-	stats->rx_errors++;
-	stats->rx_over_errors++;
 
 	netif_receive_skb(skb);
 	return 1;
@@ -993,6 +994,10 @@ static int c_can_handle_bus_err(struct n
 	if (lec_type == LEC_UNUSED || lec_type == LEC_NO_ERROR)
 		return 0;
 
+	/* common for all type of bus errors */
+	priv->can.can_stats.bus_error++;
+	stats->rx_errors++;
+
 	if (!(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING))
 		return 0;
 
@@ -1005,10 +1010,6 @@ static int c_can_handle_bus_err(struct n
 	 * check for 'last error code' which tells us the
 	 * type of the last error to occur on the CAN bus
 	 */
-
-	/* common for all type of bus errors */
-	priv->can.can_stats.bus_error++;
-	stats->rx_errors++;
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
 	cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 08/10] can: c_can: Avoid status register update for D_CAN
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (7 preceding siblings ...)
  2014-04-04 15:24 ` [patch 09/10] can: c_can: Get rid of pointless interrupts Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 10/10] can: c_can : Disable rx split as workaround Thomas Gleixner
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-avoid-status-update-for-dcan.patch --]
[-- Type: text/plain, Size: 1341 bytes --]

On D_CAN the RXOK, TXOK and LEC bits are cleared/set on read of the
status register. No need to update them.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |    9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -1041,7 +1041,8 @@ static int c_can_handle_bus_err(struct n
 	}
 
 	/* set a `lec` value so that we can check for updates later */
-	priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
+	if (priv->type != BOSCH_D_CAN)
+		priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
 
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;
@@ -1066,11 +1067,13 @@ static int c_can_poll(struct napi_struct
 					C_CAN_STS_REG);
 
 		/* handle Tx/Rx events */
-		if (priv->current_status & STATUS_TXOK)
+		if (priv->current_status & STATUS_TXOK &&
+		    priv->type != BOSCH_D_CAN)
 			priv->write_reg(priv, C_CAN_STS_REG,
 					priv->current_status & ~STATUS_TXOK);
 
-		if (priv->current_status & STATUS_RXOK)
+		if (priv->current_status & STATUS_RXOK &&
+		    priv->type != BOSCH_D_CAN)
 			priv->write_reg(priv, C_CAN_STS_REG,
 					priv->current_status & ~STATUS_RXOK);
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 09/10] can: c_can: Get rid of pointless interrupts
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (6 preceding siblings ...)
  2014-04-04 15:24 ` [patch 06/10] can: c_can: Always update error stats Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 15:24 ` [patch 08/10] can: c_can: Avoid status register update for D_CAN Thomas Gleixner
  2014-04-04 15:24 ` [patch 10/10] can: c_can : Disable rx split as workaround Thomas Gleixner
  9 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c-can-get-rid-of-pointless-interrupts.patch --]
[-- Type: text/plain, Size: 8339 bytes --]

The driver handles pointlessly TWO interrupts per packet. The reason
is that it enables the status interrupt which fires for each rx and tx
packet and it enables the per message object interrupts as well.

The status interrupt merily acks or in case of D_CAN ignores the TX/RX
state and then the message object interrupt fires.

The message objects interrupts are only useful if all message objects
have hardware filters activated.

But we don't have that and its not simple to implement in that driver
without rewriting it completely.

So we can ditch the message object interrupts and handle the RX/TX
right away from the status interrupt. Instead of TWO we handle ONE.

If we ever have the need for HW filtering, then this code needs a
complete overhaul and we can think about it then. For now we prefer a
lower interrupt load.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |  124 +++++++++++++++++-------------------------
 drivers/net/can/c_can/c_can.h |    3 -
 2 files changed, 54 insertions(+), 73 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -348,8 +348,7 @@ static void c_can_write_msg_object(struc
 
 	/* enable interrupt for this message object */
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface),
-			IF_MCONT_TXIE | IF_MCONT_TXRQST | IF_MCONT_EOB |
-			frame->can_dlc);
+			IF_MCONT_TXRQST | IF_MCONT_EOB | frame->can_dlc);
 	c_can_object_put(dev, iface, objno, IF_COMM_ALL);
 }
 
@@ -592,11 +591,10 @@ static void c_can_configure_msg_objects(
 
 	/* setup receive message objects */
 	for (i = C_CAN_MSG_OBJ_RX_FIRST; i < C_CAN_MSG_OBJ_RX_LAST; i++)
-		c_can_setup_receive_object(dev, IF_RX, i, 0, 0,
-			(IF_MCONT_RXIE | IF_MCONT_UMASK) & ~IF_MCONT_EOB);
+		c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_UMASK);
 
 	c_can_setup_receive_object(dev, IF_RX, C_CAN_MSG_OBJ_RX_LAST, 0, 0,
-			IF_MCONT_EOB | IF_MCONT_RXIE | IF_MCONT_UMASK);
+				   IF_MCONT_EOB | IF_MCONT_UMASK);
 }
 
 /*
@@ -649,8 +647,9 @@ static int c_can_start(struct net_device
 
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
 
-	/* reset tx helper pointers */
+	/* reset tx helper pointers and the rx mask */
 	priv->tx_next = priv->tx_echo = 0;
+	priv->rxmasked = 0;
 
 	return 0;
 }
@@ -823,9 +822,13 @@ static int c_can_read_objects(struct net
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
-		if (obj == C_CAN_MSG_RX_LOW_LAST)
+		if (obj < C_CAN_MSG_RX_LOW_LAST)
+			priv->rxmasked |= BIT(obj - 1);
+		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
+			priv->rxmasked = 0;
 			/* activate all lower message objects */
 			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
+		}
 
 		pkts++;
 		quota--;
@@ -870,7 +873,8 @@ static int c_can_do_rx_poll(struct net_d
 
 	while (quota > 0) {
 		if (!pend) {
-			pend = priv->read_reg(priv, C_CAN_INTPND1_REG);
+			pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
+			pend &= ~priv->rxmasked;
 			if (!pend)
 				break;
 			/*
@@ -1052,76 +1056,55 @@ static int c_can_handle_bus_err(struct n
 
 static int c_can_poll(struct napi_struct *napi, int quota)
 {
-	u16 irqstatus;
-	int work_done = 0;
 	struct net_device *dev = napi->dev;
 	struct c_can_priv *priv = netdev_priv(dev);
+	u16 curr, last = priv->last_status;
+	int work_done = 0;
 
-	irqstatus = priv->irqstatus;
-	if (!irqstatus)
+	curr = priv->read_reg(priv, C_CAN_STS_REG);
+
+	/* handle state changes */
+	if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) {
+		netdev_dbg(dev, "entered error warning state\n");
+		work_done += c_can_handle_state_change(dev, C_CAN_ERROR_WARNING);
+	}
+
+	if ((curr & STATUS_EPASS) && (!(last & STATUS_EPASS))) {
+		netdev_dbg(dev, "entered error passive state\n");
+		work_done += c_can_handle_state_change(dev, C_CAN_ERROR_PASSIVE);
+	}
+
+	if ((curr & STATUS_BOFF) && (!(last & STATUS_BOFF))) {
+		netdev_dbg(dev, "entered bus off state\n");
+		work_done += c_can_handle_state_change(dev, C_CAN_BUS_OFF);
 		goto end;
+	}
 
-	/* status events have the highest priority */
-	if (irqstatus == STATUS_INTERRUPT) {
-		priv->current_status = priv->read_reg(priv,
-					C_CAN_STS_REG);
-
-		/* handle Tx/Rx events */
-		if (priv->current_status & STATUS_TXOK &&
-		    priv->type != BOSCH_D_CAN)
-			priv->write_reg(priv, C_CAN_STS_REG,
-					priv->current_status & ~STATUS_TXOK);
-
-		if (priv->current_status & STATUS_RXOK &&
-		    priv->type != BOSCH_D_CAN)
-			priv->write_reg(priv, C_CAN_STS_REG,
-					priv->current_status & ~STATUS_RXOK);
-
-		/* handle state changes */
-		if ((priv->current_status & STATUS_EWARN) &&
-				(!(priv->last_status & STATUS_EWARN))) {
-			netdev_dbg(dev, "entered error warning state\n");
-			work_done += c_can_handle_state_change(dev,
-						C_CAN_ERROR_WARNING);
-		}
-		if ((priv->current_status & STATUS_EPASS) &&
-				(!(priv->last_status & STATUS_EPASS))) {
-			netdev_dbg(dev, "entered error passive state\n");
-			work_done += c_can_handle_state_change(dev,
-						C_CAN_ERROR_PASSIVE);
-		}
-		if ((priv->current_status & STATUS_BOFF) &&
-				(!(priv->last_status & STATUS_BOFF))) {
-			netdev_dbg(dev, "entered bus off state\n");
-			work_done += c_can_handle_state_change(dev,
-						C_CAN_BUS_OFF);
-			goto end;
-		}
+	/* handle bus recovery events */
+	if ((!(curr & STATUS_BOFF)) && (last & STATUS_BOFF)) {
+		netdev_dbg(dev, "left bus off state\n");
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	}
+	if ((!(curr & STATUS_EPASS)) && (last & STATUS_EPASS)) {
+		netdev_dbg(dev, "left error passive state\n");
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	}
 
-		/* handle bus recovery events */
-		if ((!(priv->current_status & STATUS_BOFF)) &&
-				(priv->last_status & STATUS_BOFF)) {
-			netdev_dbg(dev, "left bus off state\n");
-			priv->can.state = CAN_STATE_ERROR_ACTIVE;
-		}
-		if ((!(priv->current_status & STATUS_EPASS)) &&
-				(priv->last_status & STATUS_EPASS)) {
-			netdev_dbg(dev, "left error passive state\n");
-			priv->can.state = CAN_STATE_ERROR_ACTIVE;
-		}
+	priv->last_status = curr;
 
-		priv->last_status = priv->current_status;
+	/* handle lec errors on the bus */
+	work_done += c_can_handle_bus_err(dev, curr & LEC_MASK);
 
-		/* handle lec errors on the bus */
-		work_done += c_can_handle_bus_err(dev,
-					priv->current_status & LEC_MASK);
-	} else if ((irqstatus >= C_CAN_MSG_OBJ_RX_FIRST) &&
-			(irqstatus <= C_CAN_MSG_OBJ_RX_LAST)) {
-		/* handle events corresponding to receive message objects */
+	/* handle Tx/Rx events */
+	if (curr & STATUS_RXOK) {
+		if (priv->type != BOSCH_D_CAN)
+			priv->write_reg(priv, C_CAN_STS_REG, curr & ~STATUS_RXOK);
 		work_done += c_can_do_rx_poll(dev, (quota - work_done));
-	} else if ((irqstatus >= C_CAN_MSG_OBJ_TX_FIRST) &&
-			(irqstatus <= C_CAN_MSG_OBJ_TX_LAST)) {
-		/* handle events corresponding to transmit message objects */
+	}
+
+	if (curr & STATUS_TXOK) {
+		if (priv->type != BOSCH_D_CAN)
+			priv->write_reg(priv, C_CAN_STS_REG, curr & ~STATUS_TXOK);
 		c_can_do_tx(dev);
 	}
 
@@ -1141,8 +1124,7 @@ static irqreturn_t c_can_isr(int irq, vo
 	struct net_device *dev = (struct net_device *)dev_id;
 	struct c_can_priv *priv = netdev_priv(dev);
 
-	priv->irqstatus = priv->read_reg(priv, C_CAN_INT_REG);
-	if (!priv->irqstatus)
+	if (!priv->read_reg(priv, C_CAN_INT_REG))
 		return IRQ_NONE;
 
 	/* disable all interrupts and schedule the NAPI */
Index: linux-can/drivers/net/can/c_can/c_can.h
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.h
+++ linux-can/drivers/net/can/c_can/c_can.h
@@ -185,7 +185,6 @@ struct c_can_priv {
 	struct device *device;
 	spinlock_t xmit_lock;
 	int tx_object;
-	int current_status;
 	int last_status;
 	u16 (*read_reg) (struct c_can_priv *priv, enum reg index);
 	void (*write_reg) (struct c_can_priv *priv, enum reg index, u16 val);
@@ -195,11 +194,11 @@ struct c_can_priv {
 	unsigned int tx_next;
 	unsigned int tx_echo;
 	void *priv;		/* for board-specific data */
-	u16 irqstatus;
 	enum c_can_dev_id type;
 	u32 __iomem *raminit_ctrlreg;
 	unsigned int instance;
 	void (*raminit) (const struct c_can_priv *priv, bool enable);
+	u32 rxmasked;
 	u32 dlc[C_CAN_MSG_OBJ_TX_NUM];
 };
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
                   ` (8 preceding siblings ...)
  2014-04-04 15:24 ` [patch 08/10] can: c_can: Avoid status register update for D_CAN Thomas Gleixner
@ 2014-04-04 15:24 ` Thomas Gleixner
  2014-04-04 16:17   ` Mark
  2014-04-04 17:41   ` Oliver Hartkopp
  9 siblings, 2 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 15:24 UTC (permalink / raw)
  To: linux-can; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

[-- Attachment #1: can-c_can-disable-rx-split-as-workaround.patch --]
[-- Type: text/plain, Size: 3952 bytes --]

The RX buffer split causes packet loss in the hardware:

What happens is:

RX Packet 1 --> message buffer 1 (newdat bit is not cleared)
RX Packet 2 --> message buffer 2 (newdat bit is not cleared)
RX Packet 3 --> message buffer 3 (newdat bit is not cleared)
RX Packet 4 --> message buffer 4 (newdat bit is not cleared)
RX Packet 5 --> message buffer 5 (newdat bit is not cleared)
RX Packet 6 --> message buffer 6 (newdat bit is not cleared)
RX Packet 7 --> message buffer 7 (newdat bit is not cleared)
RX Packet 8 --> message buffer 8 (newdat bit is not cleared)

Clear newdat bit in message buffer 1
Clear newdat bit in message buffer 2
Clear newdat bit in message buffer 3
Clear newdat bit in message buffer 4
Clear newdat bit in message buffer 5
Clear newdat bit in message buffer 6
Clear newdat bit in message buffer 7
Clear newdat bit in message buffer 8

Now if during that clearing of newdat bits, a new message comes in,
the HW gets confused and drops it. 

It does not matter how many of them you clear. I put a delay between
clear of buffer 1 and buffer 2 which was long enough that the message
should have been queued either in buffer 1 or buffer 9. But it did not
show up anywhere. The next message ended up in buffer 1. So the
hardware lost a packet of course without telling it via one of the
error handlers.

That does not happen on all clear newdat bit events. I see one of 10k
packets dropped in the scenario which allows us to reproduce. But the
trace looks always the same.

Not splitting the RX Buffer avoids the packet loss but can cause
reordering. It's hard to trigger, but it CAN happen.

With that mode we use the HW as it was probably designed for. We read
from the buffer 1 upwards and clear the buffer as we get the
message. That's how all microcontrollers use it. So I assume that the
way we handle the buffers was never really tested. According to the
public documentation it should just work :)

Let the user decide which evil is the lesser one.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/Kconfig |    6 ++++++
 drivers/net/can/c_can/c_can.c |    7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

Index: linux-can/drivers/net/can/c_can/Kconfig
===================================================================
--- linux-can.orig/drivers/net/can/c_can/Kconfig
+++ linux-can/drivers/net/can/c_can/Kconfig
@@ -14,6 +14,12 @@ config CAN_C_CAN_PLATFORM
 	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
 	  boards like am335x, dm814x, dm813x and dm811x.
 
+config CAN_C_CAN_NO_RX_SPLIT
+	bool "Disable RX Split buffer"
+	---help---
+	  RX Split buffer prevents packet reordering but can cause packet
+	  loss. Select the less of the two evils.
+
 config CAN_C_CAN_PCI
 	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
 	depends on PCI
Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c
@@ -797,8 +797,12 @@ static int c_can_read_objects(struct net
 	while ((obj = ffs(pend)) && quota > 0) {
 		pend &= ~BIT(obj - 1);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
 			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
+#else
+		mcmd = IF_COMM_RCV_HIGH;
+#endif
 
 		c_can_object_get(dev, IF_RX, obj, mcmd);
 		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
@@ -822,6 +826,7 @@ static int c_can_read_objects(struct net
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		if (obj < C_CAN_MSG_RX_LOW_LAST)
 			priv->rxmasked |= BIT(obj - 1);
 		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
@@ -829,7 +834,7 @@ static int c_can_read_objects(struct net
 			/* activate all lower message objects */
 			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
 		}
-
+#endif
 		pkts++;
 		quota--;
 	}



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] can: c_can: Simplify buffer reenabling
  2014-04-04 15:24 ` [patch 07/10] can: c_can: Simplify buffer reenabling Thomas Gleixner
@ 2014-04-04 16:14   ` Oliver Hartkopp
  2014-04-04 16:33     ` Thomas Gleixner
  0 siblings, 1 reply; 26+ messages in thread
From: Oliver Hartkopp @ 2014-04-04 16:14 UTC (permalink / raw)
  To: Thomas Gleixner, Alexander Stein
  Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger

Hello Thomas, hello Alexander,

I just took a short look into the C_CAN manual.

-------- Original Message --------
Subject: [patch 07/10] can: c_can: Simplify buffer reenabling

(..)

Instead of writing to the message object we can simply clear the
NewDat bit with the get method.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/c_can.c |   16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

Index: linux-can/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-can.orig/drivers/net/can/c_can/c_can.c
+++ linux-can/drivers/net/can/c_can/c_can.c

(..)

 /* For the high buffers we clear the interrupt bit and newdat */
-#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_TXRQST)
+#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_CLR_NEWDAT)


I wonder if it generally works correctly, when the interrupt and newdat bits
are handled separately, as the manual states:

4.8.1 Reading from a FIFO Buffer
When the CPU transfers the contents of Message Object to the IFx Message
Bugger registers by writing its number to the IFx Command Request Register,
the corresponding Command Mask Register should be programmed the way that bits
NewDat and IntPnd are reset to zero
(TxRqst/NewDat = ‘1’ and ClrIntPnd = ‘1’).

The driver seems to write these bits in different steps.

Regards,
Oliver



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 15:24 ` [patch 10/10] can: c_can : Disable rx split as workaround Thomas Gleixner
@ 2014-04-04 16:17   ` Mark
  2014-04-04 16:38     ` Thomas Gleixner
                       ` (2 more replies)
  2014-04-04 17:41   ` Oliver Hartkopp
  1 sibling, 3 replies; 26+ messages in thread
From: Mark @ 2014-04-04 16:17 UTC (permalink / raw)
  To: Thomas Gleixner, linux-can
  Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

Thomas:  a few comments on my experience with the CAN driver:

I think I agree with your approach -- I had the same problem losing a 
packet when running through the clearing of the newdat flags.

About a month ago, I re-wrote pch_can.c  (before I discovered this 
mailing list) and got it to work successfully sending / receiving CAN 
data at 1 MBIT with 25% load on the bus.   We wrote a lot of test code, 
sending/receiving messages over several days looking for mistakes, and 
found none.  Running at 1 MBIT definitely helped find the problems in 
the driver....things show up a lot quicker.
The change you describe below (clearing newdat as you go) was one of my 
major changes -- and yes, it allows for potential packet re-ordering.  
But I was happy to live with that in my application -- as opposed to 
dropping packets.

The other major change was to separate the use of ifregs[0] and 
ifregs[1] by task...not by function.  So _xmit()-related functions used 
one.   And _poll()-related functions used the other.  This meant I 
didn't have to use spinlock()'s to ensure mutual exclusion to those 
buffers.  It appears you took the spinlock() path...which should work as 
well (assuming you can spinlock both tasks that would access those 
registers).   I do believe splitting them by task instead of tx vs. rx 
is a better approach however.

Last:  I changed the _poll() routine to run through a "while" loop on 
INTSTAT  (irqstatus) and service all the pending interrupts at once.  
That makes it less likely that packets will arrive out of order, and I 
think reduces processor load.

Mark




On 4/4/2014 11:24 AM, Thomas Gleixner wrote:
> The RX buffer split causes packet loss in the hardware:
>
> What happens is:
>
> RX Packet 1 --> message buffer 1 (newdat bit is not cleared)
> RX Packet 2 --> message buffer 2 (newdat bit is not cleared)
> RX Packet 3 --> message buffer 3 (newdat bit is not cleared)
> RX Packet 4 --> message buffer 4 (newdat bit is not cleared)
> RX Packet 5 --> message buffer 5 (newdat bit is not cleared)
> RX Packet 6 --> message buffer 6 (newdat bit is not cleared)
> RX Packet 7 --> message buffer 7 (newdat bit is not cleared)
> RX Packet 8 --> message buffer 8 (newdat bit is not cleared)
>
> Clear newdat bit in message buffer 1
> Clear newdat bit in message buffer 2
> Clear newdat bit in message buffer 3
> Clear newdat bit in message buffer 4
> Clear newdat bit in message buffer 5
> Clear newdat bit in message buffer 6
> Clear newdat bit in message buffer 7
> Clear newdat bit in message buffer 8
>
> Now if during that clearing of newdat bits, a new message comes in,
> the HW gets confused and drops it.
>
> It does not matter how many of them you clear. I put a delay between
> clear of buffer 1 and buffer 2 which was long enough that the message
> should have been queued either in buffer 1 or buffer 9. But it did not
> show up anywhere. The next message ended up in buffer 1. So the
> hardware lost a packet of course without telling it via one of the
> error handlers.
>
> That does not happen on all clear newdat bit events. I see one of 10k
> packets dropped in the scenario which allows us to reproduce. But the
> trace looks always the same.
>
> Not splitting the RX Buffer avoids the packet loss but can cause
> reordering. It's hard to trigger, but it CAN happen.
>
> With that mode we use the HW as it was probably designed for. We read
> from the buffer 1 upwards and clear the buffer as we get the
> message. That's how all microcontrollers use it. So I assume that the
> way we handle the buffers was never really tested. According to the
> public documentation it should just work :)
>
> Let the user decide which evil is the lesser one.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   drivers/net/can/c_can/Kconfig |    6 ++++++
>   drivers/net/can/c_can/c_can.c |    7 ++++++-
>   2 files changed, 12 insertions(+), 1 deletion(-)
>
> Index: linux-can/drivers/net/can/c_can/Kconfig
> ===================================================================
> --- linux-can.orig/drivers/net/can/c_can/Kconfig
> +++ linux-can/drivers/net/can/c_can/Kconfig
> @@ -14,6 +14,12 @@ config CAN_C_CAN_PLATFORM
>   	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
>   	  boards like am335x, dm814x, dm813x and dm811x.
>   
> +config CAN_C_CAN_NO_RX_SPLIT
> +	bool "Disable RX Split buffer"
> +	---help---
> +	  RX Split buffer prevents packet reordering but can cause packet
> +	  loss. Select the less of the two evils.
> +
>   config CAN_C_CAN_PCI
>   	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
>   	depends on PCI
> Index: linux-can/drivers/net/can/c_can/c_can.c
> ===================================================================
> --- linux-can.orig/drivers/net/can/c_can/c_can.c
> +++ linux-can/drivers/net/can/c_can/c_can.c
> @@ -797,8 +797,12 @@ static int c_can_read_objects(struct net
>   	while ((obj = ffs(pend)) && quota > 0) {
>   		pend &= ~BIT(obj - 1);
>   
> +#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
>   		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
>   			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
> +#else
> +		mcmd = IF_COMM_RCV_HIGH;
> +#endif
>   
>   		c_can_object_get(dev, IF_RX, obj, mcmd);
>   		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
> @@ -822,6 +826,7 @@ static int c_can_read_objects(struct net
>   		/* read the data from the message object */
>   		c_can_read_msg_object(dev, IF_RX, ctrl);
>   
> +#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
>   		if (obj < C_CAN_MSG_RX_LOW_LAST)
>   			priv->rxmasked |= BIT(obj - 1);
>   		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
> @@ -829,7 +834,7 @@ static int c_can_read_objects(struct net
>   			/* activate all lower message objects */
>   			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
>   		}
> -
> +#endif
>   		pkts++;
>   		quota--;
>   	}
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-can" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] can: c_can: Simplify buffer reenabling
  2014-04-04 16:14   ` Oliver Hartkopp
@ 2014-04-04 16:33     ` Thomas Gleixner
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 16:33 UTC (permalink / raw)
  To: Oliver Hartkopp
  Cc: Alexander Stein, linux-can, Marc Kleine-Budde, Wolfgang Grandegger

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2251 bytes --]

On Fri, 4 Apr 2014, Oliver Hartkopp wrote:
> Hello Thomas, hello Alexander,
> 
> I just took a short look into the C_CAN manual.
> 
> -------- Original Message --------
> Subject: [patch 07/10] can: c_can: Simplify buffer reenabling
> 
> (..)
> 
> Instead of writing to the message object we can simply clear the
> NewDat bit with the get method.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  drivers/net/can/c_can/c_can.c |   16 ++++++----------
>  1 file changed, 6 insertions(+), 10 deletions(-)
> 
> Index: linux-can/drivers/net/can/c_can/c_can.c
> ===================================================================
> --- linux-can.orig/drivers/net/can/c_can/c_can.c
> +++ linux-can/drivers/net/can/c_can/c_can.c
> 
> (..)
> 
>  /* For the high buffers we clear the interrupt bit and newdat */
> -#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_TXRQST)
> +#define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_CLR_NEWDAT)
> 
> 
> I wonder if it generally works correctly, when the interrupt and newdat bits
> are handled separately, as the manual states:
> 
> 4.8.1 Reading from a FIFO Buffer
> When the CPU transfers the contents of Message Object to the IFx Message
> Bugger registers by writing its number to the IFx Command Request Register,
> the corresponding Command Mask Register should be programmed the way that bits
> NewDat and IntPnd are reset to zero
> (TxRqst/NewDat = ‘1’ and ClrIntPnd = ‘1’).
> 
> The driver seems to write these bits in different steps.

Well it says should. And the D_CAN manual says in 2.3.1.2

Note: A read access to a Message Object can be combined with the reset
of the the control bits IntPnd and NewDat. The values of these bits
transferred to the IFxMCTR always reflect the status before resetting
them.

I cant find a "must" anywhere.

But yeah, I've got suspicious about the validity of the split RX
buffer approach once I discovered the dropped packets when we reenable
the lower buffers. Note, this is independent of the method. The drop
happens with the old and the new one.

So the last patch in the series lets you use the clear the NewDat bit
right away aproach. That gets rid of the message loss, but opens an
opportunity for potential packet reordering.

Thanks,

	Thomas

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 16:17   ` Mark
@ 2014-04-04 16:38     ` Thomas Gleixner
  2014-04-05 18:57       ` Thomas Gleixner
  2014-04-04 16:43     ` Thomas Gleixner
  2014-04-05 18:56     ` Thomas Gleixner
  2 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 16:38 UTC (permalink / raw)
  To: Mark; +Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Mark wrote:
> The change you describe below (clearing newdat as you go) was one of my major
> changes -- and yes, it allows for potential packet re-ordering.  But I was
> happy to live with that in my application -- as opposed to dropping packets.
> 
> The other major change was to separate the use of ifregs[0] and ifregs[1] by
> task...not by function.  So _xmit()-related functions used one.   And
> _poll()-related functions used the other.  This meant I didn't have to use
> spinlock()'s to ensure mutual exclusion to those buffers.  It appears you took
> the spinlock() path...which should work as well (assuming you can spinlock
> both tasks that would access those registers).   I do believe splitting them
> by task instead of tx vs. rx is a better approach however.

No. I use IF1 for RX and IF2 for TX, but TX needs a spinlock aside of
that. See

640916db2 and bf88a206.
 
> Last:  I changed the _poll() routine to run through a "while" loop on INTSTAT
> (irqstatus) and service all the pending interrupts at once.  That makes it
> less likely that packets will arrive out of order, and I think reduces
> processor load.

That's what the driver now does as well as long as it has not exceeded
the quota.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 16:17   ` Mark
  2014-04-04 16:38     ` Thomas Gleixner
@ 2014-04-04 16:43     ` Thomas Gleixner
  2014-04-05 18:56     ` Thomas Gleixner
  2 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 16:43 UTC (permalink / raw)
  To: Mark; +Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Mark wrote:
> The change you describe below (clearing newdat as you go) was one of my major
> changes -- and yes, it allows for potential packet re-ordering.  But I was
> happy to live with that in my application -- as opposed to dropping packets.

We could avoid the re-ordering by reading out newdat/irqpnd status
between packet reads. That's not a too slow operation as it's a direct
read without interface interaction. And we can avoid that extra read
for single packets which is the normal operation mode anyway.

For the TI chips I rather prefer to utilize the DMA provided by IF3
which will get rid of the whole issue and make the RX path a gazillion
times faster as you don't have to interact with IFx at all.

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 15:24 ` [patch 10/10] can: c_can : Disable rx split as workaround Thomas Gleixner
  2014-04-04 16:17   ` Mark
@ 2014-04-04 17:41   ` Oliver Hartkopp
  2014-04-04 18:55     ` Thomas Gleixner
  2014-04-04 20:54     ` [patch 10/10 V2] can: c_can: Disable rx buffer split to prevent packet loss Thomas Gleixner
  1 sibling, 2 replies; 26+ messages in thread
From: Oliver Hartkopp @ 2014-04-04 17:41 UTC (permalink / raw)
  To: Thomas Gleixner, linux-can
  Cc: Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein


-------- Original Message --------
Subject: [patch 10/10] can: c_can : Disable rx split as workaround

(..)

+config CAN_C_CAN_NO_RX_SPLIT
+	bool "Disable RX Split buffer"
+	---help---
+	  RX Split buffer prevents packet reordering but can cause packet
+	  loss. Select the less of the two evils.
+

Can you try to use something more understandable for the Kconfig option?

AFAICS there are the options:

1. Frames are in order but may get lost.
2. Frames do not get lost but may be disordered.

No user/kernel builder knows about "rx split buffers" ...

What about

+config CAN_C_CAN_STRICT_FRAME_ORDERING
+	bool "Force a strict RX CAN frame order (may cause frame loss)"
+	---help---
+	  The RX split buffer prevents packet reordering but can cause packet
+	  loss. Only enable this option when you accept to lose CAN frames
+	  in favor of getting the received CAN frames in the correct order.
+

Regards,
Oliver

ps. Your patches are attached in my mail client, which makes it hard to
comment the code inline.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 17:41   ` Oliver Hartkopp
@ 2014-04-04 18:55     ` Thomas Gleixner
  2014-04-04 19:51       ` Thomas Gleixner
  2014-04-04 20:54     ` [patch 10/10 V2] can: c_can: Disable rx buffer split to prevent packet loss Thomas Gleixner
  1 sibling, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 18:55 UTC (permalink / raw)
  To: Oliver Hartkopp
  Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Oliver Hartkopp wrote:

> 
> -------- Original Message --------
> Subject: [patch 10/10] can: c_can : Disable rx split as workaround
> 
> (..)
> 
> +config CAN_C_CAN_NO_RX_SPLIT
> +	bool "Disable RX Split buffer"
> +	---help---
> +	  RX Split buffer prevents packet reordering but can cause packet
> +	  loss. Select the less of the two evils.
> +
> 
> Can you try to use something more understandable for the Kconfig option?
> 
> AFAICS there are the options:
> 
> 1. Frames are in order but may get lost.
> 2. Frames do not get lost but may be disordered.
> 
> No user/kernel builder knows about "rx split buffers" ...
> 
> What about
> 
> +config CAN_C_CAN_STRICT_FRAME_ORDERING
> +	bool "Force a strict RX CAN frame order (may cause frame loss)"
> +	---help---
> +	  The RX split buffer prevents packet reordering but can cause packet
> +	  loss. Only enable this option when you accept to lose CAN frames
> +	  in favor of getting the received CAN frames in the correct order.
> +

Way better :)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 18:55     ` Thomas Gleixner
@ 2014-04-04 19:51       ` Thomas Gleixner
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 19:51 UTC (permalink / raw)
  To: Oliver Hartkopp
  Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Thomas Gleixner wrote:
> On Fri, 4 Apr 2014, Oliver Hartkopp wrote:
> > +config CAN_C_CAN_STRICT_FRAME_ORDERING
> > +	bool "Force a strict RX CAN frame order (may cause frame loss)"
> > +	---help---
> > +	  The RX split buffer prevents packet reordering but can cause packet
> > +	  loss. Only enable this option when you accept to lose CAN frames
> > +	  in favor of getting the received CAN frames in the correct order.
> > +

Thought about it some more.

Fact is that the reordering is extremly hard to trigger.

So now the question is how many real world scenarios are prone to
reordering issues?

I can't answer that question, but you might be able to shed some light
on it.

Sure sniffers might show different ordering, but are you running two
sniffers on the same bus, except when you are debugging a completely
insane problem related to ordering issues?

How many of the real world protocols do really care about ordering? I
know there are some streaming protocols, but they have a crc or some
other mechanism to deal with lost/reordered frames. If there is a
streaming protocol which does not have that, should we really care?

The common case were a gazillion of nodes sends data at the same time
to different identifiers does probably not care about reordering at
all because there is no guarantee that node A is always sending its
answer to the sync frame to id X before node B sends to id Y.

Thoughts?

Thanks,

	tglx






^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 10/10 V2] can: c_can: Disable rx buffer split to prevent packet loss
  2014-04-04 17:41   ` Oliver Hartkopp
  2014-04-04 18:55     ` Thomas Gleixner
@ 2014-04-04 20:54     ` Thomas Gleixner
  1 sibling, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-04 20:54 UTC (permalink / raw)
  To: Oliver Hartkopp
  Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

The RX buffer split, which was invented to prevent packet reordering
causes packet loss in the hardware. The logic of the RX buffer split
is to keep the lower 8 buffers marked busy so we get a head start
against the hardware. Once we have read out the 8th buffer we reenable
them in ascending order:

RX Packet 1 --> message buffer 1 (newdat bit is not cleared)
RX Packet 2 --> message buffer 2 (newdat bit is not cleared)
RX Packet 3 --> message buffer 3 (newdat bit is not cleared)
RX Packet 4 --> message buffer 4 (newdat bit is not cleared)
RX Packet 5 --> message buffer 5 (newdat bit is not cleared)
RX Packet 6 --> message buffer 6 (newdat bit is not cleared)
RX Packet 7 --> message buffer 7 (newdat bit is not cleared)
RX Packet 8 --> message buffer 8 (newdat bit is not cleared)

Clear newdat bit in message buffer 1
Clear newdat bit in message buffer 2
Clear newdat bit in message buffer 3
Clear newdat bit in message buffer 4
Clear newdat bit in message buffer 5
Clear newdat bit in message buffer 6
Clear newdat bit in message buffer 7
Clear newdat bit in message buffer 8

But if during that clearing of newdat bits, a new message comes in,
the HW gets confused and drops it.

It does not matter how many of them you clear. I put a delay between
clear of buffer 1 and buffer 2 which was long enough that the message
should have been queued either in buffer 1 or buffer 9. But it did not
show up anywhere. The next message ended up in buffer 1. Finer grained
instrumentation shows that it's the reenabling of buffer 1 which
causes the hardware to go bonkers. Of course there is no sign that the
hardware tells us anything about that via one of the various error
accounting mechanisms.

That does not happen on all clear newdat bit events. I see one of 10k
packets dropped in the scenario which allows us to reproduce. But the
trace looks always the same. So the race is probably at the scope of a
few clock cycles.

Not splitting the RX Buffer avoids the packet loss but can cause
reordering. It's hard to trigger, but it CAN happen. Look at the
following scenario:

3 new messages are in the buffers 1-3

Read Packet 1 from buffer 1 
                                Hardware queues a new packet in buffer 1
                                Hardware queues a new packet in buffer 4

Read Packet 2 from buffer 2
Read Packet 3 from buffer 3

Now we have two new messages in the buffers 1 and 4, but no way to
tell which one got queued first because it might have been ordered
this way:

Read Packet 1 from buffer 1     Hardware queues a new packet in buffer 4

Read Packet 2 from buffer 2
                                Hardware queues a new packet in buffer 1
Read Packet 3 from buffer 3

And there is no way to avoid that even not by reading the NewDat
pending register between every packet readout simply because that
information is completely asynchronous.

With that mode we use the HW as it was probably designed for. We read
from the buffer 1 upwards and clear the buffer as we get the
message. That's how all microcontrollers use it. So I assume that the
way we handled the buffers so far was never really tested. According
to the public documentation it should just work. Emphasis on should :)

Let the user decide which evil is the lesser one. By default not
losing packets is the lesser evil. There is a rationale for this:

The only use cases which I can imagine (CAN power users might correct
me) are

1) A streaming protocol over CAN which lacks a mechanism to deal
   with that. But I consider such a protocol broken as it wont deal with
   lost packets either.

2) Attaching two sniffers to the same bus which see a different frame
   ordering. But you really do not care unless you try to decode a
   streaming protocol wreckage where the protocol lacks any protection
   against reordering and packet loss. See #1

Thanks to Oliver Hartkopp who provided a sane config option plus help
text and made me rethink and switch to favour the very unlikely
reordering over the very likely packet loss in the Kconfig default.

As a side note:

It's amazing how hardware designers can come up with a way to
implement non functional stuff which has been implemented a gazillion
times in a functional way already.

I knew so far that there are several ways to implement non functional
timers, but non functional FIFOs are definitely a new experience.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/Kconfig |    7 +++++++
 drivers/net/can/c_can/c_can.c |   38 ++++++++++++++++++++++++++------------
 2 files changed, 33 insertions(+), 12 deletions(-)

Index: linux-2.6/drivers/net/can/c_can/Kconfig
===================================================================
--- linux-2.6.orig/drivers/net/can/c_can/Kconfig
+++ linux-2.6/drivers/net/can/c_can/Kconfig
@@ -14,6 +14,13 @@ config CAN_C_CAN_PLATFORM
 	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
 	  boards like am335x, dm814x, dm813x and dm811x.
 
+config CAN_C_CAN_STRICT_FRAME_ORDERING
+	bool "Force a strict RX CAN frame order (may cause frame loss)"
+	---help---
+	  The RX split buffer prevents packet reordering but can cause packet
+	  loss. Only enable this option when you accept to lose CAN frames
+	  in favour of getting the received CAN frames in the correct order.
+
 config CAN_C_CAN_PCI
 	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
 	depends on PCI
Index: linux-2.6/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-2.6.orig/drivers/net/can/c_can/c_can.c
+++ linux-2.6/drivers/net/can/c_can/c_can.c
@@ -789,18 +789,38 @@ static u32 c_can_adjust_pending(u32 pend
 	return pend & ~((1 << lasts) - 1);
 }
 
+static inline void c_can_rx_object_get(struct net_device *dev, u32 obj)
+{
+#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
+	if (obj < C_CAN_MSG_RX_LOW_LAST)
+		c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_LOW);
+	else
+#endif
+		c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_HIGH);
+}
+
+static inline void c_can_rx_finalize(struct c_can_priv *priv, u32 obj)
+{
+#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
+	if (obj < C_CAN_MSG_RX_LOW_LAST)
+		priv->rxmasked |= BIT(obj - 1);
+	else if (obj == C_CAN_MSG_RX_LOW_LAST) {
+		priv->rxmasked = 0;
+		/* activate all lower message objects */
+		c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
+	}
+#endif
+}
+
 static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 			      u32 pend, int quota)
 {
-	u32 pkts = 0, ctrl, obj, mcmd;
+	u32 pkts = 0, ctrl, obj;
 
 	while ((obj = ffs(pend)) && quota > 0) {
 		pend &= ~BIT(obj - 1);
 
-		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
-			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
-
-		c_can_object_get(dev, IF_RX, obj, mcmd);
+		c_can_rx_object_get(dev, obj);
 		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
 
 		if (ctrl & IF_MCONT_MSGLST) {
@@ -822,13 +842,7 @@ static int c_can_read_objects(struct net
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
-		if (obj < C_CAN_MSG_RX_LOW_LAST)
-			priv->rxmasked |= BIT(obj - 1);
-		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
-			priv->rxmasked = 0;
-			/* activate all lower message objects */
-			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
-		}
+		c_can_rx_finalize(priv, obj);
 
 		pkts++;
 		quota--;

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 16:17   ` Mark
  2014-04-04 16:38     ` Thomas Gleixner
  2014-04-04 16:43     ` Thomas Gleixner
@ 2014-04-05 18:56     ` Thomas Gleixner
  2014-04-05 19:38       ` Wolfgang Grandegger
  2 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-05 18:56 UTC (permalink / raw)
  To: Mark; +Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Mark wrote:

> Thomas:  a few comments on my experience with the CAN driver:
> 
> I think I agree with your approach -- I had the same problem losing a packet
> when running through the clearing of the newdat flags.
> 
> About a month ago, I re-wrote pch_can.c  (before I discovered this mailing
> list) and got it to work successfully sending / receiving CAN data at 1 MBIT

Wow, that's amazing.

That driver is just a variant of c_can.c with quite some of the same
bugs and some different ones.

We really should switch that over to c_can.c and get rid of it.

Thanks,

	tglx




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-04 16:38     ` Thomas Gleixner
@ 2014-04-05 18:57       ` Thomas Gleixner
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-05 18:57 UTC (permalink / raw)
  To: Mark; +Cc: linux-can, Marc Kleine-Budde, Wolfgang Grandegger, Alexander Stein

On Fri, 4 Apr 2014, Thomas Gleixner wrote:

> On Fri, 4 Apr 2014, Mark wrote:
> > The change you describe below (clearing newdat as you go) was one of my major
> > changes -- and yes, it allows for potential packet re-ordering.  But I was
> > happy to live with that in my application -- as opposed to dropping packets.
> > 
> > The other major change was to separate the use of ifregs[0] and ifregs[1] by
> > task...not by function.  So _xmit()-related functions used one.   And
> > _poll()-related functions used the other.  This meant I didn't have to use
> > spinlock()'s to ensure mutual exclusion to those buffers.  It appears you took
> > the spinlock() path...which should work as well (assuming you can spinlock
> > both tasks that would access those registers).   I do believe splitting them
> > by task instead of tx vs. rx is a better approach however.
> 
> No. I use IF1 for RX and IF2 for TX, but TX needs a spinlock aside of
> that. See
> 
> 640916db2 and bf88a206.

Thinking with awake brain about it: You use IF1 just for the softirq
(both TX and RX) and IF2 for the xmit path. Yeah, that avoids the
spinlock. Nice! Should have thought of that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-05 18:56     ` Thomas Gleixner
@ 2014-04-05 19:38       ` Wolfgang Grandegger
  2014-04-05 19:42         ` Wolfgang Grandegger
  2014-04-05 19:48         ` Thomas Gleixner
  0 siblings, 2 replies; 26+ messages in thread
From: Wolfgang Grandegger @ 2014-04-05 19:38 UTC (permalink / raw)
  To: Thomas Gleixner, Mark; +Cc: linux-can, Marc Kleine-Budde, Alexander Stein

On 04/05/2014 08:56 PM, Thomas Gleixner wrote:
> On Fri, 4 Apr 2014, Mark wrote:
> 
>> Thomas:  a few comments on my experience with the CAN driver:
>>
>> I think I agree with your approach -- I had the same problem losing a packet
>> when running through the clearing of the newdat flags.
>>
>> About a month ago, I re-wrote pch_can.c  (before I discovered this mailing
>> list) and got it to work successfully sending / receiving CAN data at 1 MBIT
> 
> Wow, that's amazing.

We realized late that the pch_can manual [1] is an exact copy of the
c_can manual just with different register names (and without mentioning
c_can at all). Also the initial author didn't mentioning it. Maybe he
did not even know.

> That driver is just a variant of c_can.c with quite some of the same
> bugs and some different ones.

It's worse :(.

> We really should switch that over to c_can.c and get rid of it.

See http://news.gmane.org/gmane.linux.can. Should we remove pch_can
immediately? Or labeling it deprecated first.

Wolfgang.

[1] Chapter 13 of
http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/platform-controller-hub-eg20t-datasheet.pdf

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-05 19:38       ` Wolfgang Grandegger
@ 2014-04-05 19:42         ` Wolfgang Grandegger
  2014-04-05 19:48         ` Thomas Gleixner
  1 sibling, 0 replies; 26+ messages in thread
From: Wolfgang Grandegger @ 2014-04-05 19:42 UTC (permalink / raw)
  To: Thomas Gleixner, Mark; +Cc: linux-can, Marc Kleine-Budde, Alexander Stein

On 04/05/2014 09:38 PM, Wolfgang Grandegger wrote:
> On 04/05/2014 08:56 PM, Thomas Gleixner wrote:
>> On Fri, 4 Apr 2014, Mark wrote:
>>
>>> Thomas:  a few comments on my experience with the CAN driver:
>>>
>>> I think I agree with your approach -- I had the same problem losing a packet
>>> when running through the clearing of the newdat flags.
>>>
>>> About a month ago, I re-wrote pch_can.c  (before I discovered this mailing
>>> list) and got it to work successfully sending / receiving CAN data at 1 MBIT
>>
>> Wow, that's amazing.
> 
> We realized late that the pch_can manual [1] is an exact copy of the
> c_can manual just with different register names (and without mentioning
> c_can at all). Also the initial author didn't mentioning it. Maybe he
> did not even know.
> 
>> That driver is just a variant of c_can.c with quite some of the same
>> bugs and some different ones.
> 
> It's worse :(.
> 
>> We really should switch that over to c_can.c and get rid of it.
> 
> See http://news.gmane.org/gmane.linux.can. Should we remove pch_can
> immediately? Or labeling it deprecated first.

I mean the link http://article.gmane.org/gmane.linux.can/5440

Wolfgang

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-05 19:38       ` Wolfgang Grandegger
  2014-04-05 19:42         ` Wolfgang Grandegger
@ 2014-04-05 19:48         ` Thomas Gleixner
  2014-04-05 19:53           ` Wolfgang Grandegger
  1 sibling, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2014-04-05 19:48 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: Mark, linux-can, Marc Kleine-Budde, Alexander Stein

On Sat, 5 Apr 2014, Wolfgang Grandegger wrote:
> On 04/05/2014 08:56 PM, Thomas Gleixner wrote:
> > On Fri, 4 Apr 2014, Mark wrote:
> > 
> >> Thomas:  a few comments on my experience with the CAN driver:
> >>
> >> I think I agree with your approach -- I had the same problem losing a packet
> >> when running through the clearing of the newdat flags.
> >>
> >> About a month ago, I re-wrote pch_can.c  (before I discovered this mailing
> >> list) and got it to work successfully sending / receiving CAN data at 1 MBIT
> > 
> > Wow, that's amazing.
> 
> We realized late that the pch_can manual [1] is an exact copy of the
> c_can manual just with different register names (and without mentioning
> c_can at all). Also the initial author didn't mentioning it. Maybe he
> did not even know.
> 
> > That driver is just a variant of c_can.c with quite some of the same
> > bugs and some different ones.
> 
> It's worse :(.
> 
> > We really should switch that over to c_can.c and get rid of it.
> 
> See http://news.gmane.org/gmane.linux.can. Should we remove pch_can
> immediately? Or labeling it deprecated first.

Dunno, but removing it right away seems to be the better solution to
avoid that people start fixing pch_can.c again.

One trick to keep the existing users happy, is keeping the Kconfig
switch and select the C_CAN stuff for it. Then you can gradually phase
it out.

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] can: c_can : Disable rx split as workaround
  2014-04-05 19:48         ` Thomas Gleixner
@ 2014-04-05 19:53           ` Wolfgang Grandegger
  0 siblings, 0 replies; 26+ messages in thread
From: Wolfgang Grandegger @ 2014-04-05 19:53 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Mark, linux-can, Marc Kleine-Budde, Alexander Stein

On 04/05/2014 09:48 PM, Thomas Gleixner wrote:
> On Sat, 5 Apr 2014, Wolfgang Grandegger wrote:
>> On 04/05/2014 08:56 PM, Thomas Gleixner wrote:
>>> On Fri, 4 Apr 2014, Mark wrote:
>>>
>>>> Thomas:  a few comments on my experience with the CAN driver:
>>>>
>>>> I think I agree with your approach -- I had the same problem losing a packet
>>>> when running through the clearing of the newdat flags.
>>>>
>>>> About a month ago, I re-wrote pch_can.c  (before I discovered this mailing
>>>> list) and got it to work successfully sending / receiving CAN data at 1 MBIT
>>>
>>> Wow, that's amazing.
>>
>> We realized late that the pch_can manual [1] is an exact copy of the
>> c_can manual just with different register names (and without mentioning
>> c_can at all). Also the initial author didn't mentioning it. Maybe he
>> did not even know.
>>
>>> That driver is just a variant of c_can.c with quite some of the same
>>> bugs and some different ones.
>>
>> It's worse :(.
>>
>>> We really should switch that over to c_can.c and get rid of it.
>>
>> See http://news.gmane.org/gmane.linux.can. Should we remove pch_can
>> immediately? Or labeling it deprecated first.
> 
> Dunno, but removing it right away seems to be the better solution to
> avoid that people start fixing pch_can.c again.

I agree, especially because that driver does have serious bugs.

> One trick to keep the existing users happy, is keeping the Kconfig
> switch and select the C_CAN stuff for it. Then you can gradually phase
> it out.

I was also thinking about that. Just the module name will then be different.

Wolfgang.


^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2014-04-05 19:53 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-04 15:24 [patch 00/10] can: c_can: Another pile of fixes and improvements Thomas Gleixner
2014-04-04 15:24 ` [patch 01/10] can: c_can: Fix startup logic Thomas Gleixner
2014-04-04 15:24 ` [patch 02/10] can: c_can: Make bus off interrupt disable logic work Thomas Gleixner
2014-04-04 15:24 ` [patch 03/10] can: c_can: Do not access skb after net_receive_skb() Thomas Gleixner
2014-04-04 15:24 ` [patch 04/10] can: c_can: Handle state change correctly Thomas Gleixner
2014-04-04 15:24 ` [patch 05/10] can: c_can: Fix berr reporting Thomas Gleixner
2014-04-04 15:24 ` [patch 07/10] can: c_can: Simplify buffer reenabling Thomas Gleixner
2014-04-04 16:14   ` Oliver Hartkopp
2014-04-04 16:33     ` Thomas Gleixner
2014-04-04 15:24 ` [patch 06/10] can: c_can: Always update error stats Thomas Gleixner
2014-04-04 15:24 ` [patch 09/10] can: c_can: Get rid of pointless interrupts Thomas Gleixner
2014-04-04 15:24 ` [patch 08/10] can: c_can: Avoid status register update for D_CAN Thomas Gleixner
2014-04-04 15:24 ` [patch 10/10] can: c_can : Disable rx split as workaround Thomas Gleixner
2014-04-04 16:17   ` Mark
2014-04-04 16:38     ` Thomas Gleixner
2014-04-05 18:57       ` Thomas Gleixner
2014-04-04 16:43     ` Thomas Gleixner
2014-04-05 18:56     ` Thomas Gleixner
2014-04-05 19:38       ` Wolfgang Grandegger
2014-04-05 19:42         ` Wolfgang Grandegger
2014-04-05 19:48         ` Thomas Gleixner
2014-04-05 19:53           ` Wolfgang Grandegger
2014-04-04 17:41   ` Oliver Hartkopp
2014-04-04 18:55     ` Thomas Gleixner
2014-04-04 19:51       ` Thomas Gleixner
2014-04-04 20:54     ` [patch 10/10 V2] can: c_can: Disable rx buffer split to prevent packet loss Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.