All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] c_can: Add support for eg20t (pch_can)
@ 2014-04-03 14:14 Alexander Stein
  2014-04-03 14:55 ` Alexander Stein
  2014-04-03 18:41 ` Wolfgang Grandegger
  0 siblings, 2 replies; 37+ messages in thread
From: Alexander Stein @ 2014-04-03 14:14 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc, Kleine-Budde <mkl
  Cc: Alexander Stein, linux-can

Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
---
 drivers/net/can/c_can/c_can_pci.c | 50 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index bce0be5..128788c 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -19,9 +19,13 @@
 
 #include "c_can.h"
 
+#define PCI_DEVICE_ID_PCH_CAN	0x8818
+#define PCH_PCI_SOFT_RESET	0x01fc
+
 enum c_can_pci_reg_align {
 	C_CAN_REG_ALIGN_16,
 	C_CAN_REG_ALIGN_32,
+	C_CAN_REG_32,
 };
 
 struct c_can_pci_data {
@@ -31,6 +35,10 @@ struct c_can_pci_data {
 	enum c_can_pci_reg_align reg_align;
 	/* Set the frequency */
 	unsigned int freq;
+	/* PCI bar number */
+	int bar;
+	/* Callback for reset */
+	void (*init)(const struct c_can_priv *priv, bool enable);
 };
 
 /*
@@ -63,6 +71,29 @@ static void c_can_pci_write_reg_aligned_to_32bit(struct c_can_priv *priv,
 	writew(val, priv->base + 2 * priv->regs[index]);
 }
 
+static u16 c_can_pci_read_reg_32bit(struct c_can_priv *priv,
+				    enum reg index)
+{
+	return (u16)ioread32(priv->base + 2 * priv->regs[index]);
+}
+
+static void c_can_pci_write_reg_32bit(struct c_can_priv *priv,
+				      enum reg index, u16 val)
+{
+	iowrite32((u32)val, priv->base + 2 * priv->regs[index]);
+}
+
+static void c_can_pci_reset_pch(const struct c_can_priv *priv, bool enable)
+{
+	if (enable) {
+		u32 __iomem *addr = priv->base + PCH_PCI_SOFT_RESET;
+
+		/* write to sw reset register */
+		iowrite32(1, addr);
+		iowrite32(0, addr);
+	}
+}
+
 static int c_can_pci_probe(struct pci_dev *pdev,
 			   const struct pci_device_id *ent)
 {
@@ -87,7 +118,7 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 	pci_enable_msi(pdev);
 
-	addr = pci_iomap(pdev, 0, pci_resource_len(pdev, 0));
+	addr = pci_iomap(pdev, c_can_pci_data->bar, pci_resource_len(pdev, 0));
 	if (!addr) {
 		dev_err(&pdev->dev,
 			"device has no PCI memory resources, "
@@ -142,11 +173,17 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 		priv->read_reg = c_can_pci_read_reg_aligned_to_16bit;
 		priv->write_reg = c_can_pci_write_reg_aligned_to_16bit;
 		break;
+	case C_CAN_REG_32:
+		priv->read_reg = c_can_pci_read_reg_32bit;
+		priv->write_reg = c_can_pci_write_reg_32bit;
+		break;
 	default:
 		ret = -EINVAL;
 		goto out_free_c_can;
 	}
 
+	priv->raminit = c_can_pci_data->init;
+
 	ret = register_c_can_dev(dev);
 	if (ret) {
 		dev_err(&pdev->dev, "registering %s failed (err=%d)\n",
@@ -193,6 +230,15 @@ static struct c_can_pci_data c_can_sta2x11= {
 	.type = BOSCH_C_CAN,
 	.reg_align = C_CAN_REG_ALIGN_32,
 	.freq = 52000000, /* 52 Mhz */
+	.bar = 0,
+};
+
+static struct c_can_pci_data c_can_pch = {
+	.type = BOSCH_C_CAN,
+	.reg_align = C_CAN_REG_32,
+	.freq = 50000000, /* 50 MHz */
+	.init = c_can_pci_reset_pch,
+	.bar = 1,
 };
 
 #define C_CAN_ID(_vend, _dev, _driverdata) {		\
@@ -202,6 +248,8 @@ static struct c_can_pci_data c_can_sta2x11= {
 static DEFINE_PCI_DEVICE_TABLE(c_can_pci_tbl) = {
 	C_CAN_ID(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_CAN,
 		 c_can_sta2x11),
+	C_CAN_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PCH_CAN,
+		 c_can_pch),
 	{},
 };
 static struct pci_driver c_can_pci_driver = {
-- 
1.8.3.2


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 14:14 [PATCH] c_can: Add support for eg20t (pch_can) Alexander Stein
@ 2014-04-03 14:55 ` Alexander Stein
  2014-04-03 14:59   ` Marc Kleine-Budde
  2014-04-03 18:41 ` Wolfgang Grandegger
  1 sibling, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-03 14:55 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc Kleine-Budde; +Cc: linux-can

Just for the records in this thread:

based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
From 500000 CAN frames I lost 72 frames.

On Thursday 03 April 2014 16:14:11, Alexander Stein wrote:
> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
> ---
>  drivers/net/can/c_can/c_can_pci.c | 50 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 49 insertions(+), 1 deletion(-)

Best regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 14:55 ` Alexander Stein
@ 2014-04-03 14:59   ` Marc Kleine-Budde
  2014-04-03 15:11     ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Marc Kleine-Budde @ 2014-04-03 14:59 UTC (permalink / raw)
  To: Alexander Stein, Wolfgang Grandegger; +Cc: linux-can, Thomas Gleixner

[-- Attachment #1: Type: text/plain, Size: 995 bytes --]

Adding tglx to Cc

On 04/03/2014 04:55 PM, Alexander Stein wrote:
> Just for the records in this thread:
> 
> based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
> From 500000 CAN frames I lost 72 frames.
> 
> On Thursday 03 April 2014 16:14:11, Alexander Stein wrote:
>> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
>> ---
>>  drivers/net/can/c_can/c_can_pci.c | 50 ++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 49 insertions(+), 1 deletion(-)
> 
> Best regards,
> Alexander
> 


-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 242 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 14:59   ` Marc Kleine-Budde
@ 2014-04-03 15:11     ` Thomas Gleixner
  2014-04-03 15:47       ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-03 15:11 UTC (permalink / raw)
  To: Marc Kleine-Budde; +Cc: Alexander Stein, Wolfgang Grandegger, linux-can

B1;3202;0cOn Thu, 3 Apr 2014, Marc Kleine-Budde wrote:

> Adding tglx to Cc
> 
> On 04/03/2014 04:55 PM, Alexander Stein wrote:
> > Just for the records in this thread:
> > 
> > based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
> > From 500000 CAN frames I lost 72 frames.

The split RX buffer handling is causing the hardware to drop frames
without giving any notice. We debugged that in the last days and can
prove that it is an hardware issue. Fun, isn't it ?

The package drop happens always at the point where the 8th message
buffer is read out and all the lower buffers are reenabled. So
something confuses the state machine of the message handler here.

If we disable the split buffer handling, then the drop out stops. That
might cause packet reordering though. Find below a test patch.

We have yet to find a workaround for that.

Thanks,

	tglx
-------------------
Subject: can: c_can : Disable rx split as workaround
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 03 Apr 2014 16:10:43 +0200

The split causes packet loss. No split can cause reordering. Choose
what you like better.

Not-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/Kconfig |    6 ++++++
 drivers/net/can/c_can/c_can.c |    7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

Index: linux/drivers/net/can/c_can/Kconfig
===================================================================
--- linux.orig/drivers/net/can/c_can/Kconfig
+++ linux/drivers/net/can/c_can/Kconfig
@@ -14,6 +14,12 @@ config CAN_C_CAN_PLATFORM
 	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
 	  boards like am335x, dm814x, dm813x and dm811x.
 
+config CAN_C_CAN_NO_RX_SPLIT
+	bool "Disable RX Split buffer"
+	---help---
+	  RX Split buffer prevents packet reordering but can cause packet
+	  loss. Select the lesser of the two evils.
+
 config CAN_C_CAN_PCI
 	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
 	depends on PCI
Index: linux/drivers/net/can/c_can/c_can.c
===================================================================
--- linux.orig/drivers/net/can/c_can/c_can.c
+++ linux/drivers/net/can/c_can/c_can.c
@@ -805,8 +805,12 @@ static int c_can_read_objects(struct net
 	while ((obj = ffs(pend)) && quota > 0) {
 		pend &= ~BIT(obj - 1);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
 			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
+#else
+		mcmd = IF_COMM_RCV_HIGH;
+#endif
 
 		c_can_object_get(dev, IF_RX, obj, mcmd);
 		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
@@ -828,6 +832,7 @@ static int c_can_read_objects(struct net
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		if (obj < C_CAN_MSG_RX_LOW_LAST)
 			priv->rxmasked |= BIT(obj - 1);
 		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
@@ -835,7 +840,7 @@ static int c_can_read_objects(struct net
 			/* activate all lower message objects */
 			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
 		}
-
+#endif
 		pkts++;
 		quota--;
 	} while ((obj = ffs(pend)) && quota > 0);





^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 15:11     ` Thomas Gleixner
@ 2014-04-03 15:47       ` Alexander Stein
  2014-04-03 19:28         ` Oliver Hartkopp
  2014-04-03 20:28         ` Thomas Gleixner
  0 siblings, 2 replies; 37+ messages in thread
From: Alexander Stein @ 2014-04-03 15:47 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Thursday 03 April 2014 17:11:18, Thomas Gleixner wrote:
> > > based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
> > > From 500000 CAN frames I lost 72 frames.
> 
> The split RX buffer handling is causing the hardware to drop frames
> without giving any notice. We debugged that in the last days and can
> prove that it is an hardware issue. Fun, isn't it ?
> 
> The package drop happens always at the point where the 8th message
> buffer is read out and all the lower buffers are reenabled. So
> something confuses the state machine of the message handler here.
> 
> If we disable the split buffer handling, then the drop out stops. That
> might cause packet reordering though. Find below a test patch.

I had to modify your patch slightly, but It didn't show any reordering though. Just the lost messages as before.
Regarding http://marc.info/?l=linux-can&m=139403207713987&w=2 it seems there is a race, when reenabling the lower buffers, bewteen reading current state and hardware inserting new message.

Alexander

Here is my used patch:
diff --git a/drivers/net/can/c_can/Kconfig b/drivers/net/can/c_can/Kconfig
index 61ffc12..fdca6a9 100644
--- a/drivers/net/can/c_can/Kconfig
+++ b/drivers/net/can/c_can/Kconfig
@@ -14,6 +14,12 @@ config CAN_C_CAN_PLATFORM
 	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
 	  boards like am335x, dm814x, dm813x and dm811x.
 
+config CAN_C_CAN_NO_RX_SPLIT
+	bool "Disable RX Split buffer"
+	---help---
+	  RX Split buffer prevents packet reordering but can cause packet
+	  loss. Select the lesser of the two evils.
+
 config CAN_C_CAN_PCI
 	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
 	depends on PCI
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 01dc494..d9dbac4 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -808,8 +808,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 	while ((obj = ffs(pend)) && quota > 0) {
 		pend &= ~BIT(obj - 1);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
 			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
+#else
+		mcmd = IF_COMM_RCV_HIGH;
+#endif
 
 		c_can_object_get(dev, IF_RX, obj, mcmd);
 		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
@@ -833,10 +837,11 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
+#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
 		if (obj == C_CAN_MSG_RX_LOW_LAST)
 			/* activate all lower message objects */
 			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX, ctrl);
-
+#endif
 		pkts++;
 		quota--;
 	}

-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 14:14 [PATCH] c_can: Add support for eg20t (pch_can) Alexander Stein
  2014-04-03 14:55 ` Alexander Stein
@ 2014-04-03 18:41 ` Wolfgang Grandegger
  2014-04-07  9:47   ` Alexander Stein
  2014-04-17 19:53   ` Marc Kleine-Budde
  1 sibling, 2 replies; 37+ messages in thread
From: Wolfgang Grandegger @ 2014-04-03 18:41 UTC (permalink / raw)
  To: Alexander Stein, Marc, Kleine-Budde; +Cc: linux-can

On 04/03/2014 04:14 PM, Alexander Stein wrote:
> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>

Acked-by: Wolfgang Grandegger <wg@grandegger.com>

In my patch stack I have also th patch below. Could you please give it
I try before I re-submit it. Thanks.

Wolfgang.

From 8cc3c5fde12d0dc36b658483433d5b12be693492 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Sat, 4 May 2013 22:28:43 +0200
Subject: [PATCH 07/12] c_can_pci: enable PCI bus master only for MSI

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
---
 c_can_pci.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/c_can_pci.c b/c_can_pci.c
index 3a2ac45..98562c2 100644
--- a/c_can_pci.c
+++ b/c_can_pci.c
@@ -86,8 +86,11 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 		goto out_disable_device;
 	}
 
-	pci_set_master(pdev);
-	pci_enable_msi(pdev);
+	ret = pci_enable_msi(pdev);
+	if (!ret) {
+		dev_info(&pdev->dev, "MSI enabled\n");
+		pci_set_master(pdev);
+	}
 
 	addr = pci_iomap(pdev, c_can_pci_data->bar, 0);
 	if (!addr) {
-- 
1.7.9.5



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 15:47       ` Alexander Stein
@ 2014-04-03 19:28         ` Oliver Hartkopp
  2014-04-03 20:59           ` Thomas Gleixner
  2014-04-03 20:28         ` Thomas Gleixner
  1 sibling, 1 reply; 37+ messages in thread
From: Oliver Hartkopp @ 2014-04-03 19:28 UTC (permalink / raw)
  To: Alexander Stein, Thomas Gleixner
  Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

Hello Alexander,

one of the differences of the SMP patch from Thomas

	http://marc.info/?l=linux-can&m=139638860032202&w=2

and your patch

	http://marc.info/?l=linux-can&m=139403248814172&w=2

is that you introduced a

	spin_lock_irqsave(&priv->lock, flags);

additionally to the usage of the two different message interfaces.

Could this probably help to fix the concurrent access?

Regards,
Oliver

On 03.04.2014 17:47, Alexander Stein wrote:
> On Thursday 03 April 2014 17:11:18, Thomas Gleixner wrote:
>>>> based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
>>>> From 500000 CAN frames I lost 72 frames.
>>
>> The split RX buffer handling is causing the hardware to drop frames
>> without giving any notice. We debugged that in the last days and can
>> prove that it is an hardware issue. Fun, isn't it ?
>>
>> The package drop happens always at the point where the 8th message
>> buffer is read out and all the lower buffers are reenabled. So
>> something confuses the state machine of the message handler here.
>>
>> If we disable the split buffer handling, then the drop out stops. That
>> might cause packet reordering though. Find below a test patch.
> 
> I had to modify your patch slightly, but It didn't show any reordering though. Just the lost messages as before.
> Regarding http://marc.info/?l=linux-can&m=139403207713987&w=2 it seems there is a race, when reenabling the lower buffers, bewteen reading current state and hardware inserting new message.
> 
> Alexander
> 
> Here is my used patch:
> diff --git a/drivers/net/can/c_can/Kconfig b/drivers/net/can/c_can/Kconfig
> index 61ffc12..fdca6a9 100644
> --- a/drivers/net/can/c_can/Kconfig
> +++ b/drivers/net/can/c_can/Kconfig
> @@ -14,6 +14,12 @@ config CAN_C_CAN_PLATFORM
>  	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
>  	  boards like am335x, dm814x, dm813x and dm811x.
>  
> +config CAN_C_CAN_NO_RX_SPLIT
> +	bool "Disable RX Split buffer"
> +	---help---
> +	  RX Split buffer prevents packet reordering but can cause packet
> +	  loss. Select the lesser of the two evils.
> +
>  config CAN_C_CAN_PCI
>  	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
>  	depends on PCI
> diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
> index 01dc494..d9dbac4 100644
> --- a/drivers/net/can/c_can/c_can.c
> +++ b/drivers/net/can/c_can/c_can.c
> @@ -808,8 +808,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
>  	while ((obj = ffs(pend)) && quota > 0) {
>  		pend &= ~BIT(obj - 1);
>  
> +#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
>  		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
>  			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
> +#else
> +		mcmd = IF_COMM_RCV_HIGH;
> +#endif
>  
>  		c_can_object_get(dev, IF_RX, obj, mcmd);
>  		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
> @@ -833,10 +837,11 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
>  		/* read the data from the message object */
>  		c_can_read_msg_object(dev, IF_RX, ctrl);
>  
> +#ifndef CONFIG_CAN_C_CAN_NO_RX_SPLIT
>  		if (obj == C_CAN_MSG_RX_LOW_LAST)
>  			/* activate all lower message objects */
>  			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX, ctrl);
> -
> +#endif
>  		pkts++;
>  		quota--;
>  	}
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 15:47       ` Alexander Stein
  2014-04-03 19:28         ` Oliver Hartkopp
@ 2014-04-03 20:28         ` Thomas Gleixner
  1 sibling, 0 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-03 20:28 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Thu, 3 Apr 2014, Alexander Stein wrote:

> On Thursday 03 April 2014 17:11:18, Thomas Gleixner wrote:
> > > > based on tag 'linux-can-fixes-for-3.15-20140401' on repository git://gitorious.org/linux-can/linux-can I have noticed message losts(only one message each time). Interestingly 'candump any,0~0,#FFFFFFFF' didn't show yn error frames.
> > > > From 500000 CAN frames I lost 72 frames.
> > 
> > The split RX buffer handling is causing the hardware to drop frames
> > without giving any notice. We debugged that in the last days and can
> > prove that it is an hardware issue. Fun, isn't it ?
> > 
> > The package drop happens always at the point where the 8th message
> > buffer is read out and all the lower buffers are reenabled. So
> > something confuses the state machine of the message handler here.
> > 
> > If we disable the split buffer handling, then the drop out stops. That
> > might cause packet reordering though. Find below a test patch.
> 
> I had to modify your patch slightly, but It didn't show any

Yeah, I had it on top of other modifications.

> reordering though. Just the lost messages as before.

Hmm. The lost packet issue is gone here when I enable the NO_RX_SPLIT
hack. I'm confused.

Vs. reordering: That's a corner case for which the split RX buffer was
designed to prevent. It's hard to trigger. I had to enforce it by
inserting artificial delays.

> Regarding http://marc.info/?l=linux-can&m=139403207713987&w=2 it
> seems there is a race, when reenabling the lower buffers, bewteen
> reading current state and hardware inserting new message.

The behaviour you describe is exactly the same as we traced out. While
the lower buffers are reenabled the packet which is coming in at this
point gets swallowed by the hardware.

And it's unrelated to RT. You can see the issue with mainline as
well. I tried both.

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 19:28         ` Oliver Hartkopp
@ 2014-04-03 20:59           ` Thomas Gleixner
  0 siblings, 0 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-03 20:59 UTC (permalink / raw)
  To: Oliver Hartkopp
  Cc: Alexander Stein, Marc Kleine-Budde, Wolfgang Grandegger, linux-can

Oliver,

On Thu, 3 Apr 2014, Oliver Hartkopp wrote:

> Hello Alexander,
> 
> one of the differences of the SMP patch from Thomas
> 
> 	http://marc.info/?l=linux-can&m=139638860032202&w=2
> 
> and your patch
> 
> 	http://marc.info/?l=linux-can&m=139403248814172&w=2
> 
> is that you introduced a
> 
> 	spin_lock_irqsave(&priv->lock, flags);
> 
> additionally to the usage of the two different message interfaces.
> 
> Could this probably help to fix the concurrent access?

The spinlock does not help. There is no concurrent access from two
cpus in the RX path. I can prove the issue with an UP machine on
mainline.

The spinlock in the TX path is a different issue. That one is
necessary on SMP and RT.

What happens is:

RX Packet 1 --> message buffer 1 (newdat bit is not cleared)
RX Packet 2 --> message buffer 2 (newdat bit is not cleared)
RX Packet 3 --> message buffer 3 (newdat bit is not cleared)
RX Packet 4 --> message buffer 4 (newdat bit is not cleared)
RX Packet 5 --> message buffer 5 (newdat bit is not cleared)
RX Packet 6 --> message buffer 6 (newdat bit is not cleared)
RX Packet 7 --> message buffer 7 (newdat bit is not cleared)
RX Packet 8 --> message buffer 8 (newdat bit is not cleared)

Clear newdat bit in message buffer 1
Clear newdat bit in message buffer 2
Clear newdat bit in message buffer 3
Clear newdat bit in message buffer 4
Clear newdat bit in message buffer 5
Clear newdat bit in message buffer 6
Clear newdat bit in message buffer 7
Clear newdat bit in message buffer 8

Now if during that clearing of newdat bits, a new message comes in,
the HW gets confused and drops it. 

It does not matter how many of them you clear. I put a delay between
clear of buffer 1 and buffer 2 which was long enough that the message
should have been queued either in buffer 1 or buffer 9. But it did not
show up anywhere. The next message ended up in buffer 1. So the
hardware lost a packet of course without telling it via one of the
error handlers.

That does not happen on all clear newdat bit events. I see one of 10k
packets dropped in the scenario which allows us to reproduce. But the
trace looks always the same.

Now with the hack patch which kills the RX FIFO split we use the HW as
it was probably designed for. We read from the buffer 1 upwards and
clear the buffer as we get the message. That's how all
microcontrollers use it. So I assume that the way we handle the
buffers was never really tested. According to the public documentation
it should just work :)

If someone with access to the VHDL code could look into that, we might
get an hint how to work around that.

Interestingly enough TI has implemented a 3rd message control
interface which is for RX only. It allows the hardware to transfer a
new message right away to the interface and issue a DMA request which
then DMAs all of the message content into RAM. I'm inclined to use
that as the workaround and ditch the FIFO for that case
completely. The DMA transfer allows to use a single message object, so
we wont see any of that assumed that the mechanism works as advertised.

But of course that's not a generic solution.

Another idea is to read out the message buffers in hard interrupt
context, i.e. leave the RX interrupt enabled all the time and store
the packets in some private data structure and let NAPI drain that
into skbs. Kinda sucks though.

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 18:41 ` Wolfgang Grandegger
@ 2014-04-07  9:47   ` Alexander Stein
  2014-04-07 10:19     ` Wolfgang Grandegger
  2014-04-07 12:06     ` Thomas Gleixner
  2014-04-17 19:53   ` Marc Kleine-Budde
  1 sibling, 2 replies; 37+ messages in thread
From: Alexander Stein @ 2014-04-07  9:47 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc Kleine-Budde; +Cc: linux-can

Hello Wolfgang,

On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
> On 04/03/2014 04:14 PM, Alexander Stein wrote:
> > Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
> 
> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
> 
> In my patch stack I have also th patch below. Could you please give it
> I try before I re-submit it. Thanks.

With this patch the driver still works, at least MSI is working. The other problems still exist.

Regards,
Alexander

> From 8cc3c5fde12d0dc36b658483433d5b12be693492 Mon Sep 17 00:00:00 2001
> From: Wolfgang Grandegger <wg@grandegger.com>
> Date: Sat, 4 May 2013 22:28:43 +0200
> Subject: [PATCH 07/12] c_can_pci: enable PCI bus master only for MSI
> 
> Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
> ---
>  c_can_pci.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/c_can_pci.c b/c_can_pci.c
> index 3a2ac45..98562c2 100644
> --- a/c_can_pci.c
> +++ b/c_can_pci.c
> @@ -86,8 +86,11 @@ static int c_can_pci_probe(struct pci_dev *pdev,
>  		goto out_disable_device;
>  	}
>  
> -	pci_set_master(pdev);
> -	pci_enable_msi(pdev);
> +	ret = pci_enable_msi(pdev);
> +	if (!ret) {
> +		dev_info(&pdev->dev, "MSI enabled\n");
> +		pci_set_master(pdev);
> +	}
>  
>  	addr = pci_iomap(pdev, c_can_pci_data->bar, 0);
>  	if (!addr) {
> 

-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07  9:47   ` Alexander Stein
@ 2014-04-07 10:19     ` Wolfgang Grandegger
  2014-04-07 12:06     ` Thomas Gleixner
  1 sibling, 0 replies; 37+ messages in thread
From: Wolfgang Grandegger @ 2014-04-07 10:19 UTC (permalink / raw)
  To: Alexander Stein, Marc Kleine-Budde; +Cc: linux-can

On 04/07/2014 11:47 AM, Alexander Stein wrote:
> Hello Wolfgang,
> 
> On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
>> On 04/03/2014 04:14 PM, Alexander Stein wrote:
>>> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
>>
>> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
>>
>> In my patch stack I have also th patch below. Could you please give it
>> I try before I re-submit it. Thanks.
> 
> With this patch the driver still works, at least MSI is working. The other problems still exist.

I have taking this code from pch_can.c. PCI bus master is only required
for MSI. It will not curer the other problems, of course. I will roll
out this patch when your patch has been accepted.

Thanks for testing.

Wolfgang.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07  9:47   ` Alexander Stein
  2014-04-07 10:19     ` Wolfgang Grandegger
@ 2014-04-07 12:06     ` Thomas Gleixner
  2014-04-07 12:07       ` Marc Kleine-Budde
  1 sibling, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 12:06 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Wolfgang Grandegger, Marc Kleine-Budde, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:

> Hello Wolfgang,
> 
> On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
> > On 04/03/2014 04:14 PM, Alexander Stein wrote:
> > > Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
> > 
> > Acked-by: Wolfgang Grandegger <wg@grandegger.com>
> > 
> > In my patch stack I have also th patch below. Could you please give it
> > I try before I re-submit it. Thanks.
> 
> With this patch the driver still works, at least MSI is working. The other problems still exist.
> 

Which problems do still exist?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:06     ` Thomas Gleixner
@ 2014-04-07 12:07       ` Marc Kleine-Budde
  2014-04-07 12:24         ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Marc Kleine-Budde @ 2014-04-07 12:07 UTC (permalink / raw)
  To: Thomas Gleixner, Alexander Stein; +Cc: Wolfgang Grandegger, linux-can

[-- Attachment #1: Type: text/plain, Size: 998 bytes --]

On 04/07/2014 02:06 PM, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
> 
>> Hello Wolfgang,
>>
>> On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
>>> On 04/03/2014 04:14 PM, Alexander Stein wrote:
>>>> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
>>>
>>> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
>>>
>>> In my patch stack I have also th patch below. Could you please give it
>>> I try before I re-submit it. Thanks.
>>
>> With this patch the driver still works, at least MSI is working. The other problems still exist.
>>
> 
> Which problems do still exist?

IIRC:
Either potentially reordered frames or dropped frames.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 242 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:07       ` Marc Kleine-Budde
@ 2014-04-07 12:24         ` Alexander Stein
  2014-04-07 12:34           ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 12:24 UTC (permalink / raw)
  To: Marc Kleine-Budde; +Cc: Thomas Gleixner, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 14:07:21, Marc Kleine-Budde wrote:
> On 04/07/2014 02:06 PM, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > 
> >> Hello Wolfgang,
> >>
> >> On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
> >>> On 04/03/2014 04:14 PM, Alexander Stein wrote:
> >>>> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
> >>>
> >>> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
> >>>
> >>> In my patch stack I have also th patch below. Could you please give it
> >>> I try before I re-submit it. Thanks.
> >>
> >> With this patch the driver still works, at least MSI is working. The other problems still exist.
> >>
> > 
> > Which problems do still exist?
> 
> IIRC:
> Either potentially reordered frames or dropped frames.

I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.

Best regards,
Alexander


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:24         ` Alexander Stein
@ 2014-04-07 12:34           ` Thomas Gleixner
  2014-04-07 12:48             ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 12:34 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 14:07:21, Marc Kleine-Budde wrote:
> > On 04/07/2014 02:06 PM, Thomas Gleixner wrote:
> > > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > > 
> > >> Hello Wolfgang,
> > >>
> > >> On Thursday 03 April 2014 20:41:24, Wolfgang Grandegger wrote:
> > >>> On 04/03/2014 04:14 PM, Alexander Stein wrote:
> > >>>> Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
> > >>>
> > >>> Acked-by: Wolfgang Grandegger <wg@grandegger.com>
> > >>>
> > >>> In my patch stack I have also th patch below. Could you please give it
> > >>> I try before I re-submit it. Thanks.
> > >>
> > >> With this patch the driver still works, at least MSI is working. The other problems still exist.
> > >>
> > > 
> > > Which problems do still exist?
> > 
> > IIRC:
> > Either potentially reordered frames or dropped frames.
> 
> I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> 

Does the last patch which disables the RX buffer split still have that
issue?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:34           ` Thomas Gleixner
@ 2014-04-07 12:48             ` Alexander Stein
  2014-04-07 12:56               ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 12:48 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

Hello Thomas,

On Monday 07 April 2014 14:34:17, Thomas Gleixner wrote:
> > I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> > Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> > 
> 
> Does the last patch which disables the RX buffer split still have that
> issue?

Regardless of CAN_C_CAN_STRICT_FRAME_ORDERING I'm still losing messages.
BTW: I need the following patch to be able to use CAN_C_CAN_STRICT_FRAME_ORDERING:

Regards,
Alexander

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index acdc24c..0e9f974 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -799,7 +799,7 @@ static inline void c_can_rx_object_get(struct net_device *dev, u32 obj)
 		c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_HIGH);
 }
 
-static inline void c_can_rx_finalize(struct c_can_priv *priv, u32 obj)
+static inline void c_can_rx_finalize(struct net_device *dev, struct c_can_priv *priv, u32 obj)
 {
 #ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
 	if (obj < C_CAN_MSG_RX_LOW_LAST)
@@ -842,7 +842,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
-		c_can_rx_finalize(priv, obj);
+		c_can_rx_finalize(dev, priv, obj);
 
 		pkts++;
 		quota--;
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:48             ` Alexander Stein
@ 2014-04-07 12:56               ` Thomas Gleixner
  2014-04-07 12:58                 ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 12:56 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:

> Hello Thomas,
> 
> On Monday 07 April 2014 14:34:17, Thomas Gleixner wrote:
> > > I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> > > Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> > > 
> > 
> > Does the last patch which disables the RX buffer split still have that
> > issue?
> 
> Regardless of CAN_C_CAN_STRICT_FRAME_ORDERING I'm still losing messages.
> BTW: I need the following patch to be able to use CAN_C_CAN_STRICT_FRAME_ORDERING:

Grr. True.

So if you disable CAN_C_CAN_STRICT_FRAME_ORDERING you still lose frames?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:56               ` Thomas Gleixner
@ 2014-04-07 12:58                 ` Alexander Stein
  2014-04-07 13:31                   ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 12:58 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 14:56:19, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
> 
> > Hello Thomas,
> > 
> > On Monday 07 April 2014 14:34:17, Thomas Gleixner wrote:
> > > > I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> > > > Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> > > > 
> > > 
> > > Does the last patch which disables the RX buffer split still have that
> > > issue?
> > 
> > Regardless of CAN_C_CAN_STRICT_FRAME_ORDERING I'm still losing messages.
> > BTW: I need the following patch to be able to use CAN_C_CAN_STRICT_FRAME_ORDERING:
> 
> Grr. True.
> 
> So if you disable CAN_C_CAN_STRICT_FRAME_ORDERING you still lose frames?

Yep, even on the commits before. But not from commit c0a9f4d39 on backwards.

Regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 12:58                 ` Alexander Stein
@ 2014-04-07 13:31                   ` Thomas Gleixner
  2014-04-07 14:27                     ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 13:31 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 14:56:19, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > 
> > > Hello Thomas,
> > > 
> > > On Monday 07 April 2014 14:34:17, Thomas Gleixner wrote:
> > > > > I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> > > > > Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> > > > > 
> > > > 
> > > > Does the last patch which disables the RX buffer split still have that
> > > > issue?
> > > 
> > > Regardless of CAN_C_CAN_STRICT_FRAME_ORDERING I'm still losing messages.
> > > BTW: I need the following patch to be able to use CAN_C_CAN_STRICT_FRAME_ORDERING:
> > 
> > Grr. True.
> > 
> > So if you disable CAN_C_CAN_STRICT_FRAME_ORDERING you still lose frames?
> 
> Yep, even on the commits before. But not from commit c0a9f4d39 on backwards.

That's weird. With c0a9f4d39 and the CAN_C_CAN_STRICT_FRAME_ORDERING=n
we use the message buffers as recommended by the C_CAN spec. i.e.:

We transfer the message object to the buffer and clear intpnd/newdat
right away.

What other patches do you have applied?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 13:31                   ` Thomas Gleixner
@ 2014-04-07 14:27                     ` Alexander Stein
  2014-04-07 15:24                       ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 14:27 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 15:31:20, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
> > On Monday 07 April 2014 14:56:19, Thomas Gleixner wrote:
> > > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > > 
> > > > Hello Thomas,
> > > > 
> > > > On Monday 07 April 2014 14:34:17, Thomas Gleixner wrote:
> > > > > > I'm currently trying to locate problem causes. Using your new set of patches on the tag linux-can-fixes-for-3.15-20140401 I lot of lost messages.
> > > > > > Using bisect it seems that c0a9f4d39 "can: c_can: Reduce register access" introduced this problem on my hardware. Before that I only see switched messages occasionally.
> > > > > > 
> > > > > 
> > > > > Does the last patch which disables the RX buffer split still have that
> > > > > issue?
> > > > 
> > > > Regardless of CAN_C_CAN_STRICT_FRAME_ORDERING I'm still losing messages.
> > > > BTW: I need the following patch to be able to use CAN_C_CAN_STRICT_FRAME_ORDERING:
> > > 
> > > Grr. True.
> > > 
> > > So if you disable CAN_C_CAN_STRICT_FRAME_ORDERING you still lose frames?
> > 
> > Yep, even on the commits before. But not from commit c0a9f4d39 on backwards.
> 
> That's weird. With c0a9f4d39 and the CAN_C_CAN_STRICT_FRAME_ORDERING=n
> we use the message buffers as recommended by the C_CAN spec. i.e.:
> 
> We transfer the message object to the buffer and clear intpnd/newdat
> right away.
> 
> What other patches do you have applied?

I have shrinked them a bit to the absolute necessity. In general these are patches for the current hardware to make them working.

$ git diff --stat linux-can-fixes-for-3.15-20140401..
 drivers/i2c/busses/i2c-eg20t.c                          |   2 +-
 drivers/net/can/c_can/Kconfig                           |   7 +++++
 drivers/net/can/c_can/c_can.c                           | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------
 drivers/net/can/c_can/c_can.h                           |   3 +-
 drivers/net/can/c_can/c_can_pci.c                       |  57 +++++++++++++++++++++++++++++++++++--
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h         |   1 +
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.c     |   6 ++--
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c |   9 ------
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c    |  52 +++++++++++++++++++---------------
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c     |   5 ++--
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h     |   1 -
 11 files changed, 234 insertions(+), 179 deletions(-)

$ git log --pretty=oneline linux-can-fixes-for-3.15-20140401..
40e2d2611b1341b26da5d0737668149609cc4ee3 Fix for can: c_can: Disable rx buffer split to prevent packet loss
45bf957f441fba3e424df59e2b5509fea13962d2 can: c_can: Disable rx buffer split to prevent packet loss
307678e593427826a4861109d0660989c024311f can: c_can: Get rid of pointless interrupts
3bef0e7bd01061899b906eade6d48499c63fb1c6 can: c_can: Avoid status register update for D_CAN
7ba15b9b59dc4d054e2344bee21707de3ee02bac can: c_can: Simplify buffer reenabling
0bb9ed221ed66787cbe9cecfa3eca394ddaf9548 can: c_can: Always update error stats
747462543ce1df21412e05f2daf86a8b68c36d3b can: c_can: Fix berr reporting
d60f488923998eb8e250260323e63438ab465cf9 can: c_can: Handle state change correctly
4c2030a6a955ae530ea32ce1e0ef3fdc8acbc445 can: c_can: Do not access skb after net_receive_skb()
144829bcfbbe97f4eab4b17f922d6d4b39027540 can: c_can: Make bus off interrupt disable logic work
8449454bf4a33ea5bf5aca2e896717459e428c2b can: c_can: Fix startup logic
fcbd69d2d869cb0c55c035d36cd6795251fd01da c_can_pci: enable PCI bus master only for MSI
53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)
95bf9c0f276925e401ac7d3c6ff47185b9badd2f pch_gbe: remove shutdown and restart of interface after PHY settings change
7229aafbce39e22c82544d7021dccd6b0c23f881 pch_gbe: reset PHY before first configuration
9fd3c55d07a41bc18931ec6a51176cbf667dbe21 pch_gbe: remove unnecessary PHY resets
c80898c56b575baa1ce2dcfbcaa6c654194f5cf8 pch_gbe: change order of PHY address discovering
c97bfddb0cf93c3ca0cb79490ab602b4ab24a740 pch_gbe: Add module parameter for RGMII/GMII PHY mode selection
f09f76391c8a90814fd78cfced35fceb616ea7e3 pch_gbe: print invalid MAC address during probe
14ab44191d17390be4fa2683092b8913a8b34a17 i2c-eg20t: Try to add busses starting from bus number 1

Regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 14:27                     ` Alexander Stein
@ 2014-04-07 15:24                       ` Thomas Gleixner
  2014-04-07 15:36                         ` Alexander Stein
  2014-04-07 18:11                         ` Marc Kleine-Budde
  0 siblings, 2 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 15:24 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 15:31:20, Thomas Gleixner wrote:
> > What other patches do you have applied?
> 
> I have shrinked them a bit to the absolute necessity. In general
> these are patches for the current hardware to make them working.

> 53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)

I can't see anything whats wrong with that one.

Can you check, whether the following check in c_can_read_objects()

               if (!(ctrl & IF_MCONT_NEWDAT))
                        continue;

ever triggers?

It'd be odd, because we get the buffers with the NEWDAT pending bits
from the NEWDATA1 register.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 15:24                       ` Thomas Gleixner
@ 2014-04-07 15:36                         ` Alexander Stein
  2014-04-07 15:53                           ` Thomas Gleixner
  2014-04-07 18:11                         ` Marc Kleine-Budde
  1 sibling, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 15:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
> > On Monday 07 April 2014 15:31:20, Thomas Gleixner wrote:
> > > What other patches do you have applied?
> > 
> > I have shrinked them a bit to the absolute necessity. In general
> > these are patches for the current hardware to make them working.
> 
> > 53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)
> 
> I can't see anything whats wrong with that one.
> 
> Can you check, whether the following check in c_can_read_objects()
> 
>                if (!(ctrl & IF_MCONT_NEWDAT))
>                         continue;
> 
> ever triggers?
> 
> It'd be odd, because we get the buffers with the NEWDAT pending bits
> from the NEWDATA1 register.

Using the following patch the warning raises about every 5ms. With and without your last patchset.
Being based on linux-can-fixes-for-3.15-20140401 + my patchset:
> fcbd69d2d869cb0c55c035d36cd6795251fd01da c_can_pci: enable PCI bus master only for MSI
> 53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)
> 95bf9c0f276925e401ac7d3c6ff47185b9badd2f pch_gbe: remove shutdown and restart of interface after PHY settings change
> 7229aafbce39e22c82544d7021dccd6b0c23f881 pch_gbe: reset PHY before first configuration
> 9fd3c55d07a41bc18931ec6a51176cbf667dbe21 pch_gbe: remove unnecessary PHY resets
> c80898c56b575baa1ce2dcfbcaa6c654194f5cf8 pch_gbe: change order of PHY address discovering
> c97bfddb0cf93c3ca0cb79490ab602b4ab24a740 pch_gbe: Add module parameter for RGMII/GMII PHY mode selection
> f09f76391c8a90814fd78cfced35fceb616ea7e3 pch_gbe: print invalid MAC address during probe
> 14ab44191d17390be4fa2683092b8913a8b34a17 i2c-eg20t: Try to add busses starting from bus number 1

but reverting c0a9f4d39 this does _NOT_ arise.

Regards,
Alexander

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 0e9f974..1a76565 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -836,8 +836,10 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 		 * odd HW behaviour. Do not remove that unless you
 		 * want to brick your machine.
 		 */
-		if (!(ctrl & IF_MCONT_NEWDAT))
+		if (!(ctrl & IF_MCONT_NEWDAT)) {
+			netdev_warn(dev, "Odd behavior");
 			continue;
+		}
 
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);

-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 15:36                         ` Alexander Stein
@ 2014-04-07 15:53                           ` Thomas Gleixner
  2014-04-07 16:06                             ` Alexander Stein
  2014-04-07 16:27                             ` Wolfgang Grandegger
  0 siblings, 2 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 15:53 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> > It'd be odd, because we get the buffers with the NEWDAT pending bits
> > from the NEWDATA1 register.
>
 
> Using the following patch the warning raises about every 5ms. With
> and without your last patchset.

> but reverting c0a9f4d39 this does _NOT_ arise.

So the NEWDAT register is telling us that the newdat bit of that
buffer is set. But when we retrieve the message, it's not set.

What's even more confusing: On my D_CAN hardware the NEWDAT is not set
event happens when I revert c0a9f4d39

So it would be interesting to see, whether this event is aligned to
the lost packets you are hunting:

    P1 -> P2 -> WARN -> P4

A few trace printks should tell us pretty fast.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 15:53                           ` Thomas Gleixner
@ 2014-04-07 16:06                             ` Alexander Stein
  2014-04-07 16:27                               ` Thomas Gleixner
  2014-04-07 16:27                             ` Wolfgang Grandegger
  1 sibling, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-07 16:06 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 17:53:59, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
> > On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> > > It'd be odd, because we get the buffers with the NEWDAT pending bits
> > > from the NEWDATA1 register.
> >
>  
> > Using the following patch the warning raises about every 5ms. With
> > and without your last patchset.
> 
> > but reverting c0a9f4d39 this does _NOT_ arise.
> 
> So the NEWDAT register is telling us that the newdat bit of that
> buffer is set. But when we retrieve the message, it's not set.
> 
> What's even more confusing: On my D_CAN hardware the NEWDAT is not set
> event happens when I revert c0a9f4d39
> 
> So it would be interesting to see, whether this event is aligned to
> the lost packets you are hunting:
> 
>     P1 -> P2 -> WARN -> P4
> 
> A few trace printks should tell us pretty fast.

I slightly modified my patch (see below) to see which obj is marked with NEWDAT wrongly.
It turns out it is spread over many message objects, but object 1 is notably the one most occuring.
Can it happen, that NEWDAT1 already shows the bit while the NEWDAT bit is not updated yet?
Or does commit c0a9f4d39 affect this status because INTPEND is cleared before reading the message object now?

Regards,
Alexander

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 0e9f974..f65124a 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -836,8 +836,10 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 		 * odd HW behaviour. Do not remove that unless you
 		 * want to brick your machine.
 		 */
-		if (!(ctrl & IF_MCONT_NEWDAT))
+		if (!(ctrl & IF_MCONT_NEWDAT)) {
+			netdev_warn(dev, "Odd behavior on obj: %d", obj);
 			continue;
+		}
 
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);

-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 15:53                           ` Thomas Gleixner
  2014-04-07 16:06                             ` Alexander Stein
@ 2014-04-07 16:27                             ` Wolfgang Grandegger
  2014-04-07 20:03                               ` Thomas Gleixner
  1 sibling, 1 reply; 37+ messages in thread
From: Wolfgang Grandegger @ 2014-04-07 16:27 UTC (permalink / raw)
  To: Thomas Gleixner, Alexander Stein; +Cc: Marc Kleine-Budde, linux-can

On 04/07/2014 05:53 PM, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
>> On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
>>> It'd be odd, because we get the buffers with the NEWDAT pending bits
>>> from the NEWDATA1 register.
>>
>  
>> Using the following patch the warning raises about every 5ms. With
>> and without your last patchset.
> 
>> but reverting c0a9f4d39 this does _NOT_ arise.
> 
> So the NEWDAT register is telling us that the newdat bit of that
> buffer is set. But when we retrieve the message, it's not set.


Not sure if it matters. There was a strange write-readback problem
reported with the PCH CAN. I digged out:

http://marc.info/?l=linux-can&m=135525750319741&w=2
http://marc.info/?l=linux-can&m=135525729919672&w=2
http://marc.info/?t=135296394900001&r=1&w=2

It got worse with concurrent activity of I2C on some eg20t system which
smells of a weired hardware problem (and could maybe explain the 5ms
period).

Wolfgang.



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 16:06                             ` Alexander Stein
@ 2014-04-07 16:27                               ` Thomas Gleixner
  2014-04-08  7:07                                 ` Alexander Stein
  2014-04-08  7:18                                 ` Alexander Stein
  0 siblings, 2 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 16:27 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 17:53:59, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > > On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> > > > It'd be odd, because we get the buffers with the NEWDAT pending bits
> > > > from the NEWDATA1 register.
> > >
> >  
> > > Using the following patch the warning raises about every 5ms. With
> > > and without your last patchset.
> > 
> > > but reverting c0a9f4d39 this does _NOT_ arise.
> > 
> > So the NEWDAT register is telling us that the newdat bit of that
> > buffer is set. But when we retrieve the message, it's not set.
> > 
> > What's even more confusing: On my D_CAN hardware the NEWDAT is not set
> > event happens when I revert c0a9f4d39
> > 
> > So it would be interesting to see, whether this event is aligned to
> > the lost packets you are hunting:
> > 
> >     P1 -> P2 -> WARN -> P4
> > 
> > A few trace printks should tell us pretty fast.
> 
> I slightly modified my patch (see below) to see which obj is marked
> with NEWDAT wrongly.  It turns out it is spread over many message
> objects, but object 1 is notably the one most occuring.  Can it
> happen, that NEWDAT1 already shows the bit while the NEWDAT bit is
> not updated yet?  Or does commit c0a9f4d39 affect this status
> because INTPEND is cleared before reading the message object now?

That's what we do not know. On D_CAN it result in hardware silently
dropping packets when we reneable the lower message buffers by
clearing the NEWDAT bit. So C_CAN might show some different behaviour
by giving us objects with the NEWDAT bit cleared.

That's why I came up with the patch which disables the RX Split. With 
CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n we clear NEWDAT and INTPEND
immediately. That's what the docs recommend (including the eg20t one).
We trade the lost packets against the potential reordering.

What really buggers me is that this does not work with your
hardware. I'll whip up a tracing patch later tonight.

Just for completeness, find the corrected patch below.

Thanks,

	tglx

------------->
Subject: can: c_can : Disable rx split as workaround
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 03 Apr 2014 16:10:43 +0200

The RX buffer split causes packet loss in the hardware:

What happens is:

RX Packet 1 --> message buffer 1 (newdat bit is not cleared)
RX Packet 2 --> message buffer 2 (newdat bit is not cleared)
RX Packet 3 --> message buffer 3 (newdat bit is not cleared)
RX Packet 4 --> message buffer 4 (newdat bit is not cleared)
RX Packet 5 --> message buffer 5 (newdat bit is not cleared)
RX Packet 6 --> message buffer 6 (newdat bit is not cleared)
RX Packet 7 --> message buffer 7 (newdat bit is not cleared)
RX Packet 8 --> message buffer 8 (newdat bit is not cleared)

Clear newdat bit in message buffer 1
Clear newdat bit in message buffer 2
Clear newdat bit in message buffer 3
Clear newdat bit in message buffer 4
Clear newdat bit in message buffer 5
Clear newdat bit in message buffer 6
Clear newdat bit in message buffer 7
Clear newdat bit in message buffer 8

Now if during that clearing of newdat bits, a new message comes in,
the HW gets confused and drops it. 

It does not matter how many of them you clear. I put a delay between
clear of buffer 1 and buffer 2 which was long enough that the message
should have been queued either in buffer 1 or buffer 9. But it did not
show up anywhere. The next message ended up in buffer 1. So the
hardware lost a packet of course without telling it via one of the
error handlers.

That does not happen on all clear newdat bit events. I see one of 10k
packets dropped in the scenario which allows us to reproduce. But the
trace looks always the same.

Not splitting the RX Buffer avoids the packet loss but can cause
reordering. It's hard to trigger, but it CAN happen.

With that mode we use the HW as it was probably designed for. We read
from the buffer 1 upwards and clear the buffer as we get the
message. That's how all microcontrollers use it. So I assume that the
way we handle the buffers was never really tested. According to the
public documentation it should just work :)

Let the user decide which evil is the lesser one.

[ Oliver Hartkopp: Provided a sane config option and help text and
  made me switch to favour potential and unlikely reordering over
  packet loss ]

[ Alexander Stein: Made the CAN_C_CAN_STRICT_FRAME_ORDERING=n case
  compile ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/net/can/c_can/Kconfig |    7 +++++
 drivers/net/can/c_can/c_can.c |   54 ++++++++++++++++++++++++++++++------------
 2 files changed, 46 insertions(+), 15 deletions(-)

Index: linux/drivers/net/can/c_can/Kconfig
===================================================================
--- linux.orig/drivers/net/can/c_can/Kconfig
+++ linux/drivers/net/can/c_can/Kconfig
@@ -14,6 +14,13 @@ config CAN_C_CAN_PLATFORM
 	  SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com)
 	  boards like am335x, dm814x, dm813x and dm811x.
 
+config CAN_C_CAN_STRICT_FRAME_ORDERING
+	bool "Force a strict RX CAN frame order (may cause frame loss)"
+	---help---
+	  The RX split buffer prevents packet reordering but can cause packet
+	  loss. Only enable this option when you accept to lose CAN frames
+	  in favour of getting the received CAN frames in the correct order.
+
 config CAN_C_CAN_PCI
 	tristate "Generic PCI Bus based C_CAN/D_CAN driver"
 	depends on PCI
Index: linux/drivers/net/can/c_can/c_can.c
===================================================================
--- linux.orig/drivers/net/can/c_can/c_can.c
+++ linux/drivers/net/can/c_can/c_can.c
@@ -789,18 +789,39 @@ static u32 c_can_adjust_pending(u32 pend
 	return pend & ~((1 << lasts) - 1);
 }
 
+static inline void c_can_rx_object_get(struct net_device *dev, u32 obj)
+{
+#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
+	if (obj < C_CAN_MSG_RX_LOW_LAST)
+		c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_LOW);
+	else
+#endif
+		c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_HIGH);
+}
+
+static inline void c_can_rx_finalize(struct net_device *dev,
+				     struct c_can_priv *priv, u32 obj)
+{
+#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
+	if (obj < C_CAN_MSG_RX_LOW_LAST)
+		priv->rxmasked |= BIT(obj - 1);
+	else if (obj == C_CAN_MSG_RX_LOW_LAST) {
+		priv->rxmasked = 0;
+		/* activate all lower message objects */
+		c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
+	}
+#endif
+}
+
 static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 			      u32 pend, int quota)
 {
-	u32 pkts = 0, ctrl, obj, mcmd;
+	u32 pkts = 0, ctrl, obj;
 
 	while ((obj = ffs(pend)) && quota > 0) {
 		pend &= ~BIT(obj - 1);
 
-		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
-			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
-
-		c_can_object_get(dev, IF_RX, obj, mcmd);
+		c_can_rx_object_get(dev, obj);
 		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
 
 		if (ctrl & IF_MCONT_MSGLST) {
@@ -820,21 +841,25 @@ static int c_can_read_objects(struct net
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 
-		if (obj < C_CAN_MSG_RX_LOW_LAST)
-			priv->rxmasked |= BIT(obj - 1);
-		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
-			priv->rxmasked = 0;
-			/* activate all lower message objects */
-			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
-		}
+		c_can_rx_finalize(dev, priv, obj);
 
 		pkts++;
 		quota--;
-	} while ((obj = ffs(pend)) && quota > 0);
+	}
 
 	return pkts;
 }
 
+static inline u32 c_can_get_pending(struct c_can_priv *priv)
+{
+	u32 pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
+
+#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING
+	pend &= ~priv->rxmasked;
+#endif
+	return pend;
+}
+
 /*
  * theory of operation:
  *
@@ -871,8 +896,7 @@ static int c_can_do_rx_poll(struct net_d
 
 	while (quota > 0) {
 		if (!pend) {
-			pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
-			pend &= ~priv->rxmasked;
+			pend = c_can_get_pending(priv);
 			if (!pend)
 				break;
 			/*






^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 15:24                       ` Thomas Gleixner
  2014-04-07 15:36                         ` Alexander Stein
@ 2014-04-07 18:11                         ` Marc Kleine-Budde
  2014-04-07 18:15                           ` Thomas Gleixner
  1 sibling, 1 reply; 37+ messages in thread
From: Marc Kleine-Budde @ 2014-04-07 18:11 UTC (permalink / raw)
  To: Thomas Gleixner, Alexander Stein; +Cc: Wolfgang Grandegger, linux-can

[-- Attachment #1: Type: text/plain, Size: 1156 bytes --]

On 04/07/2014 05:24 PM, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Alexander Stein wrote:
>> On Monday 07 April 2014 15:31:20, Thomas Gleixner wrote:
>>> What other patches do you have applied?
>>
>> I have shrinked them a bit to the absolute necessity. In general
>> these are patches for the current hardware to make them working.
> 
>> 53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)
> 
> I can't see anything whats wrong with that one.
> 
> Can you check, whether the following check in c_can_read_objects()
> 
>                if (!(ctrl & IF_MCONT_NEWDAT))
>                         continue;

On TI's D_CAN with all your patches applied and the split buffer
approach active, I haven't this trigger in a ~4h test run. Athough there
are regularly missing frames, apart from this, they are in the correct
order.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 242 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 18:11                         ` Marc Kleine-Budde
@ 2014-04-07 18:15                           ` Thomas Gleixner
  0 siblings, 0 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 18:15 UTC (permalink / raw)
  To: Marc Kleine-Budde; +Cc: Alexander Stein, Wolfgang Grandegger, linux-can

On Mon, 7 Apr 2014, Marc Kleine-Budde wrote:

> On 04/07/2014 05:24 PM, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Alexander Stein wrote:
> >> On Monday 07 April 2014 15:31:20, Thomas Gleixner wrote:
> >>> What other patches do you have applied?
> >>
> >> I have shrinked them a bit to the absolute necessity. In general
> >> these are patches for the current hardware to make them working.
> > 
> >> 53b385c37e4eb155cf9d515913ddf76750e2ac24 c_can: Add support for eg20t (pch_can)
> > 
> > I can't see anything whats wrong with that one.
> > 
> > Can you check, whether the following check in c_can_read_objects()
> > 
> >                if (!(ctrl & IF_MCONT_NEWDAT))
> >                         continue;
> 
> On TI's D_CAN with all your patches applied and the split buffer
> approach active, I haven't this trigger in a ~4h test run. Athough there
> are regularly missing frames, apart from this, they are in the correct
> order.

If you disable the split buffer is the message loss gone?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 16:27                             ` Wolfgang Grandegger
@ 2014-04-07 20:03                               ` Thomas Gleixner
  2014-04-08  6:17                                 ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-07 20:03 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: Alexander Stein, Marc Kleine-Budde, linux-can

On Mon, 7 Apr 2014, Wolfgang Grandegger wrote:
> On 04/07/2014 05:53 PM, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Alexander Stein wrote:
> >> On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> >>> It'd be odd, because we get the buffers with the NEWDAT pending bits
> >>> from the NEWDATA1 register.
> >>
> >  
> >> Using the following patch the warning raises about every 5ms. With
> >> and without your last patchset.
> > 
> >> but reverting c0a9f4d39 this does _NOT_ arise.
> > 
> > So the NEWDAT register is telling us that the newdat bit of that
> > buffer is set. But when we retrieve the message, it's not set.
> 
> 
> Not sure if it matters. There was a strange write-readback problem
> reported with the PCH CAN. I digged out:
> 
> http://marc.info/?l=linux-can&m=135525750319741&w=2
> http://marc.info/?l=linux-can&m=135525729919672&w=2
> http://marc.info/?t=135296394900001&r=1&w=2
> 
> It got worse with concurrent activity of I2C on some eg20t system which
> smells of a weired hardware problem (and could maybe explain the 5ms
> period).

Well, looking at the report:

> I cannot say if any (small) I2C transfer at all raises the
> problem. I run 'cangen -I 0x300 can0' on my PC connected to my test
> board. A I2C connected LED is triggered by heartbeat thus there is a
> small I2C traffic each second. I couldn't see any errors in dmesg in
> about 10 minutes. But even with that small CAN traffic (next to
> nothing) a 'watch sensors' (which queries several I2C sensors every
> 2s) caused errors in dmesg. It seems the problem isn't related to
> CAN bus load at all.

I2C connected LED? How is that supposed to work? You cannot run I2C
traffic from softirq context. 

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 20:03                               ` Thomas Gleixner
@ 2014-04-08  6:17                                 ` Alexander Stein
  2014-04-08  8:04                                   ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-08  6:17 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Wolfgang Grandegger, Marc Kleine-Budde, linux-can

On Monday 07 April 2014 22:03:33, Thomas Gleixner wrote:
> On Mon, 7 Apr 2014, Wolfgang Grandegger wrote:
> > On 04/07/2014 05:53 PM, Thomas Gleixner wrote:
> > > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > >> On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> > >>> It'd be odd, because we get the buffers with the NEWDAT pending bits
> > >>> from the NEWDATA1 register.
> > >>
> > >  
> > >> Using the following patch the warning raises about every 5ms. With
> > >> and without your last patchset.
> > > 
> > >> but reverting c0a9f4d39 this does _NOT_ arise.
> > > 
> > > So the NEWDAT register is telling us that the newdat bit of that
> > > buffer is set. But when we retrieve the message, it's not set.
> > 
> > 
> > Not sure if it matters. There was a strange write-readback problem
> > reported with the PCH CAN. I digged out:
> > 
> > http://marc.info/?l=linux-can&m=135525750319741&w=2
> > http://marc.info/?l=linux-can&m=135525729919672&w=2
> > http://marc.info/?t=135296394900001&r=1&w=2
> > 
> > It got worse with concurrent activity of I2C on some eg20t system which
> > smells of a weired hardware problem (and could maybe explain the 5ms
> > period).
> 
> Well, looking at the report:
> 
> > I cannot say if any (small) I2C transfer at all raises the
> > problem. I run 'cangen -I 0x300 can0' on my PC connected to my test
> > board. A I2C connected LED is triggered by heartbeat thus there is a
> > small I2C traffic each second. I couldn't see any errors in dmesg in
> > about 10 minutes. But even with that small CAN traffic (next to
> > nothing) a 'watch sensors' (which queries several I2C sensors every
> > 2s) caused errors in dmesg. It seems the problem isn't related to
> > CAN bus load at all.
> 
> I2C connected LED? How is that supposed to work? You cannot run I2C
> traffic from softirq context. 

Why not? The led_heartbeat_function function (kernel v3.0.31 in that case) runs at last pca955x_led_set which calls schedule_work.
To my surprise using the current kernel and c_can driver (I used pch_can in v3.0.x) it seems that I2C transfer doesn't affect CAN at all.

Regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 16:27                               ` Thomas Gleixner
@ 2014-04-08  7:07                                 ` Alexander Stein
  2014-04-08  8:26                                   ` Thomas Gleixner
  2014-04-08  7:18                                 ` Alexander Stein
  1 sibling, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-08  7:07 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 18:27:11, Thomas Gleixner wrote:
> > > So it would be interesting to see, whether this event is aligned to
> > > the lost packets you are hunting:
> > > 
> > >     P1 -> P2 -> WARN -> P4
> > > 
> > > A few trace printks should tell us pretty fast.
> > 
> > I slightly modified my patch (see below) to see which obj is marked
> > with NEWDAT wrongly.  It turns out it is spread over many message
> > objects, but object 1 is notably the one most occuring.  Can it
> > happen, that NEWDAT1 already shows the bit while the NEWDAT bit is
> > not updated yet?  Or does commit c0a9f4d39 affect this status
> > because INTPEND is cleared before reading the message object now?
> 
> That's what we do not know. On D_CAN it result in hardware silently
> dropping packets when we reneable the lower message buffers by
> clearing the NEWDAT bit. So C_CAN might show some different behaviour
> by giving us objects with the NEWDAT bit cleared.
> 
> That's why I came up with the patch which disables the RX Split. With 
> CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n we clear NEWDAT and INTPEND
> immediately. That's what the docs recommend (including the eg20t one).
> We trade the lost packets against the potential reordering.
> 
> What really buggers me is that this does not work with your
> hardware. I'll whip up a tracing patch later tonight.

So I noticed that with CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n (NEWDAT and INTPEND cleared immediately) the "odd behavior" occurs pretty often.
Before
> c_can_rx_object_get(dev, obj);
NEWDAT is still set, but afterwards it is already cleared in C_CAN_NEWDAT1_REG.
I guess the NEWDAT is already cleared in the message object before reading from the message object with this line
> ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));

So I removed that IF_MCONT_NEWDAT check (see below) and I didn't lose any message, but get 5 duplicates in 250000 messages (no reordering though).

Setting CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=y with that patch I lose ~650msg per 250000 frames.

Regards,
Alexander

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 0e9f974..dd75360 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -831,14 +831,6 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 			continue;
 		}
 
-		/*
-		 * This really should not happen, but this covers some
-		 * odd HW behaviour. Do not remove that unless you
-		 * want to brick your machine.
-		 */
-		if (!(ctrl & IF_MCONT_NEWDAT))
-			continue;
-
 		/* read the data from the message object */
 		c_can_read_msg_object(dev, IF_RX, ctrl);
 

-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-07 16:27                               ` Thomas Gleixner
  2014-04-08  7:07                                 ` Alexander Stein
@ 2014-04-08  7:18                                 ` Alexander Stein
  2014-04-08  7:35                                   ` Thomas Gleixner
  1 sibling, 1 reply; 37+ messages in thread
From: Alexander Stein @ 2014-04-08  7:18 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Monday 07 April 2014 18:27:11, Thomas Gleixner wrote:
>  static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
>  			      u32 pend, int quota)
>  {
> -	u32 pkts = 0, ctrl, obj, mcmd;
> +	u32 pkts = 0, ctrl, obj;
>  
>  	while ((obj = ffs(pend)) && quota > 0) {
>  		pend &= ~BIT(obj - 1);
>  
> -		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
> -			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
> -
> -		c_can_object_get(dev, IF_RX, obj, mcmd);
> +		c_can_rx_object_get(dev, obj);
>  		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
>  
>  		if (ctrl & IF_MCONT_MSGLST) {
> @@ -820,21 +841,25 @@ static int c_can_read_objects(struct net
>  		/* read the data from the message object */
>  		c_can_read_msg_object(dev, IF_RX, ctrl);
>  
> -		if (obj < C_CAN_MSG_RX_LOW_LAST)
> -			priv->rxmasked |= BIT(obj - 1);
> -		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
> -			priv->rxmasked = 0;
> -			/* activate all lower message objects */
> -			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
> -		}
> +		c_can_rx_finalize(dev, priv, obj);
>  
>  		pkts++;
>  		quota--;
> -	} while ((obj = ffs(pend)) && quota > 0);
> +	}
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

BTW: This doesn't look right.

Regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-08  7:18                                 ` Alexander Stein
@ 2014-04-08  7:35                                   ` Thomas Gleixner
  0 siblings, 0 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-08  7:35 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Tue, 8 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 18:27:11, Thomas Gleixner wrote:
> >  static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
> >  			      u32 pend, int quota)
> >  {
> > -	u32 pkts = 0, ctrl, obj, mcmd;
> > +	u32 pkts = 0, ctrl, obj;
> >  
> >  	while ((obj = ffs(pend)) && quota > 0) {
> >  		pend &= ~BIT(obj - 1);
> >  
> > -		mcmd = obj < C_CAN_MSG_RX_LOW_LAST ?
> > -			IF_COMM_RCV_LOW : IF_COMM_RCV_HIGH;
> > -
> > -		c_can_object_get(dev, IF_RX, obj, mcmd);
> > +		c_can_rx_object_get(dev, obj);
> >  		ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
> >  
> >  		if (ctrl & IF_MCONT_MSGLST) {
> > @@ -820,21 +841,25 @@ static int c_can_read_objects(struct net
> >  		/* read the data from the message object */
> >  		c_can_read_msg_object(dev, IF_RX, ctrl);
> >  
> > -		if (obj < C_CAN_MSG_RX_LOW_LAST)
> > -			priv->rxmasked |= BIT(obj - 1);
> > -		else if (obj == C_CAN_MSG_RX_LOW_LAST) {
> > -			priv->rxmasked = 0;
> > -			/* activate all lower message objects */
> > -			c_can_activate_all_lower_rx_msg_obj(dev, IF_RX);
> > -		}
> > +		c_can_rx_finalize(dev, priv, obj);
> >  
> >  		pkts++;
> >  		quota--;
> > -	} while ((obj = ffs(pend)) && quota > 0);
> > +	}
>      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> BTW: This doesn't look right.

Crap. The second while() is a left over in an stale branch and I did
not notice that I was on that one. I'll send out an updated series
later today.

Thanks,

	tglx





^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-08  6:17                                 ` Alexander Stein
@ 2014-04-08  8:04                                   ` Thomas Gleixner
  0 siblings, 0 replies; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-08  8:04 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Wolfgang Grandegger, Marc Kleine-Budde, linux-can

On Tue, 8 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 22:03:33, Thomas Gleixner wrote:
> > On Mon, 7 Apr 2014, Wolfgang Grandegger wrote:
> > > On 04/07/2014 05:53 PM, Thomas Gleixner wrote:
> > > > On Mon, 7 Apr 2014, Alexander Stein wrote:
> > > >> On Monday 07 April 2014 17:24:26, Thomas Gleixner wrote:
> > > >>> It'd be odd, because we get the buffers with the NEWDAT pending bits
> > > >>> from the NEWDATA1 register.
> > > >>
> > > >  
> > > >> Using the following patch the warning raises about every 5ms. With
> > > >> and without your last patchset.
> > > > 
> > > >> but reverting c0a9f4d39 this does _NOT_ arise.
> > > > 
> > > > So the NEWDAT register is telling us that the newdat bit of that
> > > > buffer is set. But when we retrieve the message, it's not set.
> > > 
> > > 
> > > Not sure if it matters. There was a strange write-readback problem
> > > reported with the PCH CAN. I digged out:
> > > 
> > > http://marc.info/?l=linux-can&m=135525750319741&w=2
> > > http://marc.info/?l=linux-can&m=135525729919672&w=2
> > > http://marc.info/?t=135296394900001&r=1&w=2
> > > 
> > > It got worse with concurrent activity of I2C on some eg20t system which
> > > smells of a weired hardware problem (and could maybe explain the 5ms
> > > period).
> > 
> > Well, looking at the report:
> > 
> > > I cannot say if any (small) I2C transfer at all raises the
> > > problem. I run 'cangen -I 0x300 can0' on my PC connected to my test
> > > board. A I2C connected LED is triggered by heartbeat thus there is a
> > > small I2C traffic each second. I couldn't see any errors in dmesg in
> > > about 10 minutes. But even with that small CAN traffic (next to
> > > nothing) a 'watch sensors' (which queries several I2C sensors every
> > > 2s) caused errors in dmesg. It seems the problem isn't related to
> > > CAN bus load at all.
> > 
> > I2C connected LED? How is that supposed to work? You cannot run I2C
> > traffic from softirq context. 
> 
> Why not? The led_heartbeat_function function (kernel v3.0.31 in that case) runs at last pca955x_led_set which calls schedule_work.
> To my surprise using the current kernel and c_can driver (I used pch_can in v3.0.x) it seems that I2C transfer doesn't affect CAN at all.
> 

The issue is, that the led stuff takes locks and on RT you might run
into a contended case ....

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-08  7:07                                 ` Alexander Stein
@ 2014-04-08  8:26                                   ` Thomas Gleixner
  2014-04-08  8:36                                     ` Alexander Stein
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2014-04-08  8:26 UTC (permalink / raw)
  To: Alexander Stein; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Tue, 8 Apr 2014, Alexander Stein wrote:
> On Monday 07 April 2014 18:27:11, Thomas Gleixner wrote:
> > > > So it would be interesting to see, whether this event is aligned to
> > > > the lost packets you are hunting:
> > > > 
> > > >     P1 -> P2 -> WARN -> P4
> > > > 
> > > > A few trace printks should tell us pretty fast.
> > > 
> > > I slightly modified my patch (see below) to see which obj is marked
> > > with NEWDAT wrongly.  It turns out it is spread over many message
> > > objects, but object 1 is notably the one most occuring.  Can it
> > > happen, that NEWDAT1 already shows the bit while the NEWDAT bit is
> > > not updated yet?  Or does commit c0a9f4d39 affect this status
> > > because INTPEND is cleared before reading the message object now?
> > 
> > That's what we do not know. On D_CAN it result in hardware silently
> > dropping packets when we reneable the lower message buffers by
> > clearing the NEWDAT bit. So C_CAN might show some different behaviour
> > by giving us objects with the NEWDAT bit cleared.
> > 
> > That's why I came up with the patch which disables the RX Split. With 
> > CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n we clear NEWDAT and INTPEND
> > immediately. That's what the docs recommend (including the eg20t one).
> > We trade the lost packets against the potential reordering.
> > 
> > What really buggers me is that this does not work with your
> > hardware. I'll whip up a tracing patch later tonight.
> 
> So I noticed that with CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n
> (NEWDAT and INTPEND cleared immediately) the "odd behavior" occurs
> pretty often.
> Before
>   c_can_rx_object_get(dev, obj);
> NEWDAT is still set, but afterwards it is already cleared in
> C_CAN_NEWDAT1_REG.

Which is expected. We clear the newdat bit in c_can_rx_object_get(),
which updates C_CAN_NEWDAT1_REG as well.

> I guess the NEWDAT is already cleared in the message object before
> reading from the message object with this line

> > ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
> 
> So I removed that IF_MCONT_NEWDAT check (see below) and I didn't
> lose any message, but get 5 duplicates in 250000 messages (no
> reordering though).
> 
> Setting CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=y with that patch I
> lose ~650msg per 250000 frames.

What happens, if you apply the patch below?

Thanks,

	tglx

Index: linux-2.6/drivers/net/can/c_can/c_can.c
===================================================================
--- linux-2.6.orig/drivers/net/can/c_can/c_can.c
+++ linux-2.6/drivers/net/can/c_can/c_can.c
@@ -880,7 +880,7 @@ static int c_can_do_rx_poll(struct net_d
 
 	while (quota > 0) {
 		if (!pend) {
-			pend = priv->read_reg(priv, C_CAN_INTPND1_REG);
+			pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
 			if (!pend)
 				break;
 			/*

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-08  8:26                                   ` Thomas Gleixner
@ 2014-04-08  8:36                                     ` Alexander Stein
  0 siblings, 0 replies; 37+ messages in thread
From: Alexander Stein @ 2014-04-08  8:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Marc Kleine-Budde, Wolfgang Grandegger, linux-can

On Tuesday 08 April 2014 10:26:23, Thomas Gleixner wrote:
> > So I noticed that with CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=n
> > (NEWDAT and INTPEND cleared immediately) the "odd behavior" occurs
> > pretty often.
> > Before
> >   c_can_rx_object_get(dev, obj);
> > NEWDAT is still set, but afterwards it is already cleared in
> > C_CAN_NEWDAT1_REG.
> 
> Which is expected. We clear the newdat bit in c_can_rx_object_get(),
> which updates C_CAN_NEWDAT1_REG as well.
> 
> > I guess the NEWDAT is already cleared in the message object before
> > reading from the message object with this line
> 
> > > ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
> > 
> > So I removed that IF_MCONT_NEWDAT check (see below) and I didn't
> > lose any message, but get 5 duplicates in 250000 messages (no
> > reordering though).
> > 
> > Setting CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING=y with that patch I
> > lose ~650msg per 250000 frames.
> 
> What happens, if you apply the patch below?
> 
> Thanks,
> 
> 	tglx
> 
> Index: linux-2.6/drivers/net/can/c_can/c_can.c
> ===================================================================
> --- linux-2.6.orig/drivers/net/can/c_can/c_can.c
> +++ linux-2.6/drivers/net/can/c_can/c_can.c
> @@ -880,7 +880,7 @@ static int c_can_do_rx_poll(struct net_d
>  
>  	while (quota > 0) {
>  		if (!pend) {
> -			pend = priv->read_reg(priv, C_CAN_INTPND1_REG);
> +			pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
>  			if (!pend)
>  				break;
>  			/*

Your patch "can: c_can: Get rid of pointless interrupts" already did that change. Or did you mean the reverse?

Regards,
Alexander
-- 
Dipl.-Inf. Alexander Stein

SYS TEC electronic GmbH
Am Windrad 2
08468 Heinsdorfergrund
Tel.: 03765 38600-1156
Fax: 03765 38600-4100
Email: alexander.stein@systec-electronic.com
Website: www.systec-electronic.com
 
Managing Director: Dipl.-Phys. Siegmar Schmidt
Commercial registry: Amtsgericht Chemnitz, HRB 28082


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] c_can: Add support for eg20t (pch_can)
  2014-04-03 18:41 ` Wolfgang Grandegger
  2014-04-07  9:47   ` Alexander Stein
@ 2014-04-17 19:53   ` Marc Kleine-Budde
  1 sibling, 0 replies; 37+ messages in thread
From: Marc Kleine-Budde @ 2014-04-17 19:53 UTC (permalink / raw)
  To: Wolfgang Grandegger, Alexander Stein, Marc; +Cc: linux-can

[-- Attachment #1: Type: text/plain, Size: 668 bytes --]

On 04/03/2014 08:41 PM, Wolfgang Grandegger wrote:
> From 8cc3c5fde12d0dc36b658483433d5b12be693492 Mon Sep 17 00:00:00 2001
> From: Wolfgang Grandegger <wg@grandegger.com>
> Date: Sat, 4 May 2013 22:28:43 +0200
> Subject: [PATCH 07/12] c_can_pci: enable PCI bus master only for MSI
> 
> Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>

Applied to can/master

Thanks,
Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 242 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2014-04-17 19:53 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-03 14:14 [PATCH] c_can: Add support for eg20t (pch_can) Alexander Stein
2014-04-03 14:55 ` Alexander Stein
2014-04-03 14:59   ` Marc Kleine-Budde
2014-04-03 15:11     ` Thomas Gleixner
2014-04-03 15:47       ` Alexander Stein
2014-04-03 19:28         ` Oliver Hartkopp
2014-04-03 20:59           ` Thomas Gleixner
2014-04-03 20:28         ` Thomas Gleixner
2014-04-03 18:41 ` Wolfgang Grandegger
2014-04-07  9:47   ` Alexander Stein
2014-04-07 10:19     ` Wolfgang Grandegger
2014-04-07 12:06     ` Thomas Gleixner
2014-04-07 12:07       ` Marc Kleine-Budde
2014-04-07 12:24         ` Alexander Stein
2014-04-07 12:34           ` Thomas Gleixner
2014-04-07 12:48             ` Alexander Stein
2014-04-07 12:56               ` Thomas Gleixner
2014-04-07 12:58                 ` Alexander Stein
2014-04-07 13:31                   ` Thomas Gleixner
2014-04-07 14:27                     ` Alexander Stein
2014-04-07 15:24                       ` Thomas Gleixner
2014-04-07 15:36                         ` Alexander Stein
2014-04-07 15:53                           ` Thomas Gleixner
2014-04-07 16:06                             ` Alexander Stein
2014-04-07 16:27                               ` Thomas Gleixner
2014-04-08  7:07                                 ` Alexander Stein
2014-04-08  8:26                                   ` Thomas Gleixner
2014-04-08  8:36                                     ` Alexander Stein
2014-04-08  7:18                                 ` Alexander Stein
2014-04-08  7:35                                   ` Thomas Gleixner
2014-04-07 16:27                             ` Wolfgang Grandegger
2014-04-07 20:03                               ` Thomas Gleixner
2014-04-08  6:17                                 ` Alexander Stein
2014-04-08  8:04                                   ` Thomas Gleixner
2014-04-07 18:11                         ` Marc Kleine-Budde
2014-04-07 18:15                           ` Thomas Gleixner
2014-04-17 19:53   ` Marc Kleine-Budde

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.