All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] arch/ppc/8xx_io/enet.c, version 2
@ 2002-10-24 14:23 Joakim Tjernlund
  2002-10-24 14:59 ` Tom Rini
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Joakim Tjernlund @ 2002-10-24 14:23 UTC (permalink / raw)
  To: linuxppc-embedded; +Cc: scop, thomas


Hi

This is the second version of my patch that removes the expensive memcpy of
received
ethernet frames in interrupt context.

I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
size 1500 when
applied to 8260 FEC(needs to be applied manually). But min packet size
decreased with 10 %.
This version should fix the 10% decrease case.

This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
8260/fcc_enet.c with little effort.

Better fix a bug in set_multicast_list(), move the dmi list forward when
    walking it(dmi = dmi->next;)

New stuff:
   - Configrable: copy small packets or pass them directly, see
COPY_SMALL_FRAMES in code.
   - Collision reporting fix form Thomas Lange.
   - Don't pass receive frames which has error upwards.
   - Report RX_OV errors as fifo errors, not crc errors.

Please test and report any problems and performace improvements.

        Jocke

--- arch/ppc/8xx_io/enet.c.org	Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c	Thu Oct 24 15:48:25 2002
@@ -34,7 +34,6 @@
 #include <linux/ioport.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See
<asm/commproc.h>.
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
packets
+ * at a small performance hit. Note performance testing needed */
+#define COPY_SMALL_FRAMES 1
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
internal use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT	(2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES	32
-#define CPM_ENET_RX_FRSIZE	2048
-#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE		64	/* Must be power of two */
-#define TX_RING_MOD_MASK	63	/*   for this to work */
+  #define RX_RING_SIZE		64
+  #define TX_RING_SIZE		64	/* Must be power of two for this to work */
 #else
-#define CPM_ENET_RX_PAGES	4
-#define CPM_ENET_RX_FRSIZE	2048
-#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE		8	/* Must be power of two */
-#define TX_RING_MOD_MASK	7	/*   for this to work */
+  #define RX_RING_SIZE		8
+  #define TX_RING_SIZE		8	/* Must be power of two for this to work */
+#endif
+#define TX_RING_MOD_MASK	(TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE	1600 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+    #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
 	/* Virtual addresses for the receive buffers because we can't
 	 * do a __va() on them anymore.
 	 */
-	unsigned char *rx_vaddr[RX_RING_SIZE];
+	void    *rx_vaddr[RX_RING_SIZE];
 	struct	net_device_stats stats;
 	uint	tx_full;
 	spinlock_t lock;
@@ -370,11 +375,11 @@

 		cep->stats.tx_packets++;

-		/* Deferred means some collisions occurred during transmit,
-		 * but we eventually sent the packet OK.
-		 */
-		if (bdp->cbd_sc & BD_ENET_TX_DEF)
-			cep->stats.collisions++;
+		/* Check retry counter, i.e. collision counter */
+		if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
+			/* Note that counter cannot go higher than 15 */
+			cep->stats.collisions+=(bdp->cbd_sc & BD_ENET_TX_RCMASK)>>2;
+		}

 		/* Free the sk buffer associated with this last transmit.
 		*/
@@ -449,6 +454,7 @@
 	struct	scc_enet_private *cep;
 	volatile cbd_t	*bdp;
 	struct	sk_buff *skb;
+	struct	sk_buff *skb_tmp;
 	ushort	pkt_len;

 	cep = (struct scc_enet_private *)dev->priv;
@@ -461,7 +467,8 @@
 for (;;) {
 	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
 		break;
-
+
+#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
 #ifndef final_version
 	/* Since we have allocated space to hold a complete frame, both
 	 * the first and last indicators should be set.
@@ -470,51 +477,62 @@
 		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
 			printk("CPM ENET: rcv is not first+last\n");
 #endif
-
-	/* Frame too long or too short.
-	*/
-	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-		cep->stats.rx_length_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
-		cep->stats.rx_frame_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
-		cep->stats.rx_crc_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
-		cep->stats.rx_crc_errors++;
-
-	/* Report late collisions as a frame error.
-	 * On this error, the BD is closed, but we don't know what we
-	 * have in the buffer.  So, just drop this frame on the floor.
-	 */
-	if (bdp->cbd_sc & BD_ENET_RX_CL) {
-		cep->stats.rx_frame_errors++;
-	}
-	else {
-
+	if(bdp->cbd_sc & RX_BD_ERRORS){ /* Receive errors ? */
+		cep->stats.rx_errors++;
+		if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) /* Frame too long or
too short. */
+			cep->stats.rx_length_errors++;
+		if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
+			cep->stats.rx_frame_errors++;
+		if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
+			cep->stats.rx_crc_errors++;
+		if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
+			cep->stats.rx_fifo_errors++;
+		if (bdp->cbd_sc & BD_ENET_RX_CL)        /* Late collision */
+			cep->stats.collisions++;
+	} else {
 		/* Process the incoming frame.
 		*/
 		cep->stats.rx_packets++;
 		pkt_len = bdp->cbd_datlen;
 		cep->stats.rx_bytes += pkt_len;
-
-		/* This does 16 byte alignment, much more than we need.
-		 * The packet length includes FCS, but we don't want to
-		 * include that when passing upstream as it messes up
-		 * bridging applications.
-		 */
-		skb = dev_alloc_skb(pkt_len-4);
-
-		if (skb == NULL) {
+		pkt_len -= 4; /* The packet length includes FCS, but we don't want to
+			       * include that when passing upstream as it messes up
+			       * bridging applications. Is this still true ???? */
+#ifdef COPY_SMALL_FRAMES
+		/* Allocate the next buffer now so we are sure to have one when needed
+		 * This does 16 byte alignment, exactly what we need(L1_CACHE aligned). */
+		if(pkt_len < RX_COPYBREAK)
+			skb_tmp = __dev_alloc_skb(pkt_len, GFP_ATOMIC|GFP_DMA);
+		else
+#endif
+			skb_tmp = __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC|GFP_DMA);
+
+		if (skb_tmp == NULL) {
 			printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 			cep->stats.rx_dropped++;
-		}
-		else {
+
+		} else {
+			skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
+			invalidate_dcache_range((unsigned long) skb->data,
+						(unsigned long) skb->data + pkt_len);
+
+#ifdef COPY_SMALL_FRAMES
+			if(pkt_len < RX_COPYBREAK) {
+				typeof(skb) skb_swap = skb;
+				memcpy(skb_put(skb_tmp, pkt_len), skb->data, pkt_len);
+				/* swap the skb and skb_tmp */
+				skb = skb_tmp;
+				skb_tmp = skb_swap;
+			}
+			else
+#endif
+			{
+				skb_put(skb, pkt_len);  /* Make room */
+				bdp->cbd_bufaddr = __pa(skb_tmp->data);
+				cep->rx_vaddr[bdp - cep->rx_bd_base] = skb_tmp;
+			}
 			skb->dev = dev;
-			skb_put(skb,pkt_len-4);	/* Make room */
-			eth_copy_and_sum(skb,
-				cep->rx_vaddr[bdp - cep->rx_bd_base],
-				pkt_len-4, 0);
-			skb->protocol=eth_type_trans(skb,dev);
+			skb->protocol=eth_type_trans(skb, dev);
 			netif_rx(skb);
 		}
 	}
@@ -608,7 +626,7 @@

 			dmi = dev->mc_list;

-			for (i=0; i<dev->mc_count; i++) {
+			for (i=0; i<dev->mc_count; i++, dmi = dmi->next) {

 				/* Only support group multicast for now.
 				*/
@@ -647,8 +665,7 @@
 	struct net_device *dev;
 	struct scc_enet_private *cep;
 	int i, j, k;
-	unsigned char	*eap, *ba;
-	dma_addr_t	mem_addr;
+	unsigned char	*eap;
 	bd_t		*bd;
 	volatile	cbd_t		*bdp;
 	volatile	cpm8xx_t	*cp;
@@ -839,22 +856,13 @@

 	bdp = cep->rx_bd_base;
 	k = 0;
-	for (i=0; i<CPM_ENET_RX_PAGES; i++) {
-
-		/* Allocate a page.
-		*/
-		ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
-
-		/* Initialize the BD for every fragment in the page.
-		*/
-		for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
-			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
-			bdp->cbd_bufaddr = mem_addr;
-			cep->rx_vaddr[k++] = ba;
-			mem_addr += CPM_ENET_RX_FRSIZE;
-			ba += CPM_ENET_RX_FRSIZE;
-			bdp++;
-		}
+	/* Initialize the BDs. */
+	for (j=0; j < RX_RING_SIZE; j++) {
+		struct	sk_buff * skb = __dev_alloc_skb(CPM_ENET_RX_FRSIZE,
GFP_ATOMIC|GFP_DMA);
+		bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
+		bdp->cbd_bufaddr = __pa(skb->data);
+		cep->rx_vaddr[k++] = skb;
+		bdp++;
 	}

 	/* Set the last buffer to wrap.


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-24 14:23 [PATCH] arch/ppc/8xx_io/enet.c, version 2 Joakim Tjernlund
@ 2002-10-24 14:59 ` Tom Rini
  2002-10-24 15:46   ` Joakim Tjernlund
  2002-10-28 15:17 ` [PATCH] arch/ppc/8xx_io/enet.c, version 2 Stephan Linke
  2002-11-13 13:50 ` Hans Feldt
  2 siblings, 1 reply; 16+ messages in thread
From: Tom Rini @ 2002-10-24 14:59 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-embedded, scop, thomas


On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:

> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
>     walking it(dmi = dmi->next;)
>
> New stuff:
>    - Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>    - Collision reporting fix form Thomas Lange.
>    - Don't pass receive frames which has error upwards.
>    - Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.

All of this sounds good, but can you please break this one patch up into
seperate logical patches?

--
Tom Rini (TR1265)
http://gate.crashing.org/~trini/

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-24 14:59 ` Tom Rini
@ 2002-10-24 15:46   ` Joakim Tjernlund
  2002-11-01 11:01     ` Joakim Tjernlund
  2003-01-28 11:54     ` [PATCH] arch/ppc/8xx_io/enet.c, version 3 Joakim Tjernlund
  0 siblings, 2 replies; 16+ messages in thread
From: Joakim Tjernlund @ 2002-10-24 15:46 UTC (permalink / raw)
  To: Tom Rini; +Cc: linuxppc-embedded, scop, thomas


>
>
> On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:
>
> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> >     walking it(dmi = dmi->next;)
> >
> > New stuff:
> >    - Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >    - Collision reporting fix form Thomas Lange.
> >    - Don't pass receive frames which has error upwards.
> >    - Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
>
> All of this sounds good, but can you please break this one patch up into
> seperate logical patches?

I rather not.

The logical pices in this patch are:
 1) remove the expensive memcpy.
   - This is the main part of the patch.

 2) dmi->next fix, which is a one liner.
    Don't pass receive frames which has error upwards.
   - Trival stuff.

 3) Collision reporting fix form Thomas Lange.
    Report RX_OV errors as fifo errors, not crc errors.
   - These are just minor accounting fixes.

So I don't think it's worth the effort. Let's just wait a few days and see
if it was success or not.

Folks, please report you findings to the list.

    Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-24 14:23 [PATCH] arch/ppc/8xx_io/enet.c, version 2 Joakim Tjernlund
  2002-10-24 14:59 ` Tom Rini
@ 2002-10-28 15:17 ` Stephan Linke
  2002-10-28 17:24   ` Joakim Tjernlund
  2002-11-13 13:50 ` Hans Feldt
  2 siblings, 1 reply; 16+ messages in thread
From: Stephan Linke @ 2002-10-28 15:17 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Linuxppc-Embedded


Hi Jocke,

I just had a look at the code. So far it is running fine. on my board. :)
Following question:
In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
Why don't you do the same in scc_enet_init() for the first set of skb's?
Isn't it required too at this place.

Thanks, Stephan

> -----Original Message-----
> From: owner-linuxppc-embedded@lists.linuxppc.org
> [mailto:owner-linuxppc-embedded@lists.linuxppc.org]On Behalf Of Joakim
> Tjernlund
> Sent: Donnerstag, 24. Oktober 2002 16:24
> To: linuxppc-embedded@lists.linuxppc.org
> Cc: scop@digitel.com.br; thomas@corelatus.com
> Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
>
>
>
> Hi
>
> This is the second version of my patch that removes the expensive
> memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in
> packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
>     walking it(dmi = dmi->next;)
>
> New stuff:
>    - Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>    - Collision reporting fix form Thomas Lange.
>    - Don't pass receive frames which has error upwards.
>    - Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.
>
>         Jocke
>
> --- arch/ppc/8xx_io/enet.c.org	Mon Oct 21 14:35:59 2002
> +++ arch/ppc/8xx_io/enet.c	Thu Oct 24 15:48:25 2002
> @@ -34,7 +34,6 @@
>  #include <linux/ioport.h>
>  #include <linux/slab.h>
>  #include <linux/interrupt.h>
> -#include <linux/pci.h>
>  #include <linux/init.h>
>  #include <linux/delay.h>
>  #include <linux/netdevice.h>
> @@ -86,6 +85,14 @@
>   * All functions are directly controlled using I/O pins.  See
> <asm/commproc.h>.
>   */
>
> +/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
> packets
> + * at a small performance hit. Note performance testing needed */
> +#define COPY_SMALL_FRAMES 1
> +
> +#ifdef COPY_SMALL_FRAMES
> +  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
> internal use */
> +#endif
> +
>  /* The transmitter timeout
>   */
>  #define TX_TIMEOUT	(2*HZ)
> @@ -97,19 +104,17 @@
>   * the skbuffer directly.
>   */
>  #ifdef CONFIG_ENET_BIG_BUFFERS
> -#define CPM_ENET_RX_PAGES	32
> -#define CPM_ENET_RX_FRSIZE	2048
> -#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE		64	/* Must be power of two */
> -#define TX_RING_MOD_MASK	63	/*   for this to work */
> +  #define RX_RING_SIZE		64
> +  #define TX_RING_SIZE		64	/* Must be power of
> two for this to work */
>  #else
> -#define CPM_ENET_RX_PAGES	4
> -#define CPM_ENET_RX_FRSIZE	2048
> -#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE		8	/* Must be power of two */
> -#define TX_RING_MOD_MASK	7	/*   for this to work */
> +  #define RX_RING_SIZE		8
> +  #define TX_RING_SIZE		8	/* Must be power of
> two for this to work */
> +#endif
> +#define TX_RING_MOD_MASK	(TX_RING_SIZE-1)
> +
> +#define CPM_ENET_RX_FRSIZE	1600 /* must be a multiple of cache line */
> +#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
> +    #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
>  #endif
>
>  /* The CPM stores dest/src/type, data, and checksum for receive packets.
> @@ -143,7 +148,7 @@
>  	/* Virtual addresses for the receive buffers because we can't
>  	 * do a __va() on them anymore.
>  	 */
> -	unsigned char *rx_vaddr[RX_RING_SIZE];
> +	void    *rx_vaddr[RX_RING_SIZE];
>  	struct	net_device_stats stats;
>  	uint	tx_full;
>  	spinlock_t lock;
> @@ -370,11 +375,11 @@
>
>  		cep->stats.tx_packets++;
>
> -		/* Deferred means some collisions occurred during transmit,
> -		 * but we eventually sent the packet OK.
> -		 */
> -		if (bdp->cbd_sc & BD_ENET_TX_DEF)
> -			cep->stats.collisions++;
> +		/* Check retry counter, i.e. collision counter */
> +		if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
> +			/* Note that counter cannot go higher than 15 */
> +			cep->stats.collisions+=(bdp->cbd_sc &
> BD_ENET_TX_RCMASK)>>2;
> +		}
>
>  		/* Free the sk buffer associated with this last transmit.
>  		*/
> @@ -449,6 +454,7 @@
>  	struct	scc_enet_private *cep;
>  	volatile cbd_t	*bdp;
>  	struct	sk_buff *skb;
> +	struct	sk_buff *skb_tmp;
>  	ushort	pkt_len;
>
>  	cep = (struct scc_enet_private *)dev->priv;
> @@ -461,7 +467,8 @@
>  for (;;) {
>  	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
>  		break;
> -
> +
> +#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
> BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
>  #ifndef final_version
>  	/* Since we have allocated space to hold a complete frame, both
>  	 * the first and last indicators should be set.
> @@ -470,51 +477,62 @@
>  		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
>  			printk("CPM ENET: rcv is not first+last\n");
>  #endif
> -
> -	/* Frame too long or too short.
> -	*/
> -	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
> -		cep->stats.rx_length_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
> -		cep->stats.rx_frame_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
> -		cep->stats.rx_crc_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
> -		cep->stats.rx_crc_errors++;
> -
> -	/* Report late collisions as a frame error.
> -	 * On this error, the BD is closed, but we don't know what we
> -	 * have in the buffer.  So, just drop this frame on the floor.
> -	 */
> -	if (bdp->cbd_sc & BD_ENET_RX_CL) {
> -		cep->stats.rx_frame_errors++;
> -	}
> -	else {
> -
> +	if(bdp->cbd_sc & RX_BD_ERRORS){ /* Receive errors ? */
> +		cep->stats.rx_errors++;
> +		if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
> /* Frame too long or
> too short. */
> +			cep->stats.rx_length_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame
> alignment */
> +			cep->stats.rx_frame_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
> +			cep->stats.rx_crc_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
> +			cep->stats.rx_fifo_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_CL)        /* Late collision */
> +			cep->stats.collisions++;
> +	} else {
>  		/* Process the incoming frame.
>  		*/
>  		cep->stats.rx_packets++;
>  		pkt_len = bdp->cbd_datlen;
>  		cep->stats.rx_bytes += pkt_len;
> -
> -		/* This does 16 byte alignment, much more than we need.
> -		 * The packet length includes FCS, but we don't want to
> -		 * include that when passing upstream as it messes up
> -		 * bridging applications.
> -		 */
> -		skb = dev_alloc_skb(pkt_len-4);
> -
> -		if (skb == NULL) {
> +		pkt_len -= 4; /* The packet length includes FCS,
> but we don't want to
> +			       * include that when passing upstream
> as it messes up
> +			       * bridging applications. Is this
> still true ???? */
> +#ifdef COPY_SMALL_FRAMES
> +		/* Allocate the next buffer now so we are sure to
> have one when needed
> +		 * This does 16 byte alignment, exactly what we
> need(L1_CACHE aligned). */
> +		if(pkt_len < RX_COPYBREAK)
> +			skb_tmp = __dev_alloc_skb(pkt_len,
> GFP_ATOMIC|GFP_DMA);
> +		else
> +#endif
> +			skb_tmp =
> __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC|GFP_DMA);
> +
> +		if (skb_tmp == NULL) {
>  			printk("%s: Memory squeeze, dropping
> packet.\n", dev->name);
>  			cep->stats.rx_dropped++;
> -		}
> -		else {
> +
> +		} else {
> +			skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
> +			invalidate_dcache_range((unsigned long) skb->data,
> +						(unsigned long)
> skb->data + pkt_len);
> +
> +#ifdef COPY_SMALL_FRAMES
> +			if(pkt_len < RX_COPYBREAK) {
> +				typeof(skb) skb_swap = skb;
> +				memcpy(skb_put(skb_tmp, pkt_len),
> skb->data, pkt_len);
> +				/* swap the skb and skb_tmp */
> +				skb = skb_tmp;
> +				skb_tmp = skb_swap;
> +			}
> +			else
> +#endif
> +			{
> +				skb_put(skb, pkt_len);  /* Make room */
> +				bdp->cbd_bufaddr = __pa(skb_tmp->data);
> +				cep->rx_vaddr[bdp -
> cep->rx_bd_base] = skb_tmp;
> +			}
>  			skb->dev = dev;
> -			skb_put(skb,pkt_len-4);	/* Make room */
> -			eth_copy_and_sum(skb,
> -				cep->rx_vaddr[bdp - cep->rx_bd_base],
> -				pkt_len-4, 0);
> -			skb->protocol=eth_type_trans(skb,dev);
> +			skb->protocol=eth_type_trans(skb, dev);
>  			netif_rx(skb);
>  		}
>  	}
> @@ -608,7 +626,7 @@
>
>  			dmi = dev->mc_list;
>
> -			for (i=0; i<dev->mc_count; i++) {
> +			for (i=0; i<dev->mc_count; i++, dmi = dmi->next) {
>
>  				/* Only support group multicast for now.
>  				*/
> @@ -647,8 +665,7 @@
>  	struct net_device *dev;
>  	struct scc_enet_private *cep;
>  	int i, j, k;
> -	unsigned char	*eap, *ba;
> -	dma_addr_t	mem_addr;
> +	unsigned char	*eap;
>  	bd_t		*bd;
>  	volatile	cbd_t		*bdp;
>  	volatile	cpm8xx_t	*cp;
> @@ -839,22 +856,13 @@
>
>  	bdp = cep->rx_bd_base;
>  	k = 0;
> -	for (i=0; i<CPM_ENET_RX_PAGES; i++) {
> -
> -		/* Allocate a page.
> -		*/
> -		ba = (unsigned char *)consistent_alloc(GFP_KERNEL,
> PAGE_SIZE, &mem_addr);
> -
> -		/* Initialize the BD for every fragment in the page.
> -		*/
> -		for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
> -			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
> -			bdp->cbd_bufaddr = mem_addr;
> -			cep->rx_vaddr[k++] = ba;
> -			mem_addr += CPM_ENET_RX_FRSIZE;
> -			ba += CPM_ENET_RX_FRSIZE;
> -			bdp++;
> -		}
> +	/* Initialize the BDs. */
> +	for (j=0; j < RX_RING_SIZE; j++) {
> +		struct	sk_buff * skb = __dev_alloc_skb(CPM_ENET_RX_FRSIZE,
> GFP_ATOMIC|GFP_DMA);
> +		bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
> +		bdp->cbd_bufaddr = __pa(skb->data);
> +		cep->rx_vaddr[k++] = skb;
> +		bdp++;
>  	}
>
>  	/* Set the last buffer to wrap.
>
>
>
>


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-28 15:17 ` [PATCH] arch/ppc/8xx_io/enet.c, version 2 Stephan Linke
@ 2002-10-28 17:24   ` Joakim Tjernlund
  0 siblings, 0 replies; 16+ messages in thread
From: Joakim Tjernlund @ 2002-10-28 17:24 UTC (permalink / raw)
  To: Stephan Linke; +Cc: Linuxppc-Embedded


Hi Stephan

> Hi Jocke,
>
> I just had a look at the code. So far it is running fine. on my board. :)
> Following question:
> In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
> Why don't you do the same in scc_enet_init() for the first set of skb's?
> Isn't it required too at this place.

Thanks for trying this!

I did this in my first version, but in my second version I moved the
invalidate_dcache_range() to AFTER the ethernet frame had been received.

invalidate_dcache_range() can either be called when you add the buffer to the
HW(BD) or after you have received a packet but before you try to use it. Doing it
after has one advantage, you know the packet len and therefore you only need
to invalidate "packet len" bytes.

In my tree I also removed the "sync" instruction in  invalidate_dcache_range()
because I don't think it's needed. My contact at Motorola thinks so too.
I have been running without the sync instruction for 2-3 weeks and
has yet to see any problems. Would be intresting to hear if others
can try this out and if it gives any performance improvement.

            Jocke
>
> Thanks, Stephan
>
> > -----Original Message-----
> > From: owner-linuxppc-embedded@lists.linuxppc.org
> > [mailto:owner-linuxppc-embedded@lists.linuxppc.org]On Behalf Of Joakim
> > Tjernlund
> > Sent: Donnerstag, 24. Oktober 2002 16:24
> > To: linuxppc-embedded@lists.linuxppc.org
> > Cc: scop@digitel.com.br; thomas@corelatus.com
> > Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
> >
> >
> >
> > Hi
> >
> > This is the second version of my patch that removes the expensive
> > memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in
> > packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> >     walking it(dmi = dmi->next;)
> >
> > New stuff:
> >    - Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >    - Collision reporting fix form Thomas Lange.
> >    - Don't pass receive frames which has error upwards.
> >    - Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
> >
> >         Jocke
> >


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-24 15:46   ` Joakim Tjernlund
@ 2002-11-01 11:01     ` Joakim Tjernlund
  2003-01-28 11:54     ` [PATCH] arch/ppc/8xx_io/enet.c, version 3 Joakim Tjernlund
  1 sibling, 0 replies; 16+ messages in thread
From: Joakim Tjernlund @ 2002-11-01 11:01 UTC (permalink / raw)
  To: Tom Rini; +Cc: linuxppc-embedded, scop, thomas


Hi Tom

Will you apply this patch?  No problems reported so far.

 Jocke

> >
> > On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:
> >
> > > This is the second version of my patch that removes the expensive memcpy of
> > > received
> > > ethernet frames in interrupt context.
> > >
> > > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> > > size 1500 when
> > > applied to 8260 FEC(needs to be applied manually). But min packet size
> > > decreased with 10 %.
> > > This version should fix the 10% decrease case.
> > >
> > > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > > 8260/fcc_enet.c with little effort.
> > >
> > > Better fix a bug in set_multicast_list(), move the dmi list forward when
> > >     walking it(dmi = dmi->next;)
> > >
> > > New stuff:
> > >    - Configrable: copy small packets or pass them directly, see
> > > COPY_SMALL_FRAMES in code.
> > >    - Collision reporting fix form Thomas Lange.
> > >    - Don't pass receive frames which has error upwards.
> > >    - Report RX_OV errors as fifo errors, not crc errors.
> > >
> > > Please test and report any problems and performace improvements.
> >
> > All of this sounds good, but can you please break this one patch up into
> > seperate logical patches?
>
> I rather not.
>
> The logical pices in this patch are:
>  1) remove the expensive memcpy.
>    - This is the main part of the patch.
>
>  2) dmi->next fix, which is a one liner.
>     Don't pass receive frames which has error upwards.
>    - Trival stuff.
>
>  3) Collision reporting fix form Thomas Lange.
>     Report RX_OV errors as fifo errors, not crc errors.
>    - These are just minor accounting fixes.
>
> So I don't think it's worth the effort. Let's just wait a few days and see
> if it was success or not.
>
> Folks, please report you findings to the list.
>
>     Jocke
>


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-10-24 14:23 [PATCH] arch/ppc/8xx_io/enet.c, version 2 Joakim Tjernlund
  2002-10-24 14:59 ` Tom Rini
  2002-10-28 15:17 ` [PATCH] arch/ppc/8xx_io/enet.c, version 2 Stephan Linke
@ 2002-11-13 13:50 ` Hans Feldt
  2002-11-13 16:35   ` Joakim Tjernlund
  2 siblings, 1 reply; 16+ messages in thread
From: Hans Feldt @ 2002-11-13 13:50 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-embedded, scop, thomas


On 10/24/02 04:23 PM, Joakim Tjernlund wrote:
> Hi
>
> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.

Isn't it so that this patch works because you have snooping? Without
snooping the driver would fail because of cache line replacement which
could trash received data.

The buffer invalidate in this case is unneeded. But if you wanna
optimise you should invalidate the whole buffer before giving it to DMA
thus getting rid of the flush implied by memory coherence.

Tried this out in a driver for a processor without snooping, now and
then received packets got corrupted, cache line wise...

Just trying to understand how things really work, please correct me if I
am wrong.

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 13:50 ` Hans Feldt
@ 2002-11-13 16:35   ` Joakim Tjernlund
  2002-11-13 19:30     ` Hans Feldt
  0 siblings, 1 reply; 16+ messages in thread
From: Joakim Tjernlund @ 2002-11-13 16:35 UTC (permalink / raw)
  To: Hans Feldt; +Cc: linuxppc-embedded, scop, thomas


> On 10/24/02 04:23 PM, Joakim Tjernlund wrote:
> > Hi
> >
> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
>
> Isn't it so that this patch works because you have snooping? Without
> snooping the driver would fail because of cache line replacement which
> could trash received data.

I don't have snooping(mpc860).

>
> The buffer invalidate in this case is unneeded. But if you wanna
> optimise you should invalidate the whole buffer before giving it to DMA
> thus getting rid of the flush implied by memory coherence.

In my first version I did invalidate the whole buffer BEFORE I gave it
to the CPM/DMA. I moved it because I thougth it was better to invalidate
after I received it, since the packet len is known at that time
so I don't have to invalidate the whole buffer, just the received number of bytes.

>
> Tried this out in a driver for a processor without snooping, now and
> then received packets got corrupted, cache line wise...

You may be right, perhaps one must invalidate the whole buffer before giving it
to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
was freed and the dcache must write back data to free up space and the buffer,
which now is owned by the CPM, get written to.

I have not seen any corrupted packets and you are the first to report
any problems. What modifications have you done?

What CPU?
Post the driver please.

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 16:35   ` Joakim Tjernlund
@ 2002-11-13 19:30     ` Hans Feldt
  2002-11-13 21:19       ` Joakim Tjernlund
  0 siblings, 1 reply; 16+ messages in thread
From: Hans Feldt @ 2002-11-13 19:30 UTC (permalink / raw)
  To: joakim.tjernlund; +Cc: linuxppc-embedded, scop, thomas


----- Original Message -----
From: Joakim Tjernlund <joakim.tjernlund@lumentis.se>

> You may be right, perhaps one must invalidate the whole buffer before giving it
> to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
> was freed and the dcache must write back data to free up space and the buffer,
> which now is owned by the CPM, get written to.

I beleive this could happen. Since IP does not perform checksumming
but relies on the link (don't know this really) in that matter, I guess
the application could get wrong data...

> I have not seen any corrupted packets and you are the first to report
> any problems.

Did you run any data integrity tests?

> What modifications have you done?

I haven't used your driver patch. I used the __idea__ of delaying the
invalidate to the point where you know how much has been received. This was
in an RTOS ATM driver for a 405. An integrity test showed that some few
packets was wrong. Data was changed by means of cache lines.

> What CPU?
> Post the driver please.

Irrelevant, non-linux

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 19:30     ` Hans Feldt
@ 2002-11-13 21:19       ` Joakim Tjernlund
  2002-11-13 22:12         ` Dan Malek
  0 siblings, 1 reply; 16+ messages in thread
From: Joakim Tjernlund @ 2002-11-13 21:19 UTC (permalink / raw)
  To: Hans Feldt, dan; +Cc: linuxppc-embedded, scop, thomas


> ----- Original Message -----
> From: Joakim Tjernlund <joakim.tjernlund@lumentis.se>
>
> > You may be right, perhaps one must invalidate the whole buffer before giving it
> > to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
> > was freed and the dcache must write back data to free up space and the buffer,
> > which now is owned by the CPM, get written to.
>
> I beleive this could happen. Since IP does not perform checksumming
> but relies on the link (don't know this really) in that matter, I guess
> the application could get wrong data...

OK, anyone against? Dan?

>
> > I have not seen any corrupted packets and you are the first to report
> > any problems.
>
> Did you run any data integrity tests?
No, normal use, lots of pings etc.

>
> > What modifications have you done?
>
> I haven't used your driver patch. I used the __idea__ of delaying the
> invalidate to the point where you know how much has been received. This was
> in an RTOS ATM driver for a 405. An integrity test showed that some few
> packets was wrong. Data was changed by means of cache lines.

Did you notice any difference in performance when moving the invalidate call?

Any other problems with the patch?

Dan, would you like a new patch or will you fix this?

         Jocke

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 21:19       ` Joakim Tjernlund
@ 2002-11-13 22:12         ` Dan Malek
  2002-11-13 22:54           ` Joakim Tjernlund
  2002-11-15 11:52           ` Hans Feldt
  0 siblings, 2 replies; 16+ messages in thread
From: Dan Malek @ 2002-11-13 22:12 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Hans Feldt, linuxppc-embedded, scop, thomas


Joakim Tjernlund wrote:

> OK, anyone against? Dan?

I'm currently looking at the patches and I'll be integrating something
that hopefully works :-)

This isn't something new that hasn't been tried before.  The problem
in the past with non-coherent processors, incoming DMA, and skbufs is
the buffers would share cache lines with other data which would get
corrupted as the result of the invalidate for the DMA.  Typically,
data that was corrupted were flags and control information for the IP
stack, and under "normal" use you wouldn't notice this.  However,
forwarding/bridging applications would fail to work properly and you
would sometimes see packet retransmits that weren't necessary.

The "trick" is to ensure you allocate a larger than necessary sk buffer
and then align the start and end such that they consume entire cache
lines.  There has been sufficient discussion about this that I hope
the sk buffer mechanism will allow this alignment now, as it didn't
work well in the past.  This is what I want to check out when I
apply and test the patches.

This isn't necessary on the 8260 family due to cache snooping, but it
is required on the 8xx.

Of course, a packet checksum still needs to be performed, and if it
is done as part of the data copy (and if the IP stack doesn't do it
again), it would seem that this implementation rather than DMA would
be more efficient.

Thanks.


	-- Dan


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 22:12         ` Dan Malek
@ 2002-11-13 22:54           ` Joakim Tjernlund
  2002-11-15 11:52           ` Hans Feldt
  1 sibling, 0 replies; 16+ messages in thread
From: Joakim Tjernlund @ 2002-11-13 22:54 UTC (permalink / raw)
  To: Dan Malek; +Cc: Hans Feldt, linuxppc-embedded, scop, thomas


> Joakim Tjernlund wrote:
>
> > OK, anyone against? Dan?
>
> I'm currently looking at the patches and I'll be integrating something
> that hopefully works :-)
Please tell me if there is something in that patch you don't like(besides the
moving the invalidate call).

>
> This isn't something new that hasn't been tried before.  The problem
> in the past with non-coherent processors, incoming DMA, and skbufs is
> the buffers would share cache lines with other data which would get
> corrupted as the result of the invalidate for the DMA.  Typically,
> data that was corrupted were flags and control information for the IP
> stack, and under "normal" use you wouldn't notice this.  However,
> forwarding/bridging applications would fail to work properly and you
> would sometimes see packet retransmits that weren't necessary.
>
> The "trick" is to ensure you allocate a larger than necessary sk buffer
> and then align the start and end such that they consume entire cache
> lines.  There has been sufficient discussion about this that I hope
> the sk buffer mechanism will allow this alignment now, as it didn't
> work well in the past.  This is what I want to check out when I
> apply and test the patches.

Tell me about it, I got severely bitten by a non cache aligned invalidate call
in the i2c-algo-8xx.c driver :-(

I too checked carefully that the buffer returned from __dev_alloc_skb()/dev_alloc_skb()
cache aligned, turns out that it kmalloc's a buffer and reserves 16 bytes in the
beginning so it's safe.

>
> This isn't necessary on the 8260 family due to cache snooping, but it
> is required on the 8xx.
>
> Of course, a packet checksum still needs to be performed, and if it
> is done as part of the data copy (and if the IP stack doesn't do it
> again), it would seem that this implementation rather than DMA would
> be more efficient.

Are referring to eth_copy_and_sum()? That function has never done
a csum, just a plain memcpy(). The IP stack has always done
it's own csum(just as well since it would be doing this in IRQ context),
unless you set ip_summed(I think).

Perhaps a backwards memcpy() would be more efficient? That way
the  IP header get copied last and will be in cache longer. I believe memmove()
will do that.

Some drivers also try cache align the IP header. I tried that to but
eth_type_trans() could not handle this.

Finally, why does passing the Ethernet CRC upwards mess-up bridging applications?

        Jocke
>
> Thanks.
>
>
> -- Dan
>

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 2
  2002-11-13 22:12         ` Dan Malek
  2002-11-13 22:54           ` Joakim Tjernlund
@ 2002-11-15 11:52           ` Hans Feldt
  1 sibling, 0 replies; 16+ messages in thread
From: Hans Feldt @ 2002-11-15 11:52 UTC (permalink / raw)
  To: Dan Malek; +Cc: Joakim Tjernlund, linuxppc-embedded, scop, thomas


On 11/13/02 11:12 PM, Dan Malek wrote:

> This isn't something new that hasn't been tried before.  The problem
> in the past with non-coherent processors, incoming DMA, and skbufs is
> the buffers would share cache lines with other data which would get
> corrupted as the result of the invalidate for the DMA.  Typically,
> data that was corrupted were flags and control information for the IP

The problem I was describing related to this patch was due to the L1
cache replacement algorithm. The L1 cache flushes a line to main memory
when it's full with some LRU algo. Thus if you have no snooping
(860/405), this can interfere with the DMA into the same piece of memory.

Invalidating the whole buffer before giving it to DMA solves this and
the cost of doing it on the whole buffer is not much. You have already
done the big optimisation in removing the memcpy and changing flush to
invalidate.

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] arch/ppc/8xx_io/enet.c, version 3
  2002-10-24 15:46   ` Joakim Tjernlund
  2002-11-01 11:01     ` Joakim Tjernlund
@ 2003-01-28 11:54     ` Joakim Tjernlund
  2003-02-03 10:21       ` Joakim Tjernlund
  1 sibling, 1 reply; 16+ messages in thread
From: Joakim Tjernlund @ 2003-01-28 11:54 UTC (permalink / raw)
  To: Dan Malek, linuxppc-embedded


> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
>     walking it(dmi = dmi->next;)
>
> New stuff:
>    - Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>    - Collision reporting fix form Thomas Lange.
>    - Don't pass receive frames which has error upwards.
>    - Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.

Hi

This is the third version of my optimized enet.c patch.
Changes since version 2:
  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
     it was invalidated after the packet was received and that could lead to buffer
     corruption in some cases.

  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will work
     for both 8xx and 82xx.

  3) decrease the allocated buffer length.

  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some memory.

  5) probably some white space changes got in too.

Any chance to see it the devel tree?

More than 3 months has passed since version 2 and the only problem reported was
1) and the fix has been know since mid November.

Dan, you said you would integrate this patch(or some version of it) in November. I
think I have waited long enough now. Please do ASAP.

 Jocke

--- arch/ppc/8xx_io/enet.c	Fri Nov  1 14:44:05 2002
+++ arch/ppc/8xx_io/new_enet.c	Sat Jan 25 19:57:50 2003
@@ -34,7 +34,6 @@
 #include <linux/ioport.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See <asm/commproc.h>.
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small packets
+ * at a small performance hit. Note performance testing needed */
+/* #define COPY_SMALL_FRAMES 1  */
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for internal use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT	(2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES	32
-#define CPM_ENET_RX_FRSIZE	2048
-#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE		64	/* Must be power of two */
-#define TX_RING_MOD_MASK	63	/*   for this to work */
+  #define RX_RING_SIZE		64
+  #define TX_RING_SIZE		64	/* Must be power of two for this to work */
 #else
-#define CPM_ENET_RX_PAGES	4
-#define CPM_ENET_RX_FRSIZE	2048
-#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE		8	/* Must be power of two */
-#define TX_RING_MOD_MASK	7	/*   for this to work */
+  #define RX_RING_SIZE		8
+  #define TX_RING_SIZE		8	/* Must be power of two for this to work */
+#endif
+#define TX_RING_MOD_MASK	(TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE	1552 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+    #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
 	/* Virtual addresses for the receive buffers because we can't
 	 * do a __va() on them anymore.
 	 */
-	unsigned char *rx_vaddr[RX_RING_SIZE];
+	void    *rx_vaddr[RX_RING_SIZE];
 	struct	net_device_stats stats;
 	uint	tx_full;
 	spinlock_t lock;
@@ -370,11 +375,11 @@

 		cep->stats.tx_packets++;

-		/* Deferred means some collisions occurred during transmit,
-		 * but we eventually sent the packet OK.
-		 */
-		if (bdp->cbd_sc & BD_ENET_TX_DEF)
-			cep->stats.collisions++;
+		/* Check retry counter, i.e. collision counter */
+		if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
+			/* Note that counter cannot go higher than 15 */
+			cep->stats.collisions+=(bdp->cbd_sc & BD_ENET_TX_RCMASK)>>2;
+		}

 		/* Free the sk buffer associated with this last transmit.
 		*/
@@ -449,6 +454,7 @@
 	struct	scc_enet_private *cep;
 	volatile cbd_t	*bdp;
 	struct	sk_buff *skb;
+	struct	sk_buff *skb_tmp;
 	ushort	pkt_len;

 	cep = (struct scc_enet_private *)dev->priv;
@@ -458,83 +464,93 @@
 	 */
 	bdp = cep->cur_rx;

-for (;;) {
-	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
-		break;
-
+	for (;;) {
+		if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
+			break;
+
+#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
 #ifndef final_version
-	/* Since we have allocated space to hold a complete frame, both
-	 * the first and last indicators should be set.
-	 */
-	if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) !=
-		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
+		/* Since we have allocated space to hold a complete frame, both
+		 * the first and last indicators should be set.
+		 */
+		if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) !=
+		    (BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
 			printk("CPM ENET: rcv is not first+last\n");
 #endif
-
-	/* Frame too long or too short.
-	*/
-	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-		cep->stats.rx_length_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
-		cep->stats.rx_frame_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
-		cep->stats.rx_crc_errors++;
-	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
-		cep->stats.rx_crc_errors++;
-
-	/* Report late collisions as a frame error.
-	 * On this error, the BD is closed, but we don't know what we
-	 * have in the buffer.  So, just drop this frame on the floor.
-	 */
-	if (bdp->cbd_sc & BD_ENET_RX_CL) {
-		cep->stats.rx_frame_errors++;
-	}
-	else {
-
-		/* Process the incoming frame.
-		*/
-		cep->stats.rx_packets++;
-		pkt_len = bdp->cbd_datlen;
-		cep->stats.rx_bytes += pkt_len;
-
-		/* This does 16 byte alignment, much more than we need.
-		 * The packet length includes FCS, but we don't want to
-		 * include that when passing upstream as it messes up
-		 * bridging applications.
-		 */
-		skb = dev_alloc_skb(pkt_len-4);
-
-		if (skb == NULL) {
-			printk("%s: Memory squeeze, dropping packet.\n", dev->name);
-			cep->stats.rx_dropped++;
-		}
-		else {
-			skb->dev = dev;
-			skb_put(skb,pkt_len-4);	/* Make room */
-			eth_copy_and_sum(skb,
-				cep->rx_vaddr[bdp - cep->rx_bd_base],
-				pkt_len-4, 0);
-			skb->protocol=eth_type_trans(skb,dev);
-			netif_rx(skb);
+		if(bdp->cbd_sc & RX_BD_ERRORS){ /* Receive errors ? */
+			cep->stats.rx_errors++;
+			if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) /* Frame too long or too short. */
+				cep->stats.rx_length_errors++;
+			if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
+				cep->stats.rx_frame_errors++;
+			if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
+				cep->stats.rx_crc_errors++;
+			if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
+				cep->stats.rx_fifo_errors++;
+			if (bdp->cbd_sc & BD_ENET_RX_CL)        /* Late collision */
+				cep->stats.collisions++;
+		} else {
+			/* Process the incoming frame.
+			 */
+			cep->stats.rx_packets++;
+			pkt_len = bdp->cbd_datlen;
+			cep->stats.rx_bytes += pkt_len;
+			pkt_len -= 4; /* The packet length includes FCS, but we don't want to
+				       * include that when passing upstream as it messes up
+				       * bridging applications. Is this still true ???? */
+#ifdef COPY_SMALL_FRAMES
+			/* Allocate the next buffer now so we are sure to have one when needed
+			 * This does 16 byte alignment, exactly what we need(L1_CACHE aligned). */
+			if(pkt_len < RX_COPYBREAK)
+				skb_tmp = __dev_alloc_skb(pkt_len, GFP_ATOMIC | GFP_DMA);
+			else
+#endif
+				skb_tmp = __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC | GFP_DMA);
+
+			if (skb_tmp == NULL) {
+				printk("%s: Memory squeeze, dropping packet.\n", dev->name);
+				cep->stats.rx_dropped++;
+
+			} else {
+				skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
+#ifdef COPY_SMALL_FRAMES
+				if(pkt_len < RX_COPYBREAK) {
+					typeof(skb) skb_swap = skb;
+					memcpy(skb_put(skb_tmp, pkt_len), skb->data, pkt_len);
+					/* swap the skb and skb_tmp */
+					skb = skb_tmp;
+					skb_tmp = skb_swap;
+				}
+				else
+#endif
+				{
+					skb_put(skb, pkt_len);  /* Make room */
+					bdp->cbd_bufaddr = __pa(skb_tmp->data);
+					cep->rx_vaddr[bdp - cep->rx_bd_base] = skb_tmp;
+				}
+				dma_cache_inv((unsigned long) skb_tmp->data, CPM_ENET_RX_FRSIZE);
+				skb->dev = dev;
+				skb->protocol=eth_type_trans(skb, dev);
+				netif_rx(skb);
+			}
 		}
+
+		/* Clear the status flags for this buffer.
+		 */
+		bdp->cbd_sc &= ~BD_ENET_RX_STATS;
+
+		/* Mark the buffer empty.
+		 */
+		bdp->cbd_sc |= BD_ENET_RX_EMPTY;
+
+		/* Update BD pointer to next entry.
+		 */
+		if (bdp->cbd_sc & BD_ENET_RX_WRAP)
+			bdp = cep->rx_bd_base;
+		else
+			bdp++;
+
 	}
-
-	/* Clear the status flags for this buffer.
-	*/
-	bdp->cbd_sc &= ~BD_ENET_RX_STATS;
-
-	/* Mark the buffer empty.
-	*/
-	bdp->cbd_sc |= BD_ENET_RX_EMPTY;
-
-	/* Update BD pointer to next entry.
-	*/
-	if (bdp->cbd_sc & BD_ENET_RX_WRAP)
-		bdp = cep->rx_bd_base;
-	else
-		bdp++;
-
-   }
 	cep->cur_rx = (cbd_t *)bdp;

 	return 0;
@@ -608,7 +624,7 @@

 			dmi = dev->mc_list;

-			for (i=0; i<dev->mc_count; i++) {
+			for (i=0; i<dev->mc_count; i++, dmi = dmi->next) {

 				/* Only support group multicast for now.
 				*/
@@ -647,8 +663,7 @@
 	struct net_device *dev;
 	struct scc_enet_private *cep;
 	int i, j, k;
-	unsigned char	*eap, *ba;
-	dma_addr_t	mem_addr;
+	unsigned char	*eap;
 	bd_t		*bd;
 	volatile	cbd_t		*bdp;
 	volatile	cpm8xx_t	*cp;
@@ -839,22 +854,14 @@

 	bdp = cep->rx_bd_base;
 	k = 0;
-	for (i=0; i<CPM_ENET_RX_PAGES; i++) {
-
-		/* Allocate a page.
-		*/
-		ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
-
-		/* Initialize the BD for every fragment in the page.
-		*/
-		for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
-			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
-			bdp->cbd_bufaddr = mem_addr;
-			cep->rx_vaddr[k++] = ba;
-			mem_addr += CPM_ENET_RX_FRSIZE;
-			ba += CPM_ENET_RX_FRSIZE;
-			bdp++;
-		}
+	/* Initialize the BDs. */
+	for (j=0; j < RX_RING_SIZE; j++) {
+		struct	sk_buff * skb = __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC | GFP_DMA);
+		dma_cache_inv((unsigned long) skb->data, CPM_ENET_RX_FRSIZE);
+		bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
+		bdp->cbd_bufaddr = __pa(skb->data);
+		cep->rx_vaddr[k++] = skb;
+		bdp++;
 	}

 	/* Set the last buffer to wrap.


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
  2003-01-28 11:54     ` [PATCH] arch/ppc/8xx_io/enet.c, version 3 Joakim Tjernlund
@ 2003-02-03 10:21       ` Joakim Tjernlund
  2003-02-03 12:28         ` Pantelis Antoniou
  0 siblings, 1 reply; 16+ messages in thread
From: Joakim Tjernlund @ 2003-02-03 10:21 UTC (permalink / raw)
  To: linuxppc-embedded


> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> >     walking it(dmi = dmi->next;)
> >
> > New stuff:
> >    - Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >    - Collision reporting fix form Thomas Lange.
> >    - Don't pass receive frames which has error upwards.
> >    - Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
>
> Hi
>
> This is the third version of my optimized enet.c patch.
> Changes since version 2:
>   1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
>      it was invalidated after the packet was received and that could lead to buffer
>      corruption in some cases.
>
>   2) use dma_cache_inv() instead of invalidate_dcache_range() since that will work
>      for both 8xx and 82xx.
>
>   3) decrease the allocated buffer length.
>
>   4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some memory.
>
>   5) probably some white space changes got in too.
>
> Any chance to see it the devel tree?
>
> More than 3 months has passed since version 2 and the only problem reported was
> 1) and the fix has been know since mid November.
>
> Dan, you said you would integrate this patch(or some version of it) in November. I
> think I have waited long enough now. Please do ASAP.
>
>  Jocke
[SNIP]

Zero feedback so far. Is there zero interest to improve the mpc code in linuxppc?
Anyhow, I give up. Don't have the energy to resend over and over again.

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] arch/ppc/8xx_io/enet.c, version 3
  2003-02-03 10:21       ` Joakim Tjernlund
@ 2003-02-03 12:28         ` Pantelis Antoniou
  0 siblings, 0 replies; 16+ messages in thread
From: Pantelis Antoniou @ 2003-02-03 12:28 UTC (permalink / raw)
  To: joakim.tjernlund; +Cc: linuxppc-embedded


Joakim Tjernlund wrote:

>>>This is the second version of my patch that removes the expensive memcpy of
>>>received
>>>ethernet frames in interrupt context.
>>>
>>>I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
>>>size 1500 when
>>>applied to 8260 FEC(needs to be applied manually). But min packet size
>>>decreased with 10 %.
>>>This version should fix the 10% decrease case.
>>>
>>>This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
>>>8260/fcc_enet.c with little effort.
>>>
>>>Better fix a bug in set_multicast_list(), move the dmi list forward when
>>>    walking it(dmi = dmi->next;)
>>>
>>>New stuff:
>>>   - Configrable: copy small packets or pass them directly, see
>>>COPY_SMALL_FRAMES in code.
>>>   - Collision reporting fix form Thomas Lange.
>>>   - Don't pass receive frames which has error upwards.
>>>   - Report RX_OV errors as fifo errors, not crc errors.
>>>
>>>Please test and report any problems and performace improvements.
>>>
>>>
>>Hi
>>
>>This is the third version of my optimized enet.c patch.
>>Changes since version 2:
>>  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
>>     it was invalidated after the packet was received and that could lead to buffer
>>     corruption in some cases.
>>
>>  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will work
>>     for both 8xx and 82xx.
>>
>>  3) decrease the allocated buffer length.
>>
>>  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some memory.
>>
>>  5) probably some white space changes got in too.
>>
>>Any chance to see it the devel tree?
>>
>>More than 3 months has passed since version 2 and the only problem reported was
>>1) and the fix has been know since mid November.
>>
>>Dan, you said you would integrate this patch(or some version of it) in November. I
>>think I have waited long enough now. Please do ASAP.
>>
>> Jocke
>>
>>
>[SNIP]
>
>Zero feedback so far. Is there zero interest to improve the mpc code in linuxppc?
>Anyhow, I give up. Don't have the energy to resend over and over again.
>
>   Jocke
>
>
>
>
>
>
Please don't give up.

I'll try it and you'll have my feedback within the week.

Anyhow, what is the deal with 8xx/82xx?
At least the maintainers should ack the patch, and if it is not suitable
for inclusion they should state for what reason.

We don't want to discourage people sending patches.
Right?

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2003-02-03 12:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-10-24 14:23 [PATCH] arch/ppc/8xx_io/enet.c, version 2 Joakim Tjernlund
2002-10-24 14:59 ` Tom Rini
2002-10-24 15:46   ` Joakim Tjernlund
2002-11-01 11:01     ` Joakim Tjernlund
2003-01-28 11:54     ` [PATCH] arch/ppc/8xx_io/enet.c, version 3 Joakim Tjernlund
2003-02-03 10:21       ` Joakim Tjernlund
2003-02-03 12:28         ` Pantelis Antoniou
2002-10-28 15:17 ` [PATCH] arch/ppc/8xx_io/enet.c, version 2 Stephan Linke
2002-10-28 17:24   ` Joakim Tjernlund
2002-11-13 13:50 ` Hans Feldt
2002-11-13 16:35   ` Joakim Tjernlund
2002-11-13 19:30     ` Hans Feldt
2002-11-13 21:19       ` Joakim Tjernlund
2002-11-13 22:12         ` Dan Malek
2002-11-13 22:54           ` Joakim Tjernlund
2002-11-15 11:52           ` Hans Feldt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.