All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private
@ 2013-02-12 12:47 Claudiu Manoil
  2013-02-12 12:47 ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Claudiu Manoil
  2013-02-12 14:54 ` [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Paul Gortmaker
  0 siblings, 2 replies; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 12:47 UTC (permalink / raw)
  To: netdev; +Cc: Paul Gortmaker, David S. Miller

* remove unused device_node pointer
* remove duplicate SET_NETDEV_DEV()
* use device pointer (dev) to simplify the code and to
  avoid double indirections (esp. on the "fast path")

Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.c |   29 +++++++++++++----------------
 drivers/net/ethernet/freescale/gianfar.h |    2 +-
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 75734bf..096fb5f 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -231,7 +231,7 @@ static int gfar_alloc_skb_resources(struct net_device *ndev)
 	dma_addr_t addr;
 	int i, j, k;
 	struct gfar_private *priv = netdev_priv(ndev);
-	struct device *dev = &priv->ofdev->dev;
+	struct device *dev = priv->dev;
 	struct gfar_priv_tx_q *tx_queue = NULL;
 	struct gfar_priv_rx_q *rx_queue = NULL;
 
@@ -668,7 +668,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 		return -ENOMEM;
 
 	priv = netdev_priv(dev);
-	priv->node = ofdev->dev.of_node;
 	priv->ndev = dev;
 
 	priv->num_tx_queues = num_tx_qs;
@@ -1006,7 +1005,7 @@ static int gfar_probe(struct platform_device *ofdev)
 	priv = netdev_priv(dev);
 	priv->ndev = dev;
 	priv->ofdev = ofdev;
-	priv->node = ofdev->dev.of_node;
+	priv->dev = &ofdev->dev;
 	SET_NETDEV_DEV(dev, &ofdev->dev);
 
 	spin_lock_init(&priv->bflock);
@@ -1043,8 +1042,6 @@ static int gfar_probe(struct platform_device *ofdev)
 	/* Set the dev->base_addr to the gfar reg region */
 	dev->base_addr = (unsigned long) regs;
 
-	SET_NETDEV_DEV(dev, &ofdev->dev);
-
 	/* Fill in the dev structure */
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->mtu = 1500;
@@ -1722,13 +1719,13 @@ static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
 		if (!tx_queue->tx_skbuff[i])
 			continue;
 
-		dma_unmap_single(&priv->ofdev->dev, txbdp->bufPtr,
+		dma_unmap_single(priv->dev, txbdp->bufPtr,
 				 txbdp->length, DMA_TO_DEVICE);
 		txbdp->lstatus = 0;
 		for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
 		     j++) {
 			txbdp++;
-			dma_unmap_page(&priv->ofdev->dev, txbdp->bufPtr,
+			dma_unmap_page(priv->dev, txbdp->bufPtr,
 				       txbdp->length, DMA_TO_DEVICE);
 		}
 		txbdp++;
@@ -1749,8 +1746,8 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
 
 	for (i = 0; i < rx_queue->rx_ring_size; i++) {
 		if (rx_queue->rx_skbuff[i]) {
-			dma_unmap_single(&priv->ofdev->dev,
-					 rxbdp->bufPtr, priv->rx_buffer_size,
+			dma_unmap_single(priv->dev, rxbdp->bufPtr,
+					 priv->rx_buffer_size,
 					 DMA_FROM_DEVICE);
 			dev_kfree_skb_any(rx_queue->rx_skbuff[i]);
 			rx_queue->rx_skbuff[i] = NULL;
@@ -1789,7 +1786,7 @@ static void free_skb_resources(struct gfar_private *priv)
 			free_skb_rx_queue(rx_queue);
 	}
 
-	dma_free_coherent(&priv->ofdev->dev,
+	dma_free_coherent(priv->dev,
 			  sizeof(struct txbd8) * priv->total_tx_ring_size +
 			  sizeof(struct rxbd8) * priv->total_rx_ring_size,
 			  priv->tx_queue[0]->tx_bd_base,
@@ -2169,7 +2166,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			if (i == nr_frags - 1)
 				lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
 
-			bufaddr = skb_frag_dma_map(&priv->ofdev->dev,
+			bufaddr = skb_frag_dma_map(priv->dev,
 						   &skb_shinfo(skb)->frags[i],
 						   0,
 						   length,
@@ -2221,7 +2218,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		lstatus |= BD_LFLAG(TXBD_TOE);
 	}
 
-	txbdp_start->bufPtr = dma_map_single(&priv->ofdev->dev, skb->data,
+	txbdp_start->bufPtr = dma_map_single(priv->dev, skb->data,
 					     skb_headlen(skb), DMA_TO_DEVICE);
 
 	/* If time stamping is requested one additional TxBD must be set up. The
@@ -2534,7 +2531,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		} else
 			buflen = bdp->length;
 
-		dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr,
+		dma_unmap_single(priv->dev, bdp->bufPtr,
 				 buflen, DMA_TO_DEVICE);
 
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
@@ -2553,7 +2550,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		bdp = next_txbd(bdp, base, tx_ring_size);
 
 		for (i = 0; i < frags; i++) {
-			dma_unmap_page(&priv->ofdev->dev, bdp->bufPtr,
+			dma_unmap_page(priv->dev, bdp->bufPtr,
 				       bdp->length, DMA_TO_DEVICE);
 			bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
 			bdp = next_txbd(bdp, base, tx_ring_size);
@@ -2619,7 +2616,7 @@ static void gfar_new_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
 	struct gfar_private *priv = netdev_priv(dev);
 	dma_addr_t buf;
 
-	buf = dma_map_single(&priv->ofdev->dev, skb->data,
+	buf = dma_map_single(priv->dev, skb->data,
 			     priv->rx_buffer_size, DMA_FROM_DEVICE);
 	gfar_init_rxbdp(rx_queue, bdp, buf);
 }
@@ -2784,7 +2781,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 
 		skb = rx_queue->rx_skbuff[rx_queue->skb_currx];
 
-		dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr,
+		dma_unmap_single(priv->dev, bdp->bufPtr,
 				 priv->rx_buffer_size, DMA_FROM_DEVICE);
 
 		if (unlikely(!(bdp->status & RXBD_ERR) &&
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 71793f4..22c2f7a 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1065,7 +1065,7 @@ struct gfar_private {
 	unsigned int total_tx_ring_size;
 	unsigned int total_rx_ring_size;
 
-	struct device_node *node;
+	struct device *dev;
 	struct net_device *ndev;
 	struct platform_device *ofdev;
 	enum gfar_errata errata;
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private
  2013-02-12 12:47 [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Claudiu Manoil
@ 2013-02-12 12:47 ` Claudiu Manoil
  2013-02-12 12:47   ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Claudiu Manoil
  2013-02-12 15:11   ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Paul Gortmaker
  2013-02-12 14:54 ` [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Paul Gortmaker
  1 sibling, 2 replies; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 12:47 UTC (permalink / raw)
  To: netdev; +Cc: Paul Gortmaker, David S. Miller

* group run-time critical fields within the 1st cacheline
  followed by the tx|rx_queue reference arrays and the interrupt
  group instances (gfargrp) (all cacheline aligned)
* change 'padding' from unsigned short to u16
* clear 20+ byte memory hole
* group releated members
* push non-critical fields at the end of the struct

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.h |   93 +++++++++++++++--------------
 1 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 22c2f7a..5304a58 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1054,28 +1054,64 @@ enum gfar_errata {
  * the buffer descriptor determines the actual condition.
  */
 struct gfar_private {
-
-	/* Indicates how many tx, rx queues are enabled */
-	unsigned int num_tx_queues;
 	unsigned int num_rx_queues;
-	unsigned int num_grps;
-	unsigned int mode;
-
-	/* The total tx and rx ring size for the enabled queues */
-	unsigned int total_tx_ring_size;
-	unsigned int total_rx_ring_size;
 
 	struct device *dev;
 	struct net_device *ndev;
-	struct platform_device *ofdev;
 	enum gfar_errata errata;
+	unsigned int rx_buffer_size;
+
+	u16 padding;
+
+	/* HW time stamping enabled flag */
+	int hwts_rx_en;
+	int hwts_tx_en;
 
-	struct gfar_priv_grp gfargrp[MAXGROUPS];
 	struct gfar_priv_tx_q *tx_queue[MAX_TX_QS];
 	struct gfar_priv_rx_q *rx_queue[MAX_RX_QS];
+	struct gfar_priv_grp gfargrp[MAXGROUPS];
+
+	u32 device_flags;
+
+	unsigned int mode;
+	unsigned int num_tx_queues;
+	unsigned int num_grps;
+
+	/* Network Statistics */
+	struct gfar_extra_stats extra_stats;
+
+	/* PHY stuff */
+	phy_interface_t interface;
+	struct device_node *phy_node;
+	struct device_node *tbi_node;
+	struct phy_device *phydev;
+	struct mii_bus *mii_bus;
+	int oldspeed;
+	int oldduplex;
+	int oldlink;
+
+	/* Bitfield update lock */
+	spinlock_t bflock;
+
+	uint32_t msg_enable;
+
+	struct work_struct reset_task;
+
+	struct platform_device *ofdev;
+	unsigned char
+		extended_hash:1,
+		bd_stash_en:1,
+		rx_filer_enable:1,
+		/* Wake-on-LAN enabled */
+		wol_en:1,
+		/* Enable priorty based Tx scheduling in Hw */
+		prio_sched_en:1;
+
+	/* The total tx and rx ring size for the enabled queues */
+	unsigned int total_tx_ring_size;
+	unsigned int total_rx_ring_size;
 
 	/* RX per device parameters */
-	unsigned int rx_buffer_size;
 	unsigned int rx_stash_size;
 	unsigned int rx_stash_index;
 
@@ -1094,39 +1130,6 @@ struct gfar_private {
 	unsigned int fifo_starve;
 	unsigned int fifo_starve_off;
 
-	/* Bitfield update lock */
-	spinlock_t bflock;
-
-	phy_interface_t interface;
-	struct device_node *phy_node;
-	struct device_node *tbi_node;
-	u32 device_flags;
-	unsigned char
-		extended_hash:1,
-		bd_stash_en:1,
-		rx_filer_enable:1,
-		wol_en:1, /* Wake-on-LAN enabled */
-		prio_sched_en:1; /* Enable priorty based Tx scheduling in Hw */
-	unsigned short padding;
-
-	/* PHY stuff */
-	struct phy_device *phydev;
-	struct mii_bus *mii_bus;
-	int oldspeed;
-	int oldduplex;
-	int oldlink;
-
-	uint32_t msg_enable;
-
-	struct work_struct reset_task;
-
-	/* Network Statistics */
-	struct gfar_extra_stats extra_stats;
-
-	/* HW time stamping enabled flag */
-	int hwts_rx_en;
-	int hwts_tx_en;
-
 	/*Filer table*/
 	unsigned int ftp_rqfpr[MAX_FILER_IDX + 1];
 	unsigned int ftp_rqfcr[MAX_FILER_IDX + 1];
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely
  2013-02-12 12:47 ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Claudiu Manoil
@ 2013-02-12 12:47   ` Claudiu Manoil
  2013-02-12 12:47     ` [PATCH net-next 4/5] gianfar: Remove wrong buffer size conditioning to VLAN h/w offload Claudiu Manoil
  2013-02-12 16:30     ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Paul Gortmaker
  2013-02-12 15:11   ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Paul Gortmaker
  1 sibling, 2 replies; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 12:47 UTC (permalink / raw)
  To: netdev; +Cc: Paul Gortmaker, David S. Miller

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 096fb5f..5622134 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2745,7 +2745,7 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
 	/* Send the packet up the stack */
 	ret = napi_gro_receive(napi, skb);
 
-	if (GRO_DROP == ret)
+	if (unlikely(GRO_DROP == ret))
 		priv->extra_stats.kernel_dropped++;
 
 	return 0;
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 4/5] gianfar: Remove wrong buffer size conditioning to VLAN h/w offload
  2013-02-12 12:47   ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Claudiu Manoil
@ 2013-02-12 12:47     ` Claudiu Manoil
  2013-02-12 12:47       ` [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling Claudiu Manoil
  2013-02-12 16:30     ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Paul Gortmaker
  1 sibling, 1 reply; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 12:47 UTC (permalink / raw)
  To: netdev; +Cc: Paul Gortmaker, David S. Miller

The controller's ref manual states clearly that when the hw Rx vlan
offload feature is enabled, meaning that the VLEX bit from RCTRL is
correctly enabled, then the hw performs automatic VLAN tag extraction
and deletion from the ethernet frames. So there's no point in trying to
increase the rx buff size when rxvlan is on, as the frame is actually
smaller.
And the Tx vlan hw accel feature (VLINS) has nothing to do with rx buff
size computation.

Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 5622134..59fb3bf 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2390,9 +2390,6 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
 	int oldsize = priv->rx_buffer_size;
 	int frame_size = new_mtu + ETH_HLEN;
 
-	if (gfar_is_vlan_on(priv))
-		frame_size += VLAN_HLEN;
-
 	if ((frame_size < 64) || (frame_size > JUMBO_FRAME_SIZE)) {
 		netif_err(priv, drv, dev, "Invalid MTU setting\n");
 		return -EINVAL;
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling
  2013-02-12 12:47     ` [PATCH net-next 4/5] gianfar: Remove wrong buffer size conditioning to VLAN h/w offload Claudiu Manoil
@ 2013-02-12 12:47       ` Claudiu Manoil
  2013-02-12 17:19         ` Paul Gortmaker
  0 siblings, 1 reply; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 12:47 UTC (permalink / raw)
  To: netdev; +Cc: Paul Gortmaker, David S. Miller

NETIF_F_HW_VLAN_TX flag must not condition RxFCB usage.
In the case of RxBD rings, FCBs (Frame Control Block) are inserted by
the eTSEC whenever RCTRL[PRSDEP] is set to a non-zero value. Only one
FCB is inserted per frame (in the buffer pointed to by the RxBD with
bit F set). TOE acceleration for receive is enabled for all rx frames
in this case.
Indroduce the uses_rxfcb field to signal RxFCB insertion in accordance
with the specification above (leading to cleaner, less confusing code).

Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.c |   41 ++++++++++++++---------------
 drivers/net/ethernet/freescale/gianfar.h |    1 +
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 59fb3bf..3de608c 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -349,6 +349,9 @@ static void gfar_init_mac(struct net_device *ndev)
 	/* Configure the coalescing support */
 	gfar_configure_coalescing(priv, 0xFF, 0xFF);
 
+	/* set this when rx hw offload (TOE) functions are being used */
+	priv->uses_rxfcb = 0;
+
 	if (priv->rx_filer_enable) {
 		rctrl |= RCTRL_FILREN;
 		/* Program the RIR0 reg with the required distribution */
@@ -359,8 +362,10 @@ static void gfar_init_mac(struct net_device *ndev)
 	if (ndev->flags & IFF_PROMISC)
 		rctrl |= RCTRL_PROM;
 
-	if (ndev->features & NETIF_F_RXCSUM)
+	if (ndev->features & NETIF_F_RXCSUM) {
 		rctrl |= RCTRL_CHECKSUMMING;
+		priv->uses_rxfcb = 1;
+	}
 
 	if (priv->extended_hash) {
 		rctrl |= RCTRL_EXTHASH;
@@ -382,11 +387,15 @@ static void gfar_init_mac(struct net_device *ndev)
 	}
 
 	/* Enable HW time stamping if requested from user space */
-	if (priv->hwts_rx_en)
+	if (priv->hwts_rx_en) {
 		rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
+		priv->uses_rxfcb = 1;
+	}
 
-	if (ndev->features & NETIF_F_HW_VLAN_RX)
+	if (ndev->features & NETIF_F_HW_VLAN_RX) {
 		rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
+		priv->uses_rxfcb = 1;
+	}
 
 	/* Init rctrl based on our settings */
 	gfar_write(&regs->rctrl, rctrl);
@@ -505,20 +514,6 @@ void unlock_tx_qs(struct gfar_private *priv)
 		spin_unlock(&priv->tx_queue[i]->txlock);
 }
 
-static bool gfar_is_vlan_on(struct gfar_private *priv)
-{
-	return (priv->ndev->features & NETIF_F_HW_VLAN_RX) ||
-	       (priv->ndev->features & NETIF_F_HW_VLAN_TX);
-}
-
-/* Returns 1 if incoming frames use an FCB */
-static inline int gfar_uses_fcb(struct gfar_private *priv)
-{
-	return gfar_is_vlan_on(priv) ||
-	       (priv->ndev->features & NETIF_F_RXCSUM) ||
-	       (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER);
-}
-
 static void free_tx_pointers(struct gfar_private *priv)
 {
 	int i;
@@ -2331,10 +2326,13 @@ void gfar_check_rx_parser_mode(struct gfar_private *priv)
 
 	tempval = gfar_read(&regs->rctrl);
 	/* If parse is no longer required, then disable parser */
-	if (tempval & RCTRL_REQ_PARSER)
+	if (tempval & RCTRL_REQ_PARSER) {
 		tempval |= RCTRL_PRSDEP_INIT;
-	else
+		priv->uses_rxfcb = 1;
+	} else {
 		tempval &= ~RCTRL_PRSDEP_INIT;
+		priv->uses_rxfcb = 0;
+	}
 	gfar_write(&regs->rctrl, tempval);
 }
 
@@ -2367,6 +2365,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features)
 		tempval = gfar_read(&regs->rctrl);
 		tempval |= (RCTRL_VLEX | RCTRL_PRSDEP_INIT);
 		gfar_write(&regs->rctrl, tempval);
+		priv->uses_rxfcb = 1;
 	} else {
 		/* Disable VLAN tag extraction */
 		tempval = gfar_read(&regs->rctrl);
@@ -2395,7 +2394,7 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
 		return -EINVAL;
 	}
 
-	if (gfar_uses_fcb(priv))
+	if (priv->uses_rxfcb)
 		frame_size += GMAC_FCB_LEN;
 
 	frame_size += priv->padding;
@@ -2766,7 +2765,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 	bdp = rx_queue->cur_rx;
 	base = rx_queue->rx_bd_base;
 
-	amount_pull = (gfar_uses_fcb(priv) ? GMAC_FCB_LEN : 0);
+	amount_pull = priv->uses_rxfcb ? GMAC_FCB_LEN : 0;
 
 	while (!((bdp->status & RXBD_EMPTY) || (--rx_work_limit < 0))) {
 		struct sk_buff *newskb;
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 5304a58..386f1fe 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1061,6 +1061,7 @@ struct gfar_private {
 	enum gfar_errata errata;
 	unsigned int rx_buffer_size;
 
+	u16 uses_rxfcb;
 	u16 padding;
 
 	/* HW time stamping enabled flag */
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private
  2013-02-12 12:47 [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Claudiu Manoil
  2013-02-12 12:47 ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Claudiu Manoil
@ 2013-02-12 14:54 ` Paul Gortmaker
  2013-02-12 17:14   ` Claudiu Manoil
  1 sibling, 1 reply; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-12 14:54 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

On 13-02-12 07:47 AM, Claudiu Manoil wrote:
> * remove unused device_node pointer
> * remove duplicate SET_NETDEV_DEV()
> * use device pointer (dev) to simplify the code and to
>   avoid double indirections (esp. on the "fast path")

Ideally, when you find yourself making a list within the longlog,
that is a hint that you might want to start making it into
multiple commits, for ease of review.  Granted #1 and #2 are
trivial, but #3 probably could be a separate commit.  Did you
see any change in the object size or the disassembly when
making change #3?

P.
--

> 
> Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
> Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
> ---
>  drivers/net/ethernet/freescale/gianfar.c |   29 +++++++++++++----------------
>  drivers/net/ethernet/freescale/gianfar.h |    2 +-
>  2 files changed, 14 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
> index 75734bf..096fb5f 100644
> --- a/drivers/net/ethernet/freescale/gianfar.c
> +++ b/drivers/net/ethernet/freescale/gianfar.c
> @@ -231,7 +231,7 @@ static int gfar_alloc_skb_resources(struct net_device *ndev)
>  	dma_addr_t addr;
>  	int i, j, k;
>  	struct gfar_private *priv = netdev_priv(ndev);
> -	struct device *dev = &priv->ofdev->dev;
> +	struct device *dev = priv->dev;
>  	struct gfar_priv_tx_q *tx_queue = NULL;
>  	struct gfar_priv_rx_q *rx_queue = NULL;
>  
> @@ -668,7 +668,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
>  		return -ENOMEM;
>  
>  	priv = netdev_priv(dev);
> -	priv->node = ofdev->dev.of_node;
>  	priv->ndev = dev;
>  
>  	priv->num_tx_queues = num_tx_qs;
> @@ -1006,7 +1005,7 @@ static int gfar_probe(struct platform_device *ofdev)
>  	priv = netdev_priv(dev);
>  	priv->ndev = dev;
>  	priv->ofdev = ofdev;
> -	priv->node = ofdev->dev.of_node;
> +	priv->dev = &ofdev->dev;
>  	SET_NETDEV_DEV(dev, &ofdev->dev);
>  
>  	spin_lock_init(&priv->bflock);
> @@ -1043,8 +1042,6 @@ static int gfar_probe(struct platform_device *ofdev)
>  	/* Set the dev->base_addr to the gfar reg region */
>  	dev->base_addr = (unsigned long) regs;
>  
> -	SET_NETDEV_DEV(dev, &ofdev->dev);
> -
>  	/* Fill in the dev structure */
>  	dev->watchdog_timeo = TX_TIMEOUT;
>  	dev->mtu = 1500;
> @@ -1722,13 +1719,13 @@ static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
>  		if (!tx_queue->tx_skbuff[i])
>  			continue;
>  
> -		dma_unmap_single(&priv->ofdev->dev, txbdp->bufPtr,
> +		dma_unmap_single(priv->dev, txbdp->bufPtr,
>  				 txbdp->length, DMA_TO_DEVICE);
>  		txbdp->lstatus = 0;
>  		for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
>  		     j++) {
>  			txbdp++;
> -			dma_unmap_page(&priv->ofdev->dev, txbdp->bufPtr,
> +			dma_unmap_page(priv->dev, txbdp->bufPtr,
>  				       txbdp->length, DMA_TO_DEVICE);
>  		}
>  		txbdp++;
> @@ -1749,8 +1746,8 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
>  
>  	for (i = 0; i < rx_queue->rx_ring_size; i++) {
>  		if (rx_queue->rx_skbuff[i]) {
> -			dma_unmap_single(&priv->ofdev->dev,
> -					 rxbdp->bufPtr, priv->rx_buffer_size,
> +			dma_unmap_single(priv->dev, rxbdp->bufPtr,
> +					 priv->rx_buffer_size,
>  					 DMA_FROM_DEVICE);
>  			dev_kfree_skb_any(rx_queue->rx_skbuff[i]);
>  			rx_queue->rx_skbuff[i] = NULL;
> @@ -1789,7 +1786,7 @@ static void free_skb_resources(struct gfar_private *priv)
>  			free_skb_rx_queue(rx_queue);
>  	}
>  
> -	dma_free_coherent(&priv->ofdev->dev,
> +	dma_free_coherent(priv->dev,
>  			  sizeof(struct txbd8) * priv->total_tx_ring_size +
>  			  sizeof(struct rxbd8) * priv->total_rx_ring_size,
>  			  priv->tx_queue[0]->tx_bd_base,
> @@ -2169,7 +2166,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  			if (i == nr_frags - 1)
>  				lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
>  
> -			bufaddr = skb_frag_dma_map(&priv->ofdev->dev,
> +			bufaddr = skb_frag_dma_map(priv->dev,
>  						   &skb_shinfo(skb)->frags[i],
>  						   0,
>  						   length,
> @@ -2221,7 +2218,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		lstatus |= BD_LFLAG(TXBD_TOE);
>  	}
>  
> -	txbdp_start->bufPtr = dma_map_single(&priv->ofdev->dev, skb->data,
> +	txbdp_start->bufPtr = dma_map_single(priv->dev, skb->data,
>  					     skb_headlen(skb), DMA_TO_DEVICE);
>  
>  	/* If time stamping is requested one additional TxBD must be set up. The
> @@ -2534,7 +2531,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
>  		} else
>  			buflen = bdp->length;
>  
> -		dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr,
> +		dma_unmap_single(priv->dev, bdp->bufPtr,
>  				 buflen, DMA_TO_DEVICE);
>  
>  		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
> @@ -2553,7 +2550,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
>  		bdp = next_txbd(bdp, base, tx_ring_size);
>  
>  		for (i = 0; i < frags; i++) {
> -			dma_unmap_page(&priv->ofdev->dev, bdp->bufPtr,
> +			dma_unmap_page(priv->dev, bdp->bufPtr,
>  				       bdp->length, DMA_TO_DEVICE);
>  			bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
>  			bdp = next_txbd(bdp, base, tx_ring_size);
> @@ -2619,7 +2616,7 @@ static void gfar_new_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
>  	struct gfar_private *priv = netdev_priv(dev);
>  	dma_addr_t buf;
>  
> -	buf = dma_map_single(&priv->ofdev->dev, skb->data,
> +	buf = dma_map_single(priv->dev, skb->data,
>  			     priv->rx_buffer_size, DMA_FROM_DEVICE);
>  	gfar_init_rxbdp(rx_queue, bdp, buf);
>  }
> @@ -2784,7 +2781,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
>  
>  		skb = rx_queue->rx_skbuff[rx_queue->skb_currx];
>  
> -		dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr,
> +		dma_unmap_single(priv->dev, bdp->bufPtr,
>  				 priv->rx_buffer_size, DMA_FROM_DEVICE);
>  
>  		if (unlikely(!(bdp->status & RXBD_ERR) &&
> diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
> index 71793f4..22c2f7a 100644
> --- a/drivers/net/ethernet/freescale/gianfar.h
> +++ b/drivers/net/ethernet/freescale/gianfar.h
> @@ -1065,7 +1065,7 @@ struct gfar_private {
>  	unsigned int total_tx_ring_size;
>  	unsigned int total_rx_ring_size;
>  
> -	struct device_node *node;
> +	struct device *dev;
>  	struct net_device *ndev;
>  	struct platform_device *ofdev;
>  	enum gfar_errata errata;
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private
  2013-02-12 12:47 ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Claudiu Manoil
  2013-02-12 12:47   ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Claudiu Manoil
@ 2013-02-12 15:11   ` Paul Gortmaker
  2013-02-12 15:58     ` Claudiu Manoil
  1 sibling, 1 reply; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-12 15:11 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

On 13-02-12 07:47 AM, Claudiu Manoil wrote:
> * group run-time critical fields within the 1st cacheline
>   followed by the tx|rx_queue reference arrays and the interrupt
>   group instances (gfargrp) (all cacheline aligned)
> * change 'padding' from unsigned short to u16
> * clear 20+ byte memory hole

Per prev. mail, it gets harder to see which change is where,
when they are all lumped together like this.  For example, it
wasn't obvious to me where the 20 byte hole was.  Also, it
doesn't look like you changed the padding, but rather instead
totally re-purposed it, leaving no alignment padding after the
uchar bitfields (where it was originally).

P.
--

> * group releated members
> * push non-critical fields at the end of the struct
> 
> Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
> ---
>  drivers/net/ethernet/freescale/gianfar.h |   93 +++++++++++++++--------------
>  1 files changed, 48 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
> index 22c2f7a..5304a58 100644
> --- a/drivers/net/ethernet/freescale/gianfar.h
> +++ b/drivers/net/ethernet/freescale/gianfar.h
> @@ -1054,28 +1054,64 @@ enum gfar_errata {
>   * the buffer descriptor determines the actual condition.
>   */
>  struct gfar_private {
> -
> -	/* Indicates how many tx, rx queues are enabled */
> -	unsigned int num_tx_queues;
>  	unsigned int num_rx_queues;
> -	unsigned int num_grps;
> -	unsigned int mode;
> -
> -	/* The total tx and rx ring size for the enabled queues */
> -	unsigned int total_tx_ring_size;
> -	unsigned int total_rx_ring_size;
>  
>  	struct device *dev;
>  	struct net_device *ndev;
> -	struct platform_device *ofdev;
>  	enum gfar_errata errata;
> +	unsigned int rx_buffer_size;
> +
> +	u16 padding;
> +
> +	/* HW time stamping enabled flag */
> +	int hwts_rx_en;
> +	int hwts_tx_en;
>  
> -	struct gfar_priv_grp gfargrp[MAXGROUPS];
>  	struct gfar_priv_tx_q *tx_queue[MAX_TX_QS];
>  	struct gfar_priv_rx_q *rx_queue[MAX_RX_QS];
> +	struct gfar_priv_grp gfargrp[MAXGROUPS];
> +
> +	u32 device_flags;
> +
> +	unsigned int mode;
> +	unsigned int num_tx_queues;
> +	unsigned int num_grps;
> +
> +	/* Network Statistics */
> +	struct gfar_extra_stats extra_stats;
> +
> +	/* PHY stuff */
> +	phy_interface_t interface;
> +	struct device_node *phy_node;
> +	struct device_node *tbi_node;
> +	struct phy_device *phydev;
> +	struct mii_bus *mii_bus;
> +	int oldspeed;
> +	int oldduplex;
> +	int oldlink;
> +
> +	/* Bitfield update lock */
> +	spinlock_t bflock;
> +
> +	uint32_t msg_enable;
> +
> +	struct work_struct reset_task;
> +
> +	struct platform_device *ofdev;
> +	unsigned char
> +		extended_hash:1,
> +		bd_stash_en:1,
> +		rx_filer_enable:1,
> +		/* Wake-on-LAN enabled */
> +		wol_en:1,
> +		/* Enable priorty based Tx scheduling in Hw */
> +		prio_sched_en:1;
> +
> +	/* The total tx and rx ring size for the enabled queues */
> +	unsigned int total_tx_ring_size;
> +	unsigned int total_rx_ring_size;
>  
>  	/* RX per device parameters */
> -	unsigned int rx_buffer_size;
>  	unsigned int rx_stash_size;
>  	unsigned int rx_stash_index;
>  
> @@ -1094,39 +1130,6 @@ struct gfar_private {
>  	unsigned int fifo_starve;
>  	unsigned int fifo_starve_off;
>  
> -	/* Bitfield update lock */
> -	spinlock_t bflock;
> -
> -	phy_interface_t interface;
> -	struct device_node *phy_node;
> -	struct device_node *tbi_node;
> -	u32 device_flags;
> -	unsigned char
> -		extended_hash:1,
> -		bd_stash_en:1,
> -		rx_filer_enable:1,
> -		wol_en:1, /* Wake-on-LAN enabled */
> -		prio_sched_en:1; /* Enable priorty based Tx scheduling in Hw */
> -	unsigned short padding;
> -
> -	/* PHY stuff */
> -	struct phy_device *phydev;
> -	struct mii_bus *mii_bus;
> -	int oldspeed;
> -	int oldduplex;
> -	int oldlink;
> -
> -	uint32_t msg_enable;
> -
> -	struct work_struct reset_task;
> -
> -	/* Network Statistics */
> -	struct gfar_extra_stats extra_stats;
> -
> -	/* HW time stamping enabled flag */
> -	int hwts_rx_en;
> -	int hwts_tx_en;
> -
>  	/*Filer table*/
>  	unsigned int ftp_rqfpr[MAX_FILER_IDX + 1];
>  	unsigned int ftp_rqfcr[MAX_FILER_IDX + 1];
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private
  2013-02-12 15:11   ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Paul Gortmaker
@ 2013-02-12 15:58     ` Claudiu Manoil
  2013-02-12 16:49       ` Paul Gortmaker
  0 siblings, 1 reply; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 15:58 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: netdev, David S. Miller

On 2/12/2013 5:11 PM, Paul Gortmaker wrote:
> On 13-02-12 07:47 AM, Claudiu Manoil wrote:
>> * group run-time critical fields within the 1st cacheline
>>    followed by the tx|rx_queue reference arrays and the interrupt
>>    group instances (gfargrp) (all cacheline aligned)
>> * change 'padding' from unsigned short to u16
>> * clear 20+ byte memory hole
>
> Per prev. mail, it gets harder to see which change is where,
> when they are all lumped together like this.  For example, it
> wasn't obvious to me where the 20 byte hole was.  Also, it
> doesn't look like you changed the padding, but rather instead
> totally re-purposed it, leaving no alignment padding after the
> uchar bitfields (where it was originally).
>
> P.

The 20 byte hole was here:

struct gfar_private {
         unsigned int               num_tx_queues;
         unsigned int               num_rx_queues;
         unsigned int               num_grps;
         unsigned int               mode;
         unsigned int               total_tx_ring_size;
         unsigned int               total_rx_ring_size;
         struct device_node *       node;
         struct net_device *        ndev;
         /* --- cacheline 1 boundary (32 bytes) --- */
         struct platform_device *   ofdev;
         enum gfar_errata           errata;

         /* XXX 24 bytes hole, try to pack */

         /* --- cacheline 2 boundary (64 bytes) --- */
         struct gfar_priv_grp       gfargrp[2];


At the end of the patch series the first cacheline is without holes.
Please note that the re-grouping of members and their order is most
important. For instance why keep in the first cacheline something
as unimportant as total_rx_ring_size? Members like rx_buffer_size,
or padding, or even errata, are critical however for the fast path.
Rx processing (gfar_poll + clean_rx_ring) is the bottleneck here,
keeping the CPU to 100%. So the main goal is to optimize this path,
including memory access/cache optimizations. For instance, better 
results were obtained by inverting rx|tx_queue[] with gfargrp[], originally:
         /* --- cacheline 1 boundary (32 bytes) --- */
         struct gfar_priv_tx_q *    tx_queue[8];  /*    32    32 */
         /* --- cacheline 2 boundary (64 bytes) --- */
         struct gfar_priv_rx_q *    rx_queue[8]; /*    64    32 */
         /* --- cacheline 3 boundary (96 bytes) --- */
         struct gfar_priv_grp       gfargrp[2];  /*    96   192 */

The uchar bitfields are unimportant here (used at "init time"), and
they take 4 bytes including padding anyway. So whether it's uchar or
uint, it's the same, maybe better left uchar to discourage the abuse
of these bitfields. (A 2-3byte hole here doesn't change anything
to the whole structure size, which is padded to be at least a 32B
multiple.)

Thanks,
Claudiu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely
  2013-02-12 12:47   ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Claudiu Manoil
  2013-02-12 12:47     ` [PATCH net-next 4/5] gianfar: Remove wrong buffer size conditioning to VLAN h/w offload Claudiu Manoil
@ 2013-02-12 16:30     ` Paul Gortmaker
  2013-02-12 16:47       ` Eric Dumazet
  2013-02-12 17:05       ` Claudiu Manoil
  1 sibling, 2 replies; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-12 16:30 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

[[PATCH net-next 3/5] gianfar: GRO_DROP is unlikely] On 12/02/2013 (Tue 14:47) Claudiu Manoil wrote:

> Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
> ---
>  drivers/net/ethernet/freescale/gianfar.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
> index 096fb5f..5622134 100644
> --- a/drivers/net/ethernet/freescale/gianfar.c
> +++ b/drivers/net/ethernet/freescale/gianfar.c
> @@ -2745,7 +2745,7 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
>  	/* Send the packet up the stack */
>  	ret = napi_gro_receive(napi, skb);
>  
> -	if (GRO_DROP == ret)
> +	if (unlikely(GRO_DROP == ret))
>  		priv->extra_stats.kernel_dropped++;
>  
>  	return 0;

I wondered about this, specifically if it was a moot point, when the
actual unlikely was deployed right at the end of the fcn.  It turns out
that it does make a difference, since gfar_process_frame gets inlined,
and so the increment code gets moved out of line (I have marked the if
statment with * and the increment code within "-----"):

 ------------------------- as is currently ------------------
    4d14:       80 61 00 18     lwz     r3,24(r1)
    4d18:       7f c4 f3 78     mr      r4,r30
    4d1c:       48 00 00 01     bl      4d1c <gfar_clean_rx_ring+0x10c>
 *  4d20:       2f 83 00 04     cmpwi   cr7,r3,4
    4d24:       40 9e 00 1c     bne-    cr7,4d40 <gfar_clean_rx_ring+0x130>
		----------------------------
    4d28:       81 3c 01 f8     lwz     r9,504(r28)
    4d2c:       81 5c 01 fc     lwz     r10,508(r28)
    4d30:       31 4a 00 01     addic   r10,r10,1
    4d34:       7d 29 01 94     addze   r9,r9
    4d38:       91 3c 01 f8     stw     r9,504(r28)
    4d3c:       91 5c 01 fc     stw     r10,508(r28)
		----------------------------
    4d40:       a0 1f 00 24     lhz     r0,36(r31)
    4d44:       81 3f 00 00     lwz     r9,0(r31)
    4d48:       7f a4 eb 78     mr      r4,r29
    4d4c:       7f e3 fb 78     mr      r3,r31


 -------------------------- unlikely ------------------------
    4d14:       80 61 00 18     lwz     r3,24(r1)
    4d18:       7f c4 f3 78     mr      r4,r30
    4d1c:       48 00 00 01     bl      4d1c <gfar_clean_rx_ring+0x10c>
 *  4d20:       2f 83 00 04     cmpwi   cr7,r3,4			
    4d24:       41 9e 03 94     beq-    cr7,50b8 <gfar_clean_rx_ring+0x4a8>
    4d28:       a0 1f 00 24     lhz     r0,36(r31)
    4d2c:       81 3f 00 00     lwz     r9,0(r31)
    4d30:       7f a4 eb 78     mr      r4,r29
    4d34:       7f e3 fb 78     mr      r3,r31
[...]
    50b8:       81 3c 01 f8     lwz     r9,504(r28)
    50bc:       81 5c 01 fc     lwz     r10,508(r28)
    50c0:       31 4a 00 01     addic   r10,r10,1
    50c4:       7d 29 01 94     addze   r9,r9
    50c8:       91 3c 01 f8     stw     r9,504(r28)
    50cc:       91 5c 01 fc     stw     r10,508(r28)
    50d0:       4b ff fc 58     b       4d28 <gfar_clean_rx_ring+0x118>

So, the increment does actually get moved ~1k away.  Maybe you can
incorporate the above information in your long log, so the next guy
doesn't wonder about the same question I did.

Also, I noticed that gfar_process_frame() can be void instead of int.
It never returns anything but zero, and the return code is ignored at
the single call site.  Maybe you can add a patch to your series for that
as well?

Paul.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely
  2013-02-12 16:30     ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Paul Gortmaker
@ 2013-02-12 16:47       ` Eric Dumazet
  2013-02-12 17:05       ` Claudiu Manoil
  1 sibling, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2013-02-12 16:47 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: Claudiu Manoil, netdev, David S. Miller

On Tue, 2013-02-12 at 11:30 -0500, Paul Gortmaker wrote:
> [...]
>     50b8:       81 3c 01 f8     lwz     r9,504(r28)
>     50bc:       81 5c 01 fc     lwz     r10,508(r28)
>     50c0:       31 4a 00 01     addic   r10,r10,1
>     50c4:       7d 29 01 94     addze   r9,r9
>     50c8:       91 3c 01 f8     stw     r9,504(r28)
>     50cc:       91 5c 01 fc     stw     r10,508(r28)
>     50d0:       4b ff fc 58     b       4d28 <gfar_clean_rx_ring+0x118>

What I can see here is the counter is 64bit and arch is 32bit,
and no sync is used.

So ethtool -S has races.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private
  2013-02-12 15:58     ` Claudiu Manoil
@ 2013-02-12 16:49       ` Paul Gortmaker
  0 siblings, 0 replies; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-12 16:49 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

On 13-02-12 10:58 AM, Claudiu Manoil wrote:
> On 2/12/2013 5:11 PM, Paul Gortmaker wrote:
>> On 13-02-12 07:47 AM, Claudiu Manoil wrote:
>>> * group run-time critical fields within the 1st cacheline
>>>    followed by the tx|rx_queue reference arrays and the interrupt
>>>    group instances (gfargrp) (all cacheline aligned)
>>> * change 'padding' from unsigned short to u16
>>> * clear 20+ byte memory hole
>>
>> Per prev. mail, it gets harder to see which change is where,
>> when they are all lumped together like this.  For example, it
>> wasn't obvious to me where the 20 byte hole was.  Also, it
>> doesn't look like you changed the padding, but rather instead
>> totally re-purposed it, leaving no alignment padding after the
>> uchar bitfields (where it was originally).
>>
>> P.
> 
> The 20 byte hole was here:
> 
> struct gfar_private {
>          unsigned int               num_tx_queues;
>          unsigned int               num_rx_queues;
>          unsigned int               num_grps;
>          unsigned int               mode;
>          unsigned int               total_tx_ring_size;
>          unsigned int               total_rx_ring_size;
>          struct device_node *       node;
>          struct net_device *        ndev;
>          /* --- cacheline 1 boundary (32 bytes) --- */
>          struct platform_device *   ofdev;
>          enum gfar_errata           errata;
> 
>          /* XXX 24 bytes hole, try to pack */

OK, so this is good information, that wasn't in your commit log.
(Is it 20 or 24 though?  I'm guessing 24, and original commit
log was wrong in saying 20.)

Something like:

"The default gcc layout of gfar_private leaves an implicit 24
byte hole (bytes XYZ --> ABC in the 1st cache line) after the
errata enum...."

> 
>          /* --- cacheline 2 boundary (64 bytes) --- */
>          struct gfar_priv_grp       gfargrp[2];
> 
> 
> At the end of the patch series the first cacheline is without holes.
> Please note that the re-grouping of members and their order is most
> important. For instance why keep in the first cacheline something

The importance of ordering and alignment was never in question.  What
was missing, was conveying to the reviewer exactly how you'd made it
better, without forcing them to manually deconstruct gfar_priv in the
before and after cases, esp. when you'd already done that analysis.

> as unimportant as total_rx_ring_size? Members like rx_buffer_size,
> or padding, or even errata, are critical however for the fast path.
> Rx processing (gfar_poll + clean_rx_ring) is the bottleneck here,
> keeping the CPU to 100%. So the main goal is to optimize this path,
> including memory access/cache optimizations. For instance, better 
> results were obtained by inverting rx|tx_queue[] with gfargrp[], originally:
>          /* --- cacheline 1 boundary (32 bytes) --- */
>          struct gfar_priv_tx_q *    tx_queue[8];  /*    32    32 */
>          /* --- cacheline 2 boundary (64 bytes) --- */
>          struct gfar_priv_rx_q *    rx_queue[8]; /*    64    32 */
>          /* --- cacheline 3 boundary (96 bytes) --- */
>          struct gfar_priv_grp       gfargrp[2];  /*    96   192 */
> 
> The uchar bitfields are unimportant here (used at "init time"), and
> they take 4 bytes including padding anyway. So whether it's uchar or
> uint, it's the same, maybe better left uchar to discourage the abuse
> of these bitfields. (A 2-3byte hole here doesn't change anything
> to the whole structure size, which is padded to be at least a 32B
> multiple.)

This here too, i.e. why the padding after the bitfields is unimportant
and hence removed should also be in the long log.

Thanks,
Paul.
--

> 
> Thanks,
> Claudiu
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely
  2013-02-12 16:30     ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Paul Gortmaker
  2013-02-12 16:47       ` Eric Dumazet
@ 2013-02-12 17:05       ` Claudiu Manoil
  1 sibling, 0 replies; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 17:05 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: netdev, David S. Miller

On 2/12/2013 6:30 PM, Paul Gortmaker wrote:
> [[PATCH net-next 3/5] gianfar: GRO_DROP is unlikely] On 12/02/2013 (Tue 14:47) Claudiu Manoil wrote:
>
>> Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
>> ---
>>   drivers/net/ethernet/freescale/gianfar.c |    2 +-
>>   1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
>> index 096fb5f..5622134 100644
>> --- a/drivers/net/ethernet/freescale/gianfar.c
>> +++ b/drivers/net/ethernet/freescale/gianfar.c
>> @@ -2745,7 +2745,7 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
>>   	/* Send the packet up the stack */
>>   	ret = napi_gro_receive(napi, skb);
>>
>> -	if (GRO_DROP == ret)
>> +	if (unlikely(GRO_DROP == ret))
>>   		priv->extra_stats.kernel_dropped++;
>>
>>   	return 0;
>
> I wondered about this, specifically if it was a moot point, when the
> actual unlikely was deployed right at the end of the fcn.  It turns out
> that it does make a difference, since gfar_process_frame gets inlined,
> and so the increment code gets moved out of line (I have marked the if
> statment with * and the increment code within "-----"):
>
>   ------------------------- as is currently ------------------
>      4d14:       80 61 00 18     lwz     r3,24(r1)
>      4d18:       7f c4 f3 78     mr      r4,r30
>      4d1c:       48 00 00 01     bl      4d1c <gfar_clean_rx_ring+0x10c>
>   *  4d20:       2f 83 00 04     cmpwi   cr7,r3,4
>      4d24:       40 9e 00 1c     bne-    cr7,4d40 <gfar_clean_rx_ring+0x130>
> 		----------------------------
>      4d28:       81 3c 01 f8     lwz     r9,504(r28)
>      4d2c:       81 5c 01 fc     lwz     r10,508(r28)
>      4d30:       31 4a 00 01     addic   r10,r10,1
>      4d34:       7d 29 01 94     addze   r9,r9
>      4d38:       91 3c 01 f8     stw     r9,504(r28)
>      4d3c:       91 5c 01 fc     stw     r10,508(r28)
> 		----------------------------
>      4d40:       a0 1f 00 24     lhz     r0,36(r31)
>      4d44:       81 3f 00 00     lwz     r9,0(r31)
>      4d48:       7f a4 eb 78     mr      r4,r29
>      4d4c:       7f e3 fb 78     mr      r3,r31
>
>
>   -------------------------- unlikely ------------------------
>      4d14:       80 61 00 18     lwz     r3,24(r1)
>      4d18:       7f c4 f3 78     mr      r4,r30
>      4d1c:       48 00 00 01     bl      4d1c <gfar_clean_rx_ring+0x10c>
>   *  4d20:       2f 83 00 04     cmpwi   cr7,r3,4			
>      4d24:       41 9e 03 94     beq-    cr7,50b8 <gfar_clean_rx_ring+0x4a8>
>      4d28:       a0 1f 00 24     lhz     r0,36(r31)
>      4d2c:       81 3f 00 00     lwz     r9,0(r31)
>      4d30:       7f a4 eb 78     mr      r4,r29
>      4d34:       7f e3 fb 78     mr      r3,r31
> [...]
>      50b8:       81 3c 01 f8     lwz     r9,504(r28)
>      50bc:       81 5c 01 fc     lwz     r10,508(r28)
>      50c0:       31 4a 00 01     addic   r10,r10,1
>      50c4:       7d 29 01 94     addze   r9,r9
>      50c8:       91 3c 01 f8     stw     r9,504(r28)
>      50cc:       91 5c 01 fc     stw     r10,508(r28)
>      50d0:       4b ff fc 58     b       4d28 <gfar_clean_rx_ring+0x118>
>
> So, the increment does actually get moved ~1k away.  Maybe you can
> incorporate the above information in your long log, so the next guy
> doesn't wonder about the same question I did.
>
> Also, I noticed that gfar_process_frame() can be void instead of int.
> It never returns anything but zero, and the return code is ignored at
> the single call site.  Maybe you can add a patch to your series for that
> as well?
>
> Paul.
>
> .

Thanks for the notice.
The slightest code changes to gfar_process_frame() are reflected
to the driver's performance (i.e. throughput). So this is a very
"performance sensitive" function.
I'll see what happens if changed to return void.

Claudiu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private
  2013-02-12 14:54 ` [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Paul Gortmaker
@ 2013-02-12 17:14   ` Claudiu Manoil
  0 siblings, 0 replies; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-12 17:14 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: netdev, David S. Miller

On 2/12/2013 4:54 PM, Paul Gortmaker wrote:
> On 13-02-12 07:47 AM, Claudiu Manoil wrote:
>> * remove unused device_node pointer
>> * remove duplicate SET_NETDEV_DEV()
>> * use device pointer (dev) to simplify the code and to
>>    avoid double indirections (esp. on the "fast path")
>
> Ideally, when you find yourself making a list within the longlog,
> that is a hint that you might want to start making it into
> multiple commits, for ease of review.  Granted #1 and #2 are
> trivial, but #3 probably could be a separate commit.  Did you
> see any change in the object size or the disassembly when
> making change #3?
>
> P.

I didn't inspect the assembly code yet, but this should definitely
generate better, faster code. I don't see why it wouldn't...

Thanks,
Claudiu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling
  2013-02-12 12:47       ` [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling Claudiu Manoil
@ 2013-02-12 17:19         ` Paul Gortmaker
  2013-02-13 11:34           ` Claudiu Manoil
  0 siblings, 1 reply; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-12 17:19 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

On 13-02-12 07:47 AM, Claudiu Manoil wrote:
> NETIF_F_HW_VLAN_TX flag must not condition RxFCB usage.

The above statement isn't 100% clear to me.  Is this the intent?

  Currently, gfar_uses_fcb() calls gfar_is_vlan_on() which in turn
  checks NETIF_F_HW_VLAN_TX.  However there is no relation between
  whether FCBs are used and the VLAN transmit state.

> In the case of RxBD rings, FCBs (Frame Control Block) are inserted by
> the eTSEC whenever RCTRL[PRSDEP] is set to a non-zero value. Only one
> FCB is inserted per frame (in the buffer pointed to by the RxBD with
> bit F set). TOE acceleration for receive is enabled for all rx frames
> in this case.
> Indroduce the uses_rxfcb field to signal RxFCB insertion in accordance
> with the specification above (leading to cleaner, less confusing code).

The is_vlan_on() and uses_fcb() calls were more self documenting than
setting/clearing a new single use variable added to priv, I think.
Even if they get changed/simplified, perhaps it is worth keeping them?

Rather than a specific priv->uses_rxfcb field, perhaps it makes sense
to make it more future proof with priv->rctrl field, that is a cached
value of the register, and then you keep gfar_uses_fcb() and it in
turn checks for RCTRL_PRSDEP_INIT bit in rctrl?

Also, the dependency/conditional on FSL_GIANFAR_DEV_HAS_TIMER seems
to simply vanish with this patch, and it isn't clear to me if that
was 100% intentional or not...

P.
--

> 
> Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
> Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
> ---
>  drivers/net/ethernet/freescale/gianfar.c |   41 ++++++++++++++---------------
>  drivers/net/ethernet/freescale/gianfar.h |    1 +
>  2 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
> index 59fb3bf..3de608c 100644
> --- a/drivers/net/ethernet/freescale/gianfar.c
> +++ b/drivers/net/ethernet/freescale/gianfar.c
> @@ -349,6 +349,9 @@ static void gfar_init_mac(struct net_device *ndev)
>  	/* Configure the coalescing support */
>  	gfar_configure_coalescing(priv, 0xFF, 0xFF);
>  
> +	/* set this when rx hw offload (TOE) functions are being used */
> +	priv->uses_rxfcb = 0;
> +
>  	if (priv->rx_filer_enable) {
>  		rctrl |= RCTRL_FILREN;
>  		/* Program the RIR0 reg with the required distribution */
> @@ -359,8 +362,10 @@ static void gfar_init_mac(struct net_device *ndev)
>  	if (ndev->flags & IFF_PROMISC)
>  		rctrl |= RCTRL_PROM;
>  
> -	if (ndev->features & NETIF_F_RXCSUM)
> +	if (ndev->features & NETIF_F_RXCSUM) {
>  		rctrl |= RCTRL_CHECKSUMMING;
> +		priv->uses_rxfcb = 1;
> +	}
>  
>  	if (priv->extended_hash) {
>  		rctrl |= RCTRL_EXTHASH;
> @@ -382,11 +387,15 @@ static void gfar_init_mac(struct net_device *ndev)
>  	}
>  
>  	/* Enable HW time stamping if requested from user space */
> -	if (priv->hwts_rx_en)
> +	if (priv->hwts_rx_en) {
>  		rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
> +		priv->uses_rxfcb = 1;
> +	}
>  
> -	if (ndev->features & NETIF_F_HW_VLAN_RX)
> +	if (ndev->features & NETIF_F_HW_VLAN_RX) {
>  		rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
> +		priv->uses_rxfcb = 1;
> +	}
>  
>  	/* Init rctrl based on our settings */
>  	gfar_write(&regs->rctrl, rctrl);
> @@ -505,20 +514,6 @@ void unlock_tx_qs(struct gfar_private *priv)
>  		spin_unlock(&priv->tx_queue[i]->txlock);
>  }
>  
> -static bool gfar_is_vlan_on(struct gfar_private *priv)
> -{
> -	return (priv->ndev->features & NETIF_F_HW_VLAN_RX) ||
> -	       (priv->ndev->features & NETIF_F_HW_VLAN_TX);
> -}
> -
> -/* Returns 1 if incoming frames use an FCB */
> -static inline int gfar_uses_fcb(struct gfar_private *priv)
> -{
> -	return gfar_is_vlan_on(priv) ||
> -	       (priv->ndev->features & NETIF_F_RXCSUM) ||
> -	       (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER);
> -}
> -
>  static void free_tx_pointers(struct gfar_private *priv)
>  {
>  	int i;
> @@ -2331,10 +2326,13 @@ void gfar_check_rx_parser_mode(struct gfar_private *priv)
>  
>  	tempval = gfar_read(&regs->rctrl);
>  	/* If parse is no longer required, then disable parser */
> -	if (tempval & RCTRL_REQ_PARSER)
> +	if (tempval & RCTRL_REQ_PARSER) {
>  		tempval |= RCTRL_PRSDEP_INIT;
> -	else
> +		priv->uses_rxfcb = 1;
> +	} else {
>  		tempval &= ~RCTRL_PRSDEP_INIT;
> +		priv->uses_rxfcb = 0;
> +	}
>  	gfar_write(&regs->rctrl, tempval);
>  }
>  
> @@ -2367,6 +2365,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features)
>  		tempval = gfar_read(&regs->rctrl);
>  		tempval |= (RCTRL_VLEX | RCTRL_PRSDEP_INIT);
>  		gfar_write(&regs->rctrl, tempval);
> +		priv->uses_rxfcb = 1;
>  	} else {
>  		/* Disable VLAN tag extraction */
>  		tempval = gfar_read(&regs->rctrl);
> @@ -2395,7 +2394,7 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu)
>  		return -EINVAL;
>  	}
>  
> -	if (gfar_uses_fcb(priv))
> +	if (priv->uses_rxfcb)
>  		frame_size += GMAC_FCB_LEN;
>  
>  	frame_size += priv->padding;
> @@ -2766,7 +2765,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
>  	bdp = rx_queue->cur_rx;
>  	base = rx_queue->rx_bd_base;
>  
> -	amount_pull = (gfar_uses_fcb(priv) ? GMAC_FCB_LEN : 0);
> +	amount_pull = priv->uses_rxfcb ? GMAC_FCB_LEN : 0;
>  
>  	while (!((bdp->status & RXBD_EMPTY) || (--rx_work_limit < 0))) {
>  		struct sk_buff *newskb;
> diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
> index 5304a58..386f1fe 100644
> --- a/drivers/net/ethernet/freescale/gianfar.h
> +++ b/drivers/net/ethernet/freescale/gianfar.h
> @@ -1061,6 +1061,7 @@ struct gfar_private {
>  	enum gfar_errata errata;
>  	unsigned int rx_buffer_size;
>  
> +	u16 uses_rxfcb;
>  	u16 padding;
>  
>  	/* HW time stamping enabled flag */
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling
  2013-02-12 17:19         ` Paul Gortmaker
@ 2013-02-13 11:34           ` Claudiu Manoil
  2013-02-13 14:32             ` Paul Gortmaker
  0 siblings, 1 reply; 16+ messages in thread
From: Claudiu Manoil @ 2013-02-13 11:34 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: netdev, David S. Miller

On 2/12/2013 7:19 PM, Paul Gortmaker wrote:
> On 13-02-12 07:47 AM, Claudiu Manoil wrote:
>> NETIF_F_HW_VLAN_TX flag must not condition RxFCB usage.
>
> The above statement isn't 100% clear to me.  Is this the intent?

The above statement is a rule, if you wish. The existing code breaks
that rule by saying: RxFCB is enabled if NETIF_F_HW_VLAN_TX; which is
a false statement. This patch corrects this error, according to the
rule above.
So, primarily, this patch is a fix (as expressed in <subj.>). I'll
resend the patch with additional comments to make this point clearer.

>
>    Currently, gfar_uses_fcb() calls gfar_is_vlan_on() which in turn
>    checks NETIF_F_HW_VLAN_TX.  However there is no relation between
>    whether FCBs are used and the VLAN transmit state.
>
>> In the case of RxBD rings, FCBs (Frame Control Block) are inserted by
>> the eTSEC whenever RCTRL[PRSDEP] is set to a non-zero value. Only one
>> FCB is inserted per frame (in the buffer pointed to by the RxBD with
>> bit F set). TOE acceleration for receive is enabled for all rx frames
>> in this case.
>> Indroduce the uses_rxfcb field to signal RxFCB insertion in accordance
>> with the specification above (leading to cleaner, less confusing code).
>
> The is_vlan_on() and uses_fcb() calls were more self documenting than
> setting/clearing a new single use variable added to priv, I think.
> Even if they get changed/simplified, perhaps it is worth keeping them?

gfar_uses_fcb() generates confusion around the FCB concept, this maybe
explains how it came to the error above. First, there are 2 types of
FCBs with different meaning, covering different use cases: Rx (receive
side) FCB and TxFCB. uses_fcb() was intended to signal RxFCB insertion,
which is not obvious from its name, and it became (subtly) erroneous
after incorporating is_vlan_on().
is_vlan_on() is also misleading because we need to differentiate b/w
hw VLAN extraction/VLEX (marked by VLAN_RX flag) and hw VLAN
insertion/VLINS (VLAN_TX flag), which are different mechanisms using
different types of FCBs.

>
> Rather than a specific priv->uses_rxfcb field, perhaps it makes sense
> to make it more future proof with priv->rctrl field, that is a cached
> value of the register, and then you keep gfar_uses_fcb() and it in
> turn checks for RCTRL_PRSDEP_INIT bit in rctrl?
>

The main purpose of the priv->uses_rxfcb field is to quickly signal, on
the hot path, that the incoming frame has a *Rx* FCB block inserted
which needs to be pulled out before passing the skb to the stack.
This is a performance critical operation, it needs to happen fast,
that's why uses_rxfcb is placed in the first cacheline of gfar_private.
This is also why I cannot use a cached rctrl instead: 1) because I
don't have 32 bits available in the first cacheline of gfar_probe
(but only 16); 2) no time for bit operations on the hot path.

> Also, the dependency/conditional on FSL_GIANFAR_DEV_HAS_TIMER seems
> to simply vanish with this patch, and it isn't clear to me if that
> was 100% intentional or not...
>

The dependency on FSL_GIANFAR_DEV_HAS_TIMER is another source of
confusion. The dependency is actually to priv->hwts_rx_en.
Upon changing priv->hwts_rx_en via IOCTL, the gfar device is being
restarted and on init_mac() the priv->hwts_rx_en conditions the RxFCB
insertion, and rctrl is programmed accordingly. The patch takes care
of this case too.

So, I'll re-spin this patch with enhanced comments.

Thanks,
Claudiu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling
  2013-02-13 11:34           ` Claudiu Manoil
@ 2013-02-13 14:32             ` Paul Gortmaker
  0 siblings, 0 replies; 16+ messages in thread
From: Paul Gortmaker @ 2013-02-13 14:32 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: netdev, David S. Miller

On 13-02-13 06:34 AM, Claudiu Manoil wrote:
> On 2/12/2013 7:19 PM, Paul Gortmaker wrote:
>> On 13-02-12 07:47 AM, Claudiu Manoil wrote:
>>> NETIF_F_HW_VLAN_TX flag must not condition RxFCB usage.
>>
>> The above statement isn't 100% clear to me.  Is this the intent?
> 
> The above statement is a rule, if you wish. The existing code breaks
> that rule by saying: RxFCB is enabled if NETIF_F_HW_VLAN_TX; which is
> a false statement. This patch corrects this error, according to the
> rule above.
> So, primarily, this patch is a fix (as expressed in <subj.>). I'll
> resend the patch with additional comments to make this point clearer.

OK, perhaps just:
	"...must not condition RxFCB..."
   --> 	"...must not be conditional on RxFCB..."

would have helped; otherwise it reads as if somehow VLAN_TX is
altering/shaping/conditioning the RxFCB behaviour.

> 
>>
>>    Currently, gfar_uses_fcb() calls gfar_is_vlan_on() which in turn
>>    checks NETIF_F_HW_VLAN_TX.  However there is no relation between
>>    whether FCBs are used and the VLAN transmit state.
>>
>>> In the case of RxBD rings, FCBs (Frame Control Block) are inserted by
>>> the eTSEC whenever RCTRL[PRSDEP] is set to a non-zero value. Only one
>>> FCB is inserted per frame (in the buffer pointed to by the RxBD with
>>> bit F set). TOE acceleration for receive is enabled for all rx frames
>>> in this case.
>>> Indroduce the uses_rxfcb field to signal RxFCB insertion in accordance
>>> with the specification above (leading to cleaner, less confusing code).
>>
>> The is_vlan_on() and uses_fcb() calls were more self documenting than
>> setting/clearing a new single use variable added to priv, I think.
>> Even if they get changed/simplified, perhaps it is worth keeping them?
> 
> gfar_uses_fcb() generates confusion around the FCB concept, this maybe
> explains how it came to the error above. First, there are 2 types of
> FCBs with different meaning, covering different use cases: Rx (receive
> side) FCB and TxFCB. uses_fcb() was intended to signal RxFCB insertion,
> which is not obvious from its name, and it became (subtly) erroneous
> after incorporating is_vlan_on().
> is_vlan_on() is also misleading because we need to differentiate b/w
> hw VLAN extraction/VLEX (marked by VLAN_RX flag) and hw VLAN
> insertion/VLINS (VLAN_TX flag), which are different mechanisms using
> different types of FCBs.

OK, so perhaps keep the names, but add prefix (eg. uses_fcb --> uses_rxfcb)
so there is no confusion, and people without the in-depth hardware
knowledge won't fall into the old trap.

> 
>>
>> Rather than a specific priv->uses_rxfcb field, perhaps it makes sense
>> to make it more future proof with priv->rctrl field, that is a cached
>> value of the register, and then you keep gfar_uses_fcb() and it in
>> turn checks for RCTRL_PRSDEP_INIT bit in rctrl?
>>
> 
> The main purpose of the priv->uses_rxfcb field is to quickly signal, on
> the hot path, that the incoming frame has a *Rx* FCB block inserted
> which needs to be pulled out before passing the skb to the stack.
> This is a performance critical operation, it needs to happen fast,
> that's why uses_rxfcb is placed in the first cacheline of gfar_private.
> This is also why I cannot use a cached rctrl instead: 1) because I
> don't have 32 bits available in the first cacheline of gfar_probe
> (but only 16); 2) no time for bit operations on the hot path.

OK, but don't forget that inlining will still allow you to keep
self documenting names if desired and worthwhile.

> 
>> Also, the dependency/conditional on FSL_GIANFAR_DEV_HAS_TIMER seems
>> to simply vanish with this patch, and it isn't clear to me if that
>> was 100% intentional or not...
>>
> 
> The dependency on FSL_GIANFAR_DEV_HAS_TIMER is another source of
> confusion. The dependency is actually to priv->hwts_rx_en.
> Upon changing priv->hwts_rx_en via IOCTL, the gfar device is being
> restarted and on init_mac() the priv->hwts_rx_en conditions the RxFCB
> insertion, and rctrl is programmed accordingly. The patch takes care
> of this case too.
> 
> So, I'll re-spin this patch with enhanced comments.

Great, that should help a lot.

Thanks,
Paul.
--

> 
> Thanks,
> Claudiu
> 
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2013-02-13 14:31 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-12 12:47 [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Claudiu Manoil
2013-02-12 12:47 ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Claudiu Manoil
2013-02-12 12:47   ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Claudiu Manoil
2013-02-12 12:47     ` [PATCH net-next 4/5] gianfar: Remove wrong buffer size conditioning to VLAN h/w offload Claudiu Manoil
2013-02-12 12:47       ` [PATCH net-next 5/5] gianfar: Fix and cleanup Rx FCB handling Claudiu Manoil
2013-02-12 17:19         ` Paul Gortmaker
2013-02-13 11:34           ` Claudiu Manoil
2013-02-13 14:32             ` Paul Gortmaker
2013-02-12 16:30     ` [PATCH net-next 3/5] gianfar: GRO_DROP is unlikely Paul Gortmaker
2013-02-12 16:47       ` Eric Dumazet
2013-02-12 17:05       ` Claudiu Manoil
2013-02-12 15:11   ` [PATCH net-next 2/5] gianfar: Cleanup and optimize struct gfar_private Paul Gortmaker
2013-02-12 15:58     ` Claudiu Manoil
2013-02-12 16:49       ` Paul Gortmaker
2013-02-12 14:54 ` [PATCH net-next 1/5] gianfar: Cleanup device refs in gfar_private Paul Gortmaker
2013-02-12 17:14   ` Claudiu Manoil

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.