netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path
@ 2012-09-18 16:56 Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 2/5] ucc_geth: Word align Ethernet RX data Joakim Tjernlund
                   ` (5 more replies)
  0 siblings, 6 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-18 16:56 UTC (permalink / raw)
  To: netdev; +Cc: Joakim Tjernlund

Currently ucc_geth_start_xmit wraps IRQ off for the
whole body just to be safe.
Reduce the IRQ off period to a minimum.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 drivers/net/ethernet/freescale/ucc_geth.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 9ac14f8..e609c93 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3181,8 +3181,6 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ugeth_vdbg("%s: IN", __func__);
 
-	spin_lock_irqsave(&ugeth->lock, flags);
-
 	dev->stats.tx_bytes += skb->len;
 
 	/* Start from the next BD that should be filled */
@@ -3196,6 +3194,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	    (ugeth->skb_curtx[txQ] +
 	     1) & TX_RING_MOD_MASK(ugeth->ug_info->bdRingLenTx[txQ]);
 
+	spin_lock_irqsave(&ugeth->lock, flags);
 	/* set up the buffer descriptor */
 	out_be32(&((struct qe_bd __iomem *)bd)->buf,
 		      dma_map_single(ugeth->dev, skb->data,
@@ -3207,6 +3206,8 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* set bd status and length */
 	out_be32((u32 __iomem *)bd, bd_status);
+	spin_unlock_irqrestore(&ugeth->lock, flags);
+
 
 	/* Move to next BD in the ring */
 	if (!(bd_status & T_W))
@@ -3238,8 +3239,6 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	uccf = ugeth->uccf;
 	out_be16(uccf->p_utodr, UCC_FAST_TOD);
 #endif
-	spin_unlock_irqrestore(&ugeth->lock, flags);
-
 	return NETDEV_TX_OK;
 }
 
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/5] ucc_geth: Word align Ethernet RX data
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
@ 2012-09-18 16:56 ` Joakim Tjernlund
  2012-10-03 13:17   ` Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 3/5] ucc_geth: Fix two gcc warnings Joakim Tjernlund
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-18 16:56 UTC (permalink / raw)
  To: netdev; +Cc: Joakim Tjernlund

UCC controller can shift received Ethernet frames 2 bytes into the buffer,
making IP data word aligned, this patch enables that feature.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 drivers/net/ethernet/freescale/ucc_geth.c |   19 +++++++++++--------
 drivers/net/ethernet/freescale/ucc_geth.h |    1 +
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index e609c93..5f1460a 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -160,6 +160,7 @@ static struct ucc_geth_info ugeth_primary_info = {
 	.numThreadsRx = UCC_GETH_NUM_OF_THREADS_1,
 	.riscTx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
 	.riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
+	.ipAddressAlignment = 1,
 };
 
 static struct ucc_geth_info ugeth_info[8];
@@ -211,12 +212,13 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
 		u8 __iomem *bd)
 {
 	struct sk_buff *skb = NULL;
+	u16 buf_len = ugeth->ug_info->uf_info.max_rx_buf_length +
+		UCC_GETH_RX_DATA_BUF_ALIGNMENT +
+		UCC_GETH_RX_IP_ALIGNMENT;
 
 	skb = __skb_dequeue(&ugeth->rx_recycle);
 	if (!skb)
-		skb = netdev_alloc_skb(ugeth->ndev,
-				      ugeth->ug_info->uf_info.max_rx_buf_length +
-				      UCC_GETH_RX_DATA_BUF_ALIGNMENT);
+		skb = netdev_alloc_skb(ugeth->ndev, buf_len);
 	if (skb == NULL)
 		return NULL;
 
@@ -231,10 +233,9 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
 	out_be32(&((struct qe_bd __iomem *)bd)->buf,
 		      dma_map_single(ugeth->dev,
 				     skb->data,
-				     ugeth->ug_info->uf_info.max_rx_buf_length +
-				     UCC_GETH_RX_DATA_BUF_ALIGNMENT,
+				     buf_len,
 				     DMA_FROM_DEVICE));
-
+	skb_reserve(skb, UCC_GETH_RX_IP_ALIGNMENT);
 	out_be32((u32 __iomem *)bd,
 			(R_E | R_I | (in_be32((u32 __iomem*)bd) & R_W)));
 
@@ -1877,7 +1878,8 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
 						in_be32(&((struct qe_bd __iomem *)bd)->buf),
 						ugeth->ug_info->
 						uf_info.max_rx_buf_length +
-						UCC_GETH_RX_DATA_BUF_ALIGNMENT,
+						UCC_GETH_RX_DATA_BUF_ALIGNMENT +
+						UCC_GETH_RX_IP_ALIGNMENT,
 						DMA_FROM_DEVICE);
 					dev_kfree_skb_any(
 						ugeth->rx_skbuff[i][j]);
@@ -3352,7 +3354,8 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
 		if (skb_queue_len(&ugeth->rx_recycle) < RX_BD_RING_LEN &&
 			     skb_recycle_check(skb,
 				    ugeth->ug_info->uf_info.max_rx_buf_length +
-				    UCC_GETH_RX_DATA_BUF_ALIGNMENT))
+				    UCC_GETH_RX_DATA_BUF_ALIGNMENT +
+				    UCC_GETH_RX_IP_ALIGNMENT))
 			__skb_queue_head(&ugeth->rx_recycle, skb);
 		else
 			dev_kfree_skb(skb);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index f71b3e7..aeb743c 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -855,6 +855,7 @@ struct ucc_geth_hardware_statistics {
 #define UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT	4
 #define UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT	32
 #define UCC_GETH_RX_DATA_BUF_ALIGNMENT		64
+#define UCC_GETH_RX_IP_ALIGNMENT		2 /* IP word alignment */
 
 #define UCC_GETH_TAD_EF                         0x80
 #define UCC_GETH_TAD_V                          0x40
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/5] ucc_geth: Fix two gcc warnings
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 2/5] ucc_geth: Word align Ethernet RX data Joakim Tjernlund
@ 2012-09-18 16:56 ` Joakim Tjernlund
  2012-10-03 13:17   ` Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64 Joakim Tjernlund
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-18 16:56 UTC (permalink / raw)
  To: netdev; +Cc: Joakim Tjernlund

ucc_geth.c: In function 'ucc_geth_startup':
ucc_geth.c:3092:45: warning: comparison between 'enum qe_fltr_largest_external_tbl_lookup_key_size' and 'enum qe_fltr_tbl_lookup_key_size'

ucc_geth.c:3096:45: warning: comparison between 'enum qe_fltr_largest_external_tbl_lookup_key_size' and 'enum qe_fltr_tbl_lookup_key_size'

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 drivers/net/ethernet/freescale/ucc_geth.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 5f1460a..7d60b95 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3074,11 +3074,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	if (ug_info->rxExtendedFiltering) {
 		size += THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING;
 		if (ug_info->largestexternallookupkeysize ==
-		    QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES)
+		    QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES)
 			size +=
 			    THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_8;
 		if (ug_info->largestexternallookupkeysize ==
-		    QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES)
+		    QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES)
 			size +=
 			    THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_16;
 	}
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 2/5] ucc_geth: Word align Ethernet RX data Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 3/5] ucc_geth: Fix two gcc warnings Joakim Tjernlund
@ 2012-09-18 16:56 ` Joakim Tjernlund
  2012-10-03 13:17   ` Joakim Tjernlund
  2012-09-18 16:56 ` [PATCH 5/5] ucc_geth: Add IRQ coalescing Joakim Tjernlund
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-18 16:56 UTC (permalink / raw)
  To: netdev; +Cc: Joakim Tjernlund

We still see dropped pkgs in a busy network, this makes it better.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 drivers/net/ethernet/freescale/ucc_geth.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index aeb743c..b68637e 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -878,7 +878,7 @@ struct ucc_geth_hardware_statistics {
 
 /* Driver definitions */
 #define TX_BD_RING_LEN                          0x10
-#define RX_BD_RING_LEN                          0x20
+#define RX_BD_RING_LEN                          0x40
 
 #define TX_RING_MOD_MASK(size)                  (size-1)
 #define RX_RING_MOD_MASK(size)                  (size-1)
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/5] ucc_geth: Add IRQ coalescing
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
                   ` (2 preceding siblings ...)
  2012-09-18 16:56 ` [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64 Joakim Tjernlund
@ 2012-09-18 16:56 ` Joakim Tjernlund
  2012-10-03 13:18   ` Joakim Tjernlund
  2012-09-18 22:39 ` [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Francois Romieu
  2012-09-19 22:34 ` Francois Romieu
  5 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-18 16:56 UTC (permalink / raw)
  To: netdev; +Cc: Joakim Tjernlund

Enable modest IRQ coalescing(4 pks) to reduce IRQ load.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 arch/powerpc/include/asm/qe.h             |    1 +
 drivers/net/ethernet/freescale/ucc_geth.c |   18 +++++++++++++++---
 drivers/net/ethernet/freescale/ucc_geth.h |   20 ++++++++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 5e0b6d5..12a8ac8 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -373,6 +373,7 @@ enum comm_dir {
 #define QE_MCC_STOP_RX			0x00000009
 #define QE_ATM_TRANSMIT			0x0000000a
 #define QE_HPAC_CLEAR_ALL		0x0000000b
+#define QE_SET_LAST_RX_THLD		0x00000013
 #define QE_GRACEFUL_STOP_RX		0x0000001a
 #define QE_RESTART_RX			0x0000001b
 #define QE_HPAC_SET_PRIORITY		0x0000010b
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 7d60b95..772f796 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -124,7 +124,7 @@ static struct ucc_geth_info ugeth_primary_info = {
 	.ecamptr = ((uint32_t) NULL),
 	.eventRegMask = UCCE_OTHER,
 	.pausePeriod = 0xf000,
-	.interruptcoalescingmaxvalue = {1, 1, 1, 1, 1, 1, 1, 1},
+	.interruptcoalescingmaxvalue = {4, 4, 4, 4, 4, 4, 4, 4},
 	.bdRingLenTx = {
 			TX_BD_RING_LEN,
 			TX_BD_RING_LEN,
@@ -237,7 +237,7 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
 				     DMA_FROM_DEVICE));
 	skb_reserve(skb, UCC_GETH_RX_IP_ALIGNMENT);
 	out_be32((u32 __iomem *)bd,
-			(R_E | R_I | (in_be32((u32 __iomem*)bd) & R_W)));
+		 (R_E | (in_be32((u32 __iomem*)bd) & R_W)));
 
 	return skb;
 }
@@ -1606,6 +1606,7 @@ static void adjust_link(struct net_device *dev)
 	struct ucc_fast __iomem *uf_regs;
 	struct phy_device *phydev = ugeth->phydev;
 	int new_state = 0;
+	u32 cecr_subblock;
 
 	ug_regs = ugeth->ug_regs;
 	uf_regs = ugeth->uccf->uf_regs;
@@ -1657,6 +1658,17 @@ static void adjust_link(struct net_device *dev)
 						dev->name, phydev->speed);
 				break;
 			}
+			cecr_subblock =	ucc_fast_get_qe_cr_subblock(ugeth->ug_info->uf_info.ucc_num);
+			if (phydev->speed == SPEED_10)
+				qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
+					     QE_CR_PROTOCOL_ETHERNET, UCC_10_TIME);
+			else if (phydev->speed == SPEED_100)
+				qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
+					     QE_CR_PROTOCOL_ETHERNET, UCC_100_TIME);
+			else if (phydev->speed == SPEED_1000)
+				qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
+					     QE_CR_PROTOCOL_ETHERNET, UCC_GBIT_TIME);
+
 			ugeth->oldspeed = phydev->speed;
 		}
 
@@ -2390,7 +2402,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 		bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j];
 		for (i = 0; i < ug_info->bdRingLenRx[j]; i++) {
 			/* set bd status and length */
-			out_be32((u32 __iomem *)bd, R_I);
+			out_be32((u32 __iomem *)bd, 0);
 			/* clear bd buffer */
 			out_be32(&((struct qe_bd __iomem *)bd)->buf, 0);
 			bd += sizeof(struct qe_bd);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index b68637e..49165ce 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -923,6 +923,26 @@ struct ucc_geth_hardware_statistics {
 #define UCC_GETH_MACCFG1_INIT                   0
 #define UCC_GETH_MACCFG2_INIT                   (MACCFG2_RESERVED_1)
 
+/*
+ * From QUICC Engine Block Reference Manual, Chap 8.9:
+ * This command is used to set a timeout value for the Ethernet receiver
+ * interrupt coalescing mechanism.
+ * The timeout period is measured from the time that the last Ethernet frame
+ * was received. When the timeout expires, an interrupt is issued to the CPU
+ * even if the interrupt coalescing counter did not reach its maximum value. The
+ * timeout value should be written in the two least significant bytes of CECDR,
+ * and it should be specified in terms of serial clocks.
+ * For example, a value of 1000 in CECDR sets the timeout period to 1000 serial
+ * clocks.
+ * -------------------------------------------------------------------
+ * GBIT = 125MHz => 8ns/tick, 8 bits / tick
+ * 100 = 25 MHz => 40ns/tick, 4 bits / tick
+ * 10 = 2.5 MHz => 400ns/tick, 4 bits / tick
+ */
+#define UCC_GBIT_TIME  64
+#define UCC_100_TIME   64
+#define UCC_10_TIME    8
+
 /* Ethernet Address Type. */
 enum enet_addr_type {
 	ENET_ADDR_TYPE_INDIVIDUAL,
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
                   ` (3 preceding siblings ...)
  2012-09-18 16:56 ` [PATCH 5/5] ucc_geth: Add IRQ coalescing Joakim Tjernlund
@ 2012-09-18 22:39 ` Francois Romieu
  2012-09-19  7:36   ` Joakim Tjernlund
  2012-09-19 22:34 ` Francois Romieu
  5 siblings, 1 reply; 23+ messages in thread
From: Francois Romieu @ 2012-09-18 22:39 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: netdev

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> :
> Currently ucc_geth_start_xmit wraps IRQ off for the
> whole body just to be safe.
> Reduce the IRQ off period to a minimum.

The driver does not do much work in its irq handler. You may as well
convert it to the usual tg3-ish locking style (i.e. almost no locking).

-- 
Ueimor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path
  2012-09-18 22:39 ` [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Francois Romieu
@ 2012-09-19  7:36   ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-19  7:36 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu <romieu@fr.zoreil.com> wrote on 2012/09/19 00:39:38:
>
> Joakim Tjernlund <Joakim.Tjernlund@transmode.se> :
> > Currently ucc_geth_start_xmit wraps IRQ off for the
> > whole body just to be safe.
> > Reduce the IRQ off period to a minimum.
>
> The driver does not do much work in its irq handler. You may as well
> convert it to the usual tg3-ish locking style (i.e. almost no locking).

You mean broadcom/tg3.c? It is a bit much to look at ATM for me and
there almost no locking with my patch also. Could possibly
be improved further but I am happy for now.

 Jocke

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path
  2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
                   ` (4 preceding siblings ...)
  2012-09-18 22:39 ` [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Francois Romieu
@ 2012-09-19 22:34 ` Francois Romieu
  2012-09-20  8:06   ` Joakim Tjernlund
  2012-09-20  8:17   ` [PATCH v2] " Joakim Tjernlund
  5 siblings, 2 replies; 23+ messages in thread
From: Francois Romieu @ 2012-09-19 22:34 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: netdev

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> :
> Currently ucc_geth_start_xmit wraps IRQ off for the
> whole body just to be safe.
> Reduce the IRQ off period to a minimum.

It opens a window in ucc_geth_start_xmit where the skb slot in
ugeth->tx_skbuff[txQ] is set and T_RA has not been written into
the descriptor status. Consider a racing poll : the !skb test in
ucc_geth_tx may not work as expected.

-- 
Ueimor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path
  2012-09-19 22:34 ` Francois Romieu
@ 2012-09-20  8:06   ` Joakim Tjernlund
  2012-09-20  8:17   ` [PATCH v2] " Joakim Tjernlund
  1 sibling, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-20  8:06 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu <romieu@fr.zoreil.com> wrote on 2012/09/20 00:34:16:
>
> Joakim Tjernlund <Joakim.Tjernlund@transmode.se> :
> > Currently ucc_geth_start_xmit wraps IRQ off for the
> > whole body just to be safe.
> > Reduce the IRQ off period to a minimum.
>
> It opens a window in ucc_geth_start_xmit where the skb slot in
> ugeth->tx_skbuff[txQ] is set and T_RA has not been written into
> the descriptor status. Consider a racing poll : the !skb test in
> ucc_geth_tx may not work as expected.

Right, good catch!

Surprisingly the driver never showed any malfunction even though I hit
it pretty hard.

I will send a V2 of this patch where I move the assignment inside the IRQ
off part:
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3200,8 +3200,6 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
        /* Start from the next BD that should be filled */
        bd = ugeth->txBd[txQ];
        bd_status = in_be32((u32 __iomem *)bd);
-       /* Save the skb pointer so we can free it later */
-       ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;

        /* Update the current skb pointer (wrapping if this was the last) */
        ugeth->skb_curtx[txQ] =
@@ -3209,6 +3207,8 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
             1) & TX_RING_MOD_MASK(ugeth->ug_info->bdRingLenTx[txQ]);

        spin_lock_irqsave(&ugeth->lock, flags);
+       /* Save the skb pointer so we can free it later */
+       ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
        /* set up the buffer descriptor */
        out_be32(&((struct qe_bd __iomem *)bd)->buf,
                      dma_map_single(ugeth->dev, skb->data,

 Jocke

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2] ucc_geth: Reduce IRQ off in xmit path
  2012-09-19 22:34 ` Francois Romieu
  2012-09-20  8:06   ` Joakim Tjernlund
@ 2012-09-20  8:17   ` Joakim Tjernlund
  2012-09-20 21:08     ` David Miller
  2012-09-21  9:11     ` [PATCH v3] ucc_geth: Lockless xmit Joakim Tjernlund
  1 sibling, 2 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-20  8:17 UTC (permalink / raw)
  To: netdev, Francois Romieu; +Cc: Joakim Tjernlund

Currently ucc_geth_start_xmit wraps IRQ off for the
whole body just to be safe.
Reduce the IRQ off period to a minimum.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---

 v2: Move assignment of ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]]
     inside IRQ off section to prevent racing against
     ucc_geth_tx(). Spotted by Francois Romieu <romieu@fr.zoreil.com>

 drivers/net/ethernet/freescale/ucc_geth.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 9ac14f8..0100bca 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3181,21 +3181,20 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ugeth_vdbg("%s: IN", __func__);
 
-	spin_lock_irqsave(&ugeth->lock, flags);
-
 	dev->stats.tx_bytes += skb->len;
 
 	/* Start from the next BD that should be filled */
 	bd = ugeth->txBd[txQ];
 	bd_status = in_be32((u32 __iomem *)bd);
-	/* Save the skb pointer so we can free it later */
-	ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
 
 	/* Update the current skb pointer (wrapping if this was the last) */
 	ugeth->skb_curtx[txQ] =
 	    (ugeth->skb_curtx[txQ] +
 	     1) & TX_RING_MOD_MASK(ugeth->ug_info->bdRingLenTx[txQ]);
 
+	spin_lock_irqsave(&ugeth->lock, flags);
+	/* Save the skb pointer so we can free it later */
+	ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
 	/* set up the buffer descriptor */
 	out_be32(&((struct qe_bd __iomem *)bd)->buf,
 		      dma_map_single(ugeth->dev, skb->data,
@@ -3207,6 +3206,8 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* set bd status and length */
 	out_be32((u32 __iomem *)bd, bd_status);
+	spin_unlock_irqrestore(&ugeth->lock, flags);
+
 
 	/* Move to next BD in the ring */
 	if (!(bd_status & T_W))
@@ -3238,8 +3239,6 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	uccf = ugeth->uccf;
 	out_be16(uccf->p_utodr, UCC_FAST_TOD);
 #endif
-	spin_unlock_irqrestore(&ugeth->lock, flags);
-
 	return NETDEV_TX_OK;
 }
 
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] ucc_geth: Reduce IRQ off in xmit path
  2012-09-20  8:17   ` [PATCH v2] " Joakim Tjernlund
@ 2012-09-20 21:08     ` David Miller
  2012-09-21  9:00       ` Joakim Tjernlund
  2012-09-21  9:11     ` [PATCH v3] ucc_geth: Lockless xmit Joakim Tjernlund
  1 sibling, 1 reply; 23+ messages in thread
From: David Miller @ 2012-09-20 21:08 UTC (permalink / raw)
  To: Joakim.Tjernlund; +Cc: netdev, romieu

From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Thu, 20 Sep 2012 10:17:01 +0200

> Currently ucc_geth_start_xmit wraps IRQ off for the
> whole body just to be safe.
> Reduce the IRQ off period to a minimum.
> 
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
> 
>  v2: Move assignment of ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]]
>      inside IRQ off section to prevent racing against
>      ucc_geth_tx(). Spotted by Francois Romieu <romieu@fr.zoreil.com>

I agree with Francois's initial analysis, and disagree with you're
response to him, wrt. the suggest to remove all locking entirely.

Unlike what you claim, there isn't much of a gain at all from merely
make the window of lock holding smaller, especially on the scale
in which you are doing it here.

Whereas removing the lock and the atomic completely, as tg3 does,
will give very significant performance gains.

The locking cost of grabbing the spinlock, and the memory transactions
associated with it, dominate.

Furthermore, even if the gains of your change are non-trivial, you
haven't documented it.  So unless you should some noticable gains from
this, it's just code masterbation as far as I'm concerned and I'm
therefore inclined to not apply patches like this.

TG3's core interrupt locking is not that difficult to understand and
replicate in other drivers, so I dismiss your attempts to avoid that
approach on difficulty grounds as well.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] ucc_geth: Reduce IRQ off in xmit path
  2012-09-20 21:08     ` David Miller
@ 2012-09-21  9:00       ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-21  9:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, romieu

David Miller <davem@davemloft.net> wrote on 2012/09/20 23:08:52:
>
> From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> Date: Thu, 20 Sep 2012 10:17:01 +0200
>
> > Currently ucc_geth_start_xmit wraps IRQ off for the
> > whole body just to be safe.
> > Reduce the IRQ off period to a minimum.
> >
> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> > ---
> >
> >  v2: Move assignment of ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]]
> >      inside IRQ off section to prevent racing against
> >      ucc_geth_tx(). Spotted by Francois Romieu <romieu@fr.zoreil.com>
>
> I agree with Francois's initial analysis, and disagree with you're
> response to him, wrt. the suggest to remove all locking entirely.
>
> Unlike what you claim, there isn't much of a gain at all from merely
> make the window of lock holding smaller, especially on the scale
> in which you are doing it here.
>
> Whereas removing the lock and the atomic completely, as tg3 does,
> will give very significant performance gains.
>
> The locking cost of grabbing the spinlock, and the memory transactions
> associated with it, dominate.
>
> Furthermore, even if the gains of your change are non-trivial, you
> haven't documented it.  So unless you should some noticable gains from
> this, it's just code masterbation as far as I'm concerned and I'm
> therefore inclined to not apply patches like this.
>
> TG3's core interrupt locking is not that difficult to understand and
> replicate in other drivers, so I dismiss your attempts to avoid that
> approach on difficulty grounds as well.

OK, I will give it a go. Got something working that I will send in shortly.
I hope the other patches were OK?

 Jocke

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v3] ucc_geth: Lockless xmit
  2012-09-20  8:17   ` [PATCH v2] " Joakim Tjernlund
  2012-09-20 21:08     ` David Miller
@ 2012-09-21  9:11     ` Joakim Tjernlund
  2012-09-21 12:51       ` Francois Romieu
  1 sibling, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-21  9:11 UTC (permalink / raw)
  To: Francois Romieu, netdev; +Cc: Joakim Tjernlund

Currently ucc_geth_start_xmit wraps IRQ off for the
whole body just to be safe. By rearranging the code a bit
one can avoid the lock completely.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---

 v2: Move assignment of ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]]
     inside IRQ off section to prevent racing against
     ucc_geth_tx(). Spotted by Francois Romieu <romieu@fr.zoreil.com>

 v3: Lockless xmit
     Here is my attemept to do lockless xmit. Thanks to
     Francois Romieu <romieu@fr.zoreil.com> for the idea.

 drivers/net/ethernet/freescale/ucc_geth.c |   17 ++++++++---------
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 9ac14f8..040aa70 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3177,19 +3177,20 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 __iomem *bd;			/* BD pointer */
 	u32 bd_status;
 	u8 txQ = 0;
-	unsigned long flags;
 
 	ugeth_vdbg("%s: IN", __func__);
 
-	spin_lock_irqsave(&ugeth->lock, flags);
-
 	dev->stats.tx_bytes += skb->len;
 
+	/* We are running in BH disabled context with netif_tx_lock
+	 * and TX reclaim runs via tp->napi.poll inside of a software
+	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
+	 * no IRQ context deadlocks to worry about either.  Rejoice!
+	 */
+
 	/* Start from the next BD that should be filled */
 	bd = ugeth->txBd[txQ];
 	bd_status = in_be32((u32 __iomem *)bd);
-	/* Save the skb pointer so we can free it later */
-	ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
 
 	/* Update the current skb pointer (wrapping if this was the last) */
 	ugeth->skb_curtx[txQ] =
@@ -3207,6 +3208,8 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* set bd status and length */
 	out_be32((u32 __iomem *)bd, bd_status);
+	/* Save the skb pointer so we can free it later */
+	ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
 
 	/* Move to next BD in the ring */
 	if (!(bd_status & T_W))
@@ -3238,8 +3241,6 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	uccf = ugeth->uccf;
 	out_be16(uccf->p_utodr, UCC_FAST_TOD);
 #endif
-	spin_unlock_irqrestore(&ugeth->lock, flags);
-
 	return NETDEV_TX_OK;
 }
 
@@ -3387,10 +3388,8 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
 	ug_info = ugeth->ug_info;
 
 	/* Tx event processing */
-	spin_lock(&ugeth->lock);
 	for (i = 0; i < ug_info->numQueuesTx; i++)
 		ucc_geth_tx(ugeth->ndev, i);
-	spin_unlock(&ugeth->lock);
 
 	howmany = 0;
 	for (i = 0; i < ug_info->numQueuesRx; i++)
-- 
1.7.8.6

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v3] ucc_geth: Lockless xmit
  2012-09-21  9:11     ` [PATCH v3] ucc_geth: Lockless xmit Joakim Tjernlund
@ 2012-09-21 12:51       ` Francois Romieu
       [not found]         ` <OF6B2AFF0B.1C979828-ONC1257A80.00502DCF-C1257A80.00509C7E@LocalDomain>
  0 siblings, 1 reply; 23+ messages in thread
From: Francois Romieu @ 2012-09-21 12:51 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: netdev

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> :
> Currently ucc_geth_start_xmit wraps IRQ off for the
> whole body just to be safe. By rearranging the code a bit
> one can avoid the lock completely.

Afaics you went a bit too lockless with the queueing disable / enable
logic. The hard_start_xmit handler is run in a locally softirq disabled
section but it will happily race with the napi handler on a different
CPU. Grep netif_tx_lock in tg3.c for it.

The Tx skb free logic probably requires some smp memory barriers as
well since the current skb is used by the ucc_geth driver to sync the
Tx xmit with the napi completion handler.

-- 
Ueimor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3] ucc_geth: Lockless xmit
       [not found]             ` <20120921173535.GA27934@electric-eye.fr.zoreil.com>
@ 2012-09-23  9:39               ` Joakim Tjernlund
  2012-09-24 21:10                 ` Francois Romieu
  0 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-23  9:39 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu <romieu@fr.zoreil.com> wrote on 2012/09/21 19:35:35:
>
> Please keep netdev in the Cc:.

Sorry, wrong reply button. Now added.

>
> Joakim Tjernlund <joakim.tjernlund@transmode.se> :
> [...]
> > This is what I could come up with, what do you think?
>
> In its current state the driver implicitely relies on the Tx xmit vs Tx
> completion exclusion to work. As long as they exclude each other the
> ugeth->tx_skbuff[txQ][...] and the "bd_status & T_R" states are consistent.
>
> This scheme can not work without locking. The skb alone won't provide a
> synchronization point.

I don't get it. The skb test is there just for one special case, when
the BD ring is empty the (bd_status & T_R) == 0 will be true as well so
one need something more than the bd_status test.

>
> An usual solution would be some skb dirtytx vs curtx comparison + smp
> barriers (Documentation/memory-barriers.txt).

You mean adding an extra test in addition to !skb? Something like:
if (!skb && ugeth->skb_dirtytx[txQ] == ugeth->skb_curtx[txQ])
	break;

>
> Sidenote: is there some reason why modulo (%) operations should be
> avoided on this platform ?

Not that I know. The original driver author did it this way.

>
> [...]
> > @@ -3380,8 +3380,12 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
> >                      1) & TX_RING_MOD_MASK(ugeth->ug_info->bdRingLenTx[txQ]);
> >
> >                 /* We freed a buffer, so now we can restart transmission */
> > -               if (netif_queue_stopped(dev))
> > -                       netif_wake_queue(dev);
> > +               if (netif_queue_stopped(dev)) {
> > +                       netif_tx_lock(dev);
> > +                       if (netif_queue_stopped(dev))
> > +                               netif_wake_queue(dev);
> > +                       netif_tx_unlock(dev);
> > +               }
>
> Without exclusion we don't know if it was stopped before or after the packet
> was freed. There must be some "are there available Tx slots ?" test in the
> locked section.

This makes sense, but stopping relies on ugeth->confBd[txQ] which is updated
last in ucc_geth_tx(). Looks a bit suboptimal though so it should probably
be changed.

>
> Btw the second netif_queue_stopped test can be removed if tx queueing can
> only be stopped in a single place (I did not check).

There is a netif_stop_queue in xmit and one in ucc_geth_close() so I guess
one can say there is only one place.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3] ucc_geth: Lockless xmit
  2012-09-23  9:39               ` Joakim Tjernlund
@ 2012-09-24 21:10                 ` Francois Romieu
  2012-09-25 14:09                   ` Joakim Tjernlund
  0 siblings, 1 reply; 23+ messages in thread
From: Francois Romieu @ 2012-09-24 21:10 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: netdev

Joakim Tjernlund <joakim.tjernlund@transmode.se> :
[...]
> I don't get it. The skb test is there just for one special case, when
> the BD ring is empty the (bd_status & T_R) == 0 will be true as well so
> one need something more than the bd_status test.

Sure but the converse is not true : (bd_status & T_R) == 0 && skb does not
mean that the skb has been sent. It happens when said skb is about to be
given to the hardware by hard_start_xmit as well.

-- 
Ueimor

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3] ucc_geth: Lockless xmit
  2012-09-24 21:10                 ` Francois Romieu
@ 2012-09-25 14:09                   ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-09-25 14:09 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Francois Romieu <romieu@fr.zoreil.com> wrote on 2012/09/24 23:10:14:
>
> Joakim Tjernlund <joakim.tjernlund@transmode.se> :
> [...]
> > I don't get it. The skb test is there just for one special case, when
> > the BD ring is empty the (bd_status & T_R) == 0 will be true as well so
> > one need something more than the bd_status test.
>
> Sure but the converse is not true : (bd_status & T_R) == 0 && skb does not
> mean that the skb has been sent. It happens when said skb is about to be
> given to the hardware by hard_start_xmit as well.

duhh, I was too tired when trying to make sense of smp & racing in general, thanks.

Will probably be some time before I get to this again due to other stuff though.
The other patches are independent of this one, I hope they are good/accepted?

  Jocke

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/5] ucc_geth: Word align Ethernet RX data
  2012-09-18 16:56 ` [PATCH 2/5] ucc_geth: Word align Ethernet RX data Joakim Tjernlund
@ 2012-10-03 13:17   ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-10-03 13:17 UTC (permalink / raw)
  Cc: netdev

Ping? Got no comments and I can see it in net or net-next trees either.

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> wrote on 2012/09/18 18:56:22:

> From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> To: netdev@vger.kernel.org,
> Cc: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> Date: 2012/09/18 18:56
> Subject: [PATCH 2/5] ucc_geth: Word align Ethernet RX data
>
> UCC controller can shift received Ethernet frames 2 bytes into the buffer,
> making IP data word aligned, this patch enables that feature.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  drivers/net/ethernet/freescale/ucc_geth.c |   19 +++++++++++--------
>  drivers/net/ethernet/freescale/ucc_geth.h |    1 +
>  2 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
> index e609c93..5f1460a 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -160,6 +160,7 @@ static struct ucc_geth_info ugeth_primary_info = {
>     .numThreadsRx = UCC_GETH_NUM_OF_THREADS_1,
>     .riscTx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
>     .riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
> +   .ipAddressAlignment = 1,
>  };
>
>  static struct ucc_geth_info ugeth_info[8];
> @@ -211,12 +212,13 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
>        u8 __iomem *bd)
>  {
>     struct sk_buff *skb = NULL;
> +   u16 buf_len = ugeth->ug_info->uf_info.max_rx_buf_length +
> +      UCC_GETH_RX_DATA_BUF_ALIGNMENT +
> +      UCC_GETH_RX_IP_ALIGNMENT;
>
>     skb = __skb_dequeue(&ugeth->rx_recycle);
>     if (!skb)
> -      skb = netdev_alloc_skb(ugeth->ndev,
> -                  ugeth->ug_info->uf_info.max_rx_buf_length +
> -                  UCC_GETH_RX_DATA_BUF_ALIGNMENT);
> +      skb = netdev_alloc_skb(ugeth->ndev, buf_len);
>     if (skb == NULL)
>        return NULL;
>
> @@ -231,10 +233,9 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
>     out_be32(&((struct qe_bd __iomem *)bd)->buf,
>              dma_map_single(ugeth->dev,
>                   skb->data,
> -                 ugeth->ug_info->uf_info.max_rx_buf_length +
> -                 UCC_GETH_RX_DATA_BUF_ALIGNMENT,
> +                 buf_len,
>                   DMA_FROM_DEVICE));
> -
> +   skb_reserve(skb, UCC_GETH_RX_IP_ALIGNMENT);
>     out_be32((u32 __iomem *)bd,
>           (R_E | R_I | (in_be32((u32 __iomem*)bd) & R_W)));
>
> @@ -1877,7 +1878,8 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
>                    in_be32(&((struct qe_bd __iomem *)bd)->buf),
>                    ugeth->ug_info->
>                    uf_info.max_rx_buf_length +
> -                  UCC_GETH_RX_DATA_BUF_ALIGNMENT,
> +                  UCC_GETH_RX_DATA_BUF_ALIGNMENT +
> +                  UCC_GETH_RX_IP_ALIGNMENT,
>                    DMA_FROM_DEVICE);
>                 dev_kfree_skb_any(
>                    ugeth->rx_skbuff[i][j]);
> @@ -3352,7 +3354,8 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
>        if (skb_queue_len(&ugeth->rx_recycle) < RX_BD_RING_LEN &&
>                skb_recycle_check(skb,
>                  ugeth->ug_info->uf_info.max_rx_buf_length +
> -                UCC_GETH_RX_DATA_BUF_ALIGNMENT))
> +                UCC_GETH_RX_DATA_BUF_ALIGNMENT +
> +                UCC_GETH_RX_IP_ALIGNMENT))
>           __skb_queue_head(&ugeth->rx_recycle, skb);
>        else
>           dev_kfree_skb(skb);
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
> index f71b3e7..aeb743c 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.h
> +++ b/drivers/net/ethernet/freescale/ucc_geth.h
> @@ -855,6 +855,7 @@ struct ucc_geth_hardware_statistics {
>  #define UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT   4
>  #define UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT   32
>  #define UCC_GETH_RX_DATA_BUF_ALIGNMENT      64
> +#define UCC_GETH_RX_IP_ALIGNMENT      2 /* IP word alignment */
>
>  #define UCC_GETH_TAD_EF                         0x80
>  #define UCC_GETH_TAD_V                          0x40
> --
> 1.7.8.6
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/5] ucc_geth: Fix two gcc warnings
  2012-09-18 16:56 ` [PATCH 3/5] ucc_geth: Fix two gcc warnings Joakim Tjernlund
@ 2012-10-03 13:17   ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-10-03 13:17 UTC (permalink / raw)
  Cc: netdev

Ping? Got no comments and I can see it in net or net-next trees either.

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> wrote on 2012/09/18 18:56:23:

> From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> To: netdev@vger.kernel.org,
> Cc: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> Date: 2012/09/18 18:56
> Subject: [PATCH 3/5] ucc_geth: Fix two gcc warnings
>
> ucc_geth.c: In function 'ucc_geth_startup':
> ucc_geth.c:3092:45: warning: comparison between 'enum qe_fltr_largest_external_tbl_lookup_key_size' and 'enum qe_fltr_tbl_lookup_key_size'
>
> ucc_geth.c:3096:45: warning: comparison between 'enum qe_fltr_largest_external_tbl_lookup_key_size' and 'enum qe_fltr_tbl_lookup_key_size'
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  drivers/net/ethernet/freescale/ucc_geth.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
> index 5f1460a..7d60b95 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -3074,11 +3074,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
>     if (ug_info->rxExtendedFiltering) {
>        size += THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING;
>        if (ug_info->largestexternallookupkeysize ==
> -          QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES)
> +          QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES)
>           size +=
>               THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_8;
>        if (ug_info->largestexternallookupkeysize ==
> -          QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES)
> +          QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES)
>           size +=
>               THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_16;
>     }
> --
> 1.7.8.6
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64
  2012-09-18 16:56 ` [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64 Joakim Tjernlund
@ 2012-10-03 13:17   ` Joakim Tjernlund
  2012-10-03 19:38     ` David Miller
  0 siblings, 1 reply; 23+ messages in thread
From: Joakim Tjernlund @ 2012-10-03 13:17 UTC (permalink / raw)
  Cc: netdev

Ping? Got no comments and I can see it in net or net-next trees either.

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> wrote on 2012/09/18 18:56:24:

> From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> To: netdev@vger.kernel.org,
> Cc: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> Date: 2012/09/18 18:56
> Subject: [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64
>
> We still see dropped pkgs in a busy network, this makes it better.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  drivers/net/ethernet/freescale/ucc_geth.h |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
> index aeb743c..b68637e 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.h
> +++ b/drivers/net/ethernet/freescale/ucc_geth.h
> @@ -878,7 +878,7 @@ struct ucc_geth_hardware_statistics {
>
>  /* Driver definitions */
>  #define TX_BD_RING_LEN                          0x10
> -#define RX_BD_RING_LEN                          0x20
> +#define RX_BD_RING_LEN                          0x40
>
>  #define TX_RING_MOD_MASK(size)                  (size-1)
>  #define RX_RING_MOD_MASK(size)                  (size-1)
> --
> 1.7.8.6
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] ucc_geth: Add IRQ coalescing
  2012-09-18 16:56 ` [PATCH 5/5] ucc_geth: Add IRQ coalescing Joakim Tjernlund
@ 2012-10-03 13:18   ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-10-03 13:18 UTC (permalink / raw)
  Cc: netdev

Ping? Got no comments and I can see it in net or net-next trees either.

Joakim Tjernlund <Joakim.Tjernlund@transmode.se> wrote on 2012/09/18 18:56:25:

> From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> To: netdev@vger.kernel.org,
> Cc: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> Date: 2012/09/18 18:56
> Subject: [PATCH 5/5] ucc_geth: Add IRQ coalescing
>
> Enable modest IRQ coalescing(4 pks) to reduce IRQ load.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  arch/powerpc/include/asm/qe.h             |    1 +
>  drivers/net/ethernet/freescale/ucc_geth.c |   18 +++++++++++++++---
>  drivers/net/ethernet/freescale/ucc_geth.h |   20 ++++++++++++++++++++
>  3 files changed, 36 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
> index 5e0b6d5..12a8ac8 100644
> --- a/arch/powerpc/include/asm/qe.h
> +++ b/arch/powerpc/include/asm/qe.h
> @@ -373,6 +373,7 @@ enum comm_dir {
>  #define QE_MCC_STOP_RX         0x00000009
>  #define QE_ATM_TRANSMIT         0x0000000a
>  #define QE_HPAC_CLEAR_ALL      0x0000000b
> +#define QE_SET_LAST_RX_THLD      0x00000013
>  #define QE_GRACEFUL_STOP_RX      0x0000001a
>  #define QE_RESTART_RX         0x0000001b
>  #define QE_HPAC_SET_PRIORITY      0x0000010b
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
> index 7d60b95..772f796 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -124,7 +124,7 @@ static struct ucc_geth_info ugeth_primary_info = {
>     .ecamptr = ((uint32_t) NULL),
>     .eventRegMask = UCCE_OTHER,
>     .pausePeriod = 0xf000,
> -   .interruptcoalescingmaxvalue = {1, 1, 1, 1, 1, 1, 1, 1},
> +   .interruptcoalescingmaxvalue = {4, 4, 4, 4, 4, 4, 4, 4},
>     .bdRingLenTx = {
>           TX_BD_RING_LEN,
>           TX_BD_RING_LEN,
> @@ -237,7 +237,7 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
>                   DMA_FROM_DEVICE));
>     skb_reserve(skb, UCC_GETH_RX_IP_ALIGNMENT);
>     out_be32((u32 __iomem *)bd,
> -         (R_E | R_I | (in_be32((u32 __iomem*)bd) & R_W)));
> +       (R_E | (in_be32((u32 __iomem*)bd) & R_W)));
>
>     return skb;
>  }
> @@ -1606,6 +1606,7 @@ static void adjust_link(struct net_device *dev)
>     struct ucc_fast __iomem *uf_regs;
>     struct phy_device *phydev = ugeth->phydev;
>     int new_state = 0;
> +   u32 cecr_subblock;
>
>     ug_regs = ugeth->ug_regs;
>     uf_regs = ugeth->uccf->uf_regs;
> @@ -1657,6 +1658,17 @@ static void adjust_link(struct net_device *dev)
>                    dev->name, phydev->speed);
>              break;
>           }
> +         cecr_subblock =   ucc_fast_get_qe_cr_subblock(ugeth->ug_info->uf_info.ucc_num);
> +         if (phydev->speed == SPEED_10)
> +            qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
> +                    QE_CR_PROTOCOL_ETHERNET, UCC_10_TIME);
> +         else if (phydev->speed == SPEED_100)
> +            qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
> +                    QE_CR_PROTOCOL_ETHERNET, UCC_100_TIME);
> +         else if (phydev->speed == SPEED_1000)
> +            qe_issue_cmd(QE_SET_LAST_RX_THLD, cecr_subblock,
> +                    QE_CR_PROTOCOL_ETHERNET, UCC_GBIT_TIME);
> +
>           ugeth->oldspeed = phydev->speed;
>        }
>
> @@ -2390,7 +2402,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
>        bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j];
>        for (i = 0; i < ug_info->bdRingLenRx[j]; i++) {
>           /* set bd status and length */
> -         out_be32((u32 __iomem *)bd, R_I);
> +         out_be32((u32 __iomem *)bd, 0);
>           /* clear bd buffer */
>           out_be32(&((struct qe_bd __iomem *)bd)->buf, 0);
>           bd += sizeof(struct qe_bd);
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
> index b68637e..49165ce 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.h
> +++ b/drivers/net/ethernet/freescale/ucc_geth.h
> @@ -923,6 +923,26 @@ struct ucc_geth_hardware_statistics {
>  #define UCC_GETH_MACCFG1_INIT                   0
>  #define UCC_GETH_MACCFG2_INIT                   (MACCFG2_RESERVED_1)
>
> +/*
> + * From QUICC Engine Block Reference Manual, Chap 8.9:
> + * This command is used to set a timeout value for the Ethernet receiver
> + * interrupt coalescing mechanism.
> + * The timeout period is measured from the time that the last Ethernet frame
> + * was received. When the timeout expires, an interrupt is issued to the CPU
> + * even if the interrupt coalescing counter did not reach its maximum value. The
> + * timeout value should be written in the two least significant bytes of CECDR,
> + * and it should be specified in terms of serial clocks.
> + * For example, a value of 1000 in CECDR sets the timeout period to 1000 serial
> + * clocks.
> + * -------------------------------------------------------------------
> + * GBIT = 125MHz => 8ns/tick, 8 bits / tick
> + * 100 = 25 MHz => 40ns/tick, 4 bits / tick
> + * 10 = 2.5 MHz => 400ns/tick, 4 bits / tick
> + */
> +#define UCC_GBIT_TIME  64
> +#define UCC_100_TIME   64
> +#define UCC_10_TIME    8
> +
>  /* Ethernet Address Type. */
>  enum enet_addr_type {
>     ENET_ADDR_TYPE_INDIVIDUAL,
> --
> 1.7.8.6
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64
  2012-10-03 13:17   ` Joakim Tjernlund
@ 2012-10-03 19:38     ` David Miller
  2012-10-03 20:13       ` Joakim Tjernlund
  0 siblings, 1 reply; 23+ messages in thread
From: David Miller @ 2012-10-03 19:38 UTC (permalink / raw)
  To: joakim.tjernlund; +Cc: netdev

From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Wed, 3 Oct 2012 15:17:58 +0200

> Ping? Got no comments and I can see it in net or net-next trees either.

No patch will be applied to the tree when one of the indiviual patches
get feedback and request changes from you.

When an individual patch must be redone, you must resend the entire
series not just the individual patch which changed.

You also never need to ask the kind of question you are asking here,
you simply need to look into patch work to see what the state of
your patches is:

I maintain this state exactly so people don't need to waste precious
developer time asking "what is the state of my patch" like you are
making me do right here.

See:

http://patchwork.ozlabs.org/project/netdev/list/?state=*&q=ucc_geth

And as you can see this entire series is marked as either "RFC"
or "Changes Requested"

Even worse, you did this for not one but several of the patches you
posted.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64
  2012-10-03 19:38     ` David Miller
@ 2012-10-03 20:13       ` Joakim Tjernlund
  0 siblings, 0 replies; 23+ messages in thread
From: Joakim Tjernlund @ 2012-10-03 20:13 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

David Miller <davem@davemloft.net> wrote on 2012/10/03 21:38:55:
>
> From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
> Date: Wed, 3 Oct 2012 15:17:58 +0200
>
> > Ping? Got no comments and I can see it in net or net-next trees either.
>
> No patch will be applied to the tree when one of the indiviual patches
> get feedback and request changes from you.
>
> When an individual patch must be redone, you must resend the entire
> series not just the individual patch which changed.

oh, I see what I did wrong. I sent them as series which may have internal dependencies.
However, each patch can be applied on its own.

>
> You also never need to ask the kind of question you are asking here,
> you simply need to look into patch work to see what the state of
> your patches is:
>
> I maintain this state exactly so people don't need to waste precious
> developer time asking "what is the state of my patch" like you are
> making me do right here.
>
> See:
>
> http://patchwork.ozlabs.org/project/netdev/list/?state=*&q=ucc_geth

Ahh, better bookmark this page for future use.

>
> And as you can see this entire series is marked as either "RFC"
> or "Changes Requested"

Yes, I see that now.
I guess I should resend each patch separately then?
(Possibly wait until I have figured out the lockless TX stuff)

>
> Even worse, you did this for not one but several of the patches you
> posted.

Sorry about that.

 Jocke

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2012-10-03 20:13 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-18 16:56 [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Joakim Tjernlund
2012-09-18 16:56 ` [PATCH 2/5] ucc_geth: Word align Ethernet RX data Joakim Tjernlund
2012-10-03 13:17   ` Joakim Tjernlund
2012-09-18 16:56 ` [PATCH 3/5] ucc_geth: Fix two gcc warnings Joakim Tjernlund
2012-10-03 13:17   ` Joakim Tjernlund
2012-09-18 16:56 ` [PATCH 4/5] ucc_geth: Increase RX ring buffer from 32 to 64 Joakim Tjernlund
2012-10-03 13:17   ` Joakim Tjernlund
2012-10-03 19:38     ` David Miller
2012-10-03 20:13       ` Joakim Tjernlund
2012-09-18 16:56 ` [PATCH 5/5] ucc_geth: Add IRQ coalescing Joakim Tjernlund
2012-10-03 13:18   ` Joakim Tjernlund
2012-09-18 22:39 ` [PATCH 1/5] ucc_geth: Reduce IRQ off in xmit path Francois Romieu
2012-09-19  7:36   ` Joakim Tjernlund
2012-09-19 22:34 ` Francois Romieu
2012-09-20  8:06   ` Joakim Tjernlund
2012-09-20  8:17   ` [PATCH v2] " Joakim Tjernlund
2012-09-20 21:08     ` David Miller
2012-09-21  9:00       ` Joakim Tjernlund
2012-09-21  9:11     ` [PATCH v3] ucc_geth: Lockless xmit Joakim Tjernlund
2012-09-21 12:51       ` Francois Romieu
     [not found]         ` <OF6B2AFF0B.1C979828-ONC1257A80.00502DCF-C1257A80.00509C7E@LocalDomain>
     [not found]           ` <OF53C63B27.8D5BE7C3-ONC1257A80.00575156-C1257A80.005766FC@transmode.se>
     [not found]             ` <20120921173535.GA27934@electric-eye.fr.zoreil.com>
2012-09-23  9:39               ` Joakim Tjernlund
2012-09-24 21:10                 ` Francois Romieu
2012-09-25 14:09                   ` Joakim Tjernlund

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).