All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
@ 2018-08-30 10:37 Jose Abreu
  2018-09-03  8:56 ` Jerome Brunet
  0 siblings, 1 reply; 11+ messages in thread
From: Jose Abreu @ 2018-08-30 10:37 UTC (permalink / raw)
  To: netdev
  Cc: Jose Abreu, Jerome Brunet, Martin Blumenstingl, David S. Miller,
	Joao Pinto, Giuseppe Cavallaro, Alexandre Torgue

[ As for now this is only for testing! ]

This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer. This
assumes that tx_queues == rx_queues, which can not be necessarly true.
Official patch will need to have this fixed.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Jerome Brunet <jbrunet@baylibre.com>
Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   6 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 117 +++++++++++++---------
 3 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1854f270ad66..b1b305f8f414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -258,10 +258,10 @@ struct stmmac_safety_stats {
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
 /* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER	40000
+#define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	64
+#define STMMAC_TX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 76649adf8fb0..db42a5f03a5f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -48,6 +48,9 @@ struct stmmac_tx_info {
 
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
+	u32 tx_count_frames;
+	int tx_timer_active;
+	struct timer_list txtimer;
 	u32 queue_index;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
@@ -109,15 +112,12 @@ struct stmmac_pps_cfg {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
-	bool tx_timer_armed;
 
 	int tx_coalesce;
 	int hwts_tx_en;
 	bool tx_path_in_lpi_mode;
-	struct timer_list txtimer;
 	bool tso;
 
 	unsigned int dma_buf_sz;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ff1ffb46198a..ae26a6e8608e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2034,7 +2034,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
 				tx_channel_count : rx_channel_count;
 	u32 chan;
-	bool poll_scheduled = false;
 	int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
 
 	/* Make sure we never check beyond our status buffer. */
@@ -2058,7 +2057,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 			if (likely(napi_schedule_prep(&rx_q->napi))) {
 				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
 				__napi_schedule(&rx_q->napi);
-				poll_scheduled = true;
 			}
 		}
 	}
@@ -2067,21 +2065,13 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
 	 * completed transmission, if so, call stmmac_poll (once).
 	 */
-	if (!poll_scheduled) {
-		for (chan = 0; chan < tx_channel_count; chan++) {
-			if (status[chan] & handle_tx) {
-				/* It doesn't matter what rx queue we choose
-				 * here. We use 0 since it always exists.
-				 */
-				struct stmmac_rx_queue *rx_q =
-					&priv->rx_queue[0];
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		if (status[chan] & handle_tx) {
+			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
-				if (likely(napi_schedule_prep(&rx_q->napi))) {
-					stmmac_disable_dma_irq(priv,
-							priv->ioaddr, chan);
-					__napi_schedule(&rx_q->napi);
-				}
-				break;
+			if (likely(napi_schedule_prep(&rx_q->napi))) {
+				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+				__napi_schedule(&rx_q->napi);
 			}
 		}
 	}
@@ -2241,13 +2231,18 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
  */
 static void stmmac_tx_timer(struct timer_list *t)
 {
-	struct stmmac_priv *priv = from_timer(priv, t, txtimer);
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
+	struct stmmac_priv *priv = tx_q->priv_data;
+	struct napi_struct *napi;
+
+	napi = &priv->rx_queue[tx_q->queue_index].napi;
 
-	/* let's scan all the tx queues */
-	for (queue = 0; queue < tx_queues_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	if (napi_schedule_prep(napi)) {
+		stmmac_disable_dma_irq(priv, priv->ioaddr, tx_q->queue_index);
+		__napi_schedule(napi);
+	}
+
+	tx_q->tx_timer_active = 0;
 }
 
 /**
@@ -2260,11 +2255,17 @@ static void stmmac_tx_timer(struct timer_list *t)
  */
 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
+	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 chan;
+
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
-	priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-	add_timer(&priv->txtimer);
+
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+		timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
+	}
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2592,6 +2593,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
 static int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 	int ret;
 
 	stmmac_check_ether_addr(priv);
@@ -2688,7 +2690,9 @@ static int stmmac_open(struct net_device *dev)
 	if (dev->phydev)
 		phy_stop(dev->phydev);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
+
 	stmmac_hw_teardown(dev);
 init_error:
 	free_dma_desc_resources(priv);
@@ -2708,6 +2712,7 @@ static int stmmac_open(struct net_device *dev)
 static int stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 
 	if (priv->eee_enabled)
 		del_timer_sync(&priv->eee_ctrl_timer);
@@ -2722,7 +2727,8 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_disable_all_queues(priv);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
 
 	/* Free the IRQ lines */
 	free_irq(dev->irq, dev);
@@ -2828,6 +2834,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	int tmp_pay_len = 0;
 	u32 pay_len, mss;
 	u8 proto_hdr_len;
+	bool tx_ic;
 	int i;
 
 	tx_q = &priv->tx_queue[queue];
@@ -2936,12 +2943,17 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->xstats.tx_tso_nfrags += nfrags;
 
 	/* Manage tx mitigation */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (!priv->tx_coal_frames)
+		tx_ic = false;
+	else if ((nfrags + 1) > priv->tx_coal_frames)
+		tx_ic = true;
+	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < (nfrags + 1))
+		tx_ic = true;
+	else
+		tx_ic = false;
+
+	if (tx_ic) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
 	}
@@ -2994,6 +3006,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+		tx_q->tx_timer_active = 1;
+		mod_timer(&tx_q->txtimer,
+				STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	}
+
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3024,6 +3042,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_tx_queue *tx_q;
 	unsigned int enh_desc;
 	unsigned int des;
+	bool tx_ic;
 
 	tx_q = &priv->tx_queue[queue];
 
@@ -3146,17 +3165,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * This approach takes care about the fragments: desc is the first
 	 * element in case of no SG.
 	 */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames) &&
-	    !priv->tx_timer_armed) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-		priv->tx_timer_armed = true;
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (!priv->tx_coal_frames)
+		tx_ic = false;
+	else if ((nfrags + 1) > priv->tx_coal_frames)
+		tx_ic = true;
+	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < (nfrags + 1))
+		tx_ic = true;
+	else
+		tx_ic = false;
+
+	if (tx_ic) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
-		priv->tx_timer_armed = false;
 	}
 
 	skb_tx_timestamp(skb);
@@ -3204,6 +3225,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+		tx_q->tx_timer_active = 1;
+		mod_timer(&tx_q->txtimer,
+				STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	}
+
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3522,16 +3549,12 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	struct stmmac_rx_queue *rx_q =
 		container_of(napi, struct stmmac_rx_queue, napi);
 	struct stmmac_priv *priv = rx_q->priv_data;
-	u32 tx_count = priv->plat->tx_queues_to_use;
 	u32 chan = rx_q->queue_index;
 	int work_done = 0;
-	u32 queue;
 
 	priv->xstats.napi_poll++;
 
-	/* check all the queues */
-	for (queue = 0; queue < tx_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	stmmac_tx_clean(priv, chan);
 
 	work_done = stmmac_rx(priv, budget, rx_q->queue_index);
 	if (work_done < budget) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-08-30 10:37 [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races Jose Abreu
@ 2018-09-03  8:56 ` Jerome Brunet
  2018-09-03  9:36   ` Jose Abreu
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Brunet @ 2018-09-03  8:56 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On Thu, 2018-08-30 at 11:37 +0100, Jose Abreu wrote:
> [ As for now this is only for testing! ]
> 
> This follows David Miller advice and tries to fix coalesce timer in
> multi-queue scenarios.
> 
> We are now using per-queue coalesce values and per-queue TX timer. This
> assumes that tx_queues == rx_queues, which can not be necessarly true.
> Official patch will need to have this fixed.
> 
> Coalesce timer default values was changed to 1ms and the coalesce frames
> to 25.
> 
> Tested in B2B setup between XGMAC2 and GMAC5.

Tested on Amlogic meson-axg-s400. No regression seen so far.
(arch/arm64/boot/dts/amlogic/meson-axg-s400.dts)

As far as I understand from the device tree parsing, this platform (and all
other amlogic platforms) use single queue.

---

Jose,

On another topic doing iperf3 test on amlogic's devices we seen a strange
behavior.

Doing Tx or Rx test usually works fine (700MBps to 900MBps depending on the
platform). However, when doing both Rx and Tx at the same time, We see the Tx
throughput dropping significantly (~30MBps) and lot of TCP retries.

Would you any idea what might be our problem ? or how to start investigating
this ?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03  8:56 ` Jerome Brunet
@ 2018-09-03  9:36   ` Jose Abreu
  2018-09-03 10:16     ` Jerome Brunet
  0 siblings, 1 reply; 11+ messages in thread
From: Jose Abreu @ 2018-09-03  9:36 UTC (permalink / raw)
  To: Jerome Brunet, Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

Hi Jerome,

On 03-09-2018 09:56, Jerome Brunet wrote:
> On Thu, 2018-08-30 at 11:37 +0100, Jose Abreu wrote:
>> [ As for now this is only for testing! ]
>>
>> This follows David Miller advice and tries to fix coalesce timer in
>> multi-queue scenarios.
>>
>> We are now using per-queue coalesce values and per-queue TX timer. This
>> assumes that tx_queues == rx_queues, which can not be necessarly true.
>> Official patch will need to have this fixed.
>>
>> Coalesce timer default values was changed to 1ms and the coalesce frames
>> to 25.
>>
>> Tested in B2B setup between XGMAC2 and GMAC5.
> Tested on Amlogic meson-axg-s400. No regression seen so far.
> (arch/arm64/boot/dts/amlogic/meson-axg-s400.dts)
>
> As far as I understand from the device tree parsing, this platform (and all
> other amlogic platforms) use single queue.

Thanks for testing! I will send a formal patch once I get around
the problem of rx queues != tx queues.

>
> ---
>
> Jose,
>
> On another topic doing iperf3 test on amlogic's devices we seen a strange
> behavior.
>
> Doing Tx or Rx test usually works fine (700MBps to 900MBps depending on the
> platform). However, when doing both Rx and Tx at the same time, We see the Tx
> throughput dropping significantly (~30MBps) and lot of TCP retries.
>
> Would you any idea what might be our problem ? or how to start investigating
> this ?
>

I'm not able to reproduce this here but I'm using multiple queue.
I will try with single queue. In the meantime please try this
patch (it shall be applied directly on top of this RFT):


--->8
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ae26a6e8608e..1407975320aa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2210,8 +2210,7 @@ static int stmmac_init_dma_engine(struct
stmmac_priv *priv)
                stmmac_init_tx_chan(priv, priv->ioaddr,
priv->plat->dma_cfg,
                                    tx_q->dma_tx_phy, chan);
 
-               tx_q->tx_tail_addr = tx_q->dma_tx_phy +
-                           (DMA_TX_SIZE * sizeof(struct dma_desc));
+               tx_q->tx_tail_addr = tx_q->dma_tx_phy;
                stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
                                       tx_q->tx_tail_addr, chan);
        }
@@ -3004,6 +3003,7 @@ static netdev_tx_t stmmac_tso_xmit(struct
sk_buff *skb, struct net_device *dev)
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue),
skb->len);
 
+       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx *
sizeof(*desc));
        stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
tx_q->tx_tail_addr, queue);
 
        if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
@@ -3223,6 +3223,8 @@ static netdev_tx_t stmmac_xmit(struct
sk_buff *skb, struct net_device *dev)
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue),
skb->len);
 
        stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx *
sizeof(*desc));
        stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
tx_q->tx_tail_addr, queue);
 
        if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
--->8

Thanks and Best Regards,
Jose Miguel Abreu

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03  9:36   ` Jose Abreu
@ 2018-09-03 10:16     ` Jerome Brunet
  2018-09-03 11:47       ` Jose Abreu
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Brunet @ 2018-09-03 10:16 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On Mon, 2018-09-03 at 10:36 +0100, Jose Abreu wrote:
> Hi Jerome,
> 
> On 03-09-2018 09:56, Jerome Brunet wrote:
> > On Thu, 2018-08-30 at 11:37 +0100, Jose Abreu wrote:
> > > [ As for now this is only for testing! ]
> > > 
> > > This follows David Miller advice and tries to fix coalesce timer in
> > > multi-queue scenarios.
> > > 
> > > We are now using per-queue coalesce values and per-queue TX timer. This
> > > assumes that tx_queues == rx_queues, which can not be necessarly true.
> > > Official patch will need to have this fixed.
> > > 
> > > Coalesce timer default values was changed to 1ms and the coalesce frames
> > > to 25.
> > > 
> > > Tested in B2B setup between XGMAC2 and GMAC5.
> > 
> > Tested on Amlogic meson-axg-s400. No regression seen so far.
> > (arch/arm64/boot/dts/amlogic/meson-axg-s400.dts)
> > 
> > As far as I understand from the device tree parsing, this platform (and all
> > other amlogic platforms) use single queue.
> 
> Thanks for testing! I will send a formal patch once I get around
> the problem of rx queues != tx queues.
> 
> > 
> > ---
> > 
> > Jose,
> > 
> > On another topic doing iperf3 test on amlogic's devices we seen a strange
> > behavior.
> > 
> > Doing Tx or Rx test usually works fine (700MBps to 900MBps depending on the
> > platform). However, when doing both Rx and Tx at the same time, We see the Tx
> > throughput dropping significantly (~30MBps) and lot of TCP retries.
> > 
> > Would you any idea what might be our problem ? or how to start investigating
> > this ?
> > 
> 
> I'm not able to reproduce this here but I'm using multiple queue.
> I will try with single queue. In the meantime please try this
> patch (it shall be applied directly on top of this RFT):

No notable change. Rx is fine but Tx:
[  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes

I suppose the problem as something to do with the retries. When doing Tx test
alone, we don't have such a things a throughput where we expect it to be.

By the way, your mailer (and its auto 80 column rule I suppose) made the patch
below a bit harder to apply

> 
> 
> --->8
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index ae26a6e8608e..1407975320aa 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2210,8 +2210,7 @@ static int stmmac_init_dma_engine(struct
> stmmac_priv *priv)
>                 stmmac_init_tx_chan(priv, priv->ioaddr,
> priv->plat->dma_cfg,
>                                     tx_q->dma_tx_phy, chan);
>  
> -               tx_q->tx_tail_addr = tx_q->dma_tx_phy +
> -                           (DMA_TX_SIZE * sizeof(struct dma_desc));
> +               tx_q->tx_tail_addr = tx_q->dma_tx_phy;
>                 stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
>                                        tx_q->tx_tail_addr, chan);
>         }
> @@ -3004,6 +3003,7 @@ static netdev_tx_t stmmac_tso_xmit(struct
> sk_buff *skb, struct net_device *dev)
>  
>         netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue),
> skb->len);
>  
> +       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx *
> sizeof(*desc));
>         stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
> tx_q->tx_tail_addr, queue);
>  
>         if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
> @@ -3223,6 +3223,8 @@ static netdev_tx_t stmmac_xmit(struct
> sk_buff *skb, struct net_device *dev)
>         netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue),
> skb->len);
>  
>         stmmac_enable_dma_transmission(priv, priv->ioaddr);
> +
> +       tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx *
> sizeof(*desc));
>         stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
> tx_q->tx_tail_addr, queue);
>  
>         if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
> --->8
> 
> Thanks and Best Regards,
> Jose Miguel Abreu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 10:16     ` Jerome Brunet
@ 2018-09-03 11:47       ` Jose Abreu
  2018-09-03 14:10         ` Jerome Brunet
  0 siblings, 1 reply; 11+ messages in thread
From: Jose Abreu @ 2018-09-03 11:47 UTC (permalink / raw)
  To: Jerome Brunet, Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On 03-09-2018 11:16, Jerome Brunet wrote:
> No notable change. Rx is fine but Tx:
> [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
>
> I suppose the problem as something to do with the retries. When doing Tx test
> alone, we don't have such a things a throughput where we expect it to be.

Yeah, I just remembered you are not using GMAC4 so it wouldn't
make a difference. Is your version 3.710? If so please try adding
the following compatible to your DT bindings "snps,dwmac-3.710".

>
> By the way, your mailer (and its auto 80 column rule I suppose) made the patch
> below a bit harder to apply

Sorry. Next time I will send as attachment.

Thanks and Best Regards,
Jose Miguel Abreu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 11:47       ` Jose Abreu
@ 2018-09-03 14:10         ` Jerome Brunet
  2018-09-03 15:22           ` Jose Abreu
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Brunet @ 2018-09-03 14:10 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
> On 03-09-2018 11:16, Jerome Brunet wrote:
> > No notable change. Rx is fine but Tx:
> > [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
> > 
> > I suppose the problem as something to do with the retries. When doing Tx test
> > alone, we don't have such a things a throughput where we expect it to be.
> 
> Yeah, I just remembered you are not using GMAC4 so it wouldn't
> make a difference. Is your version 3.710? If so please try adding
> the following compatible to your DT bindings "snps,dwmac-3.710".

According to the documentation, it is a 3.70a but I learn (the hard way) not to
trust the documentation too much. Is there anyway to make sure which version we
have. Like a register to read ?

Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
some reason, the MDIO bus failed to register with this. Since it is not the
documented version, I did not check why.

> 
> > 
> > By the way, your mailer (and its auto 80 column rule I suppose) made the patch
> > below a bit harder to apply
> 
> Sorry. Next time I will send as attachment.

No worries

> 
> Thanks and Best Regards,
> Jose Miguel Abreu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 14:10         ` Jerome Brunet
@ 2018-09-03 15:22           ` Jose Abreu
  2018-09-03 15:38             ` Jerome Brunet
  0 siblings, 1 reply; 11+ messages in thread
From: Jose Abreu @ 2018-09-03 15:22 UTC (permalink / raw)
  To: Jerome Brunet, Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On 03-09-2018 15:10, Jerome Brunet wrote:
> On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
>> On 03-09-2018 11:16, Jerome Brunet wrote:
>>> No notable change. Rx is fine but Tx:
>>> [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
>>>
>>> I suppose the problem as something to do with the retries. When doing Tx test
>>> alone, we don't have such a things a throughput where we expect it to be.
>> Yeah, I just remembered you are not using GMAC4 so it wouldn't
>> make a difference. Is your version 3.710? If so please try adding
>> the following compatible to your DT bindings "snps,dwmac-3.710".
> According to the documentation, it is a 3.70a but I learn (the hard way) not to
> trust the documentation too much. Is there anyway to make sure which version we
> have. Like a register to read ?

It should be dumped at probe by a string like this one:

"User ID: 0xXY, Synopsys ID: 0xXZ"

>
> Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
> some reason, the MDIO bus failed to register with this. Since it is not the
> documented version, I did not check why.

No you can't change. You need to add it. So it should stay like this:

compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac",
"snps,dwmac-3.710";

Thanks and Best Regards,
Jose Miguel Abreu

>
>>> By the way, your mailer (and its auto 80 column rule I suppose) made the patch
>>> below a bit harder to apply
>> Sorry. Next time I will send as attachment.
> No worries
>
>> Thanks and Best Regards,
>> Jose Miguel Abreu
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 15:22           ` Jose Abreu
@ 2018-09-03 15:38             ` Jerome Brunet
  2018-09-03 15:47               ` Jose Abreu
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Brunet @ 2018-09-03 15:38 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On Mon, 2018-09-03 at 16:22 +0100, Jose Abreu wrote:
> On 03-09-2018 15:10, Jerome Brunet wrote:
> > On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
> > > On 03-09-2018 11:16, Jerome Brunet wrote:
> > > > No notable change. Rx is fine but Tx:
> > > > [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
> > > > 
> > > > I suppose the problem as something to do with the retries. When doing Tx test
> > > > alone, we don't have such a things a throughput where we expect it to be.
> > > 
> > > Yeah, I just remembered you are not using GMAC4 so it wouldn't
> > > make a difference. Is your version 3.710? If so please try adding
> > > the following compatible to your DT bindings "snps,dwmac-3.710".
> > 
> > According to the documentation, it is a 3.70a but I learn (the hard way) not to
> > trust the documentation too much. Is there anyway to make sure which version we
> > have. Like a register to read ?
> 
> It should be dumped at probe by a string like this one:
> 
> "User ID: 0xXY, Synopsys ID: 0xXZ"

User ID: 0x11, Synopsys ID: 0x37 ? What to does it map to ?

> 
> > 
> > Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
> > some reason, the MDIO bus failed to register with this. Since it is not the
> > documented version, I did not check why.
> 
> No you can't change. You need to add it. So it should stay like this:
> 
> compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac",
> "snps,dwmac-3.710";

Adding "snps,dwmac-3.710" does not change anything for me.
Having both Tx and Rx at the same time still wreck Tx throughput unfortunately 

> 
> Thanks and Best Regards,
> Jose Miguel Abreu
> 
> > 
> > > > By the way, your mailer (and its auto 80 column rule I suppose) made the patch
> > > > below a bit harder to apply
> > > 
> > > Sorry. Next time I will send as attachment.
> > 
> > No worries
> > 
> > > Thanks and Best Regards,
> > > Jose Miguel Abreu
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 15:38             ` Jerome Brunet
@ 2018-09-03 15:47               ` Jose Abreu
  2018-09-04  9:16                 ` Jerome Brunet
  0 siblings, 1 reply; 11+ messages in thread
From: Jose Abreu @ 2018-09-03 15:47 UTC (permalink / raw)
  To: Jerome Brunet, Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On 03-09-2018 16:38, Jerome Brunet wrote:
> On Mon, 2018-09-03 at 16:22 +0100, Jose Abreu wrote:
>> On 03-09-2018 15:10, Jerome Brunet wrote:
>>> On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
>>>> On 03-09-2018 11:16, Jerome Brunet wrote:
>>>>> No notable change. Rx is fine but Tx:
>>>>> [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
>>>>>
>>>>> I suppose the problem as something to do with the retries. When doing Tx test
>>>>> alone, we don't have such a things a throughput where we expect it to be.
>>>> Yeah, I just remembered you are not using GMAC4 so it wouldn't
>>>> make a difference. Is your version 3.710? If so please try adding
>>>> the following compatible to your DT bindings "snps,dwmac-3.710".
>>> According to the documentation, it is a 3.70a but I learn (the hard way) not to
>>> trust the documentation too much. Is there anyway to make sure which version we
>>> have. Like a register to read ?
>> It should be dumped at probe by a string like this one:
>>
>> "User ID: 0xXY, Synopsys ID: 0xXZ"
> User ID: 0x11, Synopsys ID: 0x37 ? What to does it map to ?

Its 3.7. As for the User ID this can be changed by final HW team
so I can't confirm what it means.

>
>>> Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
>>> some reason, the MDIO bus failed to register with this. Since it is not the
>>> documented version, I did not check why.
>> No you can't change. You need to add it. So it should stay like this:
>>
>> compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac",
>> "snps,dwmac-3.710";
> Adding "snps,dwmac-3.710" does not change anything for me.
> Having both Tx and Rx at the same time still wreck Tx throughput unfortunately 

Okay, so you said that there are lots of retries: can you disable
COE at all ? (it should be something like: ethtool -K eth0 rx off
tx off).

Thanks and Best Regards,
Jose Miguel Abreu

>
>> Thanks and Best Regards,
>> Jose Miguel Abreu
>>
>>>>> By the way, your mailer (and its auto 80 column rule I suppose) made the patch
>>>>> below a bit harder to apply
>>>> Sorry. Next time I will send as attachment.
>>> No worries
>>>
>>>> Thanks and Best Regards,
>>>> Jose Miguel Abreu
>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-03 15:47               ` Jose Abreu
@ 2018-09-04  9:16                 ` Jerome Brunet
  2018-09-04 10:00                   ` Jose Abreu
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Brunet @ 2018-09-04  9:16 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue

On Mon, 2018-09-03 at 16:47 +0100, Jose Abreu wrote:
> On 03-09-2018 16:38, Jerome Brunet wrote:
> > On Mon, 2018-09-03 at 16:22 +0100, Jose Abreu wrote:
> > > On 03-09-2018 15:10, Jerome Brunet wrote:
> > > > On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
> > > > > On 03-09-2018 11:16, Jerome Brunet wrote:
> > > > > > No notable change. Rx is fine but Tx:
> > > > > > [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
> > > > > > 
> > > > > > I suppose the problem as something to do with the retries. When doing Tx test
> > > > > > alone, we don't have such a things a throughput where we expect it to be.
> > > > > 
> > > > > Yeah, I just remembered you are not using GMAC4 so it wouldn't
> > > > > make a difference. Is your version 3.710? If so please try adding
> > > > > the following compatible to your DT bindings "snps,dwmac-3.710".
> > > > 
> > > > According to the documentation, it is a 3.70a but I learn (the hard way) not to
> > > > trust the documentation too much. Is there anyway to make sure which version we
> > > > have. Like a register to read ?
> > > 
> > > It should be dumped at probe by a string like this one:
> > > 
> > > "User ID: 0xXY, Synopsys ID: 0xXZ"
> > 
> > User ID: 0x11, Synopsys ID: 0x37 ? What to does it map to ?
> 
> Its 3.7. As for the User ID this can be changed by final HW team
> so I can't confirm what it means.
> 

Is there anyway to know if it is a 3.70a or 3.71 ?

> > 
> > > > Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
> > > > some reason, the MDIO bus failed to register with this. Since it is not the
> > > > documented version, I did not check why.
> > > 
> > > No you can't change. You need to add it. So it should stay like this:
> > > 
> > > compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac",
> > > "snps,dwmac-3.710";
> > 
> > Adding "snps,dwmac-3.710" does not change anything for me.
> > Having both Tx and Rx at the same time still wreck Tx throughput unfortunately 
> 
> Okay, so you said that there are lots of retries: can you disable
> COE at all ? (it should be something like: ethtool -K eth0 rx off
> tx off).

Done but no change.

> 
> Thanks and Best Regards,
> Jose Miguel Abreu
> 
> > 
> > > Thanks and Best Regards,
> > > Jose Miguel Abreu
> > > 
> > > > > > By the way, your mailer (and its auto 80 column rule I suppose) made the patch
> > > > > > below a bit harder to apply
> > > > > 
> > > > > Sorry. Next time I will send as attachment.
> > > > 
> > > > No worries
> > > > 
> > > > > Thanks and Best Regards,
> > > > > Jose Miguel Abreu
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races
  2018-09-04  9:16                 ` Jerome Brunet
@ 2018-09-04 10:00                   ` Jose Abreu
  0 siblings, 0 replies; 11+ messages in thread
From: Jose Abreu @ 2018-09-04 10:00 UTC (permalink / raw)
  To: Jerome Brunet, Jose Abreu, netdev
  Cc: Martin Blumenstingl, David S. Miller, Joao Pinto,
	Giuseppe Cavallaro, Alexandre Torgue



On 04-09-2018 10:16, Jerome Brunet wrote:
> On Mon, 2018-09-03 at 16:47 +0100, Jose Abreu wrote:
>> On 03-09-2018 16:38, Jerome Brunet wrote:
>>> On Mon, 2018-09-03 at 16:22 +0100, Jose Abreu wrote:
>>>> On 03-09-2018 15:10, Jerome Brunet wrote:
>>>>> On Mon, 2018-09-03 at 12:47 +0100, Jose Abreu wrote:
>>>>>> On 03-09-2018 11:16, Jerome Brunet wrote:
>>>>>>> No notable change. Rx is fine but Tx:
>>>>>>> [  5]   3.00-4.00   sec  3.55 MBytes  29.8 Mbits/sec   51   12.7 KBytes
>>>>>>>
>>>>>>> I suppose the problem as something to do with the retries. When doing Tx test
>>>>>>> alone, we don't have such a things a throughput where we expect it to be.
>>>>>> Yeah, I just remembered you are not using GMAC4 so it wouldn't
>>>>>> make a difference. Is your version 3.710? If so please try adding
>>>>>> the following compatible to your DT bindings "snps,dwmac-3.710".
>>>>> According to the documentation, it is a 3.70a but I learn (the hard way) not to
>>>>> trust the documentation too much. Is there anyway to make sure which version we
>>>>> have. Like a register to read ?
>>>> It should be dumped at probe by a string like this one:
>>>>
>>>> "User ID: 0xXY, Synopsys ID: 0xXZ"
>>> User ID: 0x11, Synopsys ID: 0x37 ? What to does it map to ?
>> Its 3.7. As for the User ID this can be changed by final HW team
>> so I can't confirm what it means.
>>
> Is there anyway to know if it is a 3.70a or 3.71 ?

If the user ID wasn't changed from default then its 3.71.

>
>>>>> Out of curiosity, I changed the compatible to "snps,dwmac-3.710" anyway. For
>>>>> some reason, the MDIO bus failed to register with this. Since it is not the
>>>>> documented version, I did not check why.
>>>> No you can't change. You need to add it. So it should stay like this:
>>>>
>>>> compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac",
>>>> "snps,dwmac-3.710";
>>> Adding "snps,dwmac-3.710" does not change anything for me.
>>> Having both Tx and Rx at the same time still wreck Tx throughput unfortunately 
>> Okay, so you said that there are lots of retries: can you disable
>> COE at all ? (it should be something like: ethtool -K eth0 rx off
>> tx off).
> Done but no change.

Ok. Are you able to analyze the sent / received packets using
pcap so that we can understand why there are lots of retries ?

Thanks and Best Regards,
Jose Miguel Abreu

>
>> Thanks and Best Regards,
>> Jose Miguel Abreu
>>
>>>> Thanks and Best Regards,
>>>> Jose Miguel Abreu
>>>>
>>>>>>> By the way, your mailer (and its auto 80 column rule I suppose) made the patch
>>>>>>> below a bit harder to apply
>>>>>> Sorry. Next time I will send as attachment.
>>>>> No worries
>>>>>
>>>>>> Thanks and Best Regards,
>>>>>> Jose Miguel Abreu
>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-09-04 14:25 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-30 10:37 [RFT net-next] net: stmmac: Rework coalesce timer and fix multi-queue races Jose Abreu
2018-09-03  8:56 ` Jerome Brunet
2018-09-03  9:36   ` Jose Abreu
2018-09-03 10:16     ` Jerome Brunet
2018-09-03 11:47       ` Jose Abreu
2018-09-03 14:10         ` Jerome Brunet
2018-09-03 15:22           ` Jose Abreu
2018-09-03 15:38             ` Jerome Brunet
2018-09-03 15:47               ` Jose Abreu
2018-09-04  9:16                 ` Jerome Brunet
2018-09-04 10:00                   ` Jose Abreu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.