From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jose Abreu Subject: Re: [net-next, v2, 1/2] net: stmmac: Rework coalesce timer and fix multi-queue races Date: Mon, 10 Sep 2018 13:52:55 +0100 Message-ID: References: Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------AF16C4D943A9C74E62DDC80C" Cc: Jerome Brunet , Martin Blumenstingl , "David S. Miller" , Joao Pinto , "Giuseppe Cavallaro" , Alexandre Torgue To: Neil Armstrong , Jose Abreu , Return-path: Received: from smtprelay4.synopsys.com ([198.182.47.9]:43842 "EHLO smtprelay.synopsys.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727799AbeIJRrB (ORCPT ); Mon, 10 Sep 2018 13:47:01 -0400 In-Reply-To: Sender: netdev-owner@vger.kernel.org List-ID: --------------AF16C4D943A9C74E62DDC80C Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Hi Neil, On 10-09-2018 12:43, Neil Armstrong wrote: > Hi Jose, > > On 10/09/2018 11:14, Jose Abreu wrote: >> This follows David Miller advice and tries to fix coalesce timer in >> multi-queue scenarios. >> >> We are now using per-queue coalesce values and per-queue TX timer. >> >> Coalesce timer default values was changed to 1ms and the coalesce frames >> to 25. >> >> Tested in B2B setup between XGMAC2 and GMAC5. >> >> Signed-off-by: Jose Abreu >> Cc: Jerome Brunet >> Cc: Martin Blumenstingl >> Cc: David S. Miller >> Cc: Joao Pinto >> Cc: Giuseppe Cavallaro >> Cc: Alexandre Torgue >> --- >> Jerome, > Jerome is in holidays... > >> Can you please test if this one is okay ? > > I tested this patch on top of 4.18.7 with the previous patch (4ae0169fd1b3) reverted. > > The TX or RX stopped a few seconds after iperf3 started : > (iperf3 is running in server mode on the Amlogic A113D board) > > $ iperf3 -c 10.1.4.95 > Connecting to host 10.1.4.95, port 5201 > [ 4] local 10.1.2.12 port 39906 connected to 10.1.4.95 port 5201 > [ ID] Interval Transfer Bandwidth Retr Cwnd > [ 4] 0.00-1.00 sec 56.2 MBytes 472 Mbits/sec 0 660 KBytes > [ 4] 1.00-2.00 sec 56.2 MBytes 472 Mbits/sec 0 660 KBytes > [ 4] 2.00-3.00 sec 38.8 MBytes 325 Mbits/sec 1 1.41 KBytes > [ 4] 3.00-4.00 sec 0.00 Bytes 0.00 bits/sec 1 1.41 KBytes > [ 4] 4.00-5.00 sec 0.00 Bytes 0.00 bits/sec 1 1.41 KBytes > ^C[ 4] 5.00-5.61 sec 0.00 Bytes 0.00 bits/sec 0 1.41 KBytes > - - - - - - - - - - - - - - - - - - - - - - - - - > [ ID] Interval Transfer Bandwidth Retr > [ 4] 0.00-5.61 sec 151 MBytes 226 Mbits/sec 3 sender > [ 4] 0.00-5.61 sec 0.00 Bytes 0.00 bits/sec receiver > iperf3: interrupt - the client has terminated > > $ iperf3 -c 10.1.4.95 -R > Connecting to host 10.1.4.95, port 5201 > Reverse mode, remote host 10.1.4.95 is sending > [ 4] local 10.1.2.12 port 39894 connected to 10.1.4.95 port 5201 > [ ID] Interval Transfer Bandwidth > [ 4] 0.00-1.00 sec 82.2 MBytes 690 Mbits/sec > [ 4] 1.00-2.00 sec 82.6 MBytes 693 Mbits/sec > [ 4] 2.00-3.00 sec 24.2 MBytes 203 Mbits/sec > [ 4] 3.00-4.00 sec 0.00 Bytes 0.00 bits/sec > [ 4] 4.00-5.00 sec 0.00 Bytes 0.00 bits/sec > ^C[ 4] 5.00-5.53 sec 0.00 Bytes 0.00 bits/sec > - - - - - - - - - - - - - - - - - - - - - - - - - > [ ID] Interval Transfer Bandwidth > [ 4] 0.00-5.53 sec 0.00 Bytes 0.00 bits/sec sender > [ 4] 0.00-5.53 sec 189 MBytes 287 Mbits/sec receiver > iperf3: interrupt - the client has terminated > > These works for hours without this patch applied. Damn... I'm able to replicate the issue if I turn SMP off but it's not consistent ... Can you please try attached follow-up patch ? It's been running for 10min now and I'm getting ~700Mbps on the same GMAC as you have (3.71). Thanks and Best Regards, Jose Miguel Abreu > > Neil > >> Thanks and Best Regards, >> Jose Miguel Abreu >> --- >> drivers/net/ethernet/stmicro/stmmac/common.h | 4 +- >> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +- >> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 207 ++++++++++++++-------- >> 3 files changed, 135 insertions(+), 82 deletions(-) >> >> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h >> index 1854f270ad66..b1b305f8f414 100644 >> --- a/drivers/net/ethernet/stmicro/stmmac/common.h >> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h >> @@ -258,10 +258,10 @@ struct stmmac_safety_stats { >> #define MAX_DMA_RIWT 0xff >> #define MIN_DMA_RIWT 0x20 >> /* Tx coalesce parameters */ >> -#define STMMAC_COAL_TX_TIMER 40000 >> +#define STMMAC_COAL_TX_TIMER 1000 >> #define STMMAC_MAX_COAL_TX_TICK 100000 >> #define STMMAC_TX_MAX_FRAMES 256 >> -#define STMMAC_TX_FRAMES 64 >> +#define STMMAC_TX_FRAMES 25 >> >> /* Packets types */ >> enum packets_types { >> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> index c0a855b7ab3b..957030cfb833 100644 >> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> @@ -48,6 +48,9 @@ struct stmmac_tx_info { >> >> /* Frequently used values are kept adjacent for cache effect */ >> struct stmmac_tx_queue { >> + u32 tx_count_frames; >> + int tx_timer_active; >> + struct timer_list txtimer; >> u32 queue_index; >> struct stmmac_priv *priv_data; >> struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; >> @@ -59,6 +62,7 @@ struct stmmac_tx_queue { >> dma_addr_t dma_tx_phy; >> u32 tx_tail_addr; >> u32 mss; >> + struct napi_struct napi ____cacheline_aligned_in_smp; >> }; >> >> struct stmmac_rx_queue { >> @@ -109,14 +113,12 @@ struct stmmac_pps_cfg { >> >> struct stmmac_priv { >> /* Frequently used values are kept adjacent for cache effect */ >> - u32 tx_count_frames; >> u32 tx_coal_frames; >> u32 tx_coal_timer; >> >> int tx_coalesce; >> int hwts_tx_en; >> bool tx_path_in_lpi_mode; >> - struct timer_list txtimer; >> bool tso; >> >> unsigned int dma_buf_sz; >> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> index 9f458bb16f2a..9809c2b319fe 100644 >> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> @@ -148,6 +148,7 @@ static void stmmac_verify_args(void) >> static void stmmac_disable_all_queues(struct stmmac_priv *priv) >> { >> u32 rx_queues_cnt = priv->plat->rx_queues_to_use; >> + u32 tx_queues_cnt = priv->plat->tx_queues_to_use; >> u32 queue; >> >> for (queue = 0; queue < rx_queues_cnt; queue++) { >> @@ -155,6 +156,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) >> >> napi_disable(&rx_q->napi); >> } >> + >> + for (queue = 0; queue < tx_queues_cnt; queue++) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; >> + >> + napi_disable(&tx_q->napi); >> + } >> } >> >> /** >> @@ -164,6 +171,7 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) >> static void stmmac_enable_all_queues(struct stmmac_priv *priv) >> { >> u32 rx_queues_cnt = priv->plat->rx_queues_to_use; >> + u32 tx_queues_cnt = priv->plat->tx_queues_to_use; >> u32 queue; >> >> for (queue = 0; queue < rx_queues_cnt; queue++) { >> @@ -171,6 +179,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv) >> >> napi_enable(&rx_q->napi); >> } >> + >> + for (queue = 0; queue < tx_queues_cnt; queue++) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; >> + >> + napi_enable(&tx_q->napi); >> + } >> } >> >> /** >> @@ -1843,7 +1857,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) >> * @queue: TX queue index >> * Description: it reclaims the transmit resources after transmission completes. >> */ >> -static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) >> +static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue, >> + bool *more) >> { >> struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; >> unsigned int bytes_compl = 0, pkts_compl = 0; >> @@ -1851,10 +1866,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) >> >> netif_tx_lock(priv->dev); >> >> + if (more) >> + *more = false; >> + >> priv->xstats.tx_clean++; >> >> entry = tx_q->dirty_tx; >> - while (entry != tx_q->cur_tx) { >> + while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) { >> struct sk_buff *skb = tx_q->tx_skbuff[entry]; >> struct dma_desc *p; >> int status; >> @@ -1937,7 +1955,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) >> stmmac_enable_eee_mode(priv); >> mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); >> } >> + >> + if (more && (tx_q->dirty_tx != tx_q->cur_tx)) >> + *more = true; >> + >> netif_tx_unlock(priv->dev); >> + >> + return pkts_compl; >> } >> >> /** >> @@ -2020,6 +2044,34 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv) >> return false; >> } >> >> +static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) >> +{ >> + int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, >> + &priv->xstats, chan); >> + >> + if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { >> + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; >> + >> + if (likely(napi_schedule_prep(&rx_q->napi))) { >> + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); >> + __napi_schedule(&rx_q->napi); >> + } >> + } else { >> + status &= ~handle_rx; >> + } >> + >> + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; >> + >> + if (likely(napi_schedule_prep(&tx_q->napi))) >> + __napi_schedule(&tx_q->napi); >> + } else { >> + status &= ~handle_tx; >> + } >> + >> + return status; >> +} >> + >> /** >> * stmmac_dma_interrupt - DMA ISR >> * @priv: driver private structure >> @@ -2034,57 +2086,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) >> u32 channels_to_check = tx_channel_count > rx_channel_count ? >> tx_channel_count : rx_channel_count; >> u32 chan; >> - bool poll_scheduled = false; >> int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; >> >> /* Make sure we never check beyond our status buffer. */ >> if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) >> channels_to_check = ARRAY_SIZE(status); >> >> - /* Each DMA channel can be used for rx and tx simultaneously, yet >> - * napi_struct is embedded in struct stmmac_rx_queue rather than in a >> - * stmmac_channel struct. >> - * Because of this, stmmac_poll currently checks (and possibly wakes) >> - * all tx queues rather than just a single tx queue. >> - */ >> for (chan = 0; chan < channels_to_check; chan++) >> - status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr, >> - &priv->xstats, chan); >> - >> - for (chan = 0; chan < rx_channel_count; chan++) { >> - if (likely(status[chan] & handle_rx)) { >> - struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; >> - >> - if (likely(napi_schedule_prep(&rx_q->napi))) { >> - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); >> - __napi_schedule(&rx_q->napi); >> - poll_scheduled = true; >> - } >> - } >> - } >> - >> - /* If we scheduled poll, we already know that tx queues will be checked. >> - * If we didn't schedule poll, see if any DMA channel (used by tx) has a >> - * completed transmission, if so, call stmmac_poll (once). >> - */ >> - if (!poll_scheduled) { >> - for (chan = 0; chan < tx_channel_count; chan++) { >> - if (status[chan] & handle_tx) { >> - /* It doesn't matter what rx queue we choose >> - * here. We use 0 since it always exists. >> - */ >> - struct stmmac_rx_queue *rx_q = >> - &priv->rx_queue[0]; >> - >> - if (likely(napi_schedule_prep(&rx_q->napi))) { >> - stmmac_disable_dma_irq(priv, >> - priv->ioaddr, chan); >> - __napi_schedule(&rx_q->napi); >> - } >> - break; >> - } >> - } >> - } >> + status[chan] = stmmac_napi_check(priv, chan); >> >> for (chan = 0; chan < tx_channel_count; chan++) { >> if (unlikely(status[chan] & tx_hard_error_bump_tc)) { >> @@ -2241,13 +2250,11 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) >> */ >> static void stmmac_tx_timer(struct timer_list *t) >> { >> - struct stmmac_priv *priv = from_timer(priv, t, txtimer); >> - u32 tx_queues_count = priv->plat->tx_queues_to_use; >> - u32 queue; >> + struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer); >> >> - /* let's scan all the tx queues */ >> - for (queue = 0; queue < tx_queues_count; queue++) >> - stmmac_tx_clean(priv, queue); >> + if (likely(napi_schedule_prep(&tx_q->napi))) >> + __napi_schedule(&tx_q->napi); >> + tx_q->tx_timer_active = 0; >> } >> >> /** >> @@ -2260,11 +2267,17 @@ static void stmmac_tx_timer(struct timer_list *t) >> */ >> static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) >> { >> + u32 tx_channel_count = priv->plat->tx_queues_to_use; >> + u32 chan; >> + >> priv->tx_coal_frames = STMMAC_TX_FRAMES; >> priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; >> - timer_setup(&priv->txtimer, stmmac_tx_timer, 0); >> - priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); >> - add_timer(&priv->txtimer); >> + >> + for (chan = 0; chan < tx_channel_count; chan++) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; >> + >> + timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0); >> + } >> } >> >> static void stmmac_set_rings_length(struct stmmac_priv *priv) >> @@ -2592,6 +2605,7 @@ static void stmmac_hw_teardown(struct net_device *dev) >> static int stmmac_open(struct net_device *dev) >> { >> struct stmmac_priv *priv = netdev_priv(dev); >> + u32 chan; >> int ret; >> >> stmmac_check_ether_addr(priv); >> @@ -2688,7 +2702,9 @@ static int stmmac_open(struct net_device *dev) >> if (dev->phydev) >> phy_stop(dev->phydev); >> >> - del_timer_sync(&priv->txtimer); >> + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) >> + del_timer_sync(&priv->tx_queue[chan].txtimer); >> + >> stmmac_hw_teardown(dev); >> init_error: >> free_dma_desc_resources(priv); >> @@ -2708,6 +2724,7 @@ static int stmmac_open(struct net_device *dev) >> static int stmmac_release(struct net_device *dev) >> { >> struct stmmac_priv *priv = netdev_priv(dev); >> + u32 chan; >> >> if (priv->eee_enabled) >> del_timer_sync(&priv->eee_ctrl_timer); >> @@ -2722,7 +2739,8 @@ static int stmmac_release(struct net_device *dev) >> >> stmmac_disable_all_queues(priv); >> >> - del_timer_sync(&priv->txtimer); >> + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) >> + del_timer_sync(&priv->tx_queue[chan].txtimer); >> >> /* Free the IRQ lines */ >> free_irq(dev->irq, dev); >> @@ -2936,14 +2954,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) >> priv->xstats.tx_tso_nfrags += nfrags; >> >> /* Manage tx mitigation */ >> - priv->tx_count_frames += nfrags + 1; >> - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { >> - mod_timer(&priv->txtimer, >> - STMMAC_COAL_TIMER(priv->tx_coal_timer)); >> - } else { >> - priv->tx_count_frames = 0; >> + tx_q->tx_count_frames += nfrags + 1; >> + if (priv->tx_coal_frames <= tx_q->tx_count_frames) { >> stmmac_set_tx_ic(priv, desc); >> priv->xstats.tx_set_ic_bit++; >> + tx_q->tx_count_frames = 0; >> } >> >> skb_tx_timestamp(skb); >> @@ -2994,6 +3009,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) >> >> stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); >> >> + if (priv->tx_coal_timer && !tx_q->tx_timer_active) { >> + tx_q->tx_timer_active = 1; >> + mod_timer(&tx_q->txtimer, >> + STMMAC_COAL_TIMER(priv->tx_coal_timer)); >> + } >> + >> return NETDEV_TX_OK; >> >> dma_map_err: >> @@ -3146,14 +3167,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) >> * This approach takes care about the fragments: desc is the first >> * element in case of no SG. >> */ >> - priv->tx_count_frames += nfrags + 1; >> - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { >> - mod_timer(&priv->txtimer, >> - STMMAC_COAL_TIMER(priv->tx_coal_timer)); >> - } else { >> - priv->tx_count_frames = 0; >> + tx_q->tx_count_frames += nfrags + 1; >> + if (priv->tx_coal_frames <= tx_q->tx_count_frames) { >> stmmac_set_tx_ic(priv, desc); >> priv->xstats.tx_set_ic_bit++; >> + tx_q->tx_count_frames = 0; >> } >> >> skb_tx_timestamp(skb); >> @@ -3199,8 +3217,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) >> netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); >> >> stmmac_enable_dma_transmission(priv, priv->ioaddr); >> + >> stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); >> >> + if (priv->tx_coal_timer && !tx_q->tx_timer_active) { >> + tx_q->tx_timer_active = 1; >> + mod_timer(&tx_q->txtimer, >> + STMMAC_COAL_TIMER(priv->tx_coal_timer)); >> + } >> + >> return NETDEV_TX_OK; >> >> dma_map_err: >> @@ -3514,27 +3539,41 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) >> * Description : >> * To look at the incoming frames and clear the tx resources. >> */ >> -static int stmmac_poll(struct napi_struct *napi, int budget) >> +static int stmmac_rx_poll(struct napi_struct *napi, int budget) >> { >> struct stmmac_rx_queue *rx_q = >> container_of(napi, struct stmmac_rx_queue, napi); >> struct stmmac_priv *priv = rx_q->priv_data; >> - u32 tx_count = priv->plat->tx_queues_to_use; >> u32 chan = rx_q->queue_index; >> int work_done = 0; >> - u32 queue; >> >> priv->xstats.napi_poll++; >> >> - /* check all the queues */ >> - for (queue = 0; queue < tx_count; queue++) >> - stmmac_tx_clean(priv, queue); >> - >> work_done = stmmac_rx(priv, budget, rx_q->queue_index); >> + if (work_done < budget && napi_complete_done(napi, work_done)) >> + stmmac_enable_dma_irq(priv, priv->ioaddr, chan); >> + >> + return work_done; >> +} >> + >> +static int stmmac_tx_poll(struct napi_struct *napi, int budget) >> +{ >> + struct stmmac_tx_queue *tx_q = >> + container_of(napi, struct stmmac_tx_queue, napi); >> + struct stmmac_priv *priv = tx_q->priv_data; >> + u32 chan = tx_q->queue_index; >> + int work_done = 0; >> + bool more; >> + >> + priv->xstats.napi_poll++; >> + >> + work_done = stmmac_tx_clean(priv, budget, chan, &more); >> if (work_done < budget) { >> napi_complete_done(napi, work_done); >> - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); >> + if (more) >> + napi_reschedule(napi); >> } >> + >> return work_done; >> } >> >> @@ -4325,10 +4364,17 @@ int stmmac_dvr_probe(struct device *device, >> for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) { >> struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; >> >> - netif_napi_add(ndev, &rx_q->napi, stmmac_poll, >> + netif_napi_add(ndev, &rx_q->napi, stmmac_rx_poll, >> (8 * priv->plat->rx_queues_to_use)); >> } >> >> + for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; >> + >> + netif_napi_add(ndev, &tx_q->napi, stmmac_tx_poll, >> + (8 * priv->plat->tx_queues_to_use)); >> + } >> + >> mutex_init(&priv->lock); >> >> /* If a specific clk_csr value is passed from the platform >> @@ -4377,6 +4423,11 @@ int stmmac_dvr_probe(struct device *device, >> >> netif_napi_del(&rx_q->napi); >> } >> + for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) { >> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; >> + >> + netif_napi_del(&tx_q->napi); >> + } >> error_hw_init: >> destroy_workqueue(priv->wq); >> error_wq: >> --------------AF16C4D943A9C74E62DDC80C Content-Type: text/x-patch; name="0001-fixup_coalesce.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="0001-fixup_coalesce.patch" >>From 35a5e33f4edcc663511d615e61a1ea119dfc77ee Mon Sep 17 00:00:00 2001 Message-Id: <35a5e33f4edcc663511d615e61a1ea119dfc77ee.1536583587.git.joabreu@synopsys.com> From: Jose Abreu Date: Mon, 10 Sep 2018 14:46:09 +0200 Subject: [PATCH] fixup_coalesce Signed-off-by: Jose Abreu --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 39 +++++++++++++---------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 97268769186e..7875e81966fb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1872,7 +1872,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue, priv->xstats.tx_clean++; entry = tx_q->dirty_tx; - while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) { + while (entry != tx_q->cur_tx) { struct sk_buff *skb = tx_q->tx_skbuff[entry]; struct dma_desc *p; int status; @@ -2241,6 +2241,17 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) return ret; } +static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + + if (tx_q->tx_timer_active) + return; + + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); + tx_q->tx_timer_active = true; +} + /** * stmmac_tx_timer - mitigation sw timer for tx. * @data: data pointer @@ -2250,10 +2261,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) static void stmmac_tx_timer(struct timer_list *t) { struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer); + struct stmmac_priv *priv = tx_q->priv_data; - if (likely(napi_schedule_prep(&tx_q->napi))) - __napi_schedule(&tx_q->napi); - tx_q->tx_timer_active = 0; + stmmac_tx_clean(priv, ~0, tx_q->queue_index, NULL); + tx_q->tx_timer_active = false; } /** @@ -2852,6 +2863,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Compute header lengths */ proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + /* Start coalesce timer earlier in case TX Queue is stopped */ + stmmac_tx_timer_arm(priv, queue); + /* Desc availability based on threshold should be enough safe */ if (unlikely(stmmac_tx_avail(priv, queue) < (((skb->len - proto_hdr_len) / TSO_MAX_BUFF_SIZE + 1)))) { @@ -3009,12 +3023,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); - if (priv->tx_coal_timer && !tx_q->tx_timer_active) { - tx_q->tx_timer_active = 1; - mod_timer(&tx_q->txtimer, - STMMAC_COAL_TIMER(priv->tx_coal_timer)); - } - return NETDEV_TX_OK; dma_map_err: @@ -3054,6 +3062,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return stmmac_tso_xmit(skb, dev); } + /* Start coalesce timer earlier in case TX Queue is stopped */ + stmmac_tx_timer_arm(priv, queue); + if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) { netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, @@ -3221,12 +3232,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); - if (priv->tx_coal_timer && !tx_q->tx_timer_active) { - tx_q->tx_timer_active = 1; - mod_timer(&tx_q->txtimer, - STMMAC_COAL_TIMER(priv->tx_coal_timer)); - } - return NETDEV_TX_OK; dma_map_err: @@ -3575,7 +3580,7 @@ static int stmmac_tx_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); } - return work_done; + return min(work_done, budget); } /** -- 2.7.4 --------------AF16C4D943A9C74E62DDC80C--