From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756066AbbCPONQ (ORCPT ); Mon, 16 Mar 2015 10:13:16 -0400 Received: from mail.linuxfoundation.org ([140.211.169.12]:54786 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755959AbbCPONF (ORCPT ); Mon, 16 Mar 2015 10:13:05 -0400 From: Greg Kroah-Hartman To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Jaedon Shin , Florian Fainelli , "David S. Miller" Subject: [PATCH 3.19 023/177] net: bcmgenet: fix throughtput regression Date: Mon, 16 Mar 2015 15:07:10 +0100 Message-Id: <20150316140814.151191956@linuxfoundation.org> X-Mailer: git-send-email 2.3.3 In-Reply-To: <20150316140813.085032723@linuxfoundation.org> References: <20150316140813.085032723@linuxfoundation.org> User-Agent: quilt/0.64 MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org 3.19-stable review patch. If anyone has any objections, please let me know. ------------------ From: Jaedon Shin [ Upstream commit 4092e6acf5cb16f56154e2dd22d647023dc3d646 ] This patch adds bcmgenet_tx_poll for the tx_rings. This can reduce the interrupt load and send xmit in network stack on time. This also separated for the completion of tx_ring16 from bcmgenet_poll. The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] operative by an interrupt is to be not more than a certain number TxBDs. It is caused by too slowly reclaiming the transmitted skb. Therefore, performance degradation of xmit after 605ad7f ("tcp: refine TSO autosizing"). Signed-off-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 113 +++++++++++++++++++------ drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 2 files changed, 88 insertions(+), 27 deletions(-) --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -971,13 +971,14 @@ static inline void bcmgenet_tx_ring_int_ } /* Unlocked version of the reclaim routine */ -static void __bcmgenet_tx_reclaim(struct net_device *dev, - struct bcmgenet_tx_ring *ring) +static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, + struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); int last_tx_cn, last_c_index, num_tx_bds; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; + unsigned int pkts_compl = 0; unsigned int bds_compl; unsigned int c_index; @@ -1005,6 +1006,7 @@ static void __bcmgenet_tx_reclaim(struct tx_cb_ptr = ring->cbs + last_c_index; bds_compl = 0; if (tx_cb_ptr->skb) { + pkts_compl++; bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1; dev->stats.tx_bytes += tx_cb_ptr->skb->len; dma_unmap_single(&dev->dev, @@ -1028,23 +1030,45 @@ static void __bcmgenet_tx_reclaim(struct last_c_index &= (num_tx_bds - 1); } - if (ring->free_bds > (MAX_SKB_FRAGS + 1)) - ring->int_disable(priv, ring); - - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); + } ring->c_index = c_index; + + return pkts_compl; } -static void bcmgenet_tx_reclaim(struct net_device *dev, +static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { + unsigned int released; unsigned long flags; spin_lock_irqsave(&ring->lock, flags); - __bcmgenet_tx_reclaim(dev, ring); + released = __bcmgenet_tx_reclaim(dev, ring); spin_unlock_irqrestore(&ring->lock, flags); + + return released; +} + +static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) +{ + struct bcmgenet_tx_ring *ring = + container_of(napi, struct bcmgenet_tx_ring, napi); + unsigned int work_done = 0; + + work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring); + + if (work_done == 0) { + napi_complete(napi); + ring->int_enable(ring->priv, ring); + + return 0; + } + + return budget; } static void bcmgenet_tx_reclaim_all(struct net_device *dev) @@ -1302,10 +1326,8 @@ static netdev_tx_t bcmgenet_xmit(struct bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); - if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) { + if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) netif_tx_stop_queue(txq); - ring->int_enable(priv, ring); - } out: spin_unlock_irqrestore(&ring->lock, flags); @@ -1621,6 +1643,7 @@ static int init_umac(struct bcmgenet_pri struct device *kdev = &priv->pdev->dev; int ret; u32 reg, cpu_mask_clear; + int index; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); @@ -1647,7 +1670,7 @@ static int init_umac(struct bcmgenet_pri bcmgenet_intr_disable(priv); - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE; + cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE; dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); @@ -1674,6 +1697,10 @@ static int init_umac(struct bcmgenet_pri bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); + for (index = 0; index < priv->hw_params->tx_queues; index++) + bcmgenet_intrl2_1_writel(priv, (1 << index), + INTRL2_CPU_MASK_CLEAR); + /* Enable rx/tx engine.*/ dev_dbg(kdev, "done init umac\n"); @@ -1693,6 +1720,8 @@ static void bcmgenet_init_tx_ring(struct unsigned int first_bd; spin_lock_init(&ring->lock); + ring->priv = priv; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); ring->index = index; if (index == DESC_INDEX) { ring->queue = 0; @@ -1738,6 +1767,17 @@ static void bcmgenet_init_tx_ring(struct TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); + + napi_enable(&ring->napi); +} + +static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv, + unsigned int index) +{ + struct bcmgenet_tx_ring *ring = &priv->tx_rings[index]; + + napi_disable(&ring->napi); + netif_napi_del(&ring->napi); } /* Initialize a RDMA ring */ @@ -1907,7 +1947,7 @@ static int bcmgenet_dma_teardown(struct return ret; } -static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv) { int i; @@ -1926,6 +1966,18 @@ static void bcmgenet_fini_dma(struct bcm kfree(priv->tx_cbs); } +static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +{ + int i; + + bcmgenet_fini_tx_ring(priv, DESC_INDEX); + + for (i = 0; i < priv->hw_params->tx_queues; i++) + bcmgenet_fini_tx_ring(priv, i); + + __bcmgenet_fini_dma(priv); +} + /* init_edma: Initialize DMA control register */ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) { @@ -1952,7 +2004,7 @@ static int bcmgenet_init_dma(struct bcmg priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), GFP_KERNEL); if (!priv->tx_cbs) { - bcmgenet_fini_dma(priv); + __bcmgenet_fini_dma(priv); return -ENOMEM; } @@ -1975,9 +2027,6 @@ static int bcmgenet_poll(struct napi_str struct bcmgenet_priv, napi); unsigned int work_done; - /* tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); - work_done = bcmgenet_desc_rx(priv, budget); /* Advancing our consumer index*/ @@ -2022,28 +2071,34 @@ static void bcmgenet_irq_task(struct wor static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; + struct bcmgenet_tx_ring *ring; unsigned int index; /* Save irq status for bottom-half processing. */ priv->irq1_stat = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & - ~priv->int1_mask; + ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + /* Check the MBDONE interrupts. * packet is done, reclaim descriptors */ - if (priv->irq1_stat & 0x0000ffff) { - index = 0; - for (index = 0; index < 16; index++) { - if (priv->irq1_stat & (1 << index)) - bcmgenet_tx_reclaim(priv->dev, - &priv->tx_rings[index]); + for (index = 0; index < priv->hw_params->tx_queues; index++) { + if (!(priv->irq1_stat & BIT(index))) + continue; + + ring = &priv->tx_rings[index]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); } } + return IRQ_HANDLED; } @@ -2075,8 +2130,12 @@ static irqreturn_t bcmgenet_isr0(int irq } if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { - /* Tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); + struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); + } } if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | UMAC_IRQ_PHY_DET_F | --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -520,6 +520,7 @@ struct bcmgenet_hw_params { struct bcmgenet_tx_ring { spinlock_t lock; /* ring lock */ + struct napi_struct napi; /* NAPI per tx queue */ unsigned int index; /* ring index */ unsigned int queue; /* queue index */ struct enet_cb *cbs; /* tx ring buffer control block*/ @@ -534,6 +535,7 @@ struct bcmgenet_tx_ring { struct bcmgenet_tx_ring *); void (*int_disable)(struct bcmgenet_priv *priv, struct bcmgenet_tx_ring *); + struct bcmgenet_priv *priv; }; /* device context */