linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] net: mv643xx_eth: improve performance
@ 2016-05-13 11:59 Jisheng Zhang
  2016-05-13 11:59 ` [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel Jisheng Zhang
  2016-05-13 11:59 ` [PATCH 2/2] net: mv643xx_eth: use dma_wmb/rmb where appropriate Jisheng Zhang
  0 siblings, 2 replies; 6+ messages in thread
From: Jisheng Zhang @ 2016-05-13 11:59 UTC (permalink / raw)
  To: sebastian.hesselbarth, davem
  Cc: netdev, linux-kernel, linux-arm-kernel, Jisheng Zhang

This series is to improve the mv643xx_eth driver performance by using
{readl|writel}_relaxed or appropriate memory barriers.

Since I have no mv643xx_eth platforms, tests are appreciated!

Jisheng Zhang (2):
  net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel
  net: mv643xx_eth: use dma_wmb/rmb where appropriate

 drivers/net/ethernet/marvell/mv643xx_eth.c | 43 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 21 deletions(-)

-- 
2.8.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel
  2016-05-13 11:59 [PATCH 0/2] net: mv643xx_eth: improve performance Jisheng Zhang
@ 2016-05-13 11:59 ` Jisheng Zhang
  2016-05-13 12:09   ` Arnd Bergmann
  2016-05-13 11:59 ` [PATCH 2/2] net: mv643xx_eth: use dma_wmb/rmb where appropriate Jisheng Zhang
  1 sibling, 1 reply; 6+ messages in thread
From: Jisheng Zhang @ 2016-05-13 11:59 UTC (permalink / raw)
  To: sebastian.hesselbarth, davem
  Cc: netdev, linux-kernel, linux-arm-kernel, Jisheng Zhang

Since appropriate memory barriers are already there, use the relaxed
version to improve performance a bit.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 33 +++++++++++++++---------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 5583118..c6d8124 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -433,22 +433,22 @@ struct mv643xx_eth_private {
 /* port register accessors **************************************************/
 static inline u32 rdl(struct mv643xx_eth_private *mp, int offset)
 {
-	return readl(mp->shared->base + offset);
+	return readl_relaxed(mp->shared->base + offset);
 }
 
 static inline u32 rdlp(struct mv643xx_eth_private *mp, int offset)
 {
-	return readl(mp->base + offset);
+	return readl_relaxed(mp->base + offset);
 }
 
 static inline void wrl(struct mv643xx_eth_private *mp, int offset, u32 data)
 {
-	writel(data, mp->shared->base + offset);
+	writel_relaxed(data, mp->shared->base + offset);
 }
 
 static inline void wrlp(struct mv643xx_eth_private *mp, int offset, u32 data)
 {
-	writel(data, mp->base + offset);
+	writel_relaxed(data, mp->base + offset);
 }
 
 
@@ -2642,10 +2642,10 @@ mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
 	int i;
 
 	for (i = 0; i < 6; i++) {
-		writel(0, base + WINDOW_BASE(i));
-		writel(0, base + WINDOW_SIZE(i));
+		writel_relaxed(0, base + WINDOW_BASE(i));
+		writel_relaxed(0, base + WINDOW_SIZE(i));
 		if (i < 4)
-			writel(0, base + WINDOW_REMAP_HIGH(i));
+			writel_relaxed(0, base + WINDOW_REMAP_HIGH(i));
 	}
 
 	win_enable = 0x3f;
@@ -2654,16 +2654,17 @@ mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
 	for (i = 0; i < dram->num_cs; i++) {
 		const struct mbus_dram_window *cs = dram->cs + i;
 
-		writel((cs->base & 0xffff0000) |
+		writel_relaxed((cs->base & 0xffff0000) |
 			(cs->mbus_attr << 8) |
 			dram->mbus_dram_target_id, base + WINDOW_BASE(i));
-		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+		writel_relaxed((cs->size - 1) & 0xffff0000,
+			base + WINDOW_SIZE(i));
 
 		win_enable &= ~(1 << i);
 		win_protect |= 3 << (2 * i);
 	}
 
-	writel(win_enable, base + WINDOW_BAR_ENABLE);
+	writel_relaxed(win_enable, base + WINDOW_BAR_ENABLE);
 	msp->win_protect = win_protect;
 }
 
@@ -2674,8 +2675,8 @@ static void infer_hw_params(struct mv643xx_eth_shared_private *msp)
 	 * [21:8], or a 16-bit coal limit in bits [25,21:7] of the
 	 * SDMA config register.
 	 */
-	writel(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
-	if (readl(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
+	writel_relaxed(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
+	if (readl_relaxed(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
 		msp->extended_rx_coal_limit = 1;
 	else
 		msp->extended_rx_coal_limit = 0;
@@ -2685,12 +2686,12 @@ static void infer_hw_params(struct mv643xx_eth_shared_private *msp)
 	 * yes, whether its associated registers are in the old or
 	 * the new place.
 	 */
-	writel(1, msp->base + 0x0400 + TX_BW_MTU_MOVED);
-	if (readl(msp->base + 0x0400 + TX_BW_MTU_MOVED) & 1) {
+	writel_relaxed(1, msp->base + 0x0400 + TX_BW_MTU_MOVED);
+	if (readl_relaxed(msp->base + 0x0400 + TX_BW_MTU_MOVED) & 1) {
 		msp->tx_bw_control = TX_BW_CONTROL_NEW_LAYOUT;
 	} else {
-		writel(7, msp->base + 0x0400 + TX_BW_RATE);
-		if (readl(msp->base + 0x0400 + TX_BW_RATE) & 7)
+		writel_relaxed(7, msp->base + 0x0400 + TX_BW_RATE);
+		if (readl_relaxed(msp->base + 0x0400 + TX_BW_RATE) & 7)
 			msp->tx_bw_control = TX_BW_CONTROL_OLD_LAYOUT;
 		else
 			msp->tx_bw_control = TX_BW_CONTROL_ABSENT;
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] net: mv643xx_eth: use dma_wmb/rmb where appropriate
  2016-05-13 11:59 [PATCH 0/2] net: mv643xx_eth: improve performance Jisheng Zhang
  2016-05-13 11:59 ` [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel Jisheng Zhang
@ 2016-05-13 11:59 ` Jisheng Zhang
  1 sibling, 0 replies; 6+ messages in thread
From: Jisheng Zhang @ 2016-05-13 11:59 UTC (permalink / raw)
  To: sebastian.hesselbarth, davem
  Cc: netdev, linux-kernel, linux-arm-kernel, Jisheng Zhang

Update the mv643xx_eth driver to use the dma_rmb/wmb calls instead of
the full barriers in order to improve performance.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index c6d8124..13b71e3 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -536,7 +536,7 @@ static int rxq_process(struct rx_queue *rxq, int budget)
 		cmd_sts = rx_desc->cmd_sts;
 		if (cmd_sts & BUFFER_OWNED_BY_DMA)
 			break;
-		rmb();
+		dma_rmb();
 
 		skb = rxq->rx_skb[rxq->rx_curr_desc];
 		rxq->rx_skb[rxq->rx_curr_desc] = NULL;
@@ -647,9 +647,9 @@ static int rxq_refill(struct rx_queue *rxq, int budget)
 						  DMA_FROM_DEVICE);
 		rx_desc->buf_size = size;
 		rxq->rx_skb[rx] = skb;
-		wmb();
+		dma_wmb();
 		rx_desc->cmd_sts = BUFFER_OWNED_BY_DMA | RX_ENABLE_INTERRUPT;
-		wmb();
+		dma_wmb();
 
 		/*
 		 * The hardware automatically prepends 2 bytes of
@@ -889,7 +889,7 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 	skb_tx_timestamp(skb);
 
 	/* ensure all other descriptors are written before first cmd_sts */
-	wmb();
+	dma_wmb();
 	first_tx_desc->cmd_sts = first_cmd_sts;
 
 	/* clear TX_END status */
@@ -994,7 +994,7 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb,
 	skb_tx_timestamp(skb);
 
 	/* ensure all other descriptors are written before first cmd_sts */
-	wmb();
+	dma_wmb();
 	desc->cmd_sts = cmd_sts;
 
 	/* clear TX_END status */
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel
  2016-05-13 11:59 ` [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel Jisheng Zhang
@ 2016-05-13 12:09   ` Arnd Bergmann
  2016-05-13 12:19     ` Jisheng Zhang
  0 siblings, 1 reply; 6+ messages in thread
From: Arnd Bergmann @ 2016-05-13 12:09 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Jisheng Zhang, sebastian.hesselbarth, davem, netdev, linux-kernel

On Friday 13 May 2016 19:59:19 Jisheng Zhang wrote:
>  /* port register accessors **************************************************/
>  static inline u32 rdl(struct mv643xx_eth_private *mp, int offset)
>  {
> -	return readl(mp->shared->base + offset);
> +	return readl_relaxed(mp->shared->base + offset);
>  }
>  
>  static inline u32 rdlp(struct mv643xx_eth_private *mp, int offset)
>  {
> -	return readl(mp->base + offset);
> +	return readl_relaxed(mp->base + offset);
>  }

I'd recommend not changing these in general, but introducing new 'rdl_relaxed()'
and 'rdlp_relaxed()' etc variants that you use in the code that actually
is performance sensitive, but use the normal non-relaxed versions by
default.

Then add a comment to each use of the relaxed accessors on how you know
that it's safe for that caller. This usually is just needed for the xmit()
function and for the interrupt handler.
  
> @@ -2642,10 +2642,10 @@ mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
>  	int i;
>  
>  	for (i = 0; i < 6; i++) {
> -		writel(0, base + WINDOW_BASE(i));
> -		writel(0, base + WINDOW_SIZE(i));
> +		writel_relaxed(0, base + WINDOW_BASE(i));
> +		writel_relaxed(0, base + WINDOW_SIZE(i));
>  		if (i < 4)
> -			writel(0, base + WINDOW_REMAP_HIGH(i));
> +			writel_relaxed(0, base + WINDOW_REMAP_HIGH(i));
>  	}
>  
>  	win_enable = 0x3f;

Configuring the mbus for instance is clearly not an operation in which
performance matters at all, so better not touch that.

> @@ -2674,8 +2675,8 @@ static void infer_hw_params(struct mv643xx_eth_shared_private *msp)
>  	 * [21:8], or a 16-bit coal limit in bits [25,21:7] of the
>  	 * SDMA config register.
>  	 */
> -	writel(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
> -	if (readl(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
> +	writel_relaxed(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
> +	if (readl_relaxed(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
>  		msp->extended_rx_coal_limit = 1;
>  	else
>  		msp->extended_rx_coal_limit = 0;


This also seems to be configuration, rather than in the packet rx/tx hotpath.

	Arnd

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel
  2016-05-13 12:09   ` Arnd Bergmann
@ 2016-05-13 12:19     ` Jisheng Zhang
  2016-05-13 15:24       ` Andrew Lunn
  0 siblings, 1 reply; 6+ messages in thread
From: Jisheng Zhang @ 2016-05-13 12:19 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-arm-kernel, sebastian.hesselbarth, davem, netdev, linux-kernel

Dear Arnd,

On Fri, 13 May 2016 14:09:43 +0200 Arnd Bergmann wrote:

> On Friday 13 May 2016 19:59:19 Jisheng Zhang wrote:
> >  /* port register accessors **************************************************/
> >  static inline u32 rdl(struct mv643xx_eth_private *mp, int offset)
> >  {
> > -	return readl(mp->shared->base + offset);
> > +	return readl_relaxed(mp->shared->base + offset);
> >  }
> >  
> >  static inline u32 rdlp(struct mv643xx_eth_private *mp, int offset)
> >  {
> > -	return readl(mp->base + offset);
> > +	return readl_relaxed(mp->base + offset);
> >  }  
> 
> I'd recommend not changing these in general, but introducing new 'rdl_relaxed()'
> and 'rdlp_relaxed()' etc variants that you use in the code that actually
> is performance sensitive, but use the normal non-relaxed versions by
> default.
> 
> Then add a comment to each use of the relaxed accessors on how you know
> that it's safe for that caller. This usually is just needed for the xmit()
> function and for the interrupt handler.

Got your points and I do think it makes sense. But could we always use the
relaxed version to save some LoCs, although I have no mv643xx_eth platform
but I can confirm similar relaxed version changes in pxa168_eth is safe and
this is what we do in product's kernel.

Above is just my humble opinion, comments are welcome.

Thanks,
Jisheng

>   
> > @@ -2642,10 +2642,10 @@ mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
> >  	int i;
> >  
> >  	for (i = 0; i < 6; i++) {
> > -		writel(0, base + WINDOW_BASE(i));
> > -		writel(0, base + WINDOW_SIZE(i));
> > +		writel_relaxed(0, base + WINDOW_BASE(i));
> > +		writel_relaxed(0, base + WINDOW_SIZE(i));
> >  		if (i < 4)
> > -			writel(0, base + WINDOW_REMAP_HIGH(i));
> > +			writel_relaxed(0, base + WINDOW_REMAP_HIGH(i));
> >  	}
> >  
> >  	win_enable = 0x3f;  
> 
> Configuring the mbus for instance is clearly not an operation in which
> performance matters at all, so better not touch that.
> 
> > @@ -2674,8 +2675,8 @@ static void infer_hw_params(struct mv643xx_eth_shared_private *msp)
> >  	 * [21:8], or a 16-bit coal limit in bits [25,21:7] of the
> >  	 * SDMA config register.
> >  	 */
> > -	writel(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
> > -	if (readl(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
> > +	writel_relaxed(0x02000000, msp->base + 0x0400 + SDMA_CONFIG);
> > +	if (readl_relaxed(msp->base + 0x0400 + SDMA_CONFIG) & 0x02000000)
> >  		msp->extended_rx_coal_limit = 1;
> >  	else
> >  		msp->extended_rx_coal_limit = 0;  
> 
> 
> This also seems to be configuration, rather than in the packet rx/tx hotpath.
> 
> 	Arnd

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel
  2016-05-13 12:19     ` Jisheng Zhang
@ 2016-05-13 15:24       ` Andrew Lunn
  0 siblings, 0 replies; 6+ messages in thread
From: Andrew Lunn @ 2016-05-13 15:24 UTC (permalink / raw)
  To: Jisheng Zhang
  Cc: Arnd Bergmann, netdev, linux-kernel, davem, linux-arm-kernel,
	sebastian.hesselbarth

On Fri, May 13, 2016 at 08:19:55PM +0800, Jisheng Zhang wrote:
> Dear Arnd,
> 
> On Fri, 13 May 2016 14:09:43 +0200 Arnd Bergmann wrote:
> 
> > On Friday 13 May 2016 19:59:19 Jisheng Zhang wrote:
> > >  /* port register accessors **************************************************/
> > >  static inline u32 rdl(struct mv643xx_eth_private *mp, int offset)
> > >  {
> > > -	return readl(mp->shared->base + offset);
> > > +	return readl_relaxed(mp->shared->base + offset);
> > >  }
> > >  
> > >  static inline u32 rdlp(struct mv643xx_eth_private *mp, int offset)
> > >  {
> > > -	return readl(mp->base + offset);
> > > +	return readl_relaxed(mp->base + offset);
> > >  }  
> > 
> > I'd recommend not changing these in general, but introducing new 'rdl_relaxed()'
> > and 'rdlp_relaxed()' etc variants that you use in the code that actually
> > is performance sensitive, but use the normal non-relaxed versions by
> > default.
> > 
> > Then add a comment to each use of the relaxed accessors on how you know
> > that it's safe for that caller. This usually is just needed for the xmit()
> > function and for the interrupt handler.
> 
> Got your points and I do think it makes sense. But could we always use the
> relaxed version to save some LoCs.

It is a trade off between lines of code and hard to find bugs. Getting
this wrong can introduce subtle bugs.

Best be paranoid and only touch the fast path, where this actually
matters.

     Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-05-13 15:24 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-13 11:59 [PATCH 0/2] net: mv643xx_eth: improve performance Jisheng Zhang
2016-05-13 11:59 ` [PATCH 1/2] net: mv643xx_eth: use {readl|writel}_relaxed instead of readl/writel Jisheng Zhang
2016-05-13 12:09   ` Arnd Bergmann
2016-05-13 12:19     ` Jisheng Zhang
2016-05-13 15:24       ` Andrew Lunn
2016-05-13 11:59 ` [PATCH 2/2] net: mv643xx_eth: use dma_wmb/rmb where appropriate Jisheng Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).