All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/4] ARM: l2x0: Optmise the range based operations
@ 2010-07-31 17:46 ` Santosh Shilimkar
  0 siblings, 0 replies; 6+ messages in thread
From: Santosh Shilimkar @ 2010-07-31 17:46 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: linux-omap, Santosh Shilimkar, Catalin Marinas

For the big buffers which are in excess of cache size, the maintaince
operations by PA are very slow. For such buffers the maintainace
operations can be speeded up by using the WAY based method.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++------------------
 1 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index b2938d4..c0d6108 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+static void l2x0_clean_all(void)
+{
+	unsigned long flags;
+
+	/* clean all ways */
+	spin_lock_irqsave(&l2x0_lock, flags);
+	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
+	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
+	cache_sync();
+	spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
 static void l2x0_inv_all(void)
 {
 	unsigned long flags;
@@ -171,54 +183,63 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
 
 static void l2x0_clean_range(unsigned long start, unsigned long end)
 {
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
 
-	spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
+	if ((end - start) >= l2x0_size) {
+		l2x0_clean_all();
+	} else {
+		void __iomem *base = l2x0_base;
+		unsigned long flags, blk_end;
 
-		while (start < blk_end) {
-			l2x0_clean_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-
-		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+		spin_lock_irqsave(&l2x0_lock, flags);
+		start &= ~(CACHE_LINE_SIZE - 1);
+		while (start < end) {
+			blk_end = start + min(end - start, 4096UL);
+
+			while (start < blk_end) {
+				l2x0_clean_line(start);
+				start += CACHE_LINE_SIZE;
+			}
+
+			if (blk_end < end) {
+				spin_unlock_irqrestore(&l2x0_lock, flags);
+				spin_lock_irqsave(&l2x0_lock, flags);
+			}
 		}
+		cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
+		cache_sync();
+		spin_unlock_irqrestore(&l2x0_lock, flags);
 	}
-	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_flush_range(unsigned long start, unsigned long end)
 {
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
-
-	spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
-
-		debug_writel(0x03);
-		while (start < blk_end) {
-			l2x0_flush_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-		debug_writel(0x00);
+	if ((end - start) >= l2x0_size) {
+		l2x0_flush_all();
+	} else {
+		void __iomem *base = l2x0_base;
+		unsigned long flags, blk_end;
 
-		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+		spin_lock_irqsave(&l2x0_lock, flags);
+		start &= ~(CACHE_LINE_SIZE - 1);
+		while (start < end) {
+			blk_end = start + min(end - start, 4096UL);
+
+			debug_writel(0x03);
+			while (start < blk_end) {
+				l2x0_flush_line(start);
+				start += CACHE_LINE_SIZE;
+			}
+			debug_writel(0x00);
+
+			if (blk_end < end) {
+				spin_unlock_irqrestore(&l2x0_lock, flags);
+				spin_lock_irqsave(&l2x0_lock, flags);
+			}
 		}
+		cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
+		cache_sync();
+		spin_unlock_irqrestore(&l2x0_lock, flags);
 	}
-	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_disable(void)
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] ARM: l2x0: Optmise the range based operations
@ 2010-07-31 17:46 ` Santosh Shilimkar
  0 siblings, 0 replies; 6+ messages in thread
From: Santosh Shilimkar @ 2010-07-31 17:46 UTC (permalink / raw)
  To: linux-arm-kernel

For the big buffers which are in excess of cache size, the maintaince
operations by PA are very slow. For such buffers the maintainace
operations can be speeded up by using the WAY based method.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++------------------
 1 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index b2938d4..c0d6108 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+static void l2x0_clean_all(void)
+{
+	unsigned long flags;
+
+	/* clean all ways */
+	spin_lock_irqsave(&l2x0_lock, flags);
+	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
+	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
+	cache_sync();
+	spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
 static void l2x0_inv_all(void)
 {
 	unsigned long flags;
@@ -171,54 +183,63 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
 
 static void l2x0_clean_range(unsigned long start, unsigned long end)
 {
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
 
-	spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
+	if ((end - start) >= l2x0_size) {
+		l2x0_clean_all();
+	} else {
+		void __iomem *base = l2x0_base;
+		unsigned long flags, blk_end;
 
-		while (start < blk_end) {
-			l2x0_clean_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-
-		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+		spin_lock_irqsave(&l2x0_lock, flags);
+		start &= ~(CACHE_LINE_SIZE - 1);
+		while (start < end) {
+			blk_end = start + min(end - start, 4096UL);
+
+			while (start < blk_end) {
+				l2x0_clean_line(start);
+				start += CACHE_LINE_SIZE;
+			}
+
+			if (blk_end < end) {
+				spin_unlock_irqrestore(&l2x0_lock, flags);
+				spin_lock_irqsave(&l2x0_lock, flags);
+			}
 		}
+		cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
+		cache_sync();
+		spin_unlock_irqrestore(&l2x0_lock, flags);
 	}
-	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_flush_range(unsigned long start, unsigned long end)
 {
-	void __iomem *base = l2x0_base;
-	unsigned long flags;
-
-	spin_lock_irqsave(&l2x0_lock, flags);
-	start &= ~(CACHE_LINE_SIZE - 1);
-	while (start < end) {
-		unsigned long blk_end = start + min(end - start, 4096UL);
-
-		debug_writel(0x03);
-		while (start < blk_end) {
-			l2x0_flush_line(start);
-			start += CACHE_LINE_SIZE;
-		}
-		debug_writel(0x00);
+	if ((end - start) >= l2x0_size) {
+		l2x0_flush_all();
+	} else {
+		void __iomem *base = l2x0_base;
+		unsigned long flags, blk_end;
 
-		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+		spin_lock_irqsave(&l2x0_lock, flags);
+		start &= ~(CACHE_LINE_SIZE - 1);
+		while (start < end) {
+			blk_end = start + min(end - start, 4096UL);
+
+			debug_writel(0x03);
+			while (start < blk_end) {
+				l2x0_flush_line(start);
+				start += CACHE_LINE_SIZE;
+			}
+			debug_writel(0x00);
+
+			if (blk_end < end) {
+				spin_unlock_irqrestore(&l2x0_lock, flags);
+				spin_lock_irqsave(&l2x0_lock, flags);
+			}
 		}
+		cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
+		cache_sync();
+		spin_unlock_irqrestore(&l2x0_lock, flags);
 	}
-	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_disable(void)
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/4] ARM: l2x0: Optmise the range based operations
  2010-07-31 17:46 ` Santosh Shilimkar
@ 2010-09-06 10:26   ` Catalin Marinas
  -1 siblings, 0 replies; 6+ messages in thread
From: Catalin Marinas @ 2010-09-06 10:26 UTC (permalink / raw)
  To: Santosh Shilimkar; +Cc: linux-arm-kernel, linux-omap

On Sat, 2010-07-31 at 23:16 +0530, Santosh Shilimkar wrote:
> For the big buffers which are in excess of cache size, the maintaince
> operations by PA are very slow. For such buffers the maintainace
> operations can be speeded up by using the WAY based method.
> 
> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++------------------
>  1 files changed, 58 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> index b2938d4..c0d6108 100644
> --- a/arch/arm/mm/cache-l2x0.c
> +++ b/arch/arm/mm/cache-l2x0.c
> @@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
>  	spin_unlock_irqrestore(&l2x0_lock, flags);
>  }
>  
> +static void l2x0_clean_all(void)
> +{
> +	unsigned long flags;
> +
> +	/* clean all ways */
> +	spin_lock_irqsave(&l2x0_lock, flags);
> +	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
> +	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);

In case you'll base this on top of my PL310 optimisation, you should use
cache_wait_way() as the cache_wait() becomes a no-op.

>  static void l2x0_clean_range(unsigned long start, unsigned long end)
>  {
> -	void __iomem *base = l2x0_base;
> -	unsigned long flags;
>  
> -	spin_lock_irqsave(&l2x0_lock, flags);
> -	start &= ~(CACHE_LINE_SIZE - 1);
> -	while (start < end) {
> -		unsigned long blk_end = start + min(end - start, 4096UL);
> +	if ((end - start) >= l2x0_size) {
> +		l2x0_clean_all();
> +	} else {
> +		void __iomem *base = l2x0_base;
> +		unsigned long flags, blk_end;

Minor thing - the patch may be cleaner if we use a 'return' instead of
'else' to avoid further indentation (same for the other functions):

	if ((end - start) >= l2x0_size) {
		l2x0_clean_all();
		return;
	}

Otherwise the patch looks fine to me.

-- 
Catalin


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] ARM: l2x0: Optmise the range based operations
@ 2010-09-06 10:26   ` Catalin Marinas
  0 siblings, 0 replies; 6+ messages in thread
From: Catalin Marinas @ 2010-09-06 10:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, 2010-07-31 at 23:16 +0530, Santosh Shilimkar wrote:
> For the big buffers which are in excess of cache size, the maintaince
> operations by PA are very slow. For such buffers the maintainace
> operations can be speeded up by using the WAY based method.
> 
> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++------------------
>  1 files changed, 58 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> index b2938d4..c0d6108 100644
> --- a/arch/arm/mm/cache-l2x0.c
> +++ b/arch/arm/mm/cache-l2x0.c
> @@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
>  	spin_unlock_irqrestore(&l2x0_lock, flags);
>  }
>  
> +static void l2x0_clean_all(void)
> +{
> +	unsigned long flags;
> +
> +	/* clean all ways */
> +	spin_lock_irqsave(&l2x0_lock, flags);
> +	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
> +	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);

In case you'll base this on top of my PL310 optimisation, you should use
cache_wait_way() as the cache_wait() becomes a no-op.

>  static void l2x0_clean_range(unsigned long start, unsigned long end)
>  {
> -	void __iomem *base = l2x0_base;
> -	unsigned long flags;
>  
> -	spin_lock_irqsave(&l2x0_lock, flags);
> -	start &= ~(CACHE_LINE_SIZE - 1);
> -	while (start < end) {
> -		unsigned long blk_end = start + min(end - start, 4096UL);
> +	if ((end - start) >= l2x0_size) {
> +		l2x0_clean_all();
> +	} else {
> +		void __iomem *base = l2x0_base;
> +		unsigned long flags, blk_end;

Minor thing - the patch may be cleaner if we use a 'return' instead of
'else' to avoid further indentation (same for the other functions):

	if ((end - start) >= l2x0_size) {
		l2x0_clean_all();
		return;
	}

Otherwise the patch looks fine to me.

-- 
Catalin

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 4/4] ARM: l2x0: Optmise the range based operations
  2010-09-06 10:26   ` Catalin Marinas
@ 2010-09-07  7:18     ` Shilimkar, Santosh
  -1 siblings, 0 replies; 6+ messages in thread
From: Shilimkar, Santosh @ 2010-09-07  7:18 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: linux-arm-kernel, linux-omap

> -----Original Message-----
> From: Catalin Marinas [mailto:catalin.marinas@arm.com]
> Sent: Monday, September 06, 2010 3:57 PM
> To: Shilimkar, Santosh
> Cc: linux-arm-kernel@lists.infradead.org; linux-omap@vger.kernel.org
> Subject: Re: [PATCH 4/4] ARM: l2x0: Optmise the range based operations
> 
> On Sat, 2010-07-31 at 23:16 +0530, Santosh Shilimkar wrote:
> > For the big buffers which are in excess of cache size, the maintaince
> > operations by PA are very slow. For such buffers the maintainace
> > operations can be speeded up by using the WAY based method.
> >
> > Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > ---
> >  arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++-----------
> -------
> >  1 files changed, 58 insertions(+), 37 deletions(-)
> >
> > diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> > index b2938d4..c0d6108 100644
> > --- a/arch/arm/mm/cache-l2x0.c
> > +++ b/arch/arm/mm/cache-l2x0.c
> > @@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
> >  	spin_unlock_irqrestore(&l2x0_lock, flags);
> >  }
> >
> > +static void l2x0_clean_all(void)
> > +{
> > +	unsigned long flags;
> > +
> > +	/* clean all ways */
> > +	spin_lock_irqsave(&l2x0_lock, flags);
> > +	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
> > +	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
> 
> In case you'll base this on top of my PL310 optimisation, you should use
> cache_wait_way() as the cache_wait() becomes a no-op.
> 
Yep. Will fix that while rebasing.

Will post the full series with Thomas's two patches included
on top of your "[PATCH 0/9] Various patches for 2.6.37-rc1" which
is already in the linux-next

Regards,
Santosh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] ARM: l2x0: Optmise the range based operations
@ 2010-09-07  7:18     ` Shilimkar, Santosh
  0 siblings, 0 replies; 6+ messages in thread
From: Shilimkar, Santosh @ 2010-09-07  7:18 UTC (permalink / raw)
  To: linux-arm-kernel

> -----Original Message-----
> From: Catalin Marinas [mailto:catalin.marinas at arm.com]
> Sent: Monday, September 06, 2010 3:57 PM
> To: Shilimkar, Santosh
> Cc: linux-arm-kernel at lists.infradead.org; linux-omap at vger.kernel.org
> Subject: Re: [PATCH 4/4] ARM: l2x0: Optmise the range based operations
> 
> On Sat, 2010-07-31 at 23:16 +0530, Santosh Shilimkar wrote:
> > For the big buffers which are in excess of cache size, the maintaince
> > operations by PA are very slow. For such buffers the maintainace
> > operations can be speeded up by using the WAY based method.
> >
> > Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > ---
> >  arch/arm/mm/cache-l2x0.c |   95 ++++++++++++++++++++++++++++-----------
> -------
> >  1 files changed, 58 insertions(+), 37 deletions(-)
> >
> > diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> > index b2938d4..c0d6108 100644
> > --- a/arch/arm/mm/cache-l2x0.c
> > +++ b/arch/arm/mm/cache-l2x0.c
> > @@ -116,6 +116,18 @@ static void l2x0_flush_all(void)
> >  	spin_unlock_irqrestore(&l2x0_lock, flags);
> >  }
> >
> > +static void l2x0_clean_all(void)
> > +{
> > +	unsigned long flags;
> > +
> > +	/* clean all ways */
> > +	spin_lock_irqsave(&l2x0_lock, flags);
> > +	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
> > +	cache_wait(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
> 
> In case you'll base this on top of my PL310 optimisation, you should use
> cache_wait_way() as the cache_wait() becomes a no-op.
> 
Yep. Will fix that while rebasing.

Will post the full series with Thomas's two patches included
on top of your "[PATCH 0/9] Various patches for 2.6.37-rc1" which
is already in the linux-next

Regards,
Santosh

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-09-07  7:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-31 17:46 [PATCH 4/4] ARM: l2x0: Optmise the range based operations Santosh Shilimkar
2010-07-31 17:46 ` Santosh Shilimkar
2010-09-06 10:26 ` Catalin Marinas
2010-09-06 10:26   ` Catalin Marinas
2010-09-07  7:18   ` Shilimkar, Santosh
2010-09-07  7:18     ` Shilimkar, Santosh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.