All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Ordered I/O accessors
@ 2010-07-09 11:07 Catalin Marinas
  2010-07-09 11:08 ` [PATCH v2 1/3] ARM: Introduce *_relaxed() " Catalin Marinas
                   ` (3 more replies)
  0 siblings, 4 replies; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 11:07 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

This updated series fixes a bug in the L2x0 code causing a deadlock
(thanks to Adrian Kwong at Trident). The solution is to convert the
entire cache-l2x0.c file to the *_relaxed() I/O accessors.


Catalin Marinas (3):
      ARM: Introduce *_relaxed() I/O accessors
      ARM: Convert L2x0 to use the IO relaxed operations for cache sync
      ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE


 arch/arm/include/asm/io.h |   39 +++++++++++++++++++++++++++------------
 arch/arm/mm/cache-l2x0.c  |   26 +++++++++++++-------------
 2 files changed, 40 insertions(+), 25 deletions(-)

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 11:07 [PATCH v2 0/3] Ordered I/O accessors Catalin Marinas
@ 2010-07-09 11:08 ` Catalin Marinas
  2010-07-09 16:08   ` Arnd Bergmann
  2010-07-09 11:08 ` [PATCH v2 2/3] ARM: Convert L2x0 to use the IO relaxed operations for cache sync Catalin Marinas
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 11:08 UTC (permalink / raw)
  To: linux-arm-kernel

This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
accessors (when __mem_pci is defined). The standard read*()/write*()
macros are now based on the relaxed accessors.

This patch is in preparation for a subsequent patch which adds barriers
to the I/O accessors.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/io.h |   29 +++++++++++++++++------------
 1 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index c980156..97fb9aa 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -179,25 +179,30 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * IO port primitives for more information.
  */
 #ifdef __mem_pci
-#define readb(c) ({ __u8  __v = __raw_readb(__mem_pci(c)); __v; })
-#define readw(c) ({ __u16 __v = le16_to_cpu((__force __le16) \
+#define readb_relaxed(c) ({ u8  __v = __raw_readb(__mem_pci(c)); __v; })
+#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16) \
 					__raw_readw(__mem_pci(c))); __v; })
-#define readl(c) ({ __u32 __v = le32_to_cpu((__force __le32) \
+#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32) \
 					__raw_readl(__mem_pci(c))); __v; })
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
+
+#define writeb_relaxed(v,c)	__raw_writeb(v,__mem_pci(c))
+#define writew_relaxed(v,c)	__raw_writew((__force u16) \
+					cpu_to_le16(v),__mem_pci(c))
+#define writel_relaxed(v,c)	__raw_writel((__force u32) \
+					cpu_to_le32(v),__mem_pci(c))
+
+#define readb(c)		readb_relaxed(c)
+#define readw(c)		readw_relaxed(c)
+#define readl(c)		readl_relaxed(c)
+
+#define writeb(v,c)		writeb_relaxed(v,c)
+#define writew(v,c)		writew_relaxed(v,c)
+#define writel(v,c)		writel_relaxed(v,c)
 
 #define readsb(p,d,l)		__raw_readsb(__mem_pci(p),d,l)
 #define readsw(p,d,l)		__raw_readsw(__mem_pci(p),d,l)
 #define readsl(p,d,l)		__raw_readsl(__mem_pci(p),d,l)
 
-#define writeb(v,c)		__raw_writeb(v,__mem_pci(c))
-#define writew(v,c)		__raw_writew((__force __u16) \
-					cpu_to_le16(v),__mem_pci(c))
-#define writel(v,c)		__raw_writel((__force __u32) \
-					cpu_to_le32(v),__mem_pci(c))
-
 #define writesb(p,d,l)		__raw_writesb(__mem_pci(p),d,l)
 #define writesw(p,d,l)		__raw_writesw(__mem_pci(p),d,l)
 #define writesl(p,d,l)		__raw_writesl(__mem_pci(p),d,l)

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 2/3] ARM: Convert L2x0 to use the IO relaxed operations for cache sync
  2010-07-09 11:07 [PATCH v2 0/3] Ordered I/O accessors Catalin Marinas
  2010-07-09 11:08 ` [PATCH v2 1/3] ARM: Introduce *_relaxed() " Catalin Marinas
@ 2010-07-09 11:08 ` Catalin Marinas
  2010-07-09 11:08 ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE Catalin Marinas
  2010-07-09 11:29 ` [PATCH v2 0/3] Ordered I/O accessors Russell King - ARM Linux
  3 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 11:08 UTC (permalink / raw)
  To: linux-arm-kernel

This patch is in preparation for a subsequent patch which adds barriers
to the I/O accessors. Since the mandatory barriers may do an L2 cache
sync, this patch avoids a recursive call into l2x0_cache_sync() via the
write*() accessors and wmb().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/cache-l2x0.c |   26 +++++++++++++-------------
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 9819869..532c35c 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -32,14 +32,14 @@ static uint32_t l2x0_way_mask;	/* Bitmask of active ways */
 static inline void cache_wait(void __iomem *reg, unsigned long mask)
 {
 	/* wait for the operation to complete */
-	while (readl(reg) & mask)
+	while (readl_relaxed(reg) & mask)
 		;
 }
 
 static inline void cache_sync(void)
 {
 	void __iomem *base = l2x0_base;
-	writel(0, base + L2X0_CACHE_SYNC);
+	writel_relaxed(0, base + L2X0_CACHE_SYNC);
 	cache_wait(base + L2X0_CACHE_SYNC, 1);
 }
 
@@ -47,14 +47,14 @@ static inline void l2x0_clean_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
 }
 
 static inline void l2x0_inv_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
 }
 
 #ifdef CONFIG_PL310_ERRATA_588369
@@ -75,9 +75,9 @@ static inline void l2x0_flush_line(unsigned long addr)
 
 	/* Clean by PA followed by Invalidate by PA */
 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
 	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
 }
 #else
 
@@ -90,7 +90,7 @@ static inline void l2x0_flush_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
 }
 #endif
 
@@ -109,7 +109,7 @@ static inline void l2x0_inv_all(void)
 
 	/* invalidate all ways */
 	spin_lock_irqsave(&l2x0_lock, flags);
-	writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
+	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	cache_sync();
 	spin_unlock_irqrestore(&l2x0_lock, flags);
@@ -215,8 +215,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 
 	l2x0_base = base;
 
-	cache_id = readl(l2x0_base + L2X0_CACHE_ID);
-	aux = readl(l2x0_base + L2X0_AUX_CTRL);
+	cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	/* Determine the number of ways */
 	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
@@ -245,17 +245,17 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	 * If you are booting from non-secure mode
 	 * accessing the below registers will fault.
 	 */
-	if (!(readl(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
 
 		/* l2x0 controller is disabled */
 		aux &= aux_mask;
 		aux |= aux_val;
-		writel(aux, l2x0_base + L2X0_AUX_CTRL);
+		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
 
 		l2x0_inv_all();
 
 		/* enable L2X0 */
-		writel(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(1, l2x0_base + L2X0_CTRL);
 	}
 
 	outer_cache.inv_range = l2x0_inv_range;

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE
  2010-07-09 11:07 [PATCH v2 0/3] Ordered I/O accessors Catalin Marinas
  2010-07-09 11:08 ` [PATCH v2 1/3] ARM: Introduce *_relaxed() " Catalin Marinas
  2010-07-09 11:08 ` [PATCH v2 2/3] ARM: Convert L2x0 to use the IO relaxed operations for cache sync Catalin Marinas
@ 2010-07-09 11:08 ` Catalin Marinas
  2010-07-09 11:41   ` Catalin Marinas
  2010-07-09 11:29 ` [PATCH v2 0/3] Ordered I/O accessors Russell King - ARM Linux
  3 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 11:08 UTC (permalink / raw)
  To: linux-arm-kernel

When the coherent DMA buffers are mapped as Normal Non-cacheable
(ARM_DMA_MEM_BUFFERABLE enabled), buffer accesses are no longer ordered
with Device memory accesses causing failures in device drivers that do
not use the mandatory memory barriers before starting a DMA transfer.
LKML discussions led to the conclusion that such barriers have to be
added to the I/O accessors:

http://thread.gmane.org/gmane.linux.kernel/683509/focus=686153
http://thread.gmane.org/gmane.linux.ide/46414
http://thread.gmane.org/gmane.linux.kernel.cross-arch/5250

This patch introduces a wmb() barrier to the write*() I/O accessors to
handle the situations where Normal Non-cacheable writes are still in the
processor (or L2 cache controller) write buffer before a DMA transfer
command is issued. For the read*() accessors, a rmb() is introduced
after the I/O to avoid speculative loads where the driver polls for a
DMA transfer ready bit.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/io.h |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 97fb9aa..8f3edef 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -191,6 +191,15 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define writel_relaxed(v,c)	__raw_writel((__force u32) \
 					cpu_to_le32(v),__mem_pci(c))
 
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+#define readb(c)		({ u8  __v = readb_relaxed(c); rmb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); rmb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); rmb(); __v; })
+
+#define writeb(v,c)		do { wmb(); writeb_relaxed(v,c); } while (0)
+#define writew(v,c)		do { wmb(); writew_relaxed(v,c); } while (0)
+#define writel(v,c)		do { wmb(); writel_relaxed(v,c); } while (0)
+#else
 #define readb(c)		readb_relaxed(c)
 #define readw(c)		readw_relaxed(c)
 #define readl(c)		readl_relaxed(c)
@@ -198,6 +207,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define writeb(v,c)		writeb_relaxed(v,c)
 #define writew(v,c)		writew_relaxed(v,c)
 #define writel(v,c)		writel_relaxed(v,c)
+#endif
 
 #define readsb(p,d,l)		__raw_readsb(__mem_pci(p),d,l)
 #define readsw(p,d,l)		__raw_readsw(__mem_pci(p),d,l)

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 0/3] Ordered I/O accessors
  2010-07-09 11:07 [PATCH v2 0/3] Ordered I/O accessors Catalin Marinas
                   ` (2 preceding siblings ...)
  2010-07-09 11:08 ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE Catalin Marinas
@ 2010-07-09 11:29 ` Russell King - ARM Linux
  3 siblings, 0 replies; 23+ messages in thread
From: Russell King - ARM Linux @ 2010-07-09 11:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 09, 2010 at 12:07:56PM +0100, Catalin Marinas wrote:
> This updated series fixes a bug in the L2x0 code causing a deadlock
> (thanks to Adrian Kwong at Trident). The solution is to convert the
> entire cache-l2x0.c file to the *_relaxed() I/O accessors.

Ok.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE
  2010-07-09 11:08 ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE Catalin Marinas
@ 2010-07-09 11:41   ` Catalin Marinas
  2010-07-09 12:16     ` Russell King - ARM Linux
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 11:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 12:08 +0100, Catalin Marinas wrote:
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 97fb9aa..8f3edef 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -191,6 +191,15 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
>  #define writel_relaxed(v,c)    __raw_writel((__force u32) \
>                                         cpu_to_le32(v),__mem_pci(c))
> 
> +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
> +#define readb(c)               ({ u8  __v = readb_relaxed(c); rmb(); __v; })
> +#define readw(c)               ({ u16 __v = readw_relaxed(c); rmb(); __v; })
> +#define readl(c)               ({ u32 __v = readl_relaxed(c); rmb(); __v; })
> +
> +#define writeb(v,c)            do { wmb(); writeb_relaxed(v,c); } while (0)
> +#define writew(v,c)            do { wmb(); writew_relaxed(v,c); } while (0)
> +#define writel(v,c)            do { wmb(); writel_relaxed(v,c); } while (0)

This part requires a small change. It looks like the e1000e driver
doesn't like the do {...} while (0) constructs in the write*() accessors
(the E1000_WRITE_REG_ARRAY macro). Thanks to Giuseppe for finding this.


diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 8f3edef..b572986 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -196,9 +196,9 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define readw(c)		({ u16 __v = readw_relaxed(c); rmb(); __v; })
 #define readl(c)		({ u32 __v = readl_relaxed(c); rmb(); __v; })
 
-#define writeb(v,c)		do { wmb(); writeb_relaxed(v,c); } while (0)
-#define writew(v,c)		do { wmb(); writew_relaxed(v,c); } while (0)
-#define writel(v,c)		do { wmb(); writel_relaxed(v,c); } while (0)
+#define writeb(v,c)		({ wmb(); writeb_relaxed(v,c); })
+#define writew(v,c)		({ wmb(); writew_relaxed(v,c); })
+#define writel(v,c)		({ wmb(); writel_relaxed(v,c); })
 #else
 #define readb(c)		readb_relaxed(c)
 #define readw(c)		readw_relaxed(c)


-- 
Catalin

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE
  2010-07-09 11:41   ` Catalin Marinas
@ 2010-07-09 12:16     ` Russell King - ARM Linux
  2010-07-09 13:02       ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors ifARM_DMA_MEM_BUFFERABLE Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Russell King - ARM Linux @ 2010-07-09 12:16 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 09, 2010 at 12:41:36PM +0100, Catalin Marinas wrote:
> This part requires a small change. It looks like the e1000e driver
> doesn't like the do {...} while (0) constructs in the write*() accessors
> (the E1000_WRITE_REG_ARRAY macro). Thanks to Giuseppe for finding this.

Hmm.  I think those additional parens in drivers/net/e1000e/hw.h should
be killed off instead.  They aren't serving any useful purpose.

In fact, this is potentially dangerous - it causes write[bwl]() not to
be 'void' - in that if you do use write[bwl]() in an expression that
wants its return value, write[bwl]() will generate a read.  (While you
might not use the value, you're relying on the compiler being able to
spot that you're not using the value.)

Keeping the do { } while() there prevents write[bwl]() being used as
an expression, and therefore avoids nasty surprises when the compiler
decides to read the register after writing.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessors ifARM_DMA_MEM_BUFFERABLE
  2010-07-09 12:16     ` Russell King - ARM Linux
@ 2010-07-09 13:02       ` Catalin Marinas
  2010-07-09 14:21         ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessorsifARM_DMA_MEM_BUFFERABLE Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 13:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 13:16 +0100, Russell King - ARM Linux wrote:
> On Fri, Jul 09, 2010 at 12:41:36PM +0100, Catalin Marinas wrote:
> > This part requires a small change. It looks like the e1000e driver
> > doesn't like the do {...} while (0) constructs in the write*() accessors
> > (the E1000_WRITE_REG_ARRAY macro). Thanks to Giuseppe for finding this.
> 
> Hmm.  I think those additional parens in drivers/net/e1000e/hw.h should
> be killed off instead.  They aren't serving any useful purpose.
> 
> In fact, this is potentially dangerous - it causes write[bwl]() not to
> be 'void' - in that if you do use write[bwl]() in an expression that
> wants its return value, write[bwl]() will generate a read.  (While you
> might not use the value, you're relying on the compiler being able to
> spot that you're not using the value.)
> 
> Keeping the do { } while() there prevents write[bwl]() being used as
> an expression, and therefore avoids nasty surprises when the compiler
> decides to read the register after writing.

I haven't thought of that but yes, it could cause problems. An better
solution may be to use a "static inline void writel()" function. If you
are ok with this, I'll post a new set of patches.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessorsifARM_DMA_MEM_BUFFERABLE
  2010-07-09 13:02       ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors ifARM_DMA_MEM_BUFFERABLE Catalin Marinas
@ 2010-07-09 14:21         ` Catalin Marinas
  2010-07-09 14:34           ` Russell King - ARM Linux
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 14:21 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 14:02 +0100, Catalin Marinas wrote:
> On Fri, 2010-07-09 at 13:16 +0100, Russell King - ARM Linux wrote:
> > On Fri, Jul 09, 2010 at 12:41:36PM +0100, Catalin Marinas wrote:
> > > This part requires a small change. It looks like the e1000e driver
> > > doesn't like the do {...} while (0) constructs in the write*() accessors
> > > (the E1000_WRITE_REG_ARRAY macro). Thanks to Giuseppe for finding this.
> >
> > Hmm.  I think those additional parens in drivers/net/e1000e/hw.h should
> > be killed off instead.  They aren't serving any useful purpose.
> >
> > In fact, this is potentially dangerous - it causes write[bwl]() not to
> > be 'void' - in that if you do use write[bwl]() in an expression that
> > wants its return value, write[bwl]() will generate a read.  (While you
> > might not use the value, you're relying on the compiler being able to
> > spot that you're not using the value.)
> >
> > Keeping the do { } while() there prevents write[bwl]() being used as
> > an expression, and therefore avoids nasty surprises when the compiler
> > decides to read the register after writing.
> 
> I haven't thought of that but yes, it could cause problems. An better
> solution may be to use a "static inline void writel()" function. If you
> are ok with this, I'll post a new set of patches.

The patch below changes the __raw_* accessors to inline functions. Do
you see any problem with this? This would be needed even without the I/O
ordering patches.


ARM: Convert the read[bwl]/write[bwl] I/O accessors to inline functions

From: Catalin Marinas <catalin.marinas@arm.com>

The write* accessors conversion from macros ensures that the return type
is always void. For completeness, the read* macros are also converted to
inline functions.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/io.h |   34 ++++++++++++++++++++++++++++------
 1 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index c980156..b5be0ec 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -46,13 +46,35 @@ extern void __raw_readsb(const void __iomem *addr, void *data, int bytelen);
 extern void __raw_readsw(const void __iomem *addr, void *data, int wordlen);
 extern void __raw_readsl(const void __iomem *addr, void *data, int longlen);
 
-#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a) = (v))
-#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
-#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a) = (v))
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+	*(volatile u8 __force *)addr = val;
+}
+
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+	*(volatile u16 __force *)addr = val;
+}
+
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+	*(volatile u32 __force *)addr = val;
+}
 
-#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a))
-#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
-#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a))
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return *(const volatile u8 __force *)addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return *(const volatile u16 __force *)addr;
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return *(const volatile u32 __force *)addr;
+}
 
 /*
  * Architecture ioremap implementation.



-- 
Catalin

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/O accessorsifARM_DMA_MEM_BUFFERABLE
  2010-07-09 14:21         ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessorsifARM_DMA_MEM_BUFFERABLE Catalin Marinas
@ 2010-07-09 14:34           ` Russell King - ARM Linux
  2010-07-09 15:02             ` [PATCH v2 3/3] ARM: Add barriers to the I/OaccessorsifARM_DMA_MEM_BUFFERABLE Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Russell King - ARM Linux @ 2010-07-09 14:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 09, 2010 at 03:21:23PM +0100, Catalin Marinas wrote:
> The patch below changes the __raw_* accessors to inline functions. Do
> you see any problem with this? This would be needed even without the I/O
> ordering patches.

I'd suggest you try building a kernel before and after this patch and
compare the sizes - and then look at the disassembly associated with
these accessors.

Certainly previous gcc versions generated useless instruction overhead
when these were inline functions - partly down to it wanting to make
sure things passed to/from __raw_writeb() and __raw_readb() were
absolutely definitely only 8-bit in size, and partly because this
seems to destroy GCC's ability to spot commonalities between multiple
accesses.

That's the reason why I've never pushed a patch which causes read[bwl]
and write[bwl] to check their arguments a little more strictly - I
found that it caused gcc to generate worse code for these accessors.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 3/3] ARM: Add barriers to the I/OaccessorsifARM_DMA_MEM_BUFFERABLE
  2010-07-09 14:34           ` Russell King - ARM Linux
@ 2010-07-09 15:02             ` Catalin Marinas
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 15:02 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 15:34 +0100, Russell King - ARM Linux wrote:
> On Fri, Jul 09, 2010 at 03:21:23PM +0100, Catalin Marinas wrote:
> > The patch below changes the __raw_* accessors to inline functions. Do
> > you see any problem with this? This would be needed even without the I/O
> > ordering patches.
> 
> I'd suggest you try building a kernel before and after this patch and
> compare the sizes - and then look at the disassembly associated with
> these accessors.

I tried and the difference is quite big - nearly 78KB for a Thumb-2
kernel (with relatively new compiler). So I'll keep the original do
{...} while (0) implementation and propose a patch for the e1000.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 11:08 ` [PATCH v2 1/3] ARM: Introduce *_relaxed() " Catalin Marinas
@ 2010-07-09 16:08   ` Arnd Bergmann
  2010-07-09 16:53     ` Catalin Marinas
  2010-07-09 18:24     ` Russell King - ARM Linux
  0 siblings, 2 replies; 23+ messages in thread
From: Arnd Bergmann @ 2010-07-09 16:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday 09 July 2010, Catalin Marinas wrote:
> This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
> accessors (when __mem_pci is defined). The standard read*()/write*()
> macros are now based on the relaxed accessors.

Are these new macros valid for both PCI and non-PCI mmio addresses?
The way I understand it, the regular readl/writel family is only
valid for __iomem addresses in PCI BARs, while anything else
has to go through either ioread32/iowrite32 or something arch
specific.

Does this mean we also need an ioread32_releaxed etc?

	Arnd

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 16:08   ` Arnd Bergmann
@ 2010-07-09 16:53     ` Catalin Marinas
  2010-07-09 17:17       ` Arnd Bergmann
  2010-07-09 18:24     ` Russell King - ARM Linux
  1 sibling, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 16:53 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 17:08 +0100, Arnd Bergmann wrote:
> On Friday 09 July 2010, Catalin Marinas wrote:
> > This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
> > accessors (when __mem_pci is defined). The standard read*()/write*()
> > macros are now based on the relaxed accessors.
> 
> Are these new macros valid for both PCI and non-PCI mmio addresses?
> The way I understand it, the regular readl/writel family is only
> valid for __iomem addresses in PCI BARs, while anything else
> has to go through either ioread32/iowrite32 or something arch
> specific.

On ARM we seem to use the readl/writel accessors for a lot more than the
PCI (pretty much anything that is ioremap'ed). Not sure why but this has
been the case for a long time.

> Does this mean we also need an ioread32_releaxed etc?

Most memory ordering problems that I've seen were with PCI devices and
DMA coherent buffers. For other DMA engines used together with
ioread*(), we could indeed introduce ioread32_relaxed(). But I'm not
sure there are so many (a quick grep for dma_alloc_coherent and iowrite
shows about 14 drivers).

Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 16:53     ` Catalin Marinas
@ 2010-07-09 17:17       ` Arnd Bergmann
  0 siblings, 0 replies; 23+ messages in thread
From: Arnd Bergmann @ 2010-07-09 17:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday 09 July 2010, Catalin Marinas wrote: 
> On Fri, 2010-07-09 at 17:08 +0100, Arnd Bergmann wrote:
> > 
> > Are these new macros valid for both PCI and non-PCI mmio addresses?
> > The way I understand it, the regular readl/writel family is only
> > valid for __iomem addresses in PCI BARs, while anything else
> > has to go through either ioread32/iowrite32 or something arch
> > specific.
>
> On ARM we seem to use the readl/writel accessors for a lot more than the
> PCI (pretty much anything that is ioremap'ed). Not sure why but this has
> been the case for a long time.

Ok.

> > Does this mean we also need an ioread32_releaxed etc?
> 
> Most memory ordering problems that I've seen were with PCI devices and
> DMA coherent buffers. For other DMA engines used together with
> ioread*(), we could indeed introduce ioread32_relaxed(). But I'm not
> sure there are so many (a quick grep for dma_alloc_coherent and iowrite
> shows about 14 drivers).

Well, if only PCI devices really need the strict ordering, that may be
a good reason to use a less strict I/O accessor for non-PCI devices.

I think the 14 drivers you mentioned are mostly PCI driver as well.
dma_alloc_coherent and iowrite are both defined to do the right
thing for any possible bus, while pci_alloc_consistent and writel
are doing the same thing on PCI but are undefined (or arch specific
if you want) for other kinds of devices.

It would probably be reasonable to define iowrite as writel for PCI
and as write_relaxed for other buses, requiring manual synchronization
for other kinds of DMA. You could detect this at ioremap time and then
use one of the bits in the __iomem token to decide if the strict ordering
is necessary.

	Arnd

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 16:08   ` Arnd Bergmann
  2010-07-09 16:53     ` Catalin Marinas
@ 2010-07-09 18:24     ` Russell King - ARM Linux
  2010-07-09 19:30       ` Arnd Bergmann
  1 sibling, 1 reply; 23+ messages in thread
From: Russell King - ARM Linux @ 2010-07-09 18:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 09, 2010 at 06:08:01PM +0200, Arnd Bergmann wrote:
> On Friday 09 July 2010, Catalin Marinas wrote:
> > This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
> > accessors (when __mem_pci is defined). The standard read*()/write*()
> > macros are now based on the relaxed accessors.
> 
> Are these new macros valid for both PCI and non-PCI mmio addresses?
> The way I understand it, the regular readl/writel family is only
> valid for __iomem addresses in PCI BARs, while anything else
> has to go through either ioread32/iowrite32 or something arch
> specific.
> 
> Does this mean we also need an ioread32_releaxed etc?

Only if you want to deal with PCI IO accesses as well.  The
ioread*/iowrite* interfaces are more complex implementations than
plain read/write[bwl], because they have to work out whether the
void __iomem * cookie relates to an ioremapped cookie or a PCI IO
cookie.  (That's the only reason to use the io* variants - if you
want a driver which can portably access its registers via either
PCI MEM or PCI IO access methods.)

Plain read/write[bwl] are much simpler, and on ARM are just pointer
dereferences (and now with an additional barrier), so we allow their
use in architecture specific drivers.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 18:24     ` Russell King - ARM Linux
@ 2010-07-09 19:30       ` Arnd Bergmann
  2010-07-09 22:31         ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Arnd Bergmann @ 2010-07-09 19:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Friday 09 July 2010 20:24:17 Russell King - ARM Linux wrote:
> On Fri, Jul 09, 2010 at 06:08:01PM +0200, Arnd Bergmann wrote:
> > On Friday 09 July 2010, Catalin Marinas wrote:
> > > This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
> > > accessors (when __mem_pci is defined). The standard read*()/write*()
> > > macros are now based on the relaxed accessors.
> > 
> > Are these new macros valid for both PCI and non-PCI mmio addresses?
> > The way I understand it, the regular readl/writel family is only
> > valid for __iomem addresses in PCI BARs, while anything else
> > has to go through either ioread32/iowrite32 or something arch
> > specific.
> > 
> > Does this mean we also need an ioread32_releaxed etc?
> 
> Only if you want to deal with PCI IO accesses as well.  The
> ioread*/iowrite* interfaces are more complex implementations than
> plain read/write[bwl], because they have to work out whether the
> void __iomem * cookie relates to an ioremapped cookie or a PCI IO
> cookie.  (That's the only reason to use the io* variants - if you
> want a driver which can portably access its registers via either
> PCI MEM or PCI IO access methods.)

Right. IMHO the PCI IO variants should get the same barriers that
Catalin is introducing in the PCI MEM variants. The ordering requirements
for IO accesses are stricter than those for MEM, the main difference
being that MEM writes are posted while IO writes are synchronizing.

Thinking about it from this angle, I'm not even sure that x86 compatibility
requires arm to add wmb() after writel(). IIRC, PCI memory space writes are
required to be ordered with regard to each other, but not necessarily
with regard to other CPU instructions or DMA transfers, unlike memory
space reads and IO space read/write accesses.

> Plain read/write[bwl] are much simpler, and on ARM are just pointer
> dereferences (and now with an additional barrier), so we allow their
> use in architecture specific drivers.

Yes, that makes sense.

	Arnd

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 19:30       ` Arnd Bergmann
@ 2010-07-09 22:31         ` Catalin Marinas
  2010-07-12 11:39           ` Arnd Bergmann
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-09 22:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2010-07-09 at 20:30 +0100, Arnd Bergmann wrote:
> On Friday 09 July 2010 20:24:17 Russell King - ARM Linux wrote:
> > On Fri, Jul 09, 2010 at 06:08:01PM +0200, Arnd Bergmann wrote:
> > > On Friday 09 July 2010, Catalin Marinas wrote:
> > > > This patch introduces readl*_relaxed()/write*_relaxed() as the main I/O
> > > > accessors (when __mem_pci is defined). The standard read*()/write*()
> > > > macros are now based on the relaxed accessors.
> > >
> > > Are these new macros valid for both PCI and non-PCI mmio addresses?
> > > The way I understand it, the regular readl/writel family is only
> > > valid for __iomem addresses in PCI BARs, while anything else
> > > has to go through either ioread32/iowrite32 or something arch
> > > specific.
> > >
> > > Does this mean we also need an ioread32_releaxed etc?
> >
> > Only if you want to deal with PCI IO accesses as well.  The
> > ioread*/iowrite* interfaces are more complex implementations than
> > plain read/write[bwl], because they have to work out whether the
> > void __iomem * cookie relates to an ioremapped cookie or a PCI IO
> > cookie.  (That's the only reason to use the io* variants - if you
> > want a driver which can portably access its registers via either
> > PCI MEM or PCI IO access methods.)
> 
> Right. IMHO the PCI IO variants should get the same barriers that
> Catalin is introducing in the PCI MEM variants. The ordering requirements
> for IO accesses are stricter than those for MEM, the main difference
> being that MEM writes are posted while IO writes are synchronizing.

Linux docs don't clearly define the ordering requirements. There are
various e-mail threads but not a finalised document. In principle,
trying to be as close to x86 as possible, in which case we probably need
barriers here as well.

> Thinking about it from this angle, I'm not even sure that x86 compatibility
> requires arm to add wmb() after writel(). IIRC, PCI memory space writes are
> required to be ordered with regard to each other, but not necessarily
> with regard to other CPU instructions or DMA transfers, unlike memory
> space reads and IO space read/write accesses.

We don't need a wmb() after writel(), my patch only adds wmb() before
writel(). We need previous DMA buffer writes to be visible before
writel(), otherwise we get corrupted DMA transfers.

There were past discussions about writel() and spin_unlock() ordering.
For PCI, people introduced mmiowb(). In general, that's not needed on
ARM as the DMB in spin_unlock() should in general suffice (but it may
depend on whether the bus gets notified about the DMB).

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-09 22:31         ` Catalin Marinas
@ 2010-07-12 11:39           ` Arnd Bergmann
  2010-07-12 11:50             ` Jamie Lokier
  2010-07-12 12:00             ` Catalin Marinas
  0 siblings, 2 replies; 23+ messages in thread
From: Arnd Bergmann @ 2010-07-12 11:39 UTC (permalink / raw)
  To: linux-arm-kernel

On Saturday 10 July 2010, Catalin Marinas wrote:
> > Right. IMHO the PCI IO variants should get the same barriers that
> > Catalin is introducing in the PCI MEM variants. The ordering requirements
> > for IO accesses are stricter than those for MEM, the main difference
> > being that MEM writes are posted while IO writes are synchronizing.
> 
> Linux docs don't clearly define the ordering requirements. There are
> various e-mail threads but not a finalised document. In principle,
> trying to be as close to x86 as possible, in which case we probably need
> barriers here as well.

Right, emulating x86 behaviour for any PCI access with inl/readl/ioread32
and their counterparts seems to be the only practical answer.
It would be nice to have an architecture independent way of doing non-PCI
register access, and more importantly relaxed accesses to those.

Note that no other architecture currently defines write*_relaxed(), only
read*_relaxed(). Maybe we should add them everywhere, not just on ARM.

> > Thinking about it from this angle, I'm not even sure that x86 compatibility
> > requires arm to add wmb() after writel(). IIRC, PCI memory space writes are
> > required to be ordered with regard to each other, but not necessarily
> > with regard to other CPU instructions or DMA transfers, unlike memory
> > space reads and IO space read/write accesses.
> 
> We don't need a wmb() after writel(), my patch only adds wmb() before
> writel(). We need previous DMA buffer writes to be visible before
> writel(), otherwise we get corrupted DMA transfers.

Ah, that's right: writel and outl both need the barrier before the access,
but writel will never need a barrier after the access.
The x86 variant of outl also has the implicit ordering after the access,
but I'm not sure if we need to emulate that. I can't currently think
of a case where it's strictly required because any later access to the same
PCI function will be ordered anyway.

	Arnd

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-12 11:39           ` Arnd Bergmann
@ 2010-07-12 11:50             ` Jamie Lokier
  2010-07-12 11:53               ` Catalin Marinas
  2010-07-12 12:00             ` Catalin Marinas
  1 sibling, 1 reply; 23+ messages in thread
From: Jamie Lokier @ 2010-07-12 11:50 UTC (permalink / raw)
  To: linux-arm-kernel

Arnd Bergmann wrote:
> Ah, that's right: writel and outl both need the barrier before the access,
> but writel will never need a barrier after the access.
> The x86 variant of outl also has the implicit ordering after the access,
> but I'm not sure if we need to emulate that. I can't currently think
> of a case where it's strictly required because any later access to the same
> PCI function will be ordered anyway.

What about those ARMs which can buffer a write for an indefinite period?
Do any drivers expect writes to be posted in a reasonably short time?

-- Jamie

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-12 11:50             ` Jamie Lokier
@ 2010-07-12 11:53               ` Catalin Marinas
  2010-07-12 12:46                 ` Jamie Lokier
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2010-07-12 11:53 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2010-07-12 at 12:50 +0100, Jamie Lokier wrote:
> Arnd Bergmann wrote:
> > Ah, that's right: writel and outl both need the barrier before the access,
> > but writel will never need a barrier after the access.
> > The x86 variant of outl also has the implicit ordering after the access,
> > but I'm not sure if we need to emulate that. I can't currently think
> > of a case where it's strictly required because any later access to the same
> > PCI function will be ordered anyway.
> 
> What about those ARMs which can buffer a write for an indefinite period?
> Do any drivers expect writes to be posted in a reasonably short time?

Writing to any device is not guaranteed to succeed (i.e. change the
state of the device) in a certain amount of time (this is probably the
case on x86 as well). If you need this certainty in the code, you do a
read back from the device. Since Device memory accesses are ordered in
ARM, we don't need additional barriers for such situations.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-12 11:39           ` Arnd Bergmann
  2010-07-12 11:50             ` Jamie Lokier
@ 2010-07-12 12:00             ` Catalin Marinas
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2010-07-12 12:00 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2010-07-12 at 12:39 +0100, Arnd Bergmann wrote:
> On Saturday 10 July 2010, Catalin Marinas wrote:
> > > Right. IMHO the PCI IO variants should get the same barriers that
> > > Catalin is introducing in the PCI MEM variants. The ordering requirements
> > > for IO accesses are stricter than those for MEM, the main difference
> > > being that MEM writes are posted while IO writes are synchronizing.
> >
> > Linux docs don't clearly define the ordering requirements. There are
> > various e-mail threads but not a finalised document. In principle,
> > trying to be as close to x86 as possible, in which case we probably need
> > barriers here as well.
> 
> Right, emulating x86 behaviour for any PCI access with inl/readl/ioread32
> and their counterparts seems to be the only practical answer.
> It would be nice to have an architecture independent way of doing non-PCI
> register access, and more importantly relaxed accesses to those.
> 
> Note that no other architecture currently defines write*_relaxed(), only
> read*_relaxed(). 

That's why I only changed the ARM-specific cache-l2x0.c file

> Maybe we should add them everywhere, not just on ARM.

Yes, this was suggested in 2008 and again recently though no-one pushed
for this. I'm happy to do it but it will probably have to wait until
August (going on holiday at the end of the next week). Since we already
have read*_relaxed(), I don't see a problem with pushing such patches.

> > > Thinking about it from this angle, I'm not even sure that x86 compatibility
> > > requires arm to add wmb() after writel(). IIRC, PCI memory space writes are
> > > required to be ordered with regard to each other, but not necessarily
> > > with regard to other CPU instructions or DMA transfers, unlike memory
> > > space reads and IO space read/write accesses.
> >
> > We don't need a wmb() after writel(), my patch only adds wmb() before
> > writel(). We need previous DMA buffer writes to be visible before
> > writel(), otherwise we get corrupted DMA transfers.
> 
> Ah, that's right: writel and outl both need the barrier before the access,
> but writel will never need a barrier after the access.
> The x86 variant of outl also has the implicit ordering after the access,
> but I'm not sure if we need to emulate that. I can't currently think
> of a case where it's strictly required because any later access to the same
> PCI function will be ordered anyway.

As I replied to Jamie, even if we guarantee that an outl() leaves the
CPU before a write to Normal Non-cacheable memory, we cannot guarantee
that the device changed its state as a result of outl(). That's device
specific and outside of the CPU control.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-12 11:53               ` Catalin Marinas
@ 2010-07-12 12:46                 ` Jamie Lokier
  2010-07-13 15:21                   ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Jamie Lokier @ 2010-07-12 12:46 UTC (permalink / raw)
  To: linux-arm-kernel

Catalin Marinas wrote:
> On Mon, 2010-07-12 at 12:50 +0100, Jamie Lokier wrote:
> > Arnd Bergmann wrote:
> > > Ah, that's right: writel and outl both need the barrier before the access,
> > > but writel will never need a barrier after the access.
> > > The x86 variant of outl also has the implicit ordering after the access,
> > > but I'm not sure if we need to emulate that. I can't currently think
> > > of a case where it's strictly required because any later access to the same
> > > PCI function will be ordered anyway.
> > 
> > What about those ARMs which can buffer a write for an indefinite period?
> > Do any drivers expect writes to be posted in a reasonably short time?
> 
> Writing to any device is not guaranteed to succeed (i.e. change the
> state of the device) in a certain amount of time (this is probably the
> case on x86 as well). If you need this certainty in the code, you do a
> read back from the device. Since Device memory accesses are ordered in
> ARM, we don't need additional barriers for such situations.

There's a time between a "certain amount" and "infinite".

I'm pretty sure the write buffering time for x86 is guaranteed to be
finite and quite short (without specifying exactly how short - just
like instructions don't specify how long they take to execute), as in
it's just a buffer, whose delay is a similar order of magnitude to bus
transactions.  It's not a cache.

A recent commit changes ARM cpu_relax() because it can keep writes
buffered indefinitely.  The commit message does say it's because reads
are prioritised over writes, so perhaps that's only an issue in loops
which use cpu_relax() anyway.

If any chips buffer PCI writes indefinitely outside a
cpu_relax()-using loop, I wouldn't be surprised to see some drivers
work incorrectly.  Not *break* exactly, but things like actions
getting delayed until something else happens, or reduced performance.

-- Jamie

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 1/3] ARM: Introduce *_relaxed() I/O accessors
  2010-07-12 12:46                 ` Jamie Lokier
@ 2010-07-13 15:21                   ` Catalin Marinas
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2010-07-13 15:21 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2010-07-12 at 13:46 +0100, Jamie Lokier wrote:
> Catalin Marinas wrote:
> > On Mon, 2010-07-12 at 12:50 +0100, Jamie Lokier wrote:
> > > Arnd Bergmann wrote:
> > > > Ah, that's right: writel and outl both need the barrier before the access,
> > > > but writel will never need a barrier after the access.
> > > > The x86 variant of outl also has the implicit ordering after the access,
> > > > but I'm not sure if we need to emulate that. I can't currently think
> > > > of a case where it's strictly required because any later access to the same
> > > > PCI function will be ordered anyway.
> > >
> > > What about those ARMs which can buffer a write for an indefinite period?
> > > Do any drivers expect writes to be posted in a reasonably short time?
> >
> > Writing to any device is not guaranteed to succeed (i.e. change the
> > state of the device) in a certain amount of time (this is probably the
> > case on x86 as well). If you need this certainty in the code, you do a
> > read back from the device. Since Device memory accesses are ordered in
> > ARM, we don't need additional barriers for such situations.
> 
> There's a time between a "certain amount" and "infinite".
> 
> I'm pretty sure the write buffering time for x86 is guaranteed to be
> finite and quite short (without specifying exactly how short - just
> like instructions don't specify how long they take to execute), as in
> it's just a buffer, whose delay is a similar order of magnitude to bus
> transactions.  It's not a cache.
> 
> A recent commit changes ARM cpu_relax() because it can keep writes
> buffered indefinitely.  The commit message does say it's because reads
> are prioritised over writes, so perhaps that's only an issue in loops
> which use cpu_relax() anyway.

That was an issue visible on ARM11MPCore. On ARMv7 cores, it is
mandatory for the write buffer to drain in a finite amount of time
(though I'm not sure how long this could be). The cpu_relax() problem
that we've seen won't happen on ARMv7 cores. Pre-v6 processors have a
stronger memory order model, especially in relation to uncached DMA
buffers.

-- 
Catalin

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2010-07-13 15:21 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-09 11:07 [PATCH v2 0/3] Ordered I/O accessors Catalin Marinas
2010-07-09 11:08 ` [PATCH v2 1/3] ARM: Introduce *_relaxed() " Catalin Marinas
2010-07-09 16:08   ` Arnd Bergmann
2010-07-09 16:53     ` Catalin Marinas
2010-07-09 17:17       ` Arnd Bergmann
2010-07-09 18:24     ` Russell King - ARM Linux
2010-07-09 19:30       ` Arnd Bergmann
2010-07-09 22:31         ` Catalin Marinas
2010-07-12 11:39           ` Arnd Bergmann
2010-07-12 11:50             ` Jamie Lokier
2010-07-12 11:53               ` Catalin Marinas
2010-07-12 12:46                 ` Jamie Lokier
2010-07-13 15:21                   ` Catalin Marinas
2010-07-12 12:00             ` Catalin Marinas
2010-07-09 11:08 ` [PATCH v2 2/3] ARM: Convert L2x0 to use the IO relaxed operations for cache sync Catalin Marinas
2010-07-09 11:08 ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors if ARM_DMA_MEM_BUFFERABLE Catalin Marinas
2010-07-09 11:41   ` Catalin Marinas
2010-07-09 12:16     ` Russell King - ARM Linux
2010-07-09 13:02       ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessors ifARM_DMA_MEM_BUFFERABLE Catalin Marinas
2010-07-09 14:21         ` [PATCH v2 3/3] ARM: Add barriers to the I/O accessorsifARM_DMA_MEM_BUFFERABLE Catalin Marinas
2010-07-09 14:34           ` Russell King - ARM Linux
2010-07-09 15:02             ` [PATCH v2 3/3] ARM: Add barriers to the I/OaccessorsifARM_DMA_MEM_BUFFERABLE Catalin Marinas
2010-07-09 11:29 ` [PATCH v2 0/3] Ordered I/O accessors Russell King - ARM Linux

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.