* [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
@ 2019-05-14 9:05 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
On most arches having function flush_dcache_range(), including PPC32,
this function does a writeback and invalidation of the cache bloc.
On PPC64, flush_dcache_range() only does a writeback while
flush_inval_dcache_range() does the invalidation in addition.
In addition it looks like within arch/powerpc/, there are no PPC64
platforms using flush_dcache_range()
This patch drops the existing 64 bits version of flush_dcache_range()
and renames flush_inval_dcache_range() into flush_dcache_range().
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cacheflush.h | 1 -
arch/powerpc/kernel/misc_64.S | 27 ++-------------------------
arch/powerpc/lib/pmem.c | 8 ++++----
arch/powerpc/mm/mem.c | 4 ++--
arch/powerpc/sysdev/dart_iommu.c | 2 +-
drivers/macintosh/smu.c | 4 ++--
6 files changed, 11 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d5a8d7bf0759..e9a40b110f1d 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -109,7 +109,6 @@ static inline void invalidate_dcache_range(unsigned long start,
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
#endif
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba9481781..a4fd536efb44 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -121,31 +121,8 @@ EXPORT_SYMBOL(flush_icache_range)
*
* flush all bytes from start to stop-1 inclusive
*/
-_GLOBAL_TOC(flush_dcache_range)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
ld r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
addi r5,r7,-1
@@ -164,7 +141,7 @@ _GLOBAL(flush_inval_dcache_range)
sync
isync
blr
-
+EXPORT_SYMBOL(flush_dcache_range)
/*
* Flush a particular page from the data cache to RAM.
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 53c018762e1c..36e08bf850e0 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -23,14 +23,14 @@
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_invalidate_pmem);
@@ -43,7 +43,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return copied;
}
@@ -53,7 +53,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return dest;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd525d709072..39e66f033995 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -125,7 +125,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm
start, start + size, rc);
return -EFAULT;
}
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
@@ -153,7 +153,7 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size,
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
ret = remove_section_mapping(start, start + size);
/* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 2a751795ec1e..bc259a8d3f2d 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -158,7 +158,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count)
unsigned int tmp;
/* Perform a standard cache flush */
- flush_inval_dcache_range(start, end);
+ flush_dcache_range(start, end);
/*
* Perform the sequence described in the CPC925 manual to
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 6a844125cf2d..97758eed03f2 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -133,7 +133,7 @@ static void smu_start_cmd(void)
/* Flush command and data to RAM */
faddr = (unsigned long)smu->cmd_buf;
fend = faddr + smu->cmd_buf->length + 2;
- flush_inval_dcache_range(faddr, fend);
+ flush_dcache_range(faddr, fend);
/* We also disable NAP mode for the duration of the command
@@ -195,7 +195,7 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
* reply length (it's only 2 cache lines anyway)
*/
faddr = (unsigned long)smu->cmd_buf;
- flush_inval_dcache_range(faddr, faddr + 256);
+ flush_dcache_range(faddr, faddr + 256);
/* Now check ack */
ack = (~cmd->cmd) & 0xff;
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
@ 2019-05-14 9:05 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
On most arches having function flush_dcache_range(), including PPC32,
this function does a writeback and invalidation of the cache bloc.
On PPC64, flush_dcache_range() only does a writeback while
flush_inval_dcache_range() does the invalidation in addition.
In addition it looks like within arch/powerpc/, there are no PPC64
platforms using flush_dcache_range()
This patch drops the existing 64 bits version of flush_dcache_range()
and renames flush_inval_dcache_range() into flush_dcache_range().
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cacheflush.h | 1 -
arch/powerpc/kernel/misc_64.S | 27 ++-------------------------
arch/powerpc/lib/pmem.c | 8 ++++----
arch/powerpc/mm/mem.c | 4 ++--
arch/powerpc/sysdev/dart_iommu.c | 2 +-
drivers/macintosh/smu.c | 4 ++--
6 files changed, 11 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d5a8d7bf0759..e9a40b110f1d 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -109,7 +109,6 @@ static inline void invalidate_dcache_range(unsigned long start,
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
#endif
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba9481781..a4fd536efb44 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -121,31 +121,8 @@ EXPORT_SYMBOL(flush_icache_range)
*
* flush all bytes from start to stop-1 inclusive
*/
-_GLOBAL_TOC(flush_dcache_range)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
ld r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
addi r5,r7,-1
@@ -164,7 +141,7 @@ _GLOBAL(flush_inval_dcache_range)
sync
isync
blr
-
+EXPORT_SYMBOL(flush_dcache_range)
/*
* Flush a particular page from the data cache to RAM.
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 53c018762e1c..36e08bf850e0 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -23,14 +23,14 @@
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_invalidate_pmem);
@@ -43,7 +43,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return copied;
}
@@ -53,7 +53,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return dest;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd525d709072..39e66f033995 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -125,7 +125,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm
start, start + size, rc);
return -EFAULT;
}
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
@@ -153,7 +153,7 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size,
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
ret = remove_section_mapping(start, start + size);
/* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 2a751795ec1e..bc259a8d3f2d 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -158,7 +158,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count)
unsigned int tmp;
/* Perform a standard cache flush */
- flush_inval_dcache_range(start, end);
+ flush_dcache_range(start, end);
/*
* Perform the sequence described in the CPC925 manual to
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 6a844125cf2d..97758eed03f2 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -133,7 +133,7 @@ static void smu_start_cmd(void)
/* Flush command and data to RAM */
faddr = (unsigned long)smu->cmd_buf;
fend = faddr + smu->cmd_buf->length + 2;
- flush_inval_dcache_range(faddr, fend);
+ flush_dcache_range(faddr, fend);
/* We also disable NAP mode for the duration of the command
@@ -195,7 +195,7 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
* reply length (it's only 2 cache lines anyway)
*/
faddr = (unsigned long)smu->cmd_buf;
- flush_inval_dcache_range(faddr, faddr + 256);
+ flush_dcache_range(faddr, faddr + 256);
/* Now check ack */
ack = (~cmd->cmd) & 0xff;
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
2019-05-14 9:05 ` Christophe Leroy
@ 2019-05-14 9:05 ` Christophe Leroy
-1 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
PPC32 also have flush_dcache_range() so it can also support
ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/Kconfig | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7996cfaceca..cf6e30f637be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -127,13 +127,13 @@ config PPC
select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
- select ARCH_HAS_PMEM_API if PPC64
+ select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
+ select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
@ 2019-05-14 9:05 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
PPC32 also have flush_dcache_range() so it can also support
ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/Kconfig | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7996cfaceca..cf6e30f637be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -127,13 +127,13 @@ config PPC
select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
- select ARCH_HAS_PMEM_API if PPC64
+ select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
+ select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
2019-05-14 9:05 ` Christophe Leroy
@ 2019-05-14 9:05 ` Christophe Leroy
-1 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
This patch defines C helpers to retrieve the size of
cache blocks and uses them in the cacheflush functions.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cache.h | 16 ++++++++++++++--
arch/powerpc/include/asm/cacheflush.h | 24 +++++++++++++++---------
2 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..0009a0a82e86 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,7 +33,8 @@
#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
struct ppc_cache_info {
u32 size;
@@ -53,7 +54,18 @@ struct ppc64_caches {
};
extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+#else
+static inline u32 l1_cache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+#endif
+#endif /* ! __ASSEMBLY__ */
#if defined(__ASSEMBLY__)
/*
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e9a40b110f1d..d405f18441cd 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -64,11 +64,13 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
*/
static inline void flush_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbf(addr);
mb(); /* sync */
}
@@ -80,11 +82,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
*/
static inline void clean_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbst(addr);
mb(); /* sync */
}
@@ -97,11 +101,13 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
static inline void invalidate_dcache_range(unsigned long start,
unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbi(addr);
mb(); /* sync */
}
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
@ 2019-05-14 9:05 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
This patch defines C helpers to retrieve the size of
cache blocks and uses them in the cacheflush functions.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cache.h | 16 ++++++++++++++--
arch/powerpc/include/asm/cacheflush.h | 24 +++++++++++++++---------
2 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..0009a0a82e86 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,7 +33,8 @@
#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
struct ppc_cache_info {
u32 size;
@@ -53,7 +54,18 @@ struct ppc64_caches {
};
extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+#else
+static inline u32 l1_cache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+#endif
+#endif /* ! __ASSEMBLY__ */
#if defined(__ASSEMBLY__)
/*
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e9a40b110f1d..d405f18441cd 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -64,11 +64,13 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
*/
static inline void flush_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbf(addr);
mb(); /* sync */
}
@@ -80,11 +82,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
*/
static inline void clean_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbst(addr);
mb(); /* sync */
}
@@ -97,11 +101,13 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
static inline void invalidate_dcache_range(unsigned long start,
unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbi(addr);
mb(); /* sync */
}
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-05-14 9:05 ` Christophe Leroy
@ 2019-05-14 9:05 ` Christophe Leroy
-1 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
This patch drops the assembly PPC64 version of flush_dcache_range()
and re-uses the PPC32 static inline version.
With GCC 8.1, the following code is generated:
void flush_test(unsigned long start, unsigned long stop)
{
flush_dcache_range(start, stop);
}
0000000000000130 <.flush_test>:
130: 3d 22 00 00 addis r9,r2,0
132: R_PPC64_TOC16_HA .data+0x8
134: 81 09 00 00 lwz r8,0(r9)
136: R_PPC64_TOC16_LO .data+0x8
138: 3d 22 00 00 addis r9,r2,0
13a: R_PPC64_TOC16_HA .data+0xc
13c: 80 e9 00 00 lwz r7,0(r9)
13e: R_PPC64_TOC16_LO .data+0xc
140: 7d 48 00 d0 neg r10,r8
144: 7d 43 18 38 and r3,r10,r3
148: 7c 00 04 ac hwsync
14c: 4c 00 01 2c isync
150: 39 28 ff ff addi r9,r8,-1
154: 7c 89 22 14 add r4,r9,r4
158: 7c 83 20 50 subf r4,r3,r4
15c: 7c 89 3c 37 srd. r9,r4,r7
160: 41 82 00 1c beq 17c <.flush_test+0x4c>
164: 7d 29 03 a6 mtctr r9
168: 60 00 00 00 nop
16c: 60 00 00 00 nop
170: 7c 00 18 ac dcbf 0,r3
174: 7c 63 42 14 add r3,r3,r8
178: 42 00 ff f8 bdnz 170 <.flush_test+0x40>
17c: 7c 00 04 ac hwsync
180: 4c 00 01 2c isync
184: 4e 80 00 20 blr
188: 60 00 00 00 nop
18c: 60 00 00 00 nop
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cache.h | 10 ++++++++++
arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
arch/powerpc/kernel/misc_64.S | 29 -----------------------------
3 files changed, 18 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 0009a0a82e86..45e3137ccd71 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -54,6 +54,16 @@ struct ppc64_caches {
};
extern struct ppc64_caches ppc64_caches;
+
+static inline u32 l1_cache_shift(void)
+{
+ return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return ppc64_caches.l1d.block_size;
+}
#else
static inline u32 l1_cache_shift(void)
{
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d405f18441cd..3cd7ce3dec8b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
}
#endif
-#ifdef CONFIG_PPC32
/*
* Write any modified data cache blocks out to memory and invalidate them.
* Does not invalidate the corresponding instruction cache blocks.
@@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ mb(); /* sync */
+ isync();
+ }
+
for (i = 0; i < size >> shift; i++, addr += bytes)
dcbf(addr);
mb(); /* sync */
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ isync();
}
/*
@@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
mb(); /* sync */
}
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#endif
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a4fd536efb44..1b0a42c50ef1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_dcache_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- sync
- isync
- mtctr r8
-0: dcbf 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-05-14 9:05 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-05-14 9:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
This patch drops the assembly PPC64 version of flush_dcache_range()
and re-uses the PPC32 static inline version.
With GCC 8.1, the following code is generated:
void flush_test(unsigned long start, unsigned long stop)
{
flush_dcache_range(start, stop);
}
0000000000000130 <.flush_test>:
130: 3d 22 00 00 addis r9,r2,0
132: R_PPC64_TOC16_HA .data+0x8
134: 81 09 00 00 lwz r8,0(r9)
136: R_PPC64_TOC16_LO .data+0x8
138: 3d 22 00 00 addis r9,r2,0
13a: R_PPC64_TOC16_HA .data+0xc
13c: 80 e9 00 00 lwz r7,0(r9)
13e: R_PPC64_TOC16_LO .data+0xc
140: 7d 48 00 d0 neg r10,r8
144: 7d 43 18 38 and r3,r10,r3
148: 7c 00 04 ac hwsync
14c: 4c 00 01 2c isync
150: 39 28 ff ff addi r9,r8,-1
154: 7c 89 22 14 add r4,r9,r4
158: 7c 83 20 50 subf r4,r3,r4
15c: 7c 89 3c 37 srd. r9,r4,r7
160: 41 82 00 1c beq 17c <.flush_test+0x4c>
164: 7d 29 03 a6 mtctr r9
168: 60 00 00 00 nop
16c: 60 00 00 00 nop
170: 7c 00 18 ac dcbf 0,r3
174: 7c 63 42 14 add r3,r3,r8
178: 42 00 ff f8 bdnz 170 <.flush_test+0x40>
17c: 7c 00 04 ac hwsync
180: 4c 00 01 2c isync
184: 4e 80 00 20 blr
188: 60 00 00 00 nop
18c: 60 00 00 00 nop
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/cache.h | 10 ++++++++++
arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
arch/powerpc/kernel/misc_64.S | 29 -----------------------------
3 files changed, 18 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 0009a0a82e86..45e3137ccd71 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -54,6 +54,16 @@ struct ppc64_caches {
};
extern struct ppc64_caches ppc64_caches;
+
+static inline u32 l1_cache_shift(void)
+{
+ return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return ppc64_caches.l1d.block_size;
+}
#else
static inline u32 l1_cache_shift(void)
{
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d405f18441cd..3cd7ce3dec8b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
}
#endif
-#ifdef CONFIG_PPC32
/*
* Write any modified data cache blocks out to memory and invalidate them.
* Does not invalidate the corresponding instruction cache blocks.
@@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ mb(); /* sync */
+ isync();
+ }
+
for (i = 0; i < size >> shift; i++, addr += bytes)
dcbf(addr);
mb(); /* sync */
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ isync();
}
/*
@@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
mb(); /* sync */
}
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#endif
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a4fd536efb44..1b0a42c50ef1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_dcache_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- sync
- isync
- mtctr r8
-0: dcbf 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
--
2.13.3
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
2019-05-14 9:05 ` Christophe Leroy
` (3 preceding siblings ...)
(?)
@ 2019-07-08 1:19 ` Michael Ellerman
-1 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-07-08 1:19 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
On Tue, 2019-05-14 at 09:05:13 UTC, Christophe Leroy wrote:
> On most arches having function flush_dcache_range(), including PPC32,
> this function does a writeback and invalidation of the cache bloc.
>
> On PPC64, flush_dcache_range() only does a writeback while
> flush_inval_dcache_range() does the invalidation in addition.
>
> In addition it looks like within arch/powerpc/, there are no PPC64
> platforms using flush_dcache_range()
>
> This patch drops the existing 64 bits version of flush_dcache_range()
> and renames flush_inval_dcache_range() into flush_dcache_range().
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/1cfb725fb1899dc6fdc88f8b5354a65e8ad260c6
cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-05-14 9:05 ` Christophe Leroy
(?)
@ 2019-07-08 1:19 ` Michael Ellerman
-1 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-07-08 1:19 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
On Tue, 2019-05-14 at 09:05:16 UTC, Christophe Leroy wrote:
> This patch drops the assembly PPC64 version of flush_dcache_range()
> and re-uses the PPC32 static inline version.
>
> With GCC 8.1, the following code is generated:
>
> void flush_test(unsigned long start, unsigned long stop)
> {
> flush_dcache_range(start, stop);
> }
>
> 0000000000000130 <.flush_test>:
> 130: 3d 22 00 00 addis r9,r2,0
> 132: R_PPC64_TOC16_HA .data+0x8
> 134: 81 09 00 00 lwz r8,0(r9)
> 136: R_PPC64_TOC16_LO .data+0x8
> 138: 3d 22 00 00 addis r9,r2,0
> 13a: R_PPC64_TOC16_HA .data+0xc
> 13c: 80 e9 00 00 lwz r7,0(r9)
> 13e: R_PPC64_TOC16_LO .data+0xc
> 140: 7d 48 00 d0 neg r10,r8
> 144: 7d 43 18 38 and r3,r10,r3
> 148: 7c 00 04 ac hwsync
> 14c: 4c 00 01 2c isync
> 150: 39 28 ff ff addi r9,r8,-1
> 154: 7c 89 22 14 add r4,r9,r4
> 158: 7c 83 20 50 subf r4,r3,r4
> 15c: 7c 89 3c 37 srd. r9,r4,r7
> 160: 41 82 00 1c beq 17c <.flush_test+0x4c>
> 164: 7d 29 03 a6 mtctr r9
> 168: 60 00 00 00 nop
> 16c: 60 00 00 00 nop
> 170: 7c 00 18 ac dcbf 0,r3
> 174: 7c 63 42 14 add r3,r3,r8
> 178: 42 00 ff f8 bdnz 170 <.flush_test+0x40>
> 17c: 7c 00 04 ac hwsync
> 180: 4c 00 01 2c isync
> 184: 4e 80 00 20 blr
> 188: 60 00 00 00 nop
> 18c: 60 00 00 00 nop
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/22e9c88d486a0536d337d6e0973968be0a4cd4b2
cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
2019-05-14 9:05 ` Christophe Leroy
(?)
@ 2019-07-08 1:19 ` Michael Ellerman
-1 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-07-08 1:19 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
On Tue, 2019-05-14 at 09:05:15 UTC, Christophe Leroy wrote:
> This patch defines C helpers to retrieve the size of
> cache blocks and uses them in the cacheflush functions.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/d98fc70fc139b72ae098d24fde42ad70c8ff2f81
cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-05-14 9:05 ` Christophe Leroy
@ 2019-07-08 14:21 ` Aneesh Kumar K.V
-1 siblings, 0 replies; 29+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-08 14:21 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> This patch drops the assembly PPC64 version of flush_dcache_range()
> and re-uses the PPC32 static inline version.
>
> With GCC 8.1, the following code is generated:
>
> void flush_test(unsigned long start, unsigned long stop)
> {
> flush_dcache_range(start, stop);
> }
>
> 0000000000000130 <.flush_test>:
> 130: 3d 22 00 00 addis r9,r2,0
> 132: R_PPC64_TOC16_HA .data+0x8
> 134: 81 09 00 00 lwz r8,0(r9)
> 136: R_PPC64_TOC16_LO .data+0x8
> 138: 3d 22 00 00 addis r9,r2,0
> 13a: R_PPC64_TOC16_HA .data+0xc
> 13c: 80 e9 00 00 lwz r7,0(r9)
> 13e: R_PPC64_TOC16_LO .data+0xc
> 140: 7d 48 00 d0 neg r10,r8
> 144: 7d 43 18 38 and r3,r10,r3
> 148: 7c 00 04 ac hwsync
> 14c: 4c 00 01 2c isync
> 150: 39 28 ff ff addi r9,r8,-1
> 154: 7c 89 22 14 add r4,r9,r4
> 158: 7c 83 20 50 subf r4,r3,r4
> 15c: 7c 89 3c 37 srd. r9,r4,r7
> 160: 41 82 00 1c beq 17c <.flush_test+0x4c>
> 164: 7d 29 03 a6 mtctr r9
> 168: 60 00 00 00 nop
> 16c: 60 00 00 00 nop
> 170: 7c 00 18 ac dcbf 0,r3
> 174: 7c 63 42 14 add r3,r3,r8
> 178: 42 00 ff f8 bdnz 170 <.flush_test+0x40>
> 17c: 7c 00 04 ac hwsync
> 180: 4c 00 01 2c isync
> 184: 4e 80 00 20 blr
> 188: 60 00 00 00 nop
> 18c: 60 00 00 00 nop
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/include/asm/cache.h | 10 ++++++++++
> arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
> arch/powerpc/kernel/misc_64.S | 29 -----------------------------
> 3 files changed, 18 insertions(+), 35 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
> index 0009a0a82e86..45e3137ccd71 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -54,6 +54,16 @@ struct ppc64_caches {
> };
>
> extern struct ppc64_caches ppc64_caches;
> +
> +static inline u32 l1_cache_shift(void)
> +{
> + return ppc64_caches.l1d.log_block_size;
> +}
> +
> +static inline u32 l1_cache_bytes(void)
> +{
> + return ppc64_caches.l1d.block_size;
> +}
> #else
> static inline u32 l1_cache_shift(void)
> {
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index d405f18441cd..3cd7ce3dec8b 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
> }
> #endif
>
> -#ifdef CONFIG_PPC32
> /*
> * Write any modified data cache blocks out to memory and invalidate them.
> * Does not invalidate the corresponding instruction cache blocks.
> @@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
> unsigned long size = stop - (unsigned long)addr + (bytes - 1);
> unsigned long i;
>
> + if (IS_ENABLED(CONFIG_PPC64)) {
> + mb(); /* sync */
> + isync();
> + }
> +
> for (i = 0; i < size >> shift; i++, addr += bytes)
> dcbf(addr);
> mb(); /* sync */
> +
> + if (IS_ENABLED(CONFIG_PPC64))
> + isync();
> }
Was checking with Michael about why we need that extra isync. Michael
pointed this came via
https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
for 970 which doesn't have coherent icache. So possibly isync there is
to flush the prefetch instructions? But even so we would need an icbi
there before that isync.
So overall wondering why we need that extra barriers there.
>
> /*
> @@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
> mb(); /* sync */
> }
>
-aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-07-08 14:21 ` Aneesh Kumar K.V
0 siblings, 0 replies; 29+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-08 14:21 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> This patch drops the assembly PPC64 version of flush_dcache_range()
> and re-uses the PPC32 static inline version.
>
> With GCC 8.1, the following code is generated:
>
> void flush_test(unsigned long start, unsigned long stop)
> {
> flush_dcache_range(start, stop);
> }
>
> 0000000000000130 <.flush_test>:
> 130: 3d 22 00 00 addis r9,r2,0
> 132: R_PPC64_TOC16_HA .data+0x8
> 134: 81 09 00 00 lwz r8,0(r9)
> 136: R_PPC64_TOC16_LO .data+0x8
> 138: 3d 22 00 00 addis r9,r2,0
> 13a: R_PPC64_TOC16_HA .data+0xc
> 13c: 80 e9 00 00 lwz r7,0(r9)
> 13e: R_PPC64_TOC16_LO .data+0xc
> 140: 7d 48 00 d0 neg r10,r8
> 144: 7d 43 18 38 and r3,r10,r3
> 148: 7c 00 04 ac hwsync
> 14c: 4c 00 01 2c isync
> 150: 39 28 ff ff addi r9,r8,-1
> 154: 7c 89 22 14 add r4,r9,r4
> 158: 7c 83 20 50 subf r4,r3,r4
> 15c: 7c 89 3c 37 srd. r9,r4,r7
> 160: 41 82 00 1c beq 17c <.flush_test+0x4c>
> 164: 7d 29 03 a6 mtctr r9
> 168: 60 00 00 00 nop
> 16c: 60 00 00 00 nop
> 170: 7c 00 18 ac dcbf 0,r3
> 174: 7c 63 42 14 add r3,r3,r8
> 178: 42 00 ff f8 bdnz 170 <.flush_test+0x40>
> 17c: 7c 00 04 ac hwsync
> 180: 4c 00 01 2c isync
> 184: 4e 80 00 20 blr
> 188: 60 00 00 00 nop
> 18c: 60 00 00 00 nop
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/include/asm/cache.h | 10 ++++++++++
> arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
> arch/powerpc/kernel/misc_64.S | 29 -----------------------------
> 3 files changed, 18 insertions(+), 35 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
> index 0009a0a82e86..45e3137ccd71 100644
> --- a/arch/powerpc/include/asm/cache.h
> +++ b/arch/powerpc/include/asm/cache.h
> @@ -54,6 +54,16 @@ struct ppc64_caches {
> };
>
> extern struct ppc64_caches ppc64_caches;
> +
> +static inline u32 l1_cache_shift(void)
> +{
> + return ppc64_caches.l1d.log_block_size;
> +}
> +
> +static inline u32 l1_cache_bytes(void)
> +{
> + return ppc64_caches.l1d.block_size;
> +}
> #else
> static inline u32 l1_cache_shift(void)
> {
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index d405f18441cd..3cd7ce3dec8b 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
> }
> #endif
>
> -#ifdef CONFIG_PPC32
> /*
> * Write any modified data cache blocks out to memory and invalidate them.
> * Does not invalidate the corresponding instruction cache blocks.
> @@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
> unsigned long size = stop - (unsigned long)addr + (bytes - 1);
> unsigned long i;
>
> + if (IS_ENABLED(CONFIG_PPC64)) {
> + mb(); /* sync */
> + isync();
> + }
> +
> for (i = 0; i < size >> shift; i++, addr += bytes)
> dcbf(addr);
> mb(); /* sync */
> +
> + if (IS_ENABLED(CONFIG_PPC64))
> + isync();
> }
Was checking with Michael about why we need that extra isync. Michael
pointed this came via
https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
for 970 which doesn't have coherent icache. So possibly isync there is
to flush the prefetch instructions? But even so we would need an icbi
there before that isync.
So overall wondering why we need that extra barriers there.
>
> /*
> @@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
> mb(); /* sync */
> }
>
-aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-07-08 14:21 ` Aneesh Kumar K.V
@ 2019-07-09 2:20 ` Oliver O'Halloran
-1 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-09 2:20 UTC (permalink / raw)
To: Aneesh Kumar K.V
Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, Segher Boessenkool, linuxppc-dev,
Linux Kernel Mailing List
On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
> > *snip*
> > + if (IS_ENABLED(CONFIG_PPC64))
> > + isync();
> > }
>
>
> Was checking with Michael about why we need that extra isync. Michael
> pointed this came via
>
> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>
> for 970 which doesn't have coherent icache. So possibly isync there is
> to flush the prefetch instructions? But even so we would need an icbi
> there before that isync.
I don't think it's that, there's some magic in flush_icache_range() to
handle dropping prefetched instructions on 970.
> So overall wondering why we need that extra barriers there.
I think the isync is needed there because the architecture only
requires sync to provide ordering. A sync alone doesn't guarantee the
dcbfs have actually completed so the isync is necessary to ensure the
flushed cache lines are back in memory. That said, as far as I know
all the IBM book3s chips from power4 onwards will wait for pending
dcbfs when they hit a sync, but that might change in the future.
If it's a problem we could add a cpu-feature section around the isync
to no-op it in the common case. However, when I had a look with perf
it always showed that the sync was the hotspot so I don't think it'll
help much.
Oliver
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-07-09 2:20 ` Oliver O'Halloran
0 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-09 2:20 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
> > *snip*
> > + if (IS_ENABLED(CONFIG_PPC64))
> > + isync();
> > }
>
>
> Was checking with Michael about why we need that extra isync. Michael
> pointed this came via
>
> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>
> for 970 which doesn't have coherent icache. So possibly isync there is
> to flush the prefetch instructions? But even so we would need an icbi
> there before that isync.
I don't think it's that, there's some magic in flush_icache_range() to
handle dropping prefetched instructions on 970.
> So overall wondering why we need that extra barriers there.
I think the isync is needed there because the architecture only
requires sync to provide ordering. A sync alone doesn't guarantee the
dcbfs have actually completed so the isync is necessary to ensure the
flushed cache lines are back in memory. That said, as far as I know
all the IBM book3s chips from power4 onwards will wait for pending
dcbfs when they hit a sync, but that might change in the future.
If it's a problem we could add a cpu-feature section around the isync
to no-op it in the common case. However, when I had a look with perf
it always showed that the sync was the hotspot so I don't think it'll
help much.
Oliver
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-07-09 2:20 ` Oliver O'Halloran
@ 2019-07-09 2:51 ` Aneesh Kumar K.V
-1 siblings, 0 replies; 29+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-09 2:51 UTC (permalink / raw)
To: Oliver O'Halloran
Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, Segher Boessenkool, linuxppc-dev,
Linux Kernel Mailing List
On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> *snip*
>>> + if (IS_ENABLED(CONFIG_PPC64))
>>> + isync();
>>> }
>>
>>
>> Was checking with Michael about why we need that extra isync. Michael
>> pointed this came via
>>
>> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>>
>> for 970 which doesn't have coherent icache. So possibly isync there is
>> to flush the prefetch instructions? But even so we would need an icbi
>> there before that isync.
>
> I don't think it's that, there's some magic in flush_icache_range() to
> handle dropping prefetched instructions on 970.
>
>> So overall wondering why we need that extra barriers there.
>
> I think the isync is needed there because the architecture only
> requires sync to provide ordering. A sync alone doesn't guarantee the
> dcbfs have actually completed so the isync is necessary to ensure the
> flushed cache lines are back in memory. That said, as far as I know
> all the IBM book3s chips from power4 onwards will wait for pending
> dcbfs when they hit a sync, but that might change in the future.
>
ISA doesn't list that as the sequence. Only place where isync was
mentioned was w.r.t icbi where want to discards the prefetch.
> If it's a problem we could add a cpu-feature section around the isync
> to no-op it in the common case. However, when I had a look with perf
> it always showed that the sync was the hotspot so I don't think it'll
> help much.
>
What about the preceding barriers (sync; isync;) before dcbf? Why are
they needed?
-aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-07-09 2:51 ` Aneesh Kumar K.V
0 siblings, 0 replies; 29+ messages in thread
From: Aneesh Kumar K.V @ 2019-07-09 2:51 UTC (permalink / raw)
To: Oliver O'Halloran
Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> *snip*
>>> + if (IS_ENABLED(CONFIG_PPC64))
>>> + isync();
>>> }
>>
>>
>> Was checking with Michael about why we need that extra isync. Michael
>> pointed this came via
>>
>> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
>>
>> for 970 which doesn't have coherent icache. So possibly isync there is
>> to flush the prefetch instructions? But even so we would need an icbi
>> there before that isync.
>
> I don't think it's that, there's some magic in flush_icache_range() to
> handle dropping prefetched instructions on 970.
>
>> So overall wondering why we need that extra barriers there.
>
> I think the isync is needed there because the architecture only
> requires sync to provide ordering. A sync alone doesn't guarantee the
> dcbfs have actually completed so the isync is necessary to ensure the
> flushed cache lines are back in memory. That said, as far as I know
> all the IBM book3s chips from power4 onwards will wait for pending
> dcbfs when they hit a sync, but that might change in the future.
>
ISA doesn't list that as the sequence. Only place where isync was
mentioned was w.r.t icbi where want to discards the prefetch.
> If it's a problem we could add a cpu-feature section around the isync
> to no-op it in the common case. However, when I had a look with perf
> it always showed that the sync was the hotspot so I don't think it'll
> help much.
>
What about the preceding barriers (sync; isync;) before dcbf? Why are
they needed?
-aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-07-09 2:51 ` Aneesh Kumar K.V
@ 2019-07-09 5:29 ` Oliver O'Halloran
-1 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-09 5:29 UTC (permalink / raw)
To: Aneesh Kumar K.V
Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman, Segher Boessenkool, linuxppc-dev,
Linux Kernel Mailing List
On Tue, Jul 9, 2019 at 12:52 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> > On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
> >>
> >> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> >>
> >>> *snip*
> >>> + if (IS_ENABLED(CONFIG_PPC64))
> >>> + isync();
> >>> }
> >>
> >>
> >> Was checking with Michael about why we need that extra isync. Michael
> >> pointed this came via
> >>
> >> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
> >>
> >> for 970 which doesn't have coherent icache. So possibly isync there is
> >> to flush the prefetch instructions? But even so we would need an icbi
> >> there before that isync.
> >
> > I don't think it's that, there's some magic in flush_icache_range() to
> > handle dropping prefetched instructions on 970.
> >
> >> So overall wondering why we need that extra barriers there.
> >
> > I think the isync is needed there because the architecture only
> > requires sync to provide ordering. A sync alone doesn't guarantee the
> > dcbfs have actually completed so the isync is necessary to ensure the
> > flushed cache lines are back in memory. That said, as far as I know
> > all the IBM book3s chips from power4 onwards will wait for pending
> > dcbfs when they hit a sync, but that might change in the future.
> >
>
> ISA doesn't list that as the sequence. Only place where isync was
> mentioned was w.r.t icbi where want to discards the prefetch.
doesn't list that as the sequence for what?
> > If it's a problem we could add a cpu-feature section around the isync
> > to no-op it in the common case. However, when I had a look with perf
> > it always showed that the sync was the hotspot so I don't think it'll
> > help much.
> >
>
> What about the preceding barriers (sync; isync;) before dcbf? Why are
> they needed?
Dunno, the sync might just be to ensure ordering between prior stores
and the dcbf.
>
> -aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-07-09 5:29 ` Oliver O'Halloran
0 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-09 5:29 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
On Tue, Jul 9, 2019 at 12:52 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> > On Tue, Jul 9, 2019 at 12:22 AM Aneesh Kumar K.V
> > <aneesh.kumar@linux.ibm.com> wrote:
> >>
> >> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> >>
> >>> *snip*
> >>> + if (IS_ENABLED(CONFIG_PPC64))
> >>> + isync();
> >>> }
> >>
> >>
> >> Was checking with Michael about why we need that extra isync. Michael
> >> pointed this came via
> >>
> >> https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
> >>
> >> for 970 which doesn't have coherent icache. So possibly isync there is
> >> to flush the prefetch instructions? But even so we would need an icbi
> >> there before that isync.
> >
> > I don't think it's that, there's some magic in flush_icache_range() to
> > handle dropping prefetched instructions on 970.
> >
> >> So overall wondering why we need that extra barriers there.
> >
> > I think the isync is needed there because the architecture only
> > requires sync to provide ordering. A sync alone doesn't guarantee the
> > dcbfs have actually completed so the isync is necessary to ensure the
> > flushed cache lines are back in memory. That said, as far as I know
> > all the IBM book3s chips from power4 onwards will wait for pending
> > dcbfs when they hit a sync, but that might change in the future.
> >
>
> ISA doesn't list that as the sequence. Only place where isync was
> mentioned was w.r.t icbi where want to discards the prefetch.
doesn't list that as the sequence for what?
> > If it's a problem we could add a cpu-feature section around the isync
> > to no-op it in the common case. However, when I had a look with perf
> > it always showed that the sync was the hotspot so I don't think it'll
> > help much.
> >
>
> What about the preceding barriers (sync; isync;) before dcbf? Why are
> they needed?
Dunno, the sync might just be to ensure ordering between prior stores
and the dcbf.
>
> -aneesh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
2019-07-09 2:51 ` Aneesh Kumar K.V
@ 2019-07-09 17:04 ` Segher Boessenkool
-1 siblings, 0 replies; 29+ messages in thread
From: Segher Boessenkool @ 2019-07-09 17:04 UTC (permalink / raw)
To: Aneesh Kumar K.V
Cc: Oliver O'Halloran, Christophe Leroy, Benjamin Herrenschmidt,
Paul Mackerras, Michael Ellerman, linuxppc-dev,
Linux Kernel Mailing List
On Tue, Jul 09, 2019 at 08:21:54AM +0530, Aneesh Kumar K.V wrote:
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> >I don't think it's that, there's some magic in flush_icache_range() to
> >handle dropping prefetched instructions on 970.
> >
> >>So overall wondering why we need that extra barriers there.
> >
> >I think the isync is needed there because the architecture only
> >requires sync to provide ordering. A sync alone doesn't guarantee the
> >dcbfs have actually completed so the isync is necessary to ensure the
> >flushed cache lines are back in memory. That said, as far as I know
> >all the IBM book3s chips from power4 onwards will wait for pending
> >dcbfs when they hit a sync, but that might change in the future.
> >
>
> ISA doesn't list that as the sequence. Only place where isync was
> mentioned was w.r.t icbi where want to discards the prefetch.
You need an isync to guarantee all icbi insns before the isync have been
performed before any code after the isync is fetched. Killing the
prefetch is just part of it.
> >If it's a problem we could add a cpu-feature section around the isync
> >to no-op it in the common case. However, when I had a look with perf
> >it always showed that the sync was the hotspot so I don't think it'll
> >help much.
>
> What about the preceding barriers (sync; isync;) before dcbf? Why are
> they needed?
This isn't very generic code. The code seems to be trying to do
coherency in software. Like you needed to do for DART on U3/U4, or for
some of the PMU/SMU communication -- both are through main memory, but
both are not cache coherent. Which means all rules go out of the
window.
To do this properly you need some platform-specific code, for example
to kill hardware and software prefetch streams. Or hope^Wguarantee
those never touch your communication buffers.
I recommend you keep the original function, maybe with a more specific
name, for the DART etc. code; and have all normal(*) dcbf users use a
new more normal function, with just a single sync instruction.
Segher
(*) As far as anything using dcbf can be called "normal"!
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
@ 2019-07-09 17:04 ` Segher Boessenkool
0 siblings, 0 replies; 29+ messages in thread
From: Segher Boessenkool @ 2019-07-09 17:04 UTC (permalink / raw)
To: Aneesh Kumar K.V
Cc: Linux Kernel Mailing List, Oliver O'Halloran, Paul Mackerras,
linuxppc-dev
On Tue, Jul 09, 2019 at 08:21:54AM +0530, Aneesh Kumar K.V wrote:
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> >I don't think it's that, there's some magic in flush_icache_range() to
> >handle dropping prefetched instructions on 970.
> >
> >>So overall wondering why we need that extra barriers there.
> >
> >I think the isync is needed there because the architecture only
> >requires sync to provide ordering. A sync alone doesn't guarantee the
> >dcbfs have actually completed so the isync is necessary to ensure the
> >flushed cache lines are back in memory. That said, as far as I know
> >all the IBM book3s chips from power4 onwards will wait for pending
> >dcbfs when they hit a sync, but that might change in the future.
> >
>
> ISA doesn't list that as the sequence. Only place where isync was
> mentioned was w.r.t icbi where want to discards the prefetch.
You need an isync to guarantee all icbi insns before the isync have been
performed before any code after the isync is fetched. Killing the
prefetch is just part of it.
> >If it's a problem we could add a cpu-feature section around the isync
> >to no-op it in the common case. However, when I had a look with perf
> >it always showed that the sync was the hotspot so I don't think it'll
> >help much.
>
> What about the preceding barriers (sync; isync;) before dcbf? Why are
> they needed?
This isn't very generic code. The code seems to be trying to do
coherency in software. Like you needed to do for DART on U3/U4, or for
some of the PMU/SMU communication -- both are through main memory, but
both are not cache coherent. Which means all rules go out of the
window.
To do this properly you need some platform-specific code, for example
to kill hardware and software prefetch streams. Or hope^Wguarantee
those never touch your communication buffers.
I recommend you keep the original function, maybe with a more specific
name, for the DART etc. code; and have all normal(*) dcbf users use a
new more normal function, with just a single sync instruction.
Segher
(*) As far as anything using dcbf can be called "normal"!
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
2019-05-14 9:05 ` Christophe Leroy
@ 2019-07-15 6:49 ` Michael Ellerman
-1 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-07-15 6:49 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> PPC32 also have flush_dcache_range() so it can also support
> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/Kconfig | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index d7996cfaceca..cf6e30f637be 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -127,13 +127,13 @@ config PPC
> select ARCH_HAS_KCOV
> select ARCH_HAS_MMIOWB if PPC64
> select ARCH_HAS_PHYS_TO_DMA
> - select ARCH_HAS_PMEM_API if PPC64
> + select ARCH_HAS_PMEM_API
> select ARCH_HAS_PTE_SPECIAL
> select ARCH_HAS_MEMBARRIER_CALLBACKS
> select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
> select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
> + select ARCH_HAS_UACCESS_FLUSHCACHE
> select ARCH_HAS_UBSAN_SANITIZE_ALL
> select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
> select ARCH_HAVE_NMI_SAFE_CMPXCHG
This didn't build for me, probably due to something that's changed in
the long period between you posting it and me applying it?
corenet32_smp_defconfig:
powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
@ 2019-07-15 6:49 ` Michael Ellerman
0 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-07-15 6:49 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> PPC32 also have flush_dcache_range() so it can also support
> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/Kconfig | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index d7996cfaceca..cf6e30f637be 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -127,13 +127,13 @@ config PPC
> select ARCH_HAS_KCOV
> select ARCH_HAS_MMIOWB if PPC64
> select ARCH_HAS_PHYS_TO_DMA
> - select ARCH_HAS_PMEM_API if PPC64
> + select ARCH_HAS_PMEM_API
> select ARCH_HAS_PTE_SPECIAL
> select ARCH_HAS_MEMBARRIER_CALLBACKS
> select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
> select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
> + select ARCH_HAS_UACCESS_FLUSHCACHE
> select ARCH_HAS_UBSAN_SANITIZE_ALL
> select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
> select ARCH_HAVE_NMI_SAFE_CMPXCHG
This didn't build for me, probably due to something that's changed in
the long period between you posting it and me applying it?
corenet32_smp_defconfig:
powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
2019-07-15 6:49 ` Michael Ellerman
@ 2019-07-15 7:02 ` Oliver O'Halloran
-1 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-15 7:02 UTC (permalink / raw)
To: Michael Ellerman
Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Segher Boessenkool, Linux Kernel Mailing List, linuxppc-dev
On Mon, Jul 15, 2019 at 4:49 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> > PPC32 also have flush_dcache_range() so it can also support
> > ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > ---
> > arch/powerpc/Kconfig | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index d7996cfaceca..cf6e30f637be 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -127,13 +127,13 @@ config PPC
> > select ARCH_HAS_KCOV
> > select ARCH_HAS_MMIOWB if PPC64
> > select ARCH_HAS_PHYS_TO_DMA
> > - select ARCH_HAS_PMEM_API if PPC64
> > + select ARCH_HAS_PMEM_API
> > select ARCH_HAS_PTE_SPECIAL
> > select ARCH_HAS_MEMBARRIER_CALLBACKS
> > select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
> > select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
> > select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> > - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
> > + select ARCH_HAS_UACCESS_FLUSHCACHE
> > select ARCH_HAS_UBSAN_SANITIZE_ALL
> > select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
> > select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
> powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
I think lib/pmem.c just needs to be moved out of obj64-y.
>
> cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
@ 2019-07-15 7:02 ` Oliver O'Halloran
0 siblings, 0 replies; 29+ messages in thread
From: Oliver O'Halloran @ 2019-07-15 7:02 UTC (permalink / raw)
To: Michael Ellerman; +Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
On Mon, Jul 15, 2019 at 4:49 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> > PPC32 also have flush_dcache_range() so it can also support
> > ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > ---
> > arch/powerpc/Kconfig | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index d7996cfaceca..cf6e30f637be 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -127,13 +127,13 @@ config PPC
> > select ARCH_HAS_KCOV
> > select ARCH_HAS_MMIOWB if PPC64
> > select ARCH_HAS_PHYS_TO_DMA
> > - select ARCH_HAS_PMEM_API if PPC64
> > + select ARCH_HAS_PMEM_API
> > select ARCH_HAS_PTE_SPECIAL
> > select ARCH_HAS_MEMBARRIER_CALLBACKS
> > select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
> > select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
> > select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> > - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
> > + select ARCH_HAS_UACCESS_FLUSHCACHE
> > select ARCH_HAS_UBSAN_SANITIZE_ALL
> > select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
> > select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
> powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function `_copy_from_iter_flushcache':
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_page_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `__copy_from_user_flushcache'
> powerpc64-unknown-linux-gnu-ld: /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined reference to `memcpy_flushcache'
I think lib/pmem.c just needs to be moved out of obj64-y.
>
> cheers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
2019-07-15 6:49 ` Michael Ellerman
@ 2019-07-15 17:03 ` Christophe Leroy
-1 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-07-15 17:03 UTC (permalink / raw)
To: Michael Ellerman
Cc: linuxppc-dev, linux-kernel, Segher Boessenkool,
Oliver O'Halloran, Paul Mackerras, Benjamin Herrenschmidt
Michael Ellerman <mpe@ellerman.id.au> a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> PPC32 also have flush_dcache_range() so it can also support
>> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>> arch/powerpc/Kconfig | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index d7996cfaceca..cf6e30f637be 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -127,13 +127,13 @@ config PPC
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_MMIOWB if PPC64
>> select ARCH_HAS_PHYS_TO_DMA
>> - select ARCH_HAS_PMEM_API if PPC64
>> + select ARCH_HAS_PMEM_API
>> select ARCH_HAS_PTE_SPECIAL
>> select ARCH_HAS_MEMBARRIER_CALLBACKS
>> select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
>> select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) &&
>> !RELOCATABLE && !HIBERNATION)
>> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>> - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
>> + select ARCH_HAS_UACCESS_FLUSHCACHE
>> select ARCH_HAS_UBSAN_SANITIZE_ALL
>> select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
>> select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
> powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function
> `_copy_from_iter_flushcache':
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_page_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `__copy_from_user_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_flushcache'
>
Looks like arch/powerpc/lib/Makefile only builds pmem.o for ppc64
Moving it from obj64-y to obj-y should do it.
I'll update the patch when I'm back in two weeks unless you can fix it before.
Christophe
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
@ 2019-07-15 17:03 ` Christophe Leroy
0 siblings, 0 replies; 29+ messages in thread
From: Christophe Leroy @ 2019-07-15 17:03 UTC (permalink / raw)
To: Michael Ellerman
Cc: linux-kernel, Oliver O'Halloran, Paul Mackerras, linuxppc-dev
Michael Ellerman <mpe@ellerman.id.au> a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> PPC32 also have flush_dcache_range() so it can also support
>> ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>> arch/powerpc/Kconfig | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index d7996cfaceca..cf6e30f637be 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -127,13 +127,13 @@ config PPC
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_MMIOWB if PPC64
>> select ARCH_HAS_PHYS_TO_DMA
>> - select ARCH_HAS_PMEM_API if PPC64
>> + select ARCH_HAS_PMEM_API
>> select ARCH_HAS_PTE_SPECIAL
>> select ARCH_HAS_MEMBARRIER_CALLBACKS
>> select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
>> select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) &&
>> !RELOCATABLE && !HIBERNATION)
>> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>> - select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
>> + select ARCH_HAS_UACCESS_FLUSHCACHE
>> select ARCH_HAS_UBSAN_SANITIZE_ALL
>> select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
>> select ARCH_HAVE_NMI_SAFE_CMPXCHG
>
> This didn't build for me, probably due to something that's changed in
> the long period between you posting it and me applying it?
>
> corenet32_smp_defconfig:
>
> powerpc64-unknown-linux-gnu-ld: lib/iov_iter.o: in function
> `_copy_from_iter_flushcache':
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_page_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `__copy_from_user_flushcache'
> powerpc64-unknown-linux-gnu-ld:
> /scratch/michael/build/maint/build~/../lib/iov_iter.c:825: undefined
> reference to `memcpy_flushcache'
>
Looks like arch/powerpc/lib/Makefile only builds pmem.o for ppc64
Moving it from obj64-y to obj-y should do it.
I'll update the patch when I'm back in two weeks unless you can fix it before.
Christophe
^ permalink raw reply [flat|nested] 29+ messages in thread
* powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range())
2019-05-14 9:05 ` Christophe Leroy
@ 2019-08-08 6:53 ` Michael Ellerman
-1 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-08-08 6:53 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: linux-kernel, linuxppc-dev, Aneesh Kumar K.V, Alastair D'Silva
[ deliberately broke threading so this doesn't get buried ]
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index a4fd536efb44..1b0a42c50ef1 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
> EXPORT_SYMBOL(flush_icache_range)
>
> /*
> - * Like above, but only do the D-cache.
> - *
> - * flush_dcache_range(unsigned long start, unsigned long stop)
> - *
> - * flush all bytes from start to stop-1 inclusive
> - */
> -
> -_GLOBAL_TOC(flush_dcache_range)
> - ld r10,PPC64_CACHES@toc(r2)
> - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
> - addi r5,r7,-1
> - andc r6,r3,r5 /* round low to line bdy */
> - subf r8,r6,r4 /* compute length */
> - add r8,r8,r5 /* ensure we get enough */
> - lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
> - srw. r8,r8,r9 /* compute line count */
^
> - beqlr /* nothing to do? */
Alastair noticed that this was a 32-bit right shift.
Meaning if you called flush_dcache_range() with a range larger than 4GB,
it did nothing and returned.
That code (which was previously called flush_inval_dcache_range()) was
merged back in 2005:
https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
Back then it was only used by the smu.c driver, which presumably wasn't
flushing more than 4GB.
Over time it grew more users:
v4.17 (Apr 2018): fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug")
v4.15 (Nov 2017): 6c44741d75a2 ("powerpc/lib: Implement UACCESS_FLUSHCACHE API")
v4.15 (Nov 2017): 32ce3862af3c ("powerpc/lib: Implement PMEM API")
v4.8 (Jul 2016): c40785ad305b ("powerpc/dart: Use a cachable DART")
The DART case doesn't matter, but the others probably could. I assume
the lack of bug reports is due to the fact that pmem stuff is still in
development and the lack of flushing usually doesn't actually matter? Or
are people flushing/hotplugging < 4G at a time?
Anyway we probably want to backport the fix below to various places?
cheers
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1ad4089dd110..802f5abbf061 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -148,7 +148,7 @@ _GLOBAL(flush_inval_dcache_range)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
beqlr /* nothing to do? */
sync
isync
^ permalink raw reply related [flat|nested] 29+ messages in thread
* powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range())
@ 2019-08-08 6:53 ` Michael Ellerman
0 siblings, 0 replies; 29+ messages in thread
From: Michael Ellerman @ 2019-08-08 6:53 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Oliver O'Halloran, Segher Boessenkool
Cc: Aneesh Kumar K.V, linuxppc-dev, linux-kernel, Alastair D'Silva
[ deliberately broke threading so this doesn't get buried ]
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index a4fd536efb44..1b0a42c50ef1 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
> EXPORT_SYMBOL(flush_icache_range)
>
> /*
> - * Like above, but only do the D-cache.
> - *
> - * flush_dcache_range(unsigned long start, unsigned long stop)
> - *
> - * flush all bytes from start to stop-1 inclusive
> - */
> -
> -_GLOBAL_TOC(flush_dcache_range)
> - ld r10,PPC64_CACHES@toc(r2)
> - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
> - addi r5,r7,-1
> - andc r6,r3,r5 /* round low to line bdy */
> - subf r8,r6,r4 /* compute length */
> - add r8,r8,r5 /* ensure we get enough */
> - lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
> - srw. r8,r8,r9 /* compute line count */
^
> - beqlr /* nothing to do? */
Alastair noticed that this was a 32-bit right shift.
Meaning if you called flush_dcache_range() with a range larger than 4GB,
it did nothing and returned.
That code (which was previously called flush_inval_dcache_range()) was
merged back in 2005:
https://github.com/mpe/linux-fullhistory/commit/faa5ee3743ff9b6df9f9a03600e34fdae596cfb2#diff-67c7ffa8e420c7d4206cae4a9e888e14
Back then it was only used by the smu.c driver, which presumably wasn't
flushing more than 4GB.
Over time it grew more users:
v4.17 (Apr 2018): fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug")
v4.15 (Nov 2017): 6c44741d75a2 ("powerpc/lib: Implement UACCESS_FLUSHCACHE API")
v4.15 (Nov 2017): 32ce3862af3c ("powerpc/lib: Implement PMEM API")
v4.8 (Jul 2016): c40785ad305b ("powerpc/dart: Use a cachable DART")
The DART case doesn't matter, but the others probably could. I assume
the lack of bug reports is due to the fact that pmem stuff is still in
development and the lack of flushing usually doesn't actually matter? Or
are people flushing/hotplugging < 4G at a time?
Anyway we probably want to backport the fix below to various places?
cheers
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1ad4089dd110..802f5abbf061 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -148,7 +148,7 @@ _GLOBAL(flush_inval_dcache_range)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
beqlr /* nothing to do? */
sync
isync
^ permalink raw reply related [flat|nested] 29+ messages in thread
end of thread, other threads:[~2019-08-08 6:55 UTC | newest]
Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-14 9:05 [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Christophe Leroy
2019-05-14 9:05 ` Christophe Leroy
2019-05-14 9:05 ` [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE Christophe Leroy
2019-05-14 9:05 ` Christophe Leroy
2019-07-15 6:49 ` Michael Ellerman
2019-07-15 6:49 ` Michael Ellerman
2019-07-15 7:02 ` Oliver O'Halloran
2019-07-15 7:02 ` Oliver O'Halloran
2019-07-15 17:03 ` Christophe Leroy
2019-07-15 17:03 ` Christophe Leroy
2019-05-14 9:05 ` [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes Christophe Leroy
2019-05-14 9:05 ` Christophe Leroy
2019-07-08 1:19 ` Michael Ellerman
2019-05-14 9:05 ` [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range() Christophe Leroy
2019-05-14 9:05 ` Christophe Leroy
2019-07-08 1:19 ` Michael Ellerman
2019-07-08 14:21 ` Aneesh Kumar K.V
2019-07-08 14:21 ` Aneesh Kumar K.V
2019-07-09 2:20 ` Oliver O'Halloran
2019-07-09 2:20 ` Oliver O'Halloran
2019-07-09 2:51 ` Aneesh Kumar K.V
2019-07-09 2:51 ` Aneesh Kumar K.V
2019-07-09 5:29 ` Oliver O'Halloran
2019-07-09 5:29 ` Oliver O'Halloran
2019-07-09 17:04 ` Segher Boessenkool
2019-07-09 17:04 ` Segher Boessenkool
2019-08-08 6:53 ` powerpc flush_inval_dcache_range() was buggy until v5.3-rc1 (was Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()) Michael Ellerman
2019-08-08 6:53 ` Michael Ellerman
2019-07-08 1:19 ` [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range() Michael Ellerman
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.